#!/usr/bin/perl (more or less)

# This file contains the Parse::RecDescent grammar used by the parser to
# deconstruct imperative sentences.
#
# The resulting parser builds and returns a parse tree.
# The form of the tree is a list of hashes (sentences).
# The sentance hashes can have keys named verb, direct_object,
# indirect_object, do_preposition, io_preposition, and quote
# (and a couple more weird ones). 
#
# Thank god for HyperGrammar!
# <http://www.uottawa.ca/academic/arts/writcent/hypergrammar/grammar.html>

# Handle compound sentences, and multiple sentences too.
input: sentence (sentence_separator sentence)(s?) sentence_punct(?)
		{ $item[2] ? [ $item[1], @{$item[2]} ] : [ $item[1] ] }
sentence_separator: /$/ | sentence_punct(?) coordinating_conjunction(s) | sentence_punct

# All the sentence forms. The ordering is quite important. I've tried to
# put the most commonly used forms first, so they'll be faster. Note that
# the use of lookahead is important in getting those fast, commonly-used
# forms to not match on subsets of longer sentences.
#
# Once each sentence is parsed, a call to main::recent_obj() is made,
# passing in any recently referred to objects. This is generally used to
# set up the 'it' and 'them' prepositions, or similar.

# Talking is quick to match.
sentence: verb quote ...sentence_separator
	{ { verb => $item[1], quote => $item[2] } }
# This form is used to invoke the name of an exit to use it.
# (It can also be used to answer some questions.) It needs to come before
# the verb direct_object form. Probably calling recent_obj here would just
# be confusing.
sentence: object ...sentence_separator
	{ { direct_object => $item[1] } }
# "sit down", "get up", etc. Has to come before the verb direct_object form.
sentence: verb preposition ...sentence_separator
	{ { verb => $item[1], preposition => $item[2] } }
# Probably the most common sentence form.
sentence: verb direct_object ...sentence_separator
	{ &::recent_obj(@{$item[2]});
	  { verb => $item[1], direct_object => $item[2] } }
# This form is used to "pick up foo", etc.
sentence: verb do_preposition direct_object ...sentence_separator
	{ &::recent_obj(@{$item[3]});
	  { verb => $item[1], do_preposition => $item[2],
	    direct_object => $item[3] } }
# This form is used in eg, "put it down" or "wind it up".
sentence: verb direct_object do_preposition ...sentence_separator
	{ &::recent_obj(@{$item[2]});
	  { verb => $item[1], do_preposition => $item[3],
	    direct_object => $item[2] } }
# "put blah in foo", etc is quite common.
sentence: verb do_preposition(?) direct_object io_preposition(?) indirect_object
	{ &::recent_obj(@{$item[3]}); # which object? Dunno. :-/
	  { verb => $item[1], do_preposition => $item[2][0],
	    direct_object => $item[3], io_preposition => $item[4][0], 
	    indirect_object => $item[5] } }
# Not exactly sentences per se, but support answers to recently asked
# questions. That generally involves picking a choice from a list or
# answers, either by name or number. Or it might involve referring to a
# particular object, or be a prepositional phrase.
sentence: article(?) answer(s) ...sentence_separator
	{ { answer => $item{'answer(s)'} } }
sentence: article(?) number ...sentence_separator
	{ { number => $item{number} } }
sentence: do_preposition object ...sentence_separator
	{ { direct_object => $item{object}, 
	    do_preposition => $item{do_preposition} } }
# Simple commands are way up there too (but must come after the simple
# question answer forms).
sentence: verb ...sentence_separator
	{ { verb => $item[1]  } }
	
# This is a gross special case for a few commands that take a field as
# their last argument.
fieldverb: /(show|showall|set|unset|edit|delete|usage|help|go|list)\b/i
# A special terminator is needed to disambiguate from things like 
# "show ball then drop it", where "then" could be misinterpreted as a
# field.
# Must come before the verb quote direct_object form.
sentence: fieldverb do_preposition(?) possessive_object field ...sentence_separator
	{ &::recent_obj(@{$item[3]});
	  { verb => $item[1], do_preposition => $item[2][0], 
	    direct_object => $item[3], field => $item[4] } }
sentence: fieldverb do_preposition(?) possessive_object number field ...sentence_separator
	{ &::recent_obj(@{$item[3]});
	  { verb => $item[1], do_preposition => $item[2][0],
	    direct_object => $item[3], number => $item[4], field => $item[5] } }
# Used for the help command.
sentence: fieldverb do_preposition(?) field ...sentence_separator
	{ { verb => $item[1], do_preposition => $item[2][0], field => $item[3] } }

# "say "blah" to him", "derive a "ball" from foo", etc.
# This is strictly speaking, an indirect object, not a direct object.
# However, it simplfies processing to treat it like a direct object.
sentence: verb do_preposition(?) article(?) quote io_preposition direct_object
	{ &::recent_obj(@{$item[6]});
	  { verb => $item[1], quote => $item[4], 
	    do_preposition => $item[5], direct_object => $item[6] } }

# Now some declarative sentence forms. Matching a possessive object is
# expensive, so do it only once.
sentence: possessive_object declaration
	{ &::recent_obj(@{$item[1]});
	  { direct_object => $item[1], %{$item[2]} } }

# Stuff like "it's not hidden".
declaration: ess /\bnot\b/i field ...sentence_separator
	{ { verb => "is", field => $item[3], negated_verb => 1 } }
# "it's hidden", etc
declaration: ess field ...sentence_separator
	{ { verb => "is", field => $item[2] } }
# "I'm not benchmarked"
declaration: /'?m?\b/i /\bnot\b/i field ...sentence_separator
	{ { verb => "am", field => $item[3], negated_verb => 1 } }
# "I'm benchmarked"
declaration: /'?m?\b/i field ...sentence_separator
	{ { verb => "am", field => $item[2] } }
# Used, for example, to just say what a field's value is, to set it.
declaration: field verb quote
	{ { field => $item[1], verb => $item[2], quote => $item[3] } }
# Similar form can be used (by builders) to say that an object's field is a
# reference to another object.
declaration: field verb indirect_object
	{ { field => $item[1], verb => $item[2], indirect_object => $item[3] } }
# This is used to set metadata about fields.
declaration: field verb field number
	{ { field => $item[1], verb => $item[2],
	    metadata => $item[3], number => $item[4] } }
# Even a list of references could be set.
declaration: number field verb indirect_object
	{ { number => $item[1], field => $item[2],
	    verb => $item[3], indirect_object => $item[4] } }
# A number can also be given, if there are multiple values of a field.
declaration: number field verb quote
	{ { number => $item[1], field => $item[2], 
	    verb => $item[3], quote => $item[4] } }
# This is used to set and unset boolean fields.
declaration: negated_verb field
	{ { verb => $item[1], field => $item[2], negated_verb => 1 } }
declaration: verb field
	{ { verb => $item[1], field => $item[2] } }

# These forms are used by the signal command.
sentence: verb direct_object preposition(?) number
	{ &::recent_obj(@{$item[2]});
	  { verb => $item[1], direct_object => $item[2], number => $item[4] } }
sentence: verb direct_object quote preposition(?) number
	{ &::recent_obj(@{$item[2]});
	  { verb => $item[1], direct_object => $item[2], quote => $item[3],
	    number => $item[5] } }

# And this is is used for dialing telephones. I suppose it could be used
# for signals too.. Like the verb quote direct_object form, the object is
# really indirect, but we'll call it the direct object for simplicity.
sentence: verb do_preposition(?) number io_preposition direct_object
	{ &::recent_obj(@{$item[5]});
	  { verb => $item[1], number => $item[3], 
            do_preposition => $item[4], direct_object => $item[5] } }

# "call me "Fred"", "rename me to "Fred"", etc. Must come after the
# declarative forms, otherwise the quote matches too freely.
sentence: verb do_preposition(?) direct_object io_preposition(?) quote
	{ &::recent_obj(@{$item[3]});
	  { verb => $item[1], do_preposition => $item[2][0],
	    direct_object => $item[3], io_preposition => $item[4][0],
	    quote => $item[5] } }

# This wacky form is used for digging.
sentence: verb quote io_preposition quote
	{ { verb => $item[1], quote => $item[2], io_preposition => $item[3],
	    quote2 => $item[4] } } # XXX there must be a better name than "quote2"?

# These forms are used to do stuff with fields.	    
sentence: verb possessive_object field io_preposition quote
	{ &::recent_obj(@{$item[2]});
	  { verb => $item[1], direct_object => $item[2],
	    field => $item[3], quote => $item[5] } }
sentence: verb possessive_object field io_preposition indirect_object
	{ &::recent_obj(@{$item[2]});
	  { verb => $item[1], direct_object => $item[2],
	    field => $item[3], indirect_object => $item[5] } }
sentence: verb possessive_object number field io_preposition quote
	{ &::recent_obj(@{$item[2]});
	  { verb => $item[1], direct_object => $item[2],
	    number => $item[3], field => $item[4], quote => $item[6] } }

# For the eval command.
sentence: verb quote io_preposition field ...sentence_separator
	{ { verb => $item[1], quote => $item[2], io_preposition => $item[3],
	    field => $item[4] } }

# This is a repeat of the simple sentence form, but it does not require an
# obvious separator. The only reason for this is to make reinjection work
# for stuff like "say hi" -- this parses the verb, then the "hi" is quoted
# and the lot is re-injected.
# This should be the last sentence type listed.
sentence: verb 
	{ { verb => $item[1] } }

# End of the entences, now on to the parts of speech..

direct_object: objectlist
indirect_object: object
# Allows for multiple prepositions to be used before a direct object. They
# are joined together into one.
do_preposition: preposition(s)
	{ join(" ", @{$item[1]}) }
io_preposition: preposition
objectlist: object (/(?:(?:,\s*)?and|,)/ object)(s?)
	# Flatten the nested lists into one list ref.
	{ [ $item[2] ? ( @{$item[1]}, map { @{$_} } @{$item[2]} ) : @{$item[1]} ] }

# "foo's bar"
object: basic_object ess object
	{ &::is_obj_in_obj($item[3], "", $item[1]) }
# "my bar"
object: basic_object object
        { &::is_obj_in_obj($item[2], "", $item[1]) }
# "bar in foo". Note that multiple prepositions might be used; all must
# match.
object: basic_object preposition(s) object
	{ &::is_obj_in_obj($item[1], $item[2], $item[3]) }
# Quantifying the number of objects expected can resolve possible
# ambiguities.
object: /(a\b)?/ quantifier /(of\b)?/ object
	{ &::check_quantification($item{quantifier}, $item{object}) }
# Must some after the quantified object test, because "all" could be part
# of a quantification, or a preposition.
object: basic_object
# Another form of quantification, a trifle expensive.
object: number /(of)?/ basic_object
	{ &::check_quantification($item{number}, $item{basic_object}) }

possessive_object: object ess
	{ $item[1] }

# This is the set of simple ways to refer to an object, and is used as the
# base for both regular and possessive forms of objects.
basic_object: pronoun
	{ &::lookup_pronoun($item{pronoun}) }
basic_object: article(?) /mooix:([^ 	]+)/
	{ &::lookup_reference($1) }
basic_object: article(?) adjectivelist noun
	{ &::lookup_noun($item{noun}, $item{adjectivelist}) }
# This version is needed for cases like 'red guest', where red is a known
# adjective, but it's actually being used as part of the noun instead.
basic_object: article(?) noun
	{ &::lookup_noun($item{noun}) }
# A production without the article in front, in case the noun seems to strt
# with an article (probably due to user confusion).
basic_object: noun
	{ &::lookup_noun($item{noun}) }

adjectivelist: <leftop: adjective /,?/ adjective>

number: /[-+.\w]+\b/
	# lookup_number is passed a textual representation of a number, and
	# should return the number so represented, or undef on error
	{ { &::lookup_number($item[1]) } }

# Single or double quoted text. Allow the closing quote to be left off, if
# the text extends to end of string without one.  This also recognizes stuff
# bracketed by {..} as a quote. This special style is used by the shortcuts
# substitutions, to unambiguously quote text that may contain other quote
# characters. Quotes can have a comma before them.
quote: /,?\s*(?:"([^"]*)(?:"|$)|{(.*)})/ { $1.$2 }
# Things like object field names. Note that they cannot end in a period;
# that would be ambiguous with a period at the end of a sentence.
field: /[-_.+A-Za-z0-9]*[-_+A-Za-z0-9]/

# This only works for verbs like 'is' in declarative sentence forms.
negated_verb: verb /not\b/i
	{ $item[1] }
negated_verb: /($::verbs)n't\b/i
	{ $1 } 

# Some of the parts of speech are broken out into variables in main;
# these variables must be defined before asking the parser to parse
# something, and can be changed as needed between parsings w/o rebuilding
# the whole parser. This makes it easy to eg, populate $::nouns with all
# the names of all the objects the user could refer to. Set the variables
# to compiled regexp's, that | together the possibilities. Like:
# 	$::nouns=qr/cat|dog/;
preposition: /($::prepositions)\b/i
adjective: /($::adjectives)\b/i
noun: /($::nouns)\b/i
verb: /($::verbs)\b/i
# The \b is necessary, since "i" is a pronoun, and that could match at the
# start of other words.
pronoun: /($::pronouns)\b/i
# Matches answers to a recent question.
answer: /($::answers)/i
quantifier: /($::quantifiers)/i

article: /(an|a|the)\b/i
coordinating_conjunction: /(and|then|next)\b/i
ess: /'?s?\b/i
sentence_punct: /[,;.!]+/
