#ifndef __PATTERN_H__
#define __PATTERN_H__

#include <vector>
#include <string>
#include <map>

class Matcher;
class NFANode;
class NFAQuantifierNode;

/**
  This pattern class is very similar in functionality to Java's
  java.util.regex.Pattern class. The pattern class represents an immutable
  regular expression object. Instead of having a single object contain both the
  regular expression object and the matching object, instead the two objects are
  split apart. The {@link Matcher Matcher} class represents the maching
  object.

  The Pattern class works primarily off of "compiled" patterns. A typical
  instantiation of a regular expression looks like:

  @author    Jeffery Stuart
  @since     March 2003, Stable Since November 2004
  @version   0.02a
  @memo      A class used to represent "PERL 5"-ish regular expressions
 */
class Pattern
{
  friend class Matcher;
  friend class NFANode;
  friend class NFAQuantifierNode;
  private:
    /**
      This constructor should not be called directly. Those wishing to use the
      Pattern class should instead use the {@link compile compile} method.

      @param rhs The pattern to compile
      @memo Creates a new pattern from the regular expression in <code>rhs</code>.
     */
    Pattern(const std::string & rhs);
  protected:
    /**
      This currently is not used, so don't try to do anything with it.
      @memo Holds all the compiled patterns for quick access.
     */
    static std::map<std::string, Pattern *> compiledPatterns;
    /**
      Holds all of the registered patterns as strings. Due to certain problems
      with compilation of patterns, especially with capturing groups, this seemed
      to be the best way to do it.
     */
    static std::map<std::string, std::pair<std::string, unsigned long> > registeredPatterns;
  protected:
    /**
      Holds all the NFA nodes used. This makes deletion of a pattern, as well as
      clean-up from an unsuccessful compile much easier and faster.
     */
    std::map<NFANode*, bool> nodes;
    /**
      Used when methods like split are called. The matcher class uses a lot of
      dynamic memeory, so having an instance increases speedup of certain
      operations.
     */
    Matcher * matcher;
    /**
      The front node of the NFA.
     */
    NFANode * head;
    /**
      The actual regular expression we rerpesent
      */
    std::string pattern;
    /**
      Flag used during compilation. Once the pattern is successfully compiled,
      <code>error</code> is no longer used.
     */
    bool error;
    /**
      Used during compilation to keep track of the current index into
      <code>{@link pattern pattern}<code>.  Once the pattern is successfully
      compiled, <code>error</code> is no longer used.
     */
    int curInd;
    /**
      The number of capture groups this contains.
     */
    int groupCount;
    /**
      The number of non-capture groups this contains.
     */
    int nonCapGroupCount;
    /**
      The flags specified when this was compiled.
     */
    unsigned long flags;
  protected:
    /**
      Raises an error during compilation. Compilation will cease at that point
      and compile will return <code>NULL</code>.
     */
    void raiseError();
    /**
      Convenience function for registering a node in <code>nodes</code>.
      @param node The node to register
      @return The registered node
     */
    NFANode * registerNode(NFANode * node);

    /**
      Calculates the union of two strings. This function will first sort the
      strings and then use a simple selection algorithm to find the union.
      @param s1 The first "class" to union
      @param s2 The second "class" to union
      @return A new string containing all unique characters. Each character
              must have appeared in one or both of <code>s1</code> and
              <code>s2</code>.
     */
    std::string classUnion      (std::string s1, std::string s2)  const;
    /**
      Calculates the intersection of two strings. This function will first sort
      the strings and then use a simple selection algorithm to find the
      intersection.
      @param s1 The first "class" to intersect
      @param s2 The second "class" to intersect
      @return A new string containing all unique characters. Each character
              must have appeared both <code>s1</code> and <code>s2</code>.
     */
    std::string classIntersect  (std::string s1, std::string s2)  const;
    /**
      Calculates the negation of a string. The negation is the set of all
      characters between <code>\x00</code> and <code>\xFF</code> not
      contained in <code>s1</code>.
      @param s1 The "class" to be negated.
      @param s2 The second "class" to intersect
      @return A new string containing all unique characters. Each character
              must have appeared both <code>s1</code> and <code>s2</code>.
     */
    std::string classNegate     (std::string s1)                  const;
    /**
      Creates a new "class" representing the range from <code>low</code> thru
      <code>hi</code>. This function will wrap if <code>low</code> &gt;
      <code>hi</code>. This is a feature, not a buf. Sometimes it is useful
      to be able to say [\x70-\x10] instead of [\x70-\x7F\x00-\x10].
      @param low The beginning character
      @param hi  The ending character
      @return A new string containing all the characters from low thru hi.
     */
    std::string classCreateRange(char low,       char hi)         const;

    /**
      Extracts a decimal number from the substring of member-variable
      <code>{@link pattern pattern}<code> starting at <code>start</code> and
      ending at <code>end</code>.
      @param start The starting index in <code>{@link pattern pattern}<code>
      @param end The last index in <code>{@link pattern pattern}<code>
      @return The decimal number in <code>{@link pattern pattern}<code>
     */
    int getInt(int start, int end);
    /**
      Parses a <code>{n,m}</code> string out of the member-variable
      <code>{@link pattern pattern}<code> stores the result in <code>sNum</code>
      and <code>eNum</code>.
      @param sNum Output parameter. The minimum number of matches required
                  by the curly quantifier are stored here.
      @param eNum Output parameter. The maximum number of matches allowed
                  by the curly quantifier are stored here.
      @return Success/Failure. Fails when the curly does not have the proper
              syntax
     */
    bool quantifyCurly(int & sNum, int & eNum);
    /**
      Tries to quantify the currently parsed group. If the group being parsed
      is indeed quantified in the member-variable
      <code>{@link pattern pattern}<code>, then the NFA is modified accordingly.
      @param start  The starting node of the current group being parsed
      @param stop   The ending node of the current group being parsed
      @param gn     The group number of the current group being parsed
      @return       The node representing the starting node of the group. If the
                    group becomes quantified, then this node is not necessarily
                    a GroupHead node.
     */
    NFANode * quantifyGroup(NFANode * start, NFANode * stop, const int gn);

    /**
      Tries to quantify the last parsed expression. If the character was indeed
      quantified, then the NFA is modified accordingly.
      @param newNode The recently created expression node
      @return The node representing the last parsed expression. If the
              expression was quantified, <code>return value != newNode</code>
     */
    NFANode * quantify(NFANode * newNode);
    /**
      Parses the current class being examined in
      <code>{@link pattern pattern}</code>.
      @return A string of unique characters contained in the current class being
              parsed
     */
    std::string parseClass();
    /**
      Parses the current POSIX class being examined in
      <code>{@link pattern pattern}</code>.
      @return A string of unique characters representing the POSIX class being
              parsed
     */
    std::string parsePosix();
    /**
      Returns a string containing the octal character being parsed
      @return The string contained the octal value being parsed
     */
    std::string parseOctal();
    /**
      Returns a string containing the hex character being parsed
      @return The string contained the hex value being parsed
     */
    std::string parseHex();
    /**
      Returns a new node representing the back reference being parsed
      @return The new node representing the back reference being parsed
     */
    NFANode *   parseBackref();
    /**
      Parses the escape sequence currently being examined. Determines if the
      escape sequence is a class, a single character, or the beginning of a
      quotation sequence.
      @param inv Output parameter. Whether or not to invert the returned class
      @param quo Output parameter. Whether or not this sequence starts a
                 quotation.
      @return The characters represented by the class
     */
    std::string parseEscape(bool & inv, bool & quo);
    /**
      Parses a supposed registered pattern currently under compilation. If the
      sequence of characters does point to a registered pattern, then the
      registered pattern is appended to <code>*end<code>. The registered pattern
      is parsed with the current compilation flags.
      @param end The ending node of the thus-far compiled pattern
      @return The new end node of the current pattern
     */
    NFANode * parseRegisteredPattern(NFANode ** end);
    /**
      Parses a lookbehind expression. Appends the necessary nodes
      <code>*end</code>.
      @param pos Positive or negative look behind
      @param end The ending node of the current pattern
      @return The new end node of the current pattern
     */
    NFANode * parseBehind(const bool pos, NFANode ** end);
    /**
      Parses the current expression and tacks on nodes until a \E is found.
      @return The end of the current pattern
     */
    NFANode * parseQuote();
    /**
      Parses <code>{@link pattern pattern}</code>. This function is called
      recursively when an or (<code>|</code>) or a group is encountered.
      @param inParen Are we currently parsing inside a group
      @param inOr Are we currently parsing one side of an or (<code>|</code>)
      @param end The end of the current expression
      @return The starting node of the NFA constructed from this parse
     */
    NFANode * parse(const bool inParen = 0, const bool inOr = 0, NFANode ** end = NULL);
  public:
    /// We should match regardless of case
    const static unsigned long CASE_INSENSITIVE;
    /// We are implicitly quoted
    const static unsigned long LITERAL;
    /// @memo We should treat a <code><b>.</b></code> as [\x00-\x7F]
    const static unsigned long DOT_MATCHES_ALL;
    /** <code>^</code> and <code>$</code> should anchor to the beginning and
        ending of lines, not all input
     */
    const static unsigned long MULTILINE_MATCHING;
    /** When enabled, only instances of <code>\n</codes> are recognized as
        line terminators
     */
    const static unsigned long UNIX_LINE_MODE;
    /// The absolute minimum number of matches a quantifier can match (0)
    const static int MIN_QMATCH;
    /// The absolute maximum number of matches a quantifier can match (0x7FFFFFFF)
    const static int MAX_QMATCH;
  public:
    /**
      Call this function to compile a regular expression into a
      <code>Pattern</code> object. Special values can be assigned to
      <code>mode</code> when certain non-standard behaviors are expected from
      the <code>Pattern</code> object.
      @param pattern The regular expression to compile
      @param mode    A bitwise or of flags signalling what special behaviors are
                     wanted from this <code>Pattern</code> object
      @return If successful, <code>compile</code> returns a <code>Pattern</code>
              pointer. Upon failure, <code>compile</code> returns
              <code>NULL</code>
     */
    static Pattern                    * compile        (const std::string & pattern,
                                                        const unsigned long mode = 0);
    /**
      Dont use this function. This function will compile a pattern, and cache
      the result. This will eventually be used as an optimization when people
      just want to call static methods using the same pattern over and over
      instead of first compiling the pattern and then using the compiled
      instance for matching.
      @param pattern The regular expression to compile
      @param mode    A bitwise or of flags signalling what special behaviors are
                     wanted from this <code>Pattern</code> object
      @return If successful, <code>compileAndKeep</code> returns a
              <code>Pattern</code> pointer. Upon failure, <code>compile</code>
              returns <code>NULL</code>.
     */
    static Pattern                    * compileAndKeep (const std::string & pattern,
                                                        const unsigned long mode = 0);

    /**
      Searches through <code>str</code> and replaces all substrings matched
      by <code>pattern</code> with <code>replaceWith</code>. <code>replaceWith</code> may
      contain backreferences (e.g. <code>\1</code>) to capture groups. A typical
      invocation looks like:
      <p>
      <code>
      Pattern::replace("(a+)b(c+)", "abcccbbabcbabc", "\\2b\\1");
      </code>
      <p>
      which would replace <code>abcccbbabcbabc</code> with
      <code>cccbabbcbabcba</code>.
      @param pattern      The regular expression
      @param str          The string in which to perform replacements
      @param replaceWith  The replacement text
      @param mode         The special mode requested of the <code>Pattern</code>
                          during the replacement process
      @return             The text with the replacement string substituted where necessary
     */
    static std::string                  replace        (const std::string & pattern,
                                                        const std::string & str,
                                                        const std::string & replaceWith,
                                                        const unsigned long mode = 0);

    /**
      Splits the specified string over occurrences of the specified pattern.
      Empty strings can be optionally ignored. The number of strings returned is
      configurable. A typical invocation looks like:
      <p>
      <code>
      std::string str(strSize, '\0');<br>
      FILE * fp = fopen(fileName, "r");<br>
      fread((char*)str.data(), strSize, 1, fp);<br>
      fclose(fp);<br>
      <br>
      std::vector&lt;std::string&gt; lines = Pattern::split("[\r\n]+", str, true);<br>
      <br>
      </code>

      @param pattern    The regular expression
      @param replace    The string to split
      @param keepEmptys Whether or not to keep empty strings
      @param limit      The maximum number of splits to make
      @param mode       The special mode requested of the <code>Pattern</code>
                        during the split process
      @return All substrings of <code>str</code> split across <code>pattern</code>.
     */
    static std::vector<std::string>     split          (const std::string & pattern,
                                                        const std::string & str,
                                                        const bool keepEmptys = 0,
                                                        const unsigned long limit = 0,
                                                        const unsigned long mode = 0);

    /**
      Finds all the instances of the specified pattern within the string. You
      should be careful to only pass patterns with a minimum length of one. For
      example, the pattern <code>a*</code> can be matched by an empty string, so
      instead you should pass <code>a+</code> since at least one character must
      be matched. A typical invocation of <code>findAll</code> looks like:
      <p>
      <code>
      std::vector&lt;td::string&gt; numbers = Pattern::findAll("\\d+", string);
      </code>
      <p>

      @param pattern  The pattern for which to search
      @param str      The string to search
      @param mode     The special mode requested of the <code>Pattern</code>
                      during the find process
      @return All instances of <code>pattern</code> in <code>str</code>
     */
    static std::vector<std::string>     findAll        (const std::string & pattern,
                                                        const std::string & str,
                                                        const unsigned long mode = 0);

    /**
      Determines if an entire string matches the specified pattern

      @param pattern  The pattern for to match
      @param str      The string to match
      @param mode     The special mode requested of the <code>Pattern</code>
                      during the replacement process
      @return True if <code>str</code> is recognized by <code>pattern</code>
     */
    static bool                         matches        (const std::string & pattern,
                                                        const std::string & str,
                                                        const unsigned long mode = 0);

    /**
      Registers a pattern under a specific name for use in later compilations.
      A typical invocation and later use looks like:
      <p>
      <code>
      Pattern::registerPattern("ip", "(?:\\d{1,3}\\.){3}\\d{1,3}");<br>
      Pattern * p1 = Pattern::compile("{ip}:\\d+");<br>
      Pattern * p2 = Pattern::compile("Connection from ({ip}) on port \\d+");<br>
      </code>
      <p>
      Multiple calls to <code>registerPattern</code> with the same
      <code>name</code> will result in the pattern getting overwritten.

      @param name     The name to give to the pattern
      @param pattern  The pattern to register
      @param mode     Any special flags to use when compiling pattern
      @return Success/Failure. Fails only if <code>pattern</code> has invalid
              syntax
     */
    static bool                         registerPattern(const std::string & name,
                                                        const std::string & pattern,
                                                        const unsigned long mode = 0);

    /**
      Clears the pattern registry
      */
    static void                         unregisterPatterns();
    /**
      Don't use
     */
    static void                         clearPatternCache();

    /**
      Searches through a string for the <code>n<sup>th</sup></code> match of the
      given pattern in the string. Match indeces start at zero, not one.
      A typical invocation looks like this:
      <p>
      <code>
      std::pair&lt;std::string, int&gt; match = Pattern::findNthMatch("\\d{1,3}", "192.168.1.101:22", 1);<br>
      printf("%s %i\n", match.first.c_str(), match.second);<br>
      <br>
      Output: 168 4<br>
      <br>

      @param pattern  The pattern for which to search
      @param str      The string to search
      @param matchNum Which match to find
      @param mode     Any special flags to use during the matching process
      @return A string and an integer. The string is the string matched. The
              integer is the starting location of the matched string in
              <code>str</code>. You can check for success/failure by making sure
              that the integer returned is greater than or equal to zero.
     */
    static std::pair<std::string, int>  findNthMatch   (const std::string & pattern,
                                                        const std::string & str,
                                                        const int matchNum,
                                                        const unsigned long mode = 0);
  public:
    /**
      Deletes all NFA nodes allocated during compilation
     */
    ~Pattern();

    std::string               replace        (const std::string & str,
                                              const std::string & replaceWith);
    std::vector<std::string>  split          (const std::string & str, const bool keepEmptys = 0,
                                              const unsigned long limit = 0);
    std::vector<std::string>  findAll        (const std::string & str);
    bool                      matches         (const std::string & str);
    /**
      Returns the flags used during compilation of this pattern
      @return The flags used during compilation of this pattern
     */
    unsigned long             getFlags       () const;
    /**
      Returns the regular expression this pattern represents
      @return The regular expression this pattern represents
     */
    std::string               getPattern     () const;
    /**
      Creates a matcher object using the specified string and this pattern.
      @param str The string to match against
      @return A new matcher using object using this pattern and the specified
              string
     */
    Matcher                 * createMatcher  (const std::string & str);
};

class NFANode
{
  friend class Matcher;
  public:
    NFANode * next;
    NFANode();
    virtual ~NFANode();
    virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const = 0;
};
class NFACharNode : public NFANode
{
  protected:
    char ch;
  public:
    NFACharNode(const char c);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFACICharNode : public NFANode
{
  protected:
    char ch;
  public:
    NFACICharNode(const char c);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAStartNode : public NFANode
{
  public:
    NFAStartNode();
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAEndNode : public NFANode
{
  public:
    NFAEndNode();
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAQuantifierNode : public NFANode
{
  public:
    int min, max;
    NFANode * inner;
    virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
    NFAQuantifierNode(Pattern * pat, NFANode * internal,
                      const int minMatch = Pattern::MIN_QMATCH,
                      const int maxMatch = Pattern::MAX_QMATCH);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAGreedyQuantifierNode : public NFAQuantifierNode
{
  public:
    NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal,
                            const int minMatch = Pattern::MIN_QMATCH,
                            const int maxMatch = Pattern::MAX_QMATCH);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
    virtual int matchInternal(const std::string & str, Matcher * matcher, const int curInd, const int soFar) const;
};
class NFALazyQuantifierNode : public NFAQuantifierNode
{
  public:
    NFALazyQuantifierNode(Pattern * pat, NFANode * internal,
                          const int minMatch = Pattern::MIN_QMATCH,
                          const int maxMatch = Pattern::MAX_QMATCH);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAPossessiveQuantifierNode : public NFAQuantifierNode
{
  public:
    NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal,
                                const int minMatch = Pattern::MIN_QMATCH,
                                const int maxMatch = Pattern::MAX_QMATCH);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAAcceptNode : public NFANode
{
  public:
    NFAAcceptNode();
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAClassNode : public NFANode
{
  public:
    bool inv;
    std::map<char, bool> vals;
    NFAClassNode(const bool invert = 0);
    NFAClassNode(const std::string & clazz, const bool invert);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFACIClassNode : public NFANode
{
  public:
    bool inv;
    std::map<char, bool> vals;
    NFACIClassNode(const bool invert = 0);
    NFACIClassNode(const std::string & clazz, const bool invert);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFASubStartNode : public NFANode
{
  public:
    NFASubStartNode();
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAOrNode : public NFANode
{
  public:
    NFANode * one;
    NFANode * two;
    NFAOrNode(NFANode * first, NFANode * second);
    virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAQuoteNode : public NFANode
{
  public:
    std::string qStr;
    NFAQuoteNode(const std::string & quoted);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFACIQuoteNode : public NFANode
{
  public:
    std::string qStr;
    NFACIQuoteNode(const std::string & quoted);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFALookAheadNode : public NFANode
{
  public:
    bool pos;
    NFANode * inner;
    NFALookAheadNode(NFANode * internal, const bool positive);
    virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFALookBehindNode : public NFANode
{
  public:
    bool pos;
    std::string mStr;
    NFALookBehindNode(const std::string & str, const bool positive);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAStartOfLineNode : public NFANode
{
  public:
    NFAStartOfLineNode();
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAEndOfLineNode : public NFANode
{
  public:
    NFAEndOfLineNode();
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAReferenceNode : public NFANode
{
  public:
    int gi;
    NFAReferenceNode(const int groupIndex);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAStartOfInputNode : public NFANode
{
  public:
    NFAStartOfInputNode();
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAEndOfInputNode : public NFANode
{
  public:
    bool term;
    NFAEndOfInputNode(const bool lookForTerm);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAWordBoundaryNode : public NFANode
{
  public:
    bool pos;
    NFAWordBoundaryNode(const bool positive);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAEndOfMatchNode : public NFANode
{
  public:
    NFAEndOfMatchNode();
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAGroupHeadNode : public NFANode
{
  public:
    int gi;
    NFAGroupHeadNode(const int groupIndex);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAGroupTailNode : public NFANode
{
  public:
    int gi;
    NFAGroupTailNode(const int groupIndex);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAGroupLoopPrologueNode : public NFANode
{
  public:
    int gi;
    NFAGroupLoopPrologueNode(const int groupIndex);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};
class NFAGroupLoopNode : public NFANode
{
  public:
    int gi, min, max, type;
    NFANode * inner;
    NFAGroupLoopNode(NFANode * internal, const int minMatch,
                     const int maxMatch, const int groupIndex, const int matchType);
    virtual void findAllNodes(std::map<NFANode*, bool> & soFar);
    virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const;
    int matchGreedy(const std::string & str, Matcher * matcher, const int curInd = 0) const;
    int matchLazy(const std::string & str, Matcher * matcher, const int curInd = 0) const;
    int matchPossessive(const std::string & str, Matcher * matcher, const int curInd = 0) const;
};

#endif

