#ifndef JavadocParser_h
#include "JavadocParser.h"
#endif

#ifndef std_vector
#define std_vector
#include <vector>
#endif

#ifndef std_iostream
#define std_iostream
#include <iostream>
#endif

#include <stdlib.h>

// yes, string.h (old C library) and string (STL)
#include <string.h>
#include <string>

#include <ctype.h>

using namespace std;
using namespace doctorj;

/**
 * Returns whether the given character is comment "whitespace", which includes
 * the comment characters themselves (asterissks and slashes).
 */
static bool iscmtws(char ch) 
{
    return isspace(ch) || ch == '*' || ch == '/';
}

JavadocParser::JavadocParser(char* start, char* end) : start_(start), end_(end), pos_(NULL), descStart_(NULL), descEnd_(NULL)
{
    parse();
}

JavadocParser::~JavadocParser()
{
}

char* JavadocParser::rewind(char* const from)
{
    if (from) {
        char* to = pos_ - 1;
        while (to > from && iscmtws(*to)) {
            to--;
        }
        // ++to;
        return to;
    }
    else {
        return NULL;
    }
}

enum ParseStateType { 
    PRE_DESCRIPTION, 
    IN_DESCRIPTION, 
    IN_TAG, 
    PRE_TAG_DESCRIPTION, 
    IN_TAG_DESCRIPTION
};

void JavadocParser::getTagCmts(vector<TagCmt>* const tags) const
{
    *tags = tags_;
}

void JavadocParser::addTagCmt(char* const start, char* const end,
                              char* const descStart, char* const descEnd)
{
    TagCmt tc;
    tc.start     = start;
    tc.end       = end;
    tc.descStart = descStart;
    tc.descEnd   = descEnd;
    tags_.push_back(tc);
}

void JavadocParser::getDescription(char** start, char** end) const
{
    *start = descStart_;
    *end   = descEnd_;
}

void JavadocParser::parse()
{
    // why are parsing functions always so long?

    bool           atbol  = true;
    ParseStateType state  = PRE_DESCRIPTION;
    char*          tagst  = NULL;
    char*          tagend = NULL;
    char*          tdst   = NULL;
    char*          tdend  = NULL;

    for (pos_ = start_; pos_ < end_; ++pos_) {
        switch (state) {
            case PRE_DESCRIPTION:
                if (iscmtws(*pos_)) {
                    // nothing
                }
                else {
                    if (*pos_ == '@') {
                        // we have a tag, no description
                        state = IN_TAG;
                        tagst = pos_;
                        tdst  = NULL;
                    }
                    else {
                        // we have a description
                        state      = IN_DESCRIPTION;
                        descStart_ = pos_;
                    }
                    atbol = false;
                }
                break;
                
            case IN_DESCRIPTION:
                if (*pos_ == '\n') {
                    atbol = true;
                }
                else if (atbol) {
                    if (iscmtws(*pos_)) {
                        // proceeding through noncode
                    }
                    else if (*pos_ == '@') {
                        // tag found
                        descEnd_ = rewind(descStart_);
                        state    = IN_TAG;
                        tagst    = pos_;
                        atbol    = false;
                    }
                    else {
                        // proceeding through description
                        atbol = false;
                    }
                }
                break;
                
            case IN_TAG:
                if (isspace(*pos_)) {
                    // tag ending
                    tagend = rewind(tagst);
                    state  = PRE_TAG_DESCRIPTION;
                }
                else {
                    // still in tag
                }
                break;
                
            case PRE_TAG_DESCRIPTION:
                if (*pos_ == '\n') {
                    atbol = true;
                }
                else if (iscmtws(*pos_)) {
                    // we still haven't seen the description
                }
                else if (atbol && *pos_ == '@') {
                    // we hit another tag without a description for the current one
                    addTagCmt(tagst, tagend, NULL, NULL);
                    tagst = pos_;
                    state = IN_TAG;
                }
                else {
                    // found the tag description
                    tdst = pos_;
                    state = IN_TAG_DESCRIPTION;
                    atbol = false;
                }
                break;

            case IN_TAG_DESCRIPTION:
                // in tag description
                if (*pos_ == '\n') {
                    atbol = true;
                }
                else if (atbol) {
                    if (iscmtws(*pos_)) {
                        // proceeding through noncode
                    }
                    else if (*pos_ == '@') {
                        // new tag found
                        tdend = rewind(tdst);
                        addTagCmt(tagst, tagend, tdst, tdend);
                        
                        tagst = pos_;
                        state = IN_TAG;
                        atbol = false;
                    }
                    else {
                        // entering the description
                        atbol = false;
                    }
                }
                break;
        }
    }

    // any unfinished business?
    switch (state) {
        case PRE_DESCRIPTION:
            // nothing
            break;
        case IN_DESCRIPTION:
            // wrap up the description, but we have no tags
            descEnd_ = rewind(descStart_);
            break;
        case IN_TAG:
            // wrap up the tag, but we have no tag description
            tagend = rewind(tagst);
            addTagCmt(tagst, tagend, NULL, NULL);
            break;
        case PRE_TAG_DESCRIPTION:
            // tag, but no tag description
            tagend = rewind(tagst);
            addTagCmt(tagst, tagend, NULL, NULL);
            break;
        case IN_TAG_DESCRIPTION:
            // wrap up the tag description
            tdend = rewind(tdst);
            addTagCmt(tagst, tagend, tdst, tdend);
            break;
    }

}
