/***************************************************************************
                          codeparser.cpp  -  description
                             -------------------
    begin                : Die Jul 9 2002
    copyright            : (C) 2002 by Andr�é Simon
    email                : andre.simon1@gmx.de
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#include "codeparser.h"

using namespace std;

namespace highlight {

CodeParser::CodeParser()
{}

CodeParser::CodeParser( bool printLineNumbers,
                        const string &colourTheme, 
                        int numSpaces,
                        WrapMode lineWrappingStyle,
                        bool fragment)         
   :in(NULL),
    out(NULL),
    maskWs(false),        
    excludeWs(false),        
    fragmentOutput(fragment),    
    showLineNumbers (printLineNumbers),  
    lineNumber(0), 
    styleName(colourTheme),    
    numberSpaces(numSpaces),
    lineIndex(0),    
    formatter(NULL),
    lineWrapper(NULL)
{  
  token.reserve(30);
  line.reserve(150);  
  docStyle.load(styleName);

  if (lineWrappingStyle!=WRAP_DISABLED)  {
    lineWrapper=new LineWrapper( (printLineNumbers)?80-LINE_NUMBER_WIDTH:80, 
                                 lineWrappingStyle==WRAP_DEFAULT );
  }
}

CodeParser::~CodeParser()
{  
  delete lineWrapper;
  delete formatter;  
}

void CodeParser::reset()
{
 lineIndex = lineNumber = 0;
 line.clear();
}

void CodeParser::setFormatter( FormatStyle style)
{
  if (style!=FORMAT_DISABLED ){
    if (formatter==NULL) {  
       formatter=new astyle::ASFormatter();
       switch (style) {
  
        case FORMAT_ANSI:
         formatter->setBracketIndent(false);
         formatter->setSpaceIndentation(4);
         formatter->setBracketFormatMode(astyle::BREAK_MODE);
         formatter->setClassIndent(false);
         formatter->setSwitchIndent(false);
         formatter->setNamespaceIndent(false);
         break;

       case FORMAT_GNU:
         formatter->setBlockIndent(true);
         formatter->setSpaceIndentation(2);
         formatter->setBracketFormatMode(astyle::BREAK_MODE);
         formatter->setClassIndent(false);
         formatter->setSwitchIndent(false);
         formatter->setNamespaceIndent(false);
         break;

       case FORMAT_JAVA:
         formatter->setJavaStyle();
         formatter->setBracketIndent(false);
         formatter->setSpaceIndentation(4);
         formatter->setBracketFormatMode(astyle::ATTACH_MODE);
         formatter->setSwitchIndent(false);
         break;

       case FORMAT_KR:
         formatter->setBracketIndent(false);
         formatter->setSpaceIndentation(4);
         formatter->setBracketFormatMode(astyle::ATTACH_MODE);
         formatter->setClassIndent(false);
         formatter->setSwitchIndent(false);
         formatter->setNamespaceIndent(false);
         break;

       case FORMAT_LINUX:
         formatter->setBracketIndent(false);
         formatter->setSpaceIndentation(8);
         formatter->setBracketFormatMode(astyle::BDAC_MODE);
         formatter->setClassIndent(false);
         formatter->setSwitchIndent(false);
         formatter->setNamespaceIndent(false);
         break;
       
       default:;
       }
    } 
  } else if (formatter!=NULL){  
      delete formatter;
      formatter=NULL;
  }
}

/*void  CodeParser::setLanguageDefinition(const LanguageDefinition &langDef)
{
  langInfo=langDef;   
}*/

/** sucht vorwaerts ab Position searchPos Ziffer in s und liefert Integerwert
der gefundenen Zahl zurueck.
Im SymbolString stehen die den einzelnen Symbolen zugeordneten Konstanten
immer HINTER diesen Symbolen*/
State CodeParser::getState(const string &s, unsigned int searchPos)
{
  unsigned int i= searchPos+1, result=0;

  // nach Ziffer in s suchen
  do
    {
      ++i;
    }
  while ((i<s.length()) && !isdigit(s[i])) ;

  // Zahl zusammensetzen
  while ((i<s.length()) && isdigit(s[i]))
    {
      result = result *10 + (s[i]-'0');
      ++i;
    }
  return ((result)? (State)result:_UNKNOWN);
}

string CodeParser::getIdentifier()
{
  --lineIndex;
  unsigned int startPos=lineIndex;
  char c= line[lineIndex];

  while (    ( lineIndex < line.length()
          && (   StringTools::isAlpha(c)
              || isdigit(c))
              || isAllowedChar(c))
          )
    {      
      ++lineIndex;
      c= line[lineIndex];
    }
  return string(line, startPos, lineIndex - startPos);
}

string CodeParser::getNumber()
{
  --lineIndex;
  unsigned int startPos=lineIndex;
  char c=line[lineIndex];
    
  while ( lineIndex < line.length() && (      
          isdigit(c)  
          // Operationen auf Literalen nicht als Bestandteil der Zahl parsen
          // zB Ruby: 3.xxx()        
          || (c == '.' && isdigit(line[lineIndex+1]))
          //Minus als 1. Zeichen wird akzeptiert
          || (c == '-' && lineIndex == startPos)
          // Präf�ixe wie 0x, 0u, etc tolerieren
          || (StringTools::isAlpha(c) && line[lineIndex-1]=='0'  )
          // suffixe wie in 0.3f, 100l und Exponentialschreibweise tolerieren          
          || (isxdigit(c) || c=='L' || c=='U' || c=='l' || c=='u')  ))
    {      
      ++lineIndex;   
      c= line[lineIndex];      
    }
  return string(line,startPos, lineIndex-startPos);
}

unsigned int CodeParser::getLineNumber()
{
  return lineNumber;
}

bool CodeParser::readNewLine(string &newLine){
  bool eof;
  terminatingChar=newLine[lineIndex-1];
  if (formatter==NULL)
   {  // Reformatierung nicht aktiviert
     eof = ! getline( *in, newLine);
   }
   // Eingabe vom Formatter lesen
   else
    {
      eof=!formatter->hasMoreLines();
      if (!eof)
       {
         newLine = formatter->nextLine();
       }
    }                  
   return eof;
}

unsigned char CodeParser::getInputChar()
{
  bool eol = lineIndex == line.length();

  // Neue Zeile einlesen, falls lineIndex am Ende angelangt
  if (eol)
   {
      bool eof=false;
      if (lineWrapper!=NULL){
         if (!lineWrapper->hasMoreLines()) {
            eof=readNewLine(line);
            lineWrapper->setLine(line);
         }         
         line = lineWrapper->getNextLine();
      } else {
        eof=readNewLine(line);
      }
      lineIndex=0;
      ++lineNumber;
      line=StringTools::trimRight(line);     
      return (eof)?'\0':'\n';  
   }    
  return line[lineIndex++];
}

unsigned char CodeParser::getLastChar(){  
  return (lineIndex<2 )? '\0' : line[lineIndex-2];
}

State CodeParser::getCurrentState (bool lastStateWasNumber)
{
  unsigned char c;

  // Falls token leer, nchsten Character bestimmen
  if (token.length()==0)
    {
      c=getInputChar();
    }
  // sonst den Token nochmals untersuchen
  else
    {
      lineIndex-= (token.length()-1); //Stringindex zurcksetzen
      c=token[0];
    }

  if (c=='\n'){    
    return _EOL;   // End of line
  }

  if (c=='\0') {    
    return _EOF;   // End of file
  }

  // Whitespace erkannt; Tabs durch Spaces ersetzen, falls -t Option gesetzt 
  if (isspace(c))
    {
      if (c=='\t'){
         if (numberSpaces){
            for (int space=0; space < numberSpaces; space++){
              token += spacer;
            }
         } else {
           token=c;
         }
       } else {
          token=spacer;
       }
      return _WS;
    }

  // Zahl erkannt, Zahlen MUESSEN vor dem Suchen im Symbolstring geparst werden,
  // da sie sonst im Symbolstring gefunden werden!
  //falls c Ziffer oder '-' ist, Zahl filtern
  if (isdigit(c) || langInfo.isNumberPrefix(c)
      // Floats wie .5 hervorheben
      || (c=='.' && isdigit(line[lineIndex]))
      //testen, ob '-' zu Zahl gehoehrt oder zu einem Term a-b:
      || ((c == '-')
          // falls vor derm '-' eine Zahl steht, gehoert '-' zu einem Term a-b
          && (!lastStateWasNumber) 
          && isdigit(StringTools::getNextNonWs(line, lineIndex))) )
    {
      if (langInfo.isNumberPrefix(c))
        {
          token = c;
          ++lineIndex;
          token += getNumber();
        }
      else
        {
          token = getNumber();
        }
      
      return NUMBER;
    }

  // nach Symbolen (z.B Kommentarbeginn, Direktiven) suchen.
  // Symbole werden VOR Keywords gesucht, da auch Buchstaben in Symbolen 
  // auftreten koennen, zB in 4th
  //Leange des gefundenen Symbols in SymbolString:
  unsigned int symbTokenLength,
  symbolPos;
  bool found=false;

  //line[i] im symbolString suchen
  symbolPos = langInfo.getSymbolString().find( c );

  while ((symbolPos!= string::npos) && (!found))
    {
      //Laenge des Symbols bestimmen
      symbTokenLength=langInfo.getSymbolString().find(' ', symbolPos)-symbolPos;

      //Symbol extrahieren
      token = langInfo.getSymbolString().substr(symbolPos, symbTokenLength);

      // Abfrage nach Leerzeichen in SymbolString verhindert falsches
      // Erkennen von Symbolteilen:
      if (   token == line.substr(lineIndex-1, symbTokenLength)
          && (langInfo.getSymbolString()[symbolPos-1] == ' '))
        {
          found = true;
          lineIndex += (symbTokenLength-1);
        }
      //naechstes evtl. passende Symbol suchen
      else
        {
          symbolPos = langInfo.getSymbolString().find_first_not_of(' ', 
                      langInfo.getSymbolString().find(' ',symbolPos));
        }
    } 

  if (found)
    {
      State foundState = getState(langInfo.getSymbolString(), symbolPos);
      if (langInfo.isForth()) {
        if (foundState==MULTI_LINE_COMMENT_BEGIN)
        {
          // 4th Workaround: Falls hinter MultilineBegin-Delimiter kein Whitespace
          // steht, ist es kein Kommentar
          c=getInputChar();
          token += c;
          // " (" korrekt erkennen
          if (!isspace(c))
            {
               foundState=((c=='\"')?STRING_END:STANDARD);
            }
        }
        // .(_ nicht als String erkennen
        if ( (token==".(") && (getInputChar()=='_')){
            lineIndex--;              
            return STANDARD;              
         }
         return foundState;
       }
       // Workaround: member attribute werden wie Tags behandelt,
       // solange kein eigener member attribute stil existiert
       if (isFirstNonWsChar() && foundState==MEMBER_ATTRIBUTE_BEGIN){
          return TAG_BEGIN;          
       }
       else if (foundState==MEMBER_ATTRIBUTE_END) {
         return  TAG_END;         
       }
       if (foundState==SINGLE_LINE_COMMENT && langInfo.isFullLineComment()){
          if (lineIndex==1)
          return SINGLE_LINE_COMMENT;      
       } else { 
         return foundState;          
       }
    }

  // Alphanumerisches Token parsen und als Keyword oder Type erkennen
  if (StringTools::isAlpha(c) || langInfo.isPrefix(c))
    {
      if (langInfo.isPrefix(c))
        {
          token = c;
          ++lineIndex;
          token += getIdentifier();
        }
      else
        {
          token = getIdentifier();
        }
     
      //Kopie von Keywords/Typen bei nicht-casesensitiven Sprachen
      string reservedWord=(langInfo.isCaseSensitive())?
                            token:StringTools::lowerCase(token);
          
      bool isKeyword=langInfo.isKeyword(reservedWord);
      if (isKeyword) {
        return KEYWORD;
      }
      else {
         return langInfo.isType(reservedWord)? TYPE: STANDARD;
      }     
    }

  // Character wurde nicht erkannt, in Standard-Zustand gehen
  token = c;  
  return STANDARD;
}

string CodeParser::maskString(const string & s)
{
  ostringstream ss;
  for (unsigned int i=0;i< s.length();i++)
    {
      ss << maskCharacter(s[i]);
    }
  return ss.str();
}

void CodeParser::printMaskedToken()
{
  *out << maskString(token);
  token.clear();
}

bool CodeParser::isAllowedChar(char c)
{
  string allowedChars=langInfo.getAllowedChars();
  return ( allowedChars.find(c)!= string::npos);
}

ParseError CodeParser::printOutput (const string & inFileName, 
                                    const string &outFileName)
{    
  if (!docStyle.found()){
    return BAD_STYLE;
  }
  ParseError error=PARSE_OK;

  in = (inFileName.empty()? &cin :new ifstream (inFileName.c_str()));
  out = (outFileName.empty()? &cout :new ofstream (outFileName.c_str())); 
  
  if ( in->fail()){  
     error=BAD_INPUT;
  }    
  if ( out->fail()){     
     error=BAD_OUTPUT;
  }  

  if (error==PARSE_OK) { 
    if (formatter != NULL){
       formatter->init(new astyle::ASStreamIterator(in));
    }
    if (! fragmentOutput){
      *out << getHeader(inFileName);
    }
    printBody();
    if (! fragmentOutput){
      *out << getFooter();
    }
  }

  if (!outFileName.empty())
    {
      //out->flush();
      //((ofstream*)out) -> close();
      delete out; out=NULL;
    }

   if (!inFileName.empty())
     {
       //((ifstream*)in) -> close();
       delete in; in=NULL;
     }
   return error;
}

void CodeParser::closeTag(State s){
  *out << styleTagClose[s];
  currentState=_UNKNOWN;
}

void CodeParser::openTag(State s){
  *out << styleTagOpen[s];
  currentState=s;
}

///////////////////////////////////////////////////////////////////////////////
////////////////////ZUSTAENDE PARSER///////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

void CodeParser::processStandardState()
{
  // falls highlighting deaktiviert (zB bei txt.lang),
  // Eingabe nicht parsen, sondern lediglich Escapesequenzen einsetzen
  if (langInfo.highlightingDisabled()){
     string line;
     while (getline(*in, line)){
       *out << maskString(line) << getNewLine();
     }
     *out << flush;
  }
  else {
    // ID des erkannten Zustands (z.B Keyword, Type, Comment...)
    State state=STANDARD;
    
    // End of file-Flag
    bool eof=false, 
         firstLine=true; //newline vor erster Zeile verhindern    
    openTag(STANDARD);
    do {     
      // naechsten Zustand erkennen
      state= getCurrentState(state==NUMBER);

      switch(state)
        {                   // in entsprchenden Zustand verzweigen
        case TYPE:        
          closeTag(STANDARD);
          eof=processTypeState();
          openTag(STANDARD);
          break;
        case KEYWORD:        
          closeTag(STANDARD);
          eof=processKeywordState();
          openTag(STANDARD);
          break;
        case TYPE_BEGIN:
          closeTag(STANDARD);
          eof=processTypeState(TYPE_BEGIN);
          openTag(STANDARD);
          break;
        case KEYWORD_BEGIN:
          closeTag(STANDARD);
          eof=processKeywordState(KEYWORD_BEGIN);
          openTag(STANDARD);
          break;
        case NUMBER:
          closeTag(STANDARD);
          eof=processNumberState();
          openTag(STANDARD);
          break;
        case MULTI_LINE_COMMENT_BEGIN:
          closeTag(STANDARD);
          eof=processMultiLineCommentState( );
          openTag(STANDARD);
          break;
        case SINGLE_LINE_COMMENT:
          closeTag(STANDARD);
          eof=processSingleLineCommentState();
          openTag(STANDARD);
          break;
        case STRING:
          closeTag(STANDARD);
          eof=processStringState( STANDARD);
          openTag(STANDARD);
          break;
        case DIRECTIVE_LINE:
          closeTag(STANDARD);
          eof=processDirectiveState();
          openTag(STANDARD);
          break;
        case TAG_BEGIN:
          closeTag(STANDARD);
          eof=processTagState();
          openTag(STANDARD);
          break;
        case ESC_CHAR:
          closeTag(STANDARD);
          eof=processEscapeCharState();
          openTag(STANDARD);
          break;
        case SYMBOL:
          closeTag(STANDARD);
          eof=processSymbolState();
          openTag(STANDARD);
          break;
        case _EOL:               // ende der zeile erreicht                    
          insertLineNumber(!firstLine);
          firstLine=false;
          break;
        case _EOF:
          eof=true;
          break;
        case _WS:
          processWsState();
          break;
        default:
          printMaskedToken();
          break;
        }
      }
    while (!eof);
    closeTag(STANDARD);
    *out << getNewLine();
    *out << flush;
  }
}

bool CodeParser::processKeywordState(State myState){
  State newState;
  bool eof=false,
       exitState=false; // wenn True, Zustand beenden
   
  openTag(KEYWORD);
  do
    {  
      printMaskedToken();
      newState= getCurrentState();
      switch(newState)
        {
        case _WS: 
          processWsState();
          break;
        case _EOL:          
          insertLineNumber();
          break;
        case _EOF:  
          eof = true;
          break;        
        case KEYWORD_END:
          if (myState==KEYWORD_BEGIN){
            printMaskedToken();
            exitState=true;
          }
          break;
        default: // Testen, ob ein neuer Zustand eingetreten ist
          exitState=(myState!=KEYWORD_BEGIN && myState!=newState);
          break;
        }
    }
  while ((!exitState) && (!eof));

  closeTag(KEYWORD);
  return eof;
}

bool CodeParser::processTypeState(State myState){
  State newState;
  bool eof=false,
       exitState=false; // wenn True, Zustand beenden
   
  openTag(TYPE);
  do
    {  
      printMaskedToken();
      newState= getCurrentState();
      switch(newState)
        {
        case _WS: 
          processWsState();
          break;
        case _EOL:           
          insertLineNumber();
          break;
        case _EOF:  
          eof = true;
          break;        
        case TYPE_END:
          if (myState==TYPE_BEGIN){
            printMaskedToken();
            exitState=true;
          }
          break;
        default: // Testen, ob ein neuer Zustand eingetreten ist
          exitState=(myState!=TYPE_BEGIN && myState!=newState);
          break;
        }
    }
  while ((!exitState) && (!eof));

  closeTag(TYPE);
  return eof;

}

bool CodeParser::processNumberState(){
  State newState;
  bool eof=false,
       exitState=false; 
   
  openTag(NUMBER);
  do
    {  
      printMaskedToken();
      newState= getCurrentState(true);
      switch(newState)
        {
        case _WS: 
          processWsState();
          break;
        case _EOL: 
          insertLineNumber();
          break;
        case _EOF:  
          eof = true;
          break;                
        default: 
          exitState=newState!=NUMBER;
          break;
        }
    }
  while ((!exitState) && (!eof));

  closeTag(NUMBER);
  return eof;

}

bool CodeParser::processMultiLineCommentState()
{
  int commentCount=1;
  State newState;
  bool eof=false, exitState=false;

  openTag(MULTI_LINE_COMMENT_BEGIN);
  do
    {
      printMaskedToken();
      newState= getCurrentState();

      switch(newState)
        {         
        case _WS:
          processWsState();
          break;
        case _EOL: // Ende der Zeile erreicht
          insertLineNumber();
          break;
        case _EOF:  // Ende der Datei erreicht
          eof = true;
          break;
        case MULTI_LINE_COMMENT_BEGIN:
          if (langInfo.allowNestedMLComments())
            {
              ++commentCount;
            }
          break;
        case MULTI_LINE_COMMENT_END:
          commentCount--;
          if (! commentCount)
            {
              printMaskedToken();
              exitState=true;
            }
          break;
        default:
          break;
        }
    }
  while ((!exitState) && (!eof));

  closeTag(MULTI_LINE_COMMENT_BEGIN);
  return eof;
}

bool CodeParser::processSingleLineCommentState()
{
  int  newState;
  bool eof=false, exitState=false;

  openTag(SINGLE_LINE_COMMENT);
  do
    {      
      printMaskedToken();
      newState= getCurrentState();

      switch(newState)
        {
        case _WS:
          processWsState();
          break;
        case _EOL: 
          printMaskedToken();
          exitState=true;          
          insertLineNumber();
          break;
        case _EOF: 
          eof = true;
          break;
        default:
          break;
        }
    }
  while ((!exitState) && (!eof));

  closeTag(SINGLE_LINE_COMMENT);
  return eof;
}

bool CodeParser::processDirectiveState()
{
  State  newState;
  bool eof=false, exitState=false;

  openTag(DIRECTIVE_LINE);
  do
    {      
      printMaskedToken();
      newState= getCurrentState();
      switch(newState)
        {
        case _WS:
          processWsState();
          break;
        case DIRECTIVE_LINE_END:
           printMaskedToken();
          exitState=true;
          break;
        case _EOL: 
           printMaskedToken();
          // Forth beendet Directiven durch Delimiter
          if (!langInfo.isForth()
               && (terminatingChar!=langInfo.getContinuationChar())) 
            {
              exitState=true;
            }
            insertLineNumber();
          break;
        case MULTI_LINE_COMMENT_BEGIN:
          closeTag(DIRECTIVE_LINE);
          eof= processMultiLineCommentState();
          openTag(DIRECTIVE_LINE);
          break;
        case SINGLE_LINE_COMMENT:
          closeTag(DIRECTIVE_LINE);
          eof= processSingleLineCommentState();
          openTag(DIRECTIVE_LINE);
          exitState=true;
          break;
        case STRING:
          closeTag(DIRECTIVE_LINE);
          eof=processStringState(DIRECTIVE_LINE);
          openTag(DIRECTIVE_LINE);
          break;
        case _EOF:  
          eof = true;
          break;
        default:
          break;
        }
    }
  while ((!exitState) && (!eof));

  closeTag(DIRECTIVE_LINE);
  return eof;
}

bool CodeParser::processStringState(State oldState)
{
  State  newState;
  bool eof=false, exitState=false,
       returnedFromOtherState=false ,
       disableEscCharHighlighting=getLastChar()==langInfo.getRawStringPrefix();
  // Merken, mit welchem Stringsymbol String geoeffnet wurde
  string openStringDelimiter=token;
   
  State myState= (oldState==DIRECTIVE_LINE) ? DIRECTIVE_STRING : STRING;
  openTag(myState);
  do
    {     
      // Falls letzter Zustand EscapeChar war, muss Token nochmal geparst werden
      if (!returnedFromOtherState)
        {
           printMaskedToken();
        }
      returnedFromOtherState=false;
      newState= getCurrentState();

      switch(newState)
        {
        case _WS:
          processWsState();
          break;
        case _EOL: 
          insertLineNumber();
          break;
        case MULTI_LINE_COMMENT_END:

          // Bei Forth ist der StringEnd und der MultiLineEnd-Delimiter derselbe
          if (langInfo.isForth())
            {
              newState=STRING_END;
            }
          else
            {
              printMaskedToken();
              break;
            }
        case STRING:
        case STRING_END:
           // Workaround: 4th-Strings, die mit ." beginnen, knnen auch mit ." enden
           // FIXME: Bug mit C# Raw-String Notation:  @" "
           if (langInfo.isForth() && (newState==STRING) && (token!="\"")) {
             exitState= ( openStringDelimiter==token)
                        || (openStringDelimiter=="\""); 
           } else {
            // String beendet, wenn Abschlussdelimiter==Beginndelimiter
            if (langInfo.isForth() && token=="(\"") {
               exitState=true;
            } else {
               exitState= langInfo.isMatchingStringDelim(openStringDelimiter,token);
            }
          }
        
           // Workaround: 4th-Strings, die mit ." beginnen, knnen auch mit ." enden
           // FIXME: Bug mit C# Raw-String Notation:  @" "
           /*if (langInfo.isForth()){
              if ( newState==STRING && token!="\"") {
                exitState=(token=="(\"")|| ((newState==STRING && token!="\"")
                                             && ( openStringDelimiter==token
                                                  ||openStringDelimiter=="\"")); 
                                
              }else exitState= langInfo.isMatchingStringDelim(openStringDelimiter,token);
          
           } else {
             exitState= langInfo.isMatchingStringDelim(openStringDelimiter,token);
           }*/                                 
          printMaskedToken();
          break;
        case ESC_CHAR:
              // bei raw Strings (C#) nicht hervorheben
             if (!disableEscCharHighlighting){
                closeTag(myState);
                eof=processEscapeCharState();
                openTag(myState);
                returnedFromOtherState=true;
             }
          break;
        case TYPE:
          if (langInfo.typesWithinStringsEnabled()){
             closeTag(STRING);
             eof=processTypeState(TYPE);
             openTag(STRING);
             returnedFromOtherState=true;
           } else {
             printMaskedToken();
           }
          break;
        case _EOF:
          eof = true;
          break;
        default:
          printMaskedToken();
          break;
        }
    }
  while ((!exitState) && (!eof));

  closeTag(myState);
  return eof;
}

bool CodeParser::processTagState()
{
  State  newState;
  bool eof=false, exitState=false, returnedFromOtherState=false;
  
  openTag(KEYWORD);
  do
    {       
      if (!returnedFromOtherState )
        {
          printMaskedToken();
        }
      returnedFromOtherState = false;
      newState= getCurrentState();

      switch(newState)
        {
        case _WS:
          processWsState();
          break;
        case _EOL: 
          insertLineNumber();
          break;
        case TAG_END:
          printMaskedToken();
          exitState=true;
          break;
        case STRING:
          closeTag(KEYWORD);
          eof=processStringState(KEYWORD);
          openTag(KEYWORD);
          returnedFromOtherState = true;
          break;
        case ESC_CHAR:
          closeTag(KEYWORD);
          eof=processEscapeCharState();
          openTag(KEYWORD);
          returnedFromOtherState = true;
          break;
        case NUMBER:
          closeTag(KEYWORD);
          eof=processNumberState();
          openTag(KEYWORD);
          returnedFromOtherState = true;
          break;
        case _EOF:
          eof = true;
          break;
        default:
          printMaskedToken();
          break;
        }
    }
  while ((!exitState) && (!eof));

  closeTag(KEYWORD);

  return eof;
}

bool CodeParser::processSymbolState(){

  State newState;
  bool eof=false,
       exitState=false; 
   
  openTag(SYMBOL);
  do
    {  
      printMaskedToken();
      newState= getCurrentState(true);
      switch(newState)
        {
        case _WS: 
          processWsState();
          break;
        case _EOL: 
          insertLineNumber();
          break;
        case _EOF:  
          eof = true;
          break;                
        default: 
          exitState=newState!=SYMBOL;
          break;
        }
    }
  while ((!exitState) && (!eof));

  closeTag(SYMBOL);
  return eof;
}

void CodeParser::processWsState()
{
  if (!maskWs)
    {
      *out << token;
      token.clear();
      return;
    }

  int wscnt=0;
  lineIndex--;
  while (isspace(line[lineIndex]))
    {
      if (line[lineIndex]==' ')
        {
          ++wscnt;
        }
      else
        {
          wscnt+=numberSpaces;
        }
      ++lineIndex;
    }
  if (excludeWs)
      *out << styleTagClose[currentState];
  if (wscnt>1){
    *out << maskWsBegin;
  }
  for (int i=0;i<wscnt;i++){
    *out << spacer;
  }
  if (wscnt>1){
    *out << maskWsEnd;    
  }
  if (excludeWs)     
     *out << styleTagOpen[currentState];
  token.clear();
}


bool CodeParser::processEscapeCharState()
{
  State newState;
  bool eof=false, exitState=false;

  openTag(ESC_CHAR);
  do
    {    
      printMaskedToken();
      skipEscapeSequence();        
      newState= getCurrentState();
      switch(newState)
        {
        case _EOL: 
          insertLineNumber();
          exitState=true; 
          break;        
        case _WS:
          processWsState();          
          lineIndex--;          
          break;
        case _EOF: 
          eof = true;
          break;
        default:
          exitState=newState!=ESC_CHAR;
          break;
        }
    }
  while ((!exitState) && (!eof));

  closeTag(ESC_CHAR);
  return eof;
}

void  CodeParser::skipEscapeSequence(){ 
  if (lineIndex<line.length()){
    char c=line[lineIndex];
    int charsToSkip=1;
    // Escape Sequenz /ooo Oktal, /x000 hex, /u00xx Java unicode
    if (isdigit(c) ){
      // \0 abfangen
      while (    isdigit(line[lineIndex+charsToSkip])
            && charsToSkip<4) {
        ++charsToSkip;
      }
    }
    else if ( tolower(c)=='x'){
      charsToSkip=4;
    } else if ( tolower(c)=='u'){
      charsToSkip=5;
    }  
    while (charsToSkip-- && lineIndex++<line.length()){      
       *out <<maskCharacter(line[lineIndex-1]);
    }  
  }
}

bool CodeParser::isFirstNonWsChar() {
  unsigned int i=lineIndex-1;
  while (i--){
   if (!isspace(line[i])){
     return false;
   }
  }
  return true;
}

string CodeParser::getNewLine(){
  return newLineTag;
}

void CodeParser::insertLineNumber(bool insertNewLine) {

  if (insertNewLine){
    *out << getNewLine();          
  }
  if (showLineNumbers)
    {
      ostringstream os;      
      os <<setw(LINE_NUMBER_WIDTH)<<right<< lineNumber;
      
      *out << styleTagClose[currentState];
      *out << styleTagOpen[LINENUMBER] 
           << maskString(os.str()) 
           << spacer 
           << styleTagClose[LINENUMBER];      
      *out << styleTagOpen[currentState];
    }
}

bool CodeParser::styleValid(){
  return docStyle.found();
}

LanguageDefinition &CodeParser::getLanguage(){
  return langInfo;
}

LoadResult CodeParser::setLanguage(const string& langDefPath){
  bool reloadNecessary= langInfo.needsReload(langDefPath);
  bool loadFailed=!langInfo.load(langDefPath);
  if (loadFailed) return LOAD_FAILED;
  return (reloadNecessary)?LOAD_NEW:LOAD_NONE;    
}

bool CodeParser::enableReformatting(FormatStyle style, const string&suffix){
  if ((suffix!="c") &&(suffix!="cs")&&(suffix!="java")){
     setFormatter(FORMAT_DISABLED);
     return false;
   }
   else{
     setFormatter(style);
     return true;
   }
}
}
