/*
 * Copyright (c) 2003-2005 The University of Wroclaw.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *    1. Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *    3. The name of the University may not be used to endorse or promote
 *       products derived from this software without specific prior
 *       written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
 * NO EVENT SHALL THE UNIVERSITY BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

using System.Text;
using System.Globalization;
using System;

using Nemerle.Collections;

namespace Nemerle.Compiler {

public variant Token : System.Collections.IEnumerable
{
  | Identifier { name : string; }
  | Keyword { name : string; }
  | Operator { name : string; }

  | StringLiteral { value : string; }
  | CharLiteral { value : char; }
  | IntegerLiteral { lit : Literal.Integer; cast_to : Parsetree.PExpr }
  | FloatLiteral { value : float; }
  | DoubleLiteral { value : Double; }
  | DecimalLiteral { value : Decimal; }

  | Comment { value : string; } 

  | Semicolon
  | Comma
  | BeginBrace    // {
  | EndBrace      // }
  | BeginRound    // (
  | EndRound      // )
  | BeginSquare   // [
  | EndSquare     // ]
  | BeginQuote    // <[
  | EndQuote      // ]>

  | RoundGroup { Child : Token; }     // ( ... )
  | BracesGroup { Child : Token; }    // { ... }
  | SquareGroup { mutable Child : Token; }    // [ ... ]
  | QuoteGroup { Child : Token; } // <[ ... ]>
  | LooseGroup { mutable Child : Token; }                                 // ; ... ;

  | Namespace { Env : GlobalEnv; Body : Token; }
  | Using { Env : GlobalEnv; }
    
  | EndOfFile
  | EndOfGroup
  
  public mutable Location : Nemerle.Compiler.Location;
  public mutable Next : Token;
  
  public this () { }
  
  public this (loc : Location) {
    this.Location = loc;
  }

  public override ToString () : string {
    match (this) {
      | Identifier (name) => name
      | Keyword (name) => name 
      | Operator (name : string) => name

      | StringLiteral (value) => "\"" + value + "\"";
      | CharLiteral (value) => "'" + value.ToString () + "'" 
      | IntegerLiteral  (value, _) => value.ToString ()
      | FloatLiteral   (value) => value.ToString ()
      | DoubleLiteral  (value) => value.ToString ()
      | DecimalLiteral (value) => value.ToString ()

      | Comment (value) => "/*" + value + "*/"

      | EndOfFile | EndOfGroup => ""
        
      | Semicolon      => ";"
      | Comma          => ","
      | BeginBrace     => "{"
      | EndBrace       => "}"
      | BeginRound     => "("
      | EndRound       => ")"
      | BeginSquare    => "["
      | EndSquare      => "]"
      | BeginQuote     => "<["
      | EndQuote       => "]>"

      | RoundGroup | BracesGroup | SquareGroup | QuoteGroup | LooseGroup =>
        PreParser.Dump (this, "")

      | Namespace (env, body) =>
        "namespace " + env.CurrentNamespace.Name.ToString (".") + PreParser.Dump (body, "")

      | Using => "using import;"
    }
  }

  public GetEnumerator () : System.Collections.IEnumerator {
    match (this) {
      | RoundGroup (child) | BracesGroup (child) | SquareGroup (child)
      | QuoteGroup (child) | LooseGroup (child)  | Namespace (_, child) =>
        TokenEnumerator (child)
      | _ => throw System.ArgumentException ("this is not a group token")
    }
  }
  
  public ToString (describe : bool) : string {
    if (describe)
      match (this) {
        | Keyword (x) => "keyword `" + x + "'"
        | Identifier (x) => "identifier `" + x + "'"
        | Operator (x) => "operator `" + x + "'"
        | StringLiteral => "string literal"
        | IntegerLiteral => "integer number literal"          
        | FloatLiteral => "float literal"
        | DoubleLiteral => "double literal"
        | DecimalLiteral => "decimal literal"
        | CharLiteral => "character literal"
        | EndOfFile => "end of file"
        | EndOfGroup => "separator or closing bracket"
        | Comment => "documentation comment"
        | Semicolon   | Comma      | BeginBrace     
        | EndBrace    | BeginRound | EndRound       
        | BeginSquare | EndSquare  | BeginQuote     
        | EndQuote  =>
          "operator `" + ToString () + "'"

        | BracesGroup => "`{...}' group"          
        | RoundGroup => "`(...)' group"
        | SquareGroup => "`[...]' group"
        | QuoteGroup => "`<[...]>' group"
        | LooseGroup (body) =>
          if (body == null) "token group"
          else body.ToString (true)

        | Namespace => "namespace scoping"
        | Using => "using declaration"
      }
    else
      ToString ()
  }
}

public class TokenEnumerator : System.Collections.IEnumerator {
  mutable current : Token;
  mutable next : Token;  

  public this (begin : Token) { next = begin; }

  public Current : object { get { current } }

  public MoveNext () : bool {
    if (next != null) {
      current = next;
      next = current.Next;
      true;
    }
    else false
  }

  public Reset () : void {  }
}

public abstract class LexerBase : IDisposable
{
  internal enum NumberMode {
    | Float
    | Decimal
    | Octal
    | Hex
    | Binary
  }

  protected mutable putback : bool;
  protected mutable putbackVal : char;
  protected mutable isPendingChar : bool;  // is there already some first char
  protected mutable pendingChar : char;
  protected mutable line : int;
  protected mutable col : int;

  protected static opchars : array [bool];  
  public static BaseKeywords : Set [GlobalEnv.string_wrap];

  public mutable Keywords : Set [GlobalEnv.string_wrap];

  protected id_buffer : StringBuilder = StringBuilder ();
  
  public class Error : System.Exception
  {
    public name : string;
    public this (name : string) { this.name = name; }
  }

  public this ()
  {
    line = 1;
    col = 1;
    putback = false;
    isPendingChar = false;
  }

  abstract protected do_read () : char;

  public abstract Dispose () : void;
  
  protected read () : char
  {
    def ch = 
      if (!putback)
        do_read ()
      else {
        putback = false;
        putbackVal
      }
  
    match (ch) {
      | '\n' => ++line; col = 1
      
      | '\r' =>
        Message.Warning (10002, this.Location, "CR character found in input stream")
          
      | '\t' =>
        Message.Warning (10002, this.Location, "tab character found in input stream")
          
      | _ => ++col;
    };
    ch
  }

  protected peek () : char
  {
    unless (putback)
      putbackVal = do_read ();
    putback = true;
    putbackVal
  }

  public static IsIdBeginning (ch : char) : bool
  {
    Char.IsLetter (ch) || ch == '_'
  }

  public static IsOperatorChar (ch : char) : bool
  {
    def chint = ch :> int;
    if (chint > 255) 
      false
    else
      opchars [chint];
  }

  public IsKeyword (str : string) : bool
  {
    Keywords.Contains (GlobalEnv.string_wrap (str))
  }

  protected clear_id_buffer () : void {
    _ = id_buffer.Remove (0, id_buffer.Length);
  }
  
  protected get_op (first_ch : char) : Token
  {
    clear_id_buffer ();
    _ = id_buffer.Append (first_ch);

    mutable go = true;
    while (go) {
      if (IsOperatorChar (peek ())) {
        def c = read ();
        if (c == '/')
          if (comment_beginning () == '/')
            ignore (id_buffer.Append ('/'))
          else
            go = false
        else
          ignore (id_buffer.Append (c))
      }
      else
        go = false;
    };
    Token.Operator (id_buffer.ToString ())
  }

  protected get_number (first_ch : char) : Token
  {
    clear_id_buffer ();
    mutable already_seen_type = false;  // for the case 0b0 vs 0b
    
    mutable mode =
      match (first_ch) {
        | '.' => NumberMode.Float
        | '0' =>
          match (peek ()) {
            | 'x' | 'X' => ignore (read ()); NumberMode.Hex
            | 'o' | 'O' => ignore (read ()); NumberMode.Octal
            | 'b' | 'B' =>
              ignore (read ());
              unless (Char.IsDigit (peek ())) already_seen_type = true;
              NumberMode.Binary
              
            | x when Char.IsDigit (x) =>
              Message.Warning (this.Location, "trailing zeros look like"
                               " octal modifiers, but they are not");
              NumberMode.Decimal
              
            | _ => NumberMode.Decimal
          }
        | _ => NumberMode.Decimal
      };
    mutable last_was_digit = true;

    _ = id_buffer.Append (first_ch);

    // read digits and . between them if it is present
    def loop () {
      match (peek ()) {
        | '.' => 
          when (mode == NumberMode.Decimal) {
            mode = NumberMode.Float;
            ignore (read ());
            if (Char.IsDigit (peek ())) {
              ignore (id_buffer.Append ('.'));
              loop ()
            }
            else {
              isPendingChar = true;
              pendingChar = '.';
              last_was_digit = false;
              mode = NumberMode.Decimal;
            }
          }

        | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D'
        | 'e' | 'E' | 'f' | 'F' =>
          when (mode == NumberMode.Hex) {
            ignore (id_buffer.Append (read ()));
            loop ()
          }

        | '_' =>
          _ = read ();
          if (char.IsDigit (peek ()))
            loop ()
          else {
            isPendingChar = true;
            pendingChar = '_';
          }
        
        | x when Char.IsDigit (x) =>
          ignore (id_buffer.Append (read ()));
          loop ()
        | _ => ()
      }
    };
    loop ();
    
    def exponent_part (only_realsuf) {
      when (!only_realsuf) {
        match (peek ()) {
          | 'E' | 'e' =>
            ignore (id_buffer.Append (read ()));
            match (peek ()) {
              | '+' | '-' =>
                ignore (id_buffer.Append (read ()));
              | _ => ()
            };
            if (Char.IsDigit (peek ()))
              do {
                ignore (id_buffer.Append (read ()));
              } while (Char.IsDigit (peek ()))
            else
              throw Error ("no digits after exponent sign in float literal")
          | _ => ()
        }
      };
      match (Char.ToLower (peek (), CultureInfo.InvariantCulture)) {
        | 'f' =>
          ignore (read ());
          Token.FloatLiteral (Single.Parse (id_buffer.ToString (),
                                           NumberFormatInfo.InvariantInfo))
        | 'd' =>
          ignore (read ());          
          Token.DoubleLiteral (Double.Parse (id_buffer.ToString (),
                                           NumberFormatInfo.InvariantInfo))
        | 'm' =>
          ignore (read ());          
          Token.DecimalLiteral (Decimal.Parse (id_buffer.ToString (),
                                              NumberFormatInfo.InvariantInfo))
        | _ =>
          Token.DoubleLiteral (Double.Parse (id_buffer.ToString (),
                                            NumberFormatInfo.InvariantInfo))
      }
    };

    // convert given object to appropriate integer value according to type suffix
    def check_type_suffix (val : ulong) {
      /// we should have integer number here

      def special (c) {
        | 'l' | 's' | 'b' | 'u' =>
          true
        | _ =>
          false
      };

      // check suffixes to make special types conversions
      mutable ch =
        if (already_seen_type)
          'b'
        else
          Char.ToLower (peek (), CultureInfo.InvariantCulture);
      if (special (ch)) {
        unless (already_seen_type) ignore (read ());
        mutable unsigned = ch == 'u';

        // we can have two letter suffixes
        def ch' = Char.ToLower (peek (), CultureInfo.InvariantCulture);
        if (special (ch')) {
          ignore (read ());
          if (ch' == 'u') {
            when (unsigned)
              throw Error ("bad integer suffix (unsigned twice)");
            unsigned = true;
          }
          else {
            // both chars can be different than 'u' and then they must be 'bs'
            match ((ch, ch')) {
              | ('s', 'b') | ('b', 's') =>
                ch = 'b'
              | _ =>
                unless (unsigned)
                  throw Error ("bad integer suffix (type contraint twice)");
                ch = ch';
            }
          }
        }
        else
          // only 'b' suffix, so it is unsigned byte
          when (ch == 'b') unsigned = true;

        // [ch] now contains type meaning or 'u' (then it is uint for sure)
        def parms = match (ch) {
          | 'b' =>
            if (unsigned)
              (Literal.FromByte (val :> byte), <[ byte ]>)
            else
              // workaround mono #74925
              (Literal.FromSByte ((val :> int) :> sbyte), <[ sbyte ]>)
          | 's' =>
            if (unsigned)
              (Literal.FromUShort (val :> ushort), <[ ushort ]>)
            else
              // workaround mono #74925
              (Literal.FromShort ((val :> int) :> short), <[ short ]>)
          | 'l' =>
            if (unsigned)
              (Literal.FromULong (val), <[ ulong ]>)
            else
              (Literal.FromLong (val :> long), <[ long ]>)
            
          | _ => (Literal.FromUInt (val :> uint), <[ uint ]>)
        }
        Token.IntegerLiteral (parms);
      }
      else
        Token.IntegerLiteral (Literal.Integer (val, false, null), null)
    };
    
    try {
      match (mode) {
        | NumberMode.Float =>
          match (peek ()) {
            | 'E' | 'e' => exponent_part (false)
            | _ => exponent_part (true)
          }
        | NumberMode.Decimal =>
          if (last_was_digit)
            match (Char.ToLower (peek (), CultureInfo.InvariantCulture))
            {
              | 'e' => exponent_part (false)
              | 'f' | 'd' | 'm' => exponent_part (true)
              | _ =>
                check_type_suffix (ulong.Parse (id_buffer.ToString ()))
            }
          else
            Token.IntegerLiteral (Literal.Integer (ulong.Parse (id_buffer.ToString ()),
                                                   false, null), null)

        | NumberMode.Hex =>
          def val = UInt64.Parse (id_buffer.ToString (), Globalization.NumberStyles.HexNumber,
                                  CultureInfo.InvariantCulture);
          check_type_suffix (val)

        | NumberMode.Binary =>
          mutable value = 0ul;
          for (mutable i = 0; i < id_buffer.Length; ++i)
            match (id_buffer [i]) {
              | '0' => value <<= 1
              | '1' => value <<= 1; value += 1ul
              | x =>
                Message.Error (this.Location, $"binary literal numer must be"
                               " composed of 1 and 0s, while there is `$(x)'")
            };
          check_type_suffix (value)

        | NumberMode.Octal =>
          mutable value = 0ul;
          for (mutable i = 0; i < id_buffer.Length; ++i)
            match (Convert.ToUInt64 (id_buffer [i]) - 48ul) {
              | x when 0ul <= x && x <= 7ul => value <<= 3; value += x;
              | x =>
                Message.Error (this.Location, $"octal literal number must be"
                               " composed of 0 to 7 digits while it has `$(x)'")
            };
          check_type_suffix (value)
      }
    }
    catch {
      _ is System.OverflowException =>
        Message.Error (this.Location,
                       $ "number literal $id_buffer is too large for given type");
        Token.IntegerLiteral (Literal.Integer (0, false, null), null)
    }
  }

  protected get_id (first_ch : char) : Token
  {
    if (first_ch == '\'' && !IsIdBeginning (peek ())) {
      get_char ()
    } 
    else {
      clear_id_buffer ();
      _ = id_buffer.Append (first_ch);
      
      mutable next = peek ();
      while (IsIdBeginning (next) || Char.IsDigit (next) || next == '\'') {
        _ = id_buffer.Append (read ());
        next = peek ();
      };
          
      def str = System.String.Intern (id_buffer.ToString ());
      
      if (first_ch == '\'' && str.Length == 3 && str[2] == '\'')
        Token.CharLiteral (str[1])
      else if (IsKeyword (str))
        Token.Keyword (str)
      else
        Token.Identifier (str)
    }
  }

  protected get_char_from_hex(len : int) : char
  {
    def max = if (len == -1) 4 else len;
    clear_id_buffer ();
    def loop (i) {
      when (i < max) {
        def ch = read ();
        match (ch) {
          | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D'
          | 'e' | 'E' | 'f' | 'F' | '0' | '1' | '2' | '3'
          | '4' | '5' | '6' | '7' | '8' | '9' =>
            _ = id_buffer.Append(ch);
            loop(i+1)
            
          | _ =>
            if (len != -1 || i == 0) {
              throw Error ("bad escape character")
            }
            else {
              putback = true; putbackVal = ch
            }
        }
      }
    }
    loop (0);
    
    try {
      Convert.ToChar (UInt64.Parse (id_buffer.ToString (), Globalization.NumberStyles.HexNumber,
                                    CultureInfo.InvariantCulture));
    }
    catch {
      _ is System.OverflowException =>
        throw Error ($ "Character literal $id_buffer must be in the range U+0000 to U+FFFF");
    }
  }

  
  protected escape_value (ch : char) : char
  {
    | 'n' => '\n'
    | 't' => '\t'
    | 'r' => '\r'
    | 'b' => '\b'
    | 'e' => '\e'
    | '"' => '"'
    | '\'' => '\''
    | '\\' => '\\'
    | '0' => '\0'
    | 'x' => get_char_from_hex(-1)
    | 'u' => get_char_from_hex(4)
    | 'U' => get_char_from_hex(8)
    | _ => Message.Error (this.Location, $"bad escape character `\\$(ch)'"); ' '
  }
    
  protected get_char () : Token
  {
    match (get_string ('\'')) {
      | Token.StringLiteral (v) =>
        if (v.Length == 0)
          throw Error ("empty character literal")
        else {
          if (v.Length != 1)
            throw Error ("character literal too long")
          else
            Token.CharLiteral (v[0])
        }
      | _ => Util.ice ("Value is not string in get_char")
    }
  }

  virtual protected ignore_comments () : void { }
  
  protected get_string (end_ch : char) : Token
  {
    def buf = StringBuilder ();
    def loop () {
      match (read ()) {
        | '\\' =>
          def esc = escape_value (read ());
          _ = buf.Append (esc);
          loop ();
          
        | '\n' => throw Error ("newline before end of string")
        | ch when ch != end_ch => 
          _ = buf.Append (ch);
          loop ();
          
        | _ =>
          when (eat_whitespace () && peek () == end_ch) {
            ignore_comments ();
            _ = read ();
            loop ();
          }
      }
    }
    loop ();
     
    Token.StringLiteral (buf.ToString ())
  }

  protected get_monkey_string () : Token
  {
    def buf = Text.StringBuilder (32);

    def loop () {
      match (read ()) {
        | '"' =>
          match (peek ()) {
            | '"' =>
              _ = buf.Append ('"');
              _ = read ();
              loop ()
            | _ =>
              Token.StringLiteral (buf.ToString ())
          }
        | ch =>
          _ = buf.Append (ch); loop ()
      }
    };

    loop ()
  }

  abstract protected comment_beginning () : char;
  
  abstract protected eat_whitespace () : bool;

  protected do_get_token () : Token
  {
    def ch = 
      if (isPendingChar) {
        isPendingChar = false;
        pendingChar
      }
      else {
        try { read () }
        catch { _ is LexerBase.Error => ' ' }
      }
    match (ch) {
      | '"' => get_string ('"')

      | '\'' => get_id ('\'')

      | '.' => 
        if (Char.IsDigit (peek ()))
          get_number (ch)
        else
          Token.Operator (".")

      | '{' => Token.BeginBrace ()
      | '}' => Token.EndBrace ()
      | '[' => Token.BeginSquare ()
      | ']' =>
        if (peek () == '>') {
          ignore (read ());
          Token.EndQuote ()
        }
        else
          Token.EndSquare ()
      | '(' => Token.BeginRound ()
      | ')' => Token.EndRound ()
      | ',' => Token.Comma ()
      | ';' => Token.Semicolon ()

      | '<' when peek () == '[' => ignore (read ()); Token.BeginQuote ()
        
      | '@' =>
        def next = read ();
        if (IsOperatorChar (next))
          match (get_op (next)) {
            | Token.Operator (s) => Token.Identifier (s)
            | _ => Util.ice ()
          }
        else if (IsIdBeginning (next))
          match (get_id (next)) {
            | Token.Identifier (x)
            | Token.Keyword (x) => Token.Identifier (x)
            | _ => Util.ice ()
          }
        else if (next == '"')
          get_monkey_string ()
        else
          throw Error ("expecting operator, identifier or string literal after '@'")
      
      | '*' =>
        if (peek () == '*')
          get_op (ch)
        else
          get_op (ch)

      | '/' =>
        get_op (ch)
                              
      | '%' | '\\'
        => get_op (ch)

      | '-' =>
        if (peek () != '.') get_op (ch) 
        else Token.Operator ("-")

      | '+'  
        =>
           if (peek () != '.') get_op (ch) 
           else Token.Operator ("+")
      
      | '^' | '$' | '~' | '?' | '#'
        => get_op (ch)
        
      | '=' | '<' | '>' | '!' 
        => get_op (ch)
        
      | '&' => get_op (ch)
      | '|' => get_op (ch)
      | ':' => get_op (ch)

      // after executing eat_whitespace it is the only possibility for space
      // (try..catch above)
      | ' ' => Token.EndOfFile ()
        
      | _ =>
        if (Char.IsDigit (ch))
          get_number (ch)
        else
          if (IsIdBeginning (ch))
            get_id (ch)
          else
            throw Error ("invalid character")
    }      
  }

  abstract public GetToken () : Token;

  abstract public Location : Location { get; }

  public static this () 
  {
    def tab = array [
        "_", "abstract", "and", "array", "as", "base", "catch",
        "class", "def", "delegate", "enum", "event",
        "false", "finally", "fun", "implements",
        "interface", "internal", "is", "macro", "match", "matches",
        "module", "mutable", "namespace", "new", "null", "out",
        "override", "params", "private", "protected", "public",
        "ref", "sealed", "static", "struct", "syntax", "this",
        "throw", "true", "try", "type", "typeof", "using",
        "variant", "virtual", "void", "volatile", "when", "where",
        "partial", "extern"
    ];
    mutable kes = Set ();
    foreach (el in tab) kes = kes.Add (GlobalEnv.string_wrap (el));
    BaseKeywords = kes;

    def tab = array ['=', '<', '>', '@', '^', '&', '-', '+', '|', '*',
                     '/', '$', '%', '!', '?', '~', '.', ':', '#'];
    opchars = array (256);                      
    foreach (x in tab) opchars [x :> int] = true;
  }

  public static IsOperator (str : string) : bool
  {
    def len = str.Length;
    mutable is_op = true;
    for (mutable i = 0; i < len && is_op; ++i) {
      def c = str[i];
      is_op = IsOperatorChar (c);
      unless (is_op) 
        match (c) {
         | '(' | ')' | ';' | '[' | ']' => is_op = true
         | _ => ()
        }
    };
    is_op
  }

  public static HasKeywordChars (str : string) : bool
  {
    def len = str.Length;
    mutable is_kw = true;
    for (mutable i = 0; i < len && is_kw; ++i)
      is_kw = Char.IsLetter (str[i]) || str[i] == '_';
    is_kw
  }
} // end class LexerBase

public class LexerFile : LexerBase
{
  reader : System.IO.TextReader;
  mutable file_name : string;
  mutable file_idx : int;
  comment_store : StringBuilder;
  mutable comment_loc : Location;

 #region PREPROCESSOR VARIABLES
  /** if there was only white chars from beginnig of line */
  mutable white_beginning : bool;  

  mutable eating_stack : list[int]; // stack of nested #if

  /** -1 = now inside true preprocessing section
       0 = no preprocessing currently in action
       1 = there were no true preprocessing sections in this set
       2 = there already was true preprocessing section in this set
       3 = we are now inside #else section, but it is not analyzed   */
  mutable eating_now : int;
  mutable line_stack : int; // real line number at moment of `#line 3' occurence
  mutable line_start : int; // how to compute real line after `#line default' occurence
  file_real : string;           // real filename to revert after `#line 4 "bla"'
  
  protected defines : Hashtable [string, bool];

 #endregion PREPROCESSOR VARIABLES

  public static command_defines : Hashtable [string, bool];
  public static mutable store_comments : bool;
 
  private check_last_line_for_lf (file : IO.FileStream) : void
  {
    _ = file.Seek (-1 :> Int64, IO.SeekOrigin.End);
    
    def inp = file.ReadByte ();
    
    if (inp >= 0) {
      def ch = Convert.ToChar (inp);
      when (ch != '\n')
        Message.Warning (this.Location, "no new line at the end of the file");
    }
    else 
      throw LexerBase.Error ("unexpected end of file");
      
    _ = file.Seek (0 :> Int64, IO.SeekOrigin.Begin);
  }

  public static this () {
    store_comments = false;
    command_defines = Hashtable ();
  }

  public override Dispose () : void {
    (reader : IDisposable).Dispose ();
  }
  
  public this (fn : string)
  {
    base ();
    comment_store = StringBuilder (300);
    file_name = fn;
    file_idx = Location.AddFile (file_name);
    file_real = fn;
    line_stack = -1;
    try {
      def file = IO.FileStream (fn, IO.FileMode.Open, IO.FileAccess.Read);
      reader = IO.StreamReader (file, Text.Encoding.UTF8);
      when (WarningOptions.IsEnabled (10002))
        check_last_line_for_lf (file);
    }
    catch {
      | e => Message.FatalError ($"cannot open file `$fn': $(e.Message)")
    }
      
    white_beginning = true;
    defines = Hashtable (25);
    foreach (el in command_defines) defines.Add (el.Key, true);
    eating_stack = [];
    eating_now = 0;
  }

  override protected do_read () : char
  {
    def inp = reader.Read ();
    if (inp >= 0)
      (inp :> char)
    else 
      throw LexerBase.Error ("unexpected end of file")
  }
        
  override protected comment_beginning () : char
  {
    match (peek ()) {
      | '/' =>
        // we are for sure in one line comment
        _ = read ();
        try {
          if (store_comments && peek () == '/') {
            comment_loc = Location (file_idx, line, col - 2);
            _ = read ();
            mutable cc = ' ';
            do {

              cc = read ();                
              _ = comment_store.Append (cc)
            } while (cc != '\n');
            comment_loc = comment_loc + Location (file_idx, line, col);
          }
          else
            while (read () != '\n') {};
        }
        catch { _ is LexerBase.Error => () };
          
        white_beginning = true;
        // pass whitespace, so next read would be eof checked
        ' '

      | '*' =>
        // multiline comment
        _ = read ();
        def loop1 (seen_star, store) {
          def cc = read ();
          when (store)
            ignore (comment_store.Append (cc));

          match (cc) {
            | '*' => loop1 (true, store)
            | '/' when seen_star => ()
            | _ => loop1 (false, store)
          }
        };
        if (store_comments && peek () == '*') {
          comment_loc = Location (file_idx, line, col - 2);
          _ = read ();
          loop1 (true, true);
          if (comment_store.Length == 1)
            ignore (comment_store.Remove (0, 1))
          else
            ignore (comment_store.Remove (comment_store.Length - 2, 2));
          comment_loc = comment_loc + Location (file_idx, line, col);
        }
        else
          loop1 (false, false);

        // pass whitespace, so next read would be eof checked            
        ' '

      | _ => '/'
    }
  }

  override protected eat_whitespace () : bool
  {
    mutable eof = false;
    
    def loop (shift : bool) {
      when (shift) ignore (read ()); 
      def ch = 
        try { peek () } 
        catch { _ is LexerBase.Error => eof = true; '_' };
      if (eof) {
        if (eating_now > 0) {
          Message.Error (this.Location, "unexpected end of file"
                         " before finishing preprocessor directive");
          eating_now = 0;
          false
        }
        else
          if (!eating_stack.IsEmpty) {
            Message.Error (this.Location, "there are still open"
                           " preprocessing directives at the end of file");
            false
          }
          else true
      }
      else
        match (ch) {
          | ' ' | '\t' | '\r' => loop (true);
 
          | '\n' =>
            white_beginning = true;
            loop (true)
            
          | '/' =>
            white_beginning = false;
            _ = read ();
            match (comment_beginning ()) {
              | '/' => 
                pendingChar = '/';
                isPendingChar = true;
                true
                  
              | _ => loop (false)
            }
              
          | '#' =>
            if (white_beginning) {
              _ = read ();
              parse_preprocessor ();
              loop (false)
            }
            else {
              Message.Error (this.Location, "preprocessor directives must occur only in"
                             " lines beginning with whitespaces");
              while (read () != '\n') ();
              loop (false);
            }
        
          | _ =>
            white_beginning = false;
            if (eating_now > 0) loop (true) else true
        }
    };
    loop (false)
  }

  override protected ignore_comments () : void {
    when (comment_store.Length > 0) {
      Message.Warning (this.Location,
                       "documentation comments between literals are ignored");
      ignore (comment_store.Remove (0, comment_store.Length))
    }
  }
  
  parse_preprocessor () : void
  {
    def read_to_the_end_of_line () {
      mutable c = ' ';
      def line = StringBuilder (80);
      while (c != '\n') {
        ignore (line.Append (c));
        try { c = read () }
        catch { _ is LexerBase.Error => c = '\n' }
      };
      line.ToString ()
    };
    // eof isn't checked, because we are eating and expecting something
    // not white before end of line
    def eat_spaces () : char {
      mutable c = ' ';
      while (Char.IsWhiteSpace (c) && c != '\n') 
        c = read ();
      c
    };
    def read_word () : string {
      def word = StringBuilder (eat_spaces ().ToString ());
      try {
        while (IsIdBeginning (peek ()) || Char.IsDigit (peek ()))
          _ = word.Append (read ())
      } catch { _ is LexerBase.Error => () };
      word.ToString ()
    };

    def directive = read_word ();
    when (directive == "")
      throw LexerBase.Error ("expected preprocessing directive after `#'");

    match (directive) {
      | "if" =>
        eating_stack = eating_now :: eating_stack;
        if (evaluate_preprocessing_expr (read_to_the_end_of_line ().Trim ()))
          unless (eating_now > 0)          
            eating_now = -1
        else
          eating_now = 1

      | "elif" =>
        def now = evaluate_preprocessing_expr (read_to_the_end_of_line ().Trim ());
        match (eating_stack) {
          | x :: _ =>
            unless (x > 0)
              match (eating_now) {
                | 1 => when (now) eating_now = -1
                | -1 | 2 => eating_now = 2
                | _ => throw LexerBase.Error ("unbalanced preprocessing directives")
              }
          | [] => throw LexerBase.Error ("unbalanced #else");
        }

      | "else" =>
        ignore (read_to_the_end_of_line ());
        match (eating_stack) {
          | x :: _ =>
            unless (x > 0)
              match (eating_now) {
                | 1 => eating_now = -1
                | -1 | 2 => eating_now = 3
                | _ => throw LexerBase.Error ("unbalanced preprocessing directives")
              }
          | [] => throw LexerBase.Error ("unbalanced #else");
        }
        
      | "endif" =>
        ignore (read_to_the_end_of_line ());
        match (eating_stack) {
          | x :: xs =>
            eating_now = x; eating_stack = xs
          | [] => throw LexerBase.Error ("unbalanced #endif");
        }

      | "line" =>
        mutable c = eat_spaces ();
        def (new_line, new_file) =
          if (c == 'd') {
            if (read_word () == "efault") {
              ignore (read_to_the_end_of_line ());
              (-1, null)
            }
            else
              throw LexerBase.Error ("expecting line number or `default' indicator")
          }
          else {
            def num = StringBuilder (c.ToString ());
            try {
              while (Char.IsDigit (peek ()))
                ignore (num.Append (read ()))
            } catch { _ is LexerBase.Error => c = ' ' };

            (Int32.Parse (num.ToString ()), read_to_the_end_of_line ().Trim ())
          };
        if (new_line == -1) {
          line = line - line_start + line_stack;
          file_name = file_real;
          file_idx = Location.GetFileIndex (file_real);
          line_stack = -1;
        }
        else {
          // if there is already something on stack, bring real line first
          when (line_stack != -1)
            line = line - line_start + line_stack;
          line_start = new_line;
          line_stack = line;
          line = new_line;
          when (new_file != "") {
            file_name = new_file;
            file_idx = Location.GetFileIndex (new_file);
          }
        }

      | "error" => 
        Message.Error (this.Location, read_to_the_end_of_line ().Trim ());
        
      | "warning" =>
        Message.Warning (this.Location, read_to_the_end_of_line ().Trim ());

      | "region" =>
        _ = read_to_the_end_of_line ();
        
      | "endregion" =>
        _ = read_to_the_end_of_line ();

      | "define" =>
        defines.Set (read_word (), true);
        _ = read_to_the_end_of_line ()

      | "undef" =>
        defines.Set (read_word (), false);
        _ = read_to_the_end_of_line ()
        
      | x => throw LexerBase.Error ("unsupported preprocessing directive `" + x + "'")
    };
  }

  evaluate_preprocessing_expr (str : string) : bool {
    def traverse (line : string, acc) {
      mutable i = 0;
      while (i < line.Length && Char.IsWhiteSpace (line [i])) ++i;
      def line = line.Substring (i);
      
      match (line) {
        | x when x.StartsWith ("false") =>
          traverse (x.Substring (5), "false" :: acc)
        | x when x.StartsWith ("true") =>
          traverse (x.Substring (4), "true" :: acc)
        | x when x.StartsWith ("||") =>
          traverse (x.Substring (2), "||" :: acc)
        | x when x.StartsWith ("&&") =>
          traverse (x.Substring (2), "&&" :: acc)
        | x when x.StartsWith ("==") =>
          traverse (x.Substring (2), "==" :: acc)
        | x when x.StartsWith ("!=") =>
          traverse (x.Substring (2), "!=" :: acc)
        | x when x.StartsWith ("(") =>
          traverse (x.Substring (1), "(" :: acc)
        | x when x.StartsWith (")") =>
          traverse (x.Substring (1), ")" :: acc)
        | x when x.StartsWith ("!") =>
          traverse (x.Substring (1), "!" :: acc)
        | "" => List.Rev (acc)
        | x =>
          mutable j = 0;
          while (j < x.Length && (IsIdBeginning (x[j]) || Char.IsDigit (x[j]))) ++j;
          when (j == 0)
            throw LexerBase.Error ("bad preprocessing condition format");
          def val = 
            match (defines.Get (x.Substring (0, j))) {
              | Some (v) => v
              | None => false
            };
          if (val) traverse (x.Substring (j), "true" :: acc)
          else traverse (x.Substring (j), "false" :: acc)
      }
    };
    mutable tokens = traverse (str, []);

    def lowest () {
      match (tokens) {
        | "(" :: xs =>
          tokens = xs;
          def res = highest ();
          match (tokens) {
            | ")" :: xs => tokens = xs
            | _ =>
              throw LexerBase.Error ("unbalanced parenthesis in preprocessing expression")
          };
          res
        | "!" :: xs => tokens = xs; !lowest (); 
        | "false" :: xs => tokens = xs; false
        | "true" :: xs => tokens = xs; true
        | _ => throw LexerBase.Error ("bad expression")
      }
    }
    and low () {
      mutable res = lowest ();
      def loop () {
        match (tokens) {
          | "&&" :: xs =>
            tokens = xs; res = lowest () && res; loop ()
          | _ => ()
        }
      };
      loop ();
      res
    }
    and high () {
      mutable res = low ();
      def loop () {
        match (tokens) {
          | "||" :: xs =>
            tokens = xs; res = low () || res; loop ()
          | _ => ()
        }
      };
      loop ();
      res
    }
    and highest () {
      def h = high ();
      match (tokens) {
        | "==" :: xs => tokens = xs; h == high ();
        | "!=" :: xs => tokens = xs; h != high ();
        | _ => h
      }
    };
    highest ()
  }

  public override GetToken () : Token
  {
    unless (isPendingChar) _ = eat_whitespace ();

    if (comment_store.Length > 0) {
      def res = Token.Comment (comment_store.ToString ());
      res.Location = comment_loc;
      _ = comment_store.Remove (0, comment_store.Length);
      res
    }
    else {
      def last_line = line;
      def last_col = col;
      def tok = do_get_token ();
      tok.Location = Location (file_idx, last_line, last_col, line, col);
      tok
    }
  }

  public override Location : Location
  {
    get { Location (file_idx, line, col) }
  }
} // end class LexerFile


public class LexerString : LexerBase
{
  reader : string;
  mutable pos : int;
  location : Location;

  public this (fn : string, loc : Location)
  {
    base ();
    reader = fn;
    pos = 0;
    location = loc;
  }

  public Position : int {
    get { pos }
  }

  public override Dispose () : void { }
  
  override protected do_read () : char
  {
    if (pos < reader.Length) {
      def ch = reader[pos];
      ++pos;
      ch
    } else throw LexerBase.Error ("unexpected end of code text")
  }

  override protected comment_beginning () : char
  {
    match (peek ()) {
      | '/' =>
        // we are for sure in one line comment
        try {
          while (read () != '\n') {};
        } 
        catch { _ is LexerBase.Error => () };
        // pass whitespace, so next read would be eof checked
        ' '

      | '*' =>
        // multiline comment
        ignore (read ());
        def loop1 (seen_star) {
          def cc = read ();

          match (cc) {
            | '*' => loop1 (true)
            | '/' when seen_star => ()
            | _ => loop1 (false)
          }
        };
        loop1 (false);
        // pass whitespace, so next read would be eof checked            
        ' '

      | _ => '/'
    }
  }
  
  override protected eat_whitespace () : bool
  {
    mutable eof = false;
    
    def loop (shift : bool) {
      when (shift) _ = read ();
      def ch = 
        try { peek () } 
        catch { _ is LexerBase.Error => eof = true; '_' };
      if (eof)
        false
      else
        match (ch) {
          | ' ' | '\t' | '\r' | '\n' => loop (true);
 
          | '/' =>
            _ = read ();
            match (comment_beginning ()) {
              | '/' => 
                pendingChar = '/';
                isPendingChar = true;
                true
                
              | _ => loop (false)
            }
        
          | '#' =>
            throw LexerBase.Error ("preprocessor directives may not occur in"
                                   " string programs")
          | _ => true
        }
    };
    loop (false)
  }

  public override GetToken () : Token
  {
    unless (isPendingChar)
      _ = eat_whitespace ();

    do_get_token ()
  }

  public override Location : Location
  {
    get { location }
  }
} // LexerString

public enum SyntaxType {
  | Identifier 
  | Keyword 
  | Operator 
  | OperatorBrace 
  | OperatorDot 

  | StringLiteral 
  | CharLiteral 
  | IntLiteral 
  | FloatLiteral
   
  | Comment 

  | EndOfFile
}


public class LexerColorizer
{
  [Record]
  public class SyntaxToken {
    public StartPos : int;
    public EndPos : int;
    public Token : SyntaxType;
  }

  mutable reader : string;
  mutable pos : int;
  
  public this (fn : string)
  {
    reader = fn;
    pos = 0;
  }

  public SetString (text : string, offset : int) : void {
    reader = text;
    pos = offset;
  }

  read () : char
  {
    if (pos < reader.Length) {
      def ch = reader[pos];
      ++pos;
      ch
    } else throw LexerBase.Error ("unexpected end of code text")
  }

  peek () : char
  {
    if (pos < reader.Length)
      reader[pos]
    else 
      (0 :> char) 
  }

  get_op () : SyntaxType
  {
    def loop () {
      if (LexerBase.IsOperatorChar (peek ())) {
        def c = read ();
        if (c == '/' && (peek () == '/' || peek () == '*')) {
          --pos;
          SyntaxType.Operator
        }
        else
          loop ();
      }
      else
        SyntaxType.Operator
    };
    loop ();
  }

  get_number () : SyntaxType
  {
    mutable already_seen_type = false;  // for the case 0b0 vs 0b
   
    mutable mode =
      match (read ()) {
        | '.' => LexerBase.NumberMode.Float
        | '0' =>
          match (peek ()) {
            | 'x' | 'X' => ++pos; LexerBase.NumberMode.Hex
            | 'o' | 'O' => ++pos; LexerBase.NumberMode.Octal
            | 'b' | 'B' =>
              ++pos;
              unless (Char.IsDigit (peek ())) already_seen_type = true;
              LexerBase.NumberMode.Binary
              
            | x when Char.IsDigit (x) =>
              Message.Warning ("trailing zeros look like"
                               " octal modifiers, but they are not");
              LexerBase.NumberMode.Decimal
              
            | _ => LexerBase.NumberMode.Decimal
          }
        | _ => LexerBase.NumberMode.Decimal
      };
    mutable last_was_digit = true;

    // read digits and . between them if it is present
    def loop () {
      match (peek ()) {
        | '.' => 
          when (mode == LexerBase.NumberMode.Decimal) {
            mode = LexerBase.NumberMode.Float;
            ++pos;
            if (Char.IsDigit (peek ()))
              loop ()
            else {
              --pos;
              mode = LexerBase.NumberMode.Decimal;
            }
          }

        | 'a' | 'A' | 'b' | 'B' | 'c' | 'C' | 'd' | 'D'
        | 'e' | 'E' | 'f' | 'F' =>
          when (mode == LexerBase.NumberMode.Hex) {
            ++pos;
            loop ();
          }

        | x when Char.IsDigit (x) =>
          ++pos;
          loop ()
        | _ => ()
      }
    };
    loop ();
    
    def exponent_part (only_realsuf) {
      when (!only_realsuf) {
        match (peek ()) {
          | 'E' | 'e' =>
            ++pos;
            match (peek ()) {
              | '+' | '-' => ++pos;
              | _ => ()
            };
            if (Char.IsDigit (peek ()))
              do {
                ++pos;
              } while (Char.IsDigit (peek ()))
            else
              throw LexerBase.Error ("no digits after exponent sign in float literal")
          | _ => ()
        }
      };
      SyntaxType.FloatLiteral
    };

    def check_type_suffix () {
      /// we should have integer number here

      def special (c) {
        | 'l' | 's' | 'b' | 'u' => true
        | _ => false
      };

      // check suffixes to make special types conversions
      mutable ch =
        if (already_seen_type)
          'b'
        else
          Char.ToLower (peek (), CultureInfo.InvariantCulture);
      if (special (ch)) {
        unless (already_seen_type) ++pos;

        // we can have two letter suffixes
        def ch' = Char.ToLower (peek (), CultureInfo.InvariantCulture);
        when (special (ch')) ++pos;
        SyntaxType.IntLiteral
      }
      else
        SyntaxType.IntLiteral
    };
    
    match (mode) {
      | LexerBase.NumberMode.Float =>
        match (peek ()) {
          | 'E' | 'e' => 
            exponent_part (false)
          | _ =>
            exponent_part (true)
        }
      | LexerBase.NumberMode.Decimal =>
        if (last_was_digit)
          match (Char.ToLower (peek (), CultureInfo.InvariantCulture)) {
            | 'e' =>
              exponent_part (false)
            | 'f' | 'd' | 'm' =>
              exponent_part (true)
            | _ =>
              check_type_suffix ()
          }
        else
          SyntaxType.IntLiteral

      | LexerBase.NumberMode.Hex 
      | LexerBase.NumberMode.Binary 
      | LexerBase.NumberMode.Octal => check_type_suffix ()
    }
  }

  get_id () : SyntaxType
  {
    def first_ch = read ();
    if (first_ch == '\'' && !LexerBase.IsIdBeginning (peek ()))
      get_char ()
    else {
      def id_buffer = StringBuilder ();
      _ = id_buffer.Append (first_ch);

      while (LexerBase.IsIdBeginning (peek ()) || 
             Char.IsDigit (peek ()) || 
             peek () == '\'')
        _ = id_buffer.Append (read ());
          
      def str = id_buffer.ToString ();
      
      if (first_ch == '\'' && str.Length == 3 && str[2] == '\'')
        SyntaxType.CharLiteral
      else if (GlobalEnv.Core.IsKeyword (str))
        SyntaxType.Keyword
      else
        SyntaxType.Identifier
    }
  }

   
  get_char () : SyntaxType
  {
    pos += 2;
    SyntaxType.CharLiteral
  }

  get_string (end_ch : char) : SyntaxType
  {
    def loop () {
      if (pos >= reader.Length)
        SyntaxType.EndOfFile
      else             
        match (read ()) {
          | '\\' => ++pos; loop ()
          | ch when ch != end_ch => loop () 
          | _ => SyntaxType.StringLiteral 
        }
    };
    loop () 
  }

  get_monkey_string () : SyntaxType
  {
    def loop () {
      match (read ()) {
        | '"' =>
          match (peek ()) {
            | '"' =>
              ++pos;
              loop ()
            | _ => SyntaxType.StringLiteral
          }
        | _ => loop ()
      }
    };

    loop ()
  }

  comment_beginning (state : ref int) : char
  {
    match (peek ()) {
      | '/' =>
        // we are for sure in one line comment
        while (pos < reader.Length && read () != '\n') {};
        // pass whitespace, so next read would be eof checked
        ' '

      | '*' =>
        // multiline comment
        ++pos;
        unless (eat_comment ()) state = 1;
        // pass whitespace, so next read would be eof checked            
        ' '

      | _ => '/'
    }
  }

  eat_comment () : bool 
  {
    def loop1 (seen_star) {
      if (pos < reader.Length)
        match (read ()) {
          | '*' => loop1 (true)
          | '/' when seen_star => true
          | _ => loop1 (false)
        }
      else
        false
    };
    loop1 (false);
  }

  get_token () : SyntaxType
  {
    if (pos >= reader.Length) SyntaxType.EndOfFile
    else
    match (peek ()) {
      | '"' => ++pos; get_string ('"')

      | '\'' => get_id ()

      | '.' =>
        ++pos;
        if (Char.IsDigit (peek ()))
          get_number ()
        else
          SyntaxType.Operator

      | '@' =>
        ++pos;
        def next = peek ();
        if (LexerBase.IsOperatorChar (next))
          get_op ();
        else if (LexerBase.IsIdBeginning (next)) {
          _ = get_id ();
          SyntaxType.Identifier
        }
        else if (next == '"') {
          ++pos;
          get_monkey_string ()
        }
        else
          throw LexerBase.Error ("expecting operator, identifier or string literal after '@'")

      | '{' | '}' | '[' | ']' 
      | '(' | ')'
        =>
        ++pos; SyntaxType.OperatorBrace      

      | ',' | ';' | '\\'
        =>
        ++pos; 
        get_op ();
     
      | ' ' => SyntaxType.EndOfFile
     
      | ch =>
        if (Char.IsDigit (ch))
          get_number ()
        else
          if (LexerBase.IsIdBeginning (ch))
            get_id ()
          else
            if (LexerBase.IsOperatorChar (ch)) 
              get_op ()
            else
              throw LexerBase.Error ("invalid character")
    }      
  }

  public GetSyntaxToken (state : ref int) : SyntaxToken {
    def loop (c) {
      | ' ' | '\t' | '\r' | '\n' =>
        if (pos < reader.Length)
          loop (read ())
        else
          SyntaxType.EndOfFile
            
      | '/' =>
        match (comment_beginning (ref state)) {
          | '/' => loop (' ')
          | _ => SyntaxType.Comment 
        }
        
      | '#' =>
        throw LexerBase.Error ("preprocessor directives may not occur in"
                               " string programs")
        
      | _ => --pos; get_token ()
    };
    def begin_pos = pos;
    def tok =
      if (pos >= reader.Length)
        SyntaxType.EndOfFile
      else if (state == 1) {
        when (eat_comment ()) state = 0;
        SyntaxType.Comment
      }
      else {
        try {
          loop (' ');
        }
        catch {
          | _ is LexerBase.Error => SyntaxType.EndOfFile
        }
      }
    SyntaxToken (begin_pos, pos - 1, tok);
  }  
} // end class LexerColorizer

} // end ns
