%{
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include "types.h"
#include "misc.h"
#include "parse.tab.h"


/*  static int */
/*  puttext(void) { */
/*    yylval.text.text=yytext; */
/*    yylval.text.n=yyleng; */
/*    return 0; */
/*  } */

#define MRET(a) do { if (bs) { bs=0; return BS_ ## a ; } return (a); } while(0)
#define puttext(a) do { yylval.text.text=yytext; yylval.text.n=yyleng-(a); } while(0)

  static int bs;

%}

%option nomain
%option noyywrap
%option case-sensitive
%option 8bit
%option nounput

SPACE [ ]
NL [\n]
UPPER [A-Z\']
LOWER [a-z\']
DIGIT [[:digit:]]
PUNCT [[:punct:]]
ARABIC [\',-.ANTWY^_`bcdfghklmnpqrstvwyz|~]
NIBS [^ \n[:punct:]]

%x arabic

%%

%{
  static int start;
  
  if (!start) {
    yy_flex_debug=getenv("DEBUG_LEX") ? 1 : 0;
    start=1;
  }
  
%}

<arabic>{
  
  {ARABIC}+{SPACE}{NIBS} |
  {NL}+{SPACE}{NIBS} {
    puttext(2);
    yyless(yyleng-1);
    MRET(WORD);
  }
    
  {ARABIC}+{NIBS} |
  {SPACE}+{NIBS} |
  {NL}+{NIBS} {
    puttext(1);
    bs=1;
    yyless(yyleng-1);
    MRET(WORD);
  }

  {ARABIC}+ |
  {SPACE}+ |
  {NL}+ {
    puttext(0);
    MRET(WORD);
  }
  
  "}" {
    BEGIN(INITIAL);
    puttext(0);
    MRET(WORD);
  }

  .|\n {err("Unknown char %c in arabic context\n",*yytext);}
  
}

"{"{SPACE}*"\\arabic" {
  BEGIN(arabic);
  puttext(0);
  MRET(WORD);
}

{UPPER}{LOWER}*{SPACE}{NIBS} {
  puttext(2);
  yyless(yyleng-1);
  MRET(CAP_WORD);
}

{LOWER}{UPPER}*{SPACE}{NIBS} {
  puttext(2);
  yyless(yyleng-1);
  MRET(CCAP_WORD);
}

{UPPER}+{SPACE}{NIBS} {
  puttext(2);
  yyless(yyleng-1);
  MRET(CAP_CCAP_WORD);
}

{LOWER}+{SPACE}{NIBS} |
{PUNCT}+{SPACE}{NIBS} |
{DIGIT}+{SPACE}{NIBS} |
{NL}+{SPACE}{NIBS} {
  puttext(2);
  yyless(yyleng-1);
  MRET(WORD);
}
  
{UPPER}{LOWER}*{SPACE}(.|\nl) {
  puttext(2);
  yyless(yyleng-2);
  MRET(CAP_WORD);
}

{LOWER}{UPPER}*{SPACE}(.|\nl) {
  puttext(2);
  yyless(yyleng-2);
  MRET(CCAP_WORD);
}

{UPPER}+{SPACE}(.|\nl) {
  puttext(2);
  yyless(yyleng-2);
  MRET(CAP_CCAP_WORD);
}

{LOWER}+{SPACE}(.|\nl) |
{PUNCT}+{SPACE}(.|\nl) |
{DIGIT}+{SPACE}(.|\nl) |
{NL}+{SPACE}(.|\nl) {
  puttext(2);
  yyless(yyleng-2);
  MRET(WORD);
}

{UPPER}{LOWER}* {
  puttext(0);
  MRET(CAP_WORD);
}

{LOWER}{UPPER}* {
  puttext(0);
  MRET(CCAP_WORD);
}

{UPPER}+ {
  puttext(0);
  MRET(CAP_CCAP_WORD);
}

{LOWER}+ |
{PUNCT}+ |
{DIGIT}+ |
{SPACE}+ |
{NL}+ {
  puttext(0);
  MRET(WORD);
}

{UPPER}{LOWER}*{NIBS} {
  puttext(1);
  bs=1;
  yyless(yyleng-1);
  MRET(CAP_WORD);
}

{LOWER}{UPPER}*{NIBS} {
  puttext(1);
  bs=1;
  yyless(yyleng-1);
  MRET(CCAP_WORD);
}

{UPPER}+{NIBS} {
  puttext(1);
  bs=1;
  yyless(yyleng-1);
  MRET(CAP_CCAP_WORD);
}

{LOWER}+{NIBS} |
{PUNCT}+{NIBS} |
{DIGIT}+{NIBS} |
{SPACE}+{NIBS} |
{NL}+{NIBS} {
  puttext(1);
  bs=1;
  yyless(yyleng-1);
  MRET(WORD);
}


<<EOF>> {
  bs=1;
  return MEOF;
}
    

.|\n {err("Unknown char %c in initial context\n",*yytext);}





%%

YYSTYPE yylval;

int
main(int argc,char * argv[]) {

  int j;

  yyin=stdin;
  for (;(j=yylex())!=MEOF;) 
    switch(j) {
    case WORD:
      printf("%-*.*s\n",yylval.text.n,(int)yylval.text.n,yylval.text.text);
      break;
    case CAP_WORD:
      printf("<cap> %-*.*s\n",yylval.text.n,(int)yylval.text.n,yylval.text.text);
      break;
    case CCAP_WORD:
      printf("<ccap> %-*.*s\n",yylval.text.n,(int)yylval.text.n,yylval.text.text);
      break;
    case CAP_CCAP_WORD:
      printf("<cap_ccap> %-*.*s\n",yylval.text.n,(int)yylval.text.n,yylval.text.text);
      break;
    case BS_WORD:
      printf("<bs> %-*.*s\n",yylval.text.n,(int)yylval.text.n,yylval.text.text);
      break;
    case BS_CAP_WORD:
      printf("<bs_cap> %-*.*s\n",yylval.text.n,(int)yylval.text.n,yylval.text.text);
      break;
    case BS_CCAP_WORD:
      printf("<bs_ccap> %-*.*s\n",yylval.text.n,(int)yylval.text.n,yylval.text.text);
      break;
    case BS_CAP_CCAP_WORD:
      printf("<bs_cap_ccap> %-*.*s\n",yylval.text.n,(int)yylval.text.n,yylval.text.text);
      break;
    default:
      errret("Bad j=%u\n",j);
      break;
    }

  return 0;

}
