
/* datum.c */

#define TITLE_TOC  "RAMIFICATION MAP AND TABLE OF CONTENTS\n"
#define RUBRIC     "THE RAMIFICATION\n"
#define TITLE_XREF "CROSS-REFERENCE (SEE ALSO)\n"

/* This file provides the function get_datum(0) to retrieve and format
 * the datum from the library data file's next line.  When the next line
 * contains no datum, zero is returned.  The datum, the parsing stage
 * and the current ramno are returned in the global variables defined in
 * "datum.h".
 *
 * This file also provides the function restart_data() which (as its
 * name suggests) causes subsequent get_datum(0) calls to get again
 * beginning from the library data file's start.
 *
 */

#include "datum.h"
#include "conv.h"
#include "argp.h"
#include "ldf.h"
#include "gen.h"

struct datum  datum          = {0}  ;
enum doc_part doc_part       = NONE1;
char          ram_s[N_DIG+1] = ""   ;

/* a bounded pointer */
static char *op = 0;
static char *pp = 0;
static char *xp = 0;
static inline void pp_clear( void          ) {
  xp = op = pp = 0;
}
static inline char *pp_init( char *const p ) {
  return xp = WIDTH_DATA + ( op = pp = p );
}
static inline char *pp_set ( char *const p ) {
  if ( xp <= op ) return 0;
  return pp = p < op ? op : p > xp ? xp : p;
}
static inline char *pp_incr( void          ) {
  return pp_set( pp + 1 );
}
static inline char *pp_decr( void          ) {
  return pp_set( pp - 1 );
}
static inline char *pp_addn( const int n   ) {
  return pp_set( pp + n );
}
/* The following two inlines are not used at the time of this writing
 * because, while safe, they are inefficient and thus unsuitable for
 * inclusion in the program's main loop.  While execution efficiency is
 * in the author's view not always the principal principle of program
 * design, it is a highly important principle which necessarily assumes
 * greater importance in the main loop than elsewhere.  If care is taken
 * in coding, the two inlines are not really needed, anyway.  The source
 * is nevertheless retained here for future reference and use.  */
static inline char *pp_inct( void          ) {
  char *const pp0 = pp;
  if ( !pp_incr() || pp <= pp0 ) error(
    EFAULT, 0,
    "pointer out of bounds"
  );
  return pp;
}
static inline char *pp_dect( void          ) {
  char *const pp0 = pp;
  if ( !pp_decr() || pp >= pp0 ) error(
    EFAULT, 0,
    "pointer out of bounds"
  );
  return pp;
}

/* Return a pointer to the newline at line's end.  */
static inline char *end_of_line( char *s ) {
  while ( *s && *s != '\n' ) ++s;
  return s;
}

/* Return a pointer to the string's next space.  If no space, then the
 * line's end.  */
static inline char *next_space( char *s ) {
  while ( *s && *s != '\n' && *s != ' ' ) ++s;
  return s;
}

/* Given pointers to string's beginning and end, return a pointer to
 * just past the last nonspace in the string.  Middots ('\267') count as
 * spaces for this purpose so long as a true space separates them from
 * the last nonspace.  */
static inline char *last_nonspace( const char *const s, char *x ) {
  char *p = x;
  while ( x > s && ( x[-1] == ' ' || x[-1] == '\267' ) )
    if ( *--x == ' ' ) p = x;
  return p;
}

/* Does the given string begin with a ram number?  If so, return a
 * pointer just past the number's end.  */
static char *begins_with_ramno( char *s ) {
  char *const x = s + N_DIG;
  while ( s < x ) if ( !isdigit( *s++ ) ) return 0;
  if ( *s != ' ' && *s != '\n' )          return 0;
  return x;
}

/* Does the given string begin with an ascii-art xref arrow?  If so,
 * return a pointer just past the arrow's end.  The required format is
 * specific; an example: " ---> ".  To be recognized, the arrow must
 * have one or more dashes and be bounded by single spaces.  */
static char *begins_with_arrow( char *s ) {
  if    ( *s++ != ' ' || *s++ != '-' ) return 0;
  while ( *s   == '-'                ) ++s;
  if    ( *s++ != '>' || *s++ != ' ' ) return 0;
  return s;
}

/* This function must be called at least once to initialize a datum
 * structure before it is used, and may thereafter be called when it is
 * desired to clear and reinitialize the datum.  */
struct datum *init_datum( struct datum *d ) {
  if ( !d ) d = &datum;
  memset( d, 0, sizeof(struct datum) );
  /* The purpose of the next line is to ensure that the datum buffer is
   * always bounded by nulls at end and beginning both.  Such null
   * double-bounding makes unnecessary certain bounds checks in
   * get_datum(), which must run efficiently because it is the heart of
   * the program's main loop and is called more than 10,000 times in a
   * typical run.  */
  d->buf = d->buf0 + 1;
  return d;
}

/* It is expected that this function and the one above will usually be
 * called with null arguments.  When they are, they output by default to
 * the global struct datum `datum' (see `datum.h').  */
struct datum *get_datum( struct datum *d ) {

  /* Initialize an ascii-art rule string to WIDTH_DATA of '-'.  */
  static char rule[WIDTH_DATA+2];
  static int first_call = 1;
  if ( first_call ) {
    memset( rule, '-', WIDTH_DATA );
    rule[WIDTH_DATA+0] = '\n';
    rule[WIDTH_DATA+1] = '\0';
    first_call = 0;
  }

  if ( !d ) d = &datum;
  init_datum(d);
  if ( !read_ldf( d->buf ) )
    doc_part = doc_part >= XREF_O ? NONE2 : ILLEGAL;
  if ( opt.no_latin1 ) {
    undot( d->buf );
    unlat( d->buf );
  }

  /* the state machine */
  switch ( doc_part ) {

    case ILLEGAL         :
      break;

    case NONE1           : // (before the document's start)
      doc_part =
        !strcmp( d->buf, "\n" )
          ? TOC_SP0          : PREFACE         ;
      break;

    case PREFACE         : // document title and preface
      doc_part =
        !strcmp( d->buf, "\n" )
          ? TOC_SP0          : PREFACE         ;
      break;
    case TOC_SP0         : // space above the toc title
      doc_part =
        !strcmp( d->buf, rule )
          ? TOC_RULE1        :
        !strcmp( d->buf, "\n" )
          ? TOC_SP0          : PREFACE         ;
      break;
    case TOC_RULE1       : // rule above the toc title
      doc_part =
        !strcmp( d->buf, TITLE_TOC )
          ? TOC_TITLE        :
        !strcmp( d->buf, "\n" )
          ? TOC_SP0          : PREFACE         ;
      break;
    case TOC_TITLE       : // the toc (table of contents) title
      doc_part =
        !strcmp( d->buf, rule )
          ? TOC_RULE2        :
        !strcmp( d->buf, "\n" )
          ? TOC_SP0          : PREFACE         ;
      break;
    case TOC_RULE2       : // rule below the toc title
      doc_part =
        !strcmp( d->buf, "\n" )
          ? TOC_SP3          : PREFACE         ;
      break;

    case TOC_SP3         : // space below the toc title
      doc_part =
        begins_with_ramno( d->buf )
          ? TOC_O            :
        !strcmp( d->buf, "\n" )
          ? TOC_SP3          : TOC_TITLE_X     ;
      break;
    case TOC_TITLE_X     : // toc preface
      doc_part =
        !strcmp( d->buf, "\n" )
          ? TOC_SP3          : TOC_TITLE_X     ;
      break;

    case TOC_O           : // a toc (table of contents) entry
      doc_part =
        begins_with_ramno( d->buf )
          ? TOC_O            :
        !strcmp( d->buf, "\n" )
          ? RUBRIC_SP0       : TOC_X           ;
      break;

    case TOC_X           : // toc endmatter
      doc_part =
        !strcmp( d->buf, "\n" )
          ? RUBRIC_SP0       : TOC_X           ;
      break;
    case RUBRIC_SP0      : // space above the rubric's upper rule
      doc_part =
        !strcmp( d->buf, rule )
          ? RUBRIC_RULE1     :
        !strcmp( d->buf, "\n" )
          ? RUBRIC_SP0       : TOC_X           ;
      break;
    case RUBRIC_RULE1    : // the rubric's upper rule
      doc_part =
        !strcmp( d->buf, "\n" )
          ? RUBRIC_SP1       : TOC_X           ;
      break;
    case RUBRIC_SP1      : // space immediately above the rubric
      doc_part =
        !strcmp( d->buf, RUBRIC )
          ? RUBRIC_TITLE     :
        !strcmp( d->buf, rule )
          ? RUBRIC_RULE1     :
        !strcmp( d->buf, "\n" )
          ? RUBRIC_SP0       : TOC_X           ;
      break;
    case RUBRIC_TITLE    : // the rubric (announcing the main body)
      doc_part =
        !strcmp( d->buf, "\n" )
          ? RUBRIC_SP2       : TOC_X           ;
      break;
    case RUBRIC_SP2      : // space immediately below the rubric
      doc_part =
        !strcmp( d->buf, rule )
          ? TITLE_RULE1      :
        !strcmp( d->buf, "\n" )
          ? RUBRIC_SP0       : TOC_X           ;
      break;

    case TITLE_RULE1     : // rule above a ram title
      doc_part =
        begins_with_ramno( d->buf )
          ? TITLE_O          :
        !strcmp( d->buf, TITLE_XREF )
          ? XREF_TITLE       : ILLEGAL         ;
      break;
    case TITLE_O         : // a ram title
      doc_part =
        !strcmp( d->buf, rule )
          ? TITLE_RULE2      : ILLEGAL         ;
      break;
    case TITLE_RULE2     : // rule below a ram title
    case DEB_O           : // a basic .deb datum
      doc_part =
        !strcmp( d->buf, "\n" )
          ? DEB_X            : DEB_O           ;
      break;
    case DEB_X           : // space at a ram's foot
      doc_part =
        !strcmp( d->buf, rule )
          ? TITLE_RULE1      : ILLEGAL         ;
      break;

    case XREF_TITLE      : // the xref (cross-reference) table's title
      doc_part =
        !strcmp( d->buf, rule )
          ? XREF_TITLE_RULE2 : ILLEGAL         ;
      break;
    case XREF_TITLE_RULE2: // rule below the xref table's title
      doc_part =
        !strcmp( d->buf, "\n" )
          ? XREF_TITLE_SP2   : ILLEGAL         ;
      break;

    case XREF_TITLE_SP2  : // space below the xref table's title
      doc_part =
        begins_with_ramno( d->buf )
          ? XREF_O           :
        !strcmp( d->buf, "\n" )
          ? XREF_TITLE_SP2   : XREF_TITLE_X    ;
      break;
    case XREF_TITLE_X    : // xref-table preface
      doc_part =
        !strcmp( d->buf, "\n" )
          ? XREF_TITLE_SP2   : XREF_TITLE_X    ;
      break;

    case XREF_O          : // an xref-table entry
      doc_part =
        begins_with_ramno( d->buf )
          ? XREF_O           : DOC_ENDMATTER   ;
      break;
    case DOC_ENDMATTER   : // document endmatter
      break;

    case NONE2           : // (after the document's end)
      break;

    default:
      error(
        EPERM, 0,
        "impossible, datum.c get_datum()"
      );

  }

  /* If the line contains a relevant datum, parse it.  (This switch
   * expects to be executed over 10,000 times in a typical run.  For
   * this reason, in the grand old tradition of the C programming
   * language, a significant degree of program-design elegance is traded
   * away for execution efficiency.  Notice particularly the deliberate
   * circumvention of bounds-checking in several spots with "pp++" and
   * "pp--".  Notice also the overall design in which the parsed pieces
   * of an input line are never even copied out of the buffer in which
   * they are found but are cut up and processed right there in place.
   * The resulting relatively lean, resourceful switch is less elegant
   * and admittedly requires more careful coding, but remains
   * technically correct and surely runs faster.)  */
  pp_init( d->buf );
  switch ( doc_part ) {

    case ILLEGAL:
      error(
        EIO, 0,
        "cannot parse the library data file (stage %d)",
        doc_part
      );
      break;

    case TOC_O: case XREF_O: {
      d->ram              = pp                       ;
      d->ramX             = pp_addn(N_DIG)           ;
      d->indent           = pp_incr()                ;
      while ( *pp == ' '  ) pp++                     ;
      d->indentX          = pp                       ;
      d->title            = pp                       ;
      if ( doc_part == TOC_O ) {
        d->type   = TOC;
        d->titleX = pp_set( end_of_line(pp) );
      }
      else {
        char *arrowX = 0;
        d->type   = XREF;
        while (
          *pp && *pp != '\n'
          && !( arrowX = begins_with_arrow(pp) )
        ) pp++;
        d->titleX = pp;
        if ( arrowX ) {
          d->arrow  = pp            ;
          d->arrowX = pp_set(arrowX);
          d->xrefs  = pp            ;
          {
            int i;
            for (
              i = 0;
              i < NMAX_XREF && begins_with_ramno(pp);
              pp_addn(N_DIG+1), ++i
            ) d->xref[i] = pp;
          }
          d->xrefsX = pp            ;
        }
      }
    } break;

    case TITLE_O:
      d->type = TITLE;
      d->ram              = pp                       ;
      d->ramX             = pp_addn(N_DIG)           ;
      d->title_uc         = pp_incr()                ;
      pp_set( end_of_line(pp)-1 )                    ;
      while ( *pp && *pp != ')' )
                            pp--                     ;
      d->countX           = pp                       ;
      while ( *pp && *pp != '(' )
                            pp--                     ;
      d->count            = pp_incr()                ;
      pp_addn(-2)                                    ;
      while ( *pp == ' '  ) pp--                     ;
      d->title_ucX        = pp_incr()                ;
      strncpy( ram_s, d->ram, N_DIG );
      ram_s[N_DIG] = '\0';
      break;

    case DEB_O:
      d->type = DEB;
      d->maint            = pp                       ;
      d->maintX           = last_nonspace( pp, pp + WIDTH_MAINT );
      d->pri              = pp_addn(WIDTH_MAINT+3)   ;
      d->priX             = pp_addn(WIDTH_PRI)       ;
      d->deb              = pp_addn(3)               ;
      d->debX             = pp_set( next_space(pp) ) ;
      {
        char *const a = pp + 1                ;
        char *const b = d->deb + WIDTH_DEB + 1;
        d->desc = pp_set( a >= b ? a : b );
      }
      d->descX            = pp_set( end_of_line(pp) );
      /* The next line is an efficiency hack.  It works because at least
       * one blank space precedes each field (except the leftmost) in
       * the library data file.  It clobbers these known spaces.  For
       * such data as eventually are output, print_deb() (see `prdeb.h'
       * and `prdeb.c') knows where the clobbered spaces were and neatly
       * outputs extra spaces to make up for them.  */
      d->descX[0] = d->desc[-1] = d->deb[-1] = d->pri[-1] = '\0';
      if ( opt.no_desc || !opt.wide ) d->debX[0]          = '\0';
      strncpy( d->ram_s, ram_s, N_DIG+1 );
      break;

    case DEB_X:
      *ram_s = '\0';
      break;

    default: break;

  }
  pp_clear();

  return d->type ? d : 0;

}

void rewind_data( void ) {
  rewind_ldf();
  init_datum(0);
  doc_part = NONE1;
  *ram_s   = '\0' ;
}

