//
// =========================================================================
//
//  21 September 2001
//
//  PDB CURATION TOOL
//
//  SGI make:
//  cc -o pdbcur pdbcur.cpp pcur_rdl.cpp cparser.c ccplib.c mmdb.a -lm -lC
//
//
//  Command line:
//
//   pdbcur  xyzin pdbfile xyzout pdbfile <<eof
//   ? keyword  parameter(s)
//   ? keyword  parameter(s)
//   ? .......  .........
//   ? eof
//
// -------------------------------------------------------------------------
//
//   Input syntax:
//
//   Keyword    Parameter(s)
//  ~~~~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
//   renchain    {selection of chain(s)}    'new chain ID'
//               Example:  renchain  /*/A 'B'
//               Quotations are optional and are useful for
//               designation 'no chain ID'.
//               Examples:
//               rename A to 'no chain ID': renchain A ''
//               rename 'no chain ID' to B: renchain /*// B
//
//   renresidue  {selection of residue(s)}  'new residue name'
//               Example:  renresidue  (ALA) 'AL1'
//
//   renatom     {selection of atom(s)}  'new 4-letter atom name'
//               Example:  renatom  CA[C] ' CC '
//
//   renelement  {selection of atom(s)}  'new element name'
//               Example:  renelement  CA[C] 'AL'
//
//   delmodel    {selection of model(s)}
//               Deletes the specified model(s).
//               Example (delete model #1):                delmodel /1
//               Example (delete all models with chain A): delmodel /*/A
//
//   delchain    {selection of chain(s)}
//               Deletes the specified chain(s).
//               Example (delete chain A in all models):   delchain A
//               Example (delete chain A in 1st model):    delchain /1/A
//
//   delresidue  {selection of residue(s)}
//               Deletes the specified residue(s).
//               Example (delete residues 33 to 120):      delresidue  33-120
//
//   delatom     {selection of atom(s)}
//               Deletes the specified atom(s).
//               Example (delete all C-gamma atoms):       delatom CG[C]
//
//   lvmodel     {selection of model(s)}
//               Leaves the specified model(s), everything else is
//               deleted.
//               Example (leave only model #1):            lvmodel /1
//               Example (leave all models with chain A):  lvmodel /*/A
//
//   lvchain     {selection of chain(s)}
//               Leaves the specified chain(s), everything else is
//               deleted.
//               Example (leave chains A in all models):    lvchain A
//               Example (leave only chain A in 1st model): lvchain /1/A
//
//   lvresidue   {selection of residue(s)}
//               Leaves the specified residue(s), everything else is
//               deleted.
//               Example (leave residues 33.A to 120.B): lvresidue 33.A-120.B
//
//   lvatom      {selection of atom(s)}
//               Leaves the specified atom(s), everything else is
//               deleted.
//               Example (leave only C-alpha atoms):       lvatom CA[C]
//
//   write       {PDB|CIF|BIN}
//               writes 'xyzout' as a PDB, mmCIF or MMDB BINary
//               file. By default, the file is written in the format
//               of input file.
//
//   genter      no parameters; this keyword generates PDB 'TER' cards.
//
//   sernum      no parameters; this keyword generates correct atom
//               serial numbers.
//
//   mvsolvent   no parameters; moves solvent chains to the end of models.
//
//   symmetry    input of the space group symmetry name, e.g. 'P 21 21 21'
//               (without quotation marks, spaces _are_ significant).
//               This parameter is mandatory if coordinate file does not
//               specify the space group symmetry.
//
//   geometry    a b c alpha beta gamma
//               input of the unit cell dimensions (space-separated
//               real numbers). This parameter is mandatory if coordinate
//               file does not specify the cell parameters.
//
//   genunit     generating a unit cell as defined by crystallographic
//               information given in coordinate file or set up with
//               keywords 'symmetry' and 'geometry'. Chains generated
//               by identity operation retain their names, all other
//               are renamed as c_n, where c is the chain's original
//               name, and n is the number of symmetry operation in
//               the space group used (starting from 0 for identity
//               operation on). In order to comply with PDB standards,
//               the chains are then to be renamed using renchain
//               command, e.g.  renchain A_2 H . The chains may be
//               assigned automatically generated 1-character names
//               using the command  mkchainIDs .
//
//               Example: rnase.pdb contains 2 chains A and B.
//               Generate a unit cell, space group P 21 21 21, 4
//               symmetry operations, and assign chain IDs C,D,E for
//               chain A transformed by operations #1,2,3, and IDs
//               F,G,H for chain B transformed by the same operations.
//               Chains A and B transformed by 0th operation (identity)
//               retain their IDs:
//
//               pdbcur xyzin rnase.pdb xyzout ucell.pdb <<eof
//               ? symm P 21 21 21
//               ? genu
//               ? renc A_1 C
//               ? renc A_2 D
//               ? renc A_3 E
//               ? renc B_1 F
//               ? renc B_2 G
//               ? renc B_3 H
//               ? eof
//
//   symop       X,Y,Z 'old chain ID' 'new chain ID' 'old ID' 'new ID' ...
//               declares (but does not apply) a symmetry operation.
//               The symmetry operations for each X,Y,Z fractional
//               coordinates must be written without spaces.
//                  Pairs 'old chain ID' - 'new chain ID' specify how
//               the chains should be renamed after operation. This
//               input is not mandatory. If no renaming is specified,
//               the newly generated chains will be renamed automatically
//               (see keyword symcommit).
//                  Example:  symop  Y+1/2,X-1/2,Z  A S  B R
//               (declare symmetry transformation x=Y+1/2, y=X-1/2, z=Z
//               with renaming chain A to S and B to R.
//
//   symcommit   no parameters
//               applies all symmetry operations declared since
//               last symcommit statement. First operation (normally
//               identity) will be applied to the existing set of
//               coordinates, all other will be applied to the
//               duplicates of the coordinates, and the results
//               are merged.
//                  The newly generated chains are named as C_n,
//               where C is the original chain name, and n is the
//               symmetry operation number. Symmetry operations
//               are numbered as they appear in symop statements,
//               from 0 on; however the very first one is applied
//               to the existing chains, which are not renamed in
//               this case.
//
//                  Example:
//
//               pdbcur xyzin rnase.pdb xyzout rnase1.pdb <<eof
//               ? symop  X,Y,Z
//               ? symop  Y+1/2,X-1/2,Z
//               ? symcommit
//               ? eof
//
//               just adds two chains named A_1 and B_1, obtained
//               according to the rule Y+1/2,X-1/2,Z from chains
//               A and B, to the existing file.
//              
//
//   mkchainIDs  automatically generates 1-character chain IDs after
//               applying symmetry operations. The IDs are generated
//               such that they use all available letters starting
//               from A, and a chain is not renamed if its name is
//               already a 1-character one.
//
//               The following example
//
//               pdbcur xyzin rnase.pdb xyzout ucell.pdb <<eof
//               ? symm P 21 21 21
//               ? genu
//               ? mkch
//               ? eof
//
//               produces exactly the same result as that given for
//               keyword GENUNIT, because the original chains are named
//               sequentially as A,B (not G,I, for example).
//
//   rotate      {selection of atoms} alpha beta gamma x y z
//   rotate      {selection of atoms} alpha beta gamma center
//               Euler rotation of selected atoms through angles alpha,
//               beta and gamma (degrees) as applied to the initial
//               Z-axis, new Y-axis and newest Z-axis, correspondingly.
//               The rotation center is given by either orthogonal
//               coordinates x, y and z or by keyword 'center' for
//               specifying the mass center of the selected atoms.
//                  Examples:
//                1. 90-degree rotation of chain A about Z-axis in
//                   original coordinate system:
//                   rotate   A   90 0 0   0 0 0
//                2. 60-degree rotation of chains A and B about Y-axis
//                   in the coordinate system of their mass center:
//                   rotate 'A,B'  0 60 0   center
//
//   vrotate     {selection of atoms} alpha vx vy vz   x y z
//   vrotate     {selection of atoms} alpha vx vy vz   center
//   vrotate     {selection of atoms} alpha atom1 atom2
//               Rotation of selected atoms through angle alpha (degrees)
//               about a vector given by direction (vx,vy,vz) from the
//               rotation center (given as x,y,z or by keyword 'center'
//               for the mass center of the selected atoms). The vector
//               may also be specified by two atoms atom1 and atom2
//               represented in the mmdb selection notation.
//                  Examples:
//                1. 90-degree rotation of chain A about Z-axis in
//                   original coordinate system:
//                   vrotate  A   90  0 0 1   0 0 0
//                2. 60-degree rotation of chains A and B about Y-axis
//                   in the coordinate system of their mass center:
//                   vrotate 'A,B'  60  0 1 0  center
//                3. 45-degree rotation of all atoms about vector connecting
//                   C-alpha atoms of residues 20.A of chain A and 55
//                   of chain B:
//                   vrotate /*/*/*/* 45  /1/A/20.A/CA[C] /1/B/55/CA[C]
//                   or, if there is only one model in the PDB file:
//                   vrotate *  45  A/20.A/CA[C] B/55/CA[C]
//
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
//
//   Specification of the selection sets:
//
//   /mdl/chn/s1.i1-s2.i2/at[el]:aloc
//   /mdl/chn/*(res).ic/at[el]:aloc
//
//   where no spaces are allowed. The slashes separate the
//   hierarchical levels of models, chains, residues and atoms.
//
//   Notations:
//
//    mdl   - the model's serial number or 0 or '*' for any model
//            (default).
//    chn   - the chain ID or list of chain IDs like 'A,B,C' or
//            '*' for any chain (default).
//    s1,s2 - the starting and ending residue sequence numbers
//            or '*' for any sequence number (default).
//    i1,i2 - the residues insertion codes or '*' for any
//            insertion code. If the sequence number other than  
//            '*' is specified, then insertion code defaults to ""
//            (no insertion code), otherwise the default is '*'.
//    res   - residue name or list of residue names like 'ALA,SER'
//            or '*' for any residue name (default)
//    at    - atom name or list of atom names like 'CA,N1,O' or
//            '*' for any atom name (default)
//    el    - chemical element name or list of chemical element
//            names like 'C,N,O', or '*' for any chemical element
//            name (default)
//    aloc  - the alternative location indicator or list of
//            alternative locations like 'A,B,C', or '*' for any
//            alternate location. If the atom name and chemical
//            element name is specified (both may be '*'), then
//            the alternative location indicator defaults to ""
//            (no alternate location), otherwise the default is
//             '*'.
//
//   Values for chain IDs, residue names, atom names, chemical element
//   names and alternative location indicators may be negated by
//   prefix '!'. For example, '!A,B,C' for the list of chain names
//   means 'any chain ID but A,B,C'.
//
//   Generally, any hierarchical element as well as the selection
//   code may be omitted, in which case it is replaced for
//   default (see above). This makes the following examples valid:
//
//    *                   select all atoms
//    /1                  select all atoms in model 1
//    A,B                 select all atoms in chains A and B in
//                        all models
//    /1//                select all atoms in chain without chainID
//                        in model 1
//    /*/,A,B/            select all atoms in chain without chainID,
//                        chain A and B in all models
//    33-120              select all atoms in residues 33. to 120.
//                        in all chains and models
//    A/33.A-120.B        select all atoms in residues 33.A to
//                        120.B in chain A only, in all models
//    A/33.-120.A/[C]     select all carbons in residues 33. to
//                        120.A in chain A, in all models
//    CA[C]               select all C-alphas in all
//                        models/chains/residues
//    A//[C]              select all carbons in chain A, in all models
//    (!ALA,SER)          select all atoms in any residues but
//                        ALA and SER, in all models/chains
//    /1/A/(GLU)/CA[C]    select all C-alphas in GLU residues of
//                        chain A, model 1
//    /1/A/*(GLU)./CA[C}: same as above
//    [C]:,A              select all carbons without alternative
//                        location indicator and carbons in alternate
//                        location A
//
//    NOTE: if a selection contains comma(s), the selection sentence must
// be embraced by quotation marks, which indicate to the input parser that
// the sentence is a single input parameter rather than a set of comma-
// separated arguments.
//
// =========================================================================
//

#ifndef  __STDLIB_H
#include <stdlib.h>
#endif

#ifndef  __STRING_H
#include <string.h>
#endif

#define __CPlusPlus

#ifndef  __CCPLib__
#include "ccplib.h"
#endif

#ifndef  __CParser__
#include "cparser.h"
#endif


#ifndef  __PCUR_Funcs__
#include "pcur_funcs.h"
#endif


// ======================================================================

//   constants


// -------------------  Main program  --------------------------

int main ( int argc, char ** argv, char ** env )  {
CMMDBManager MMDB;
CFile        f;
CGenSym      GenSym;
int          RC,lcount,FType;
char         S[500];

// input parser parameters
int           ntok=0;
char          line[201];
PARSERTOKEN * token=NULL;
PARSERARRAY * parser;


  //  1.  General CCP4 initializations
  ccp4fyp         ( argc,argv );
  ccp4ProgramName ( argv[0]   );
  ccp4rcs         ( argv[0]   );


  //  2.  Make routine initializations, which must always be done
  //      before working with MMDB
  InitMatType();


  //  3.  Read coordinate file.
  //    3.1 Set all necessary read flags -- check with the top of
  //        file  mmdb_file.h  as needed
  MMDB.SetFlag ( MMDBF_PrintCIFWarnings );

  //    3.2 Read coordinate file by its logical name
  RC = MMDB.ReadCoorFile1 ( "XYZIN" );

  //    3.3 Check for possible errors:
  if (RC) {
    //  An error was encountered. MMDB provides an error messenger
    //  function for easy error message printing.
    printf ( " ***** ERROR #%i READ:\n\n %s\n\n",RC,GetErrorDescription(RC) );
    //  Location of the error may be identified as precise as line
    //  number and the line itself (PDB only. Errors in mmCIF are
    //  located by category/item name. Errors of reading BINary files
    //  are not locatable and the files are not editable). This
    //  information is now retrieved from MMDB input buffer:
    MMDB.GetInputBuffer ( S,lcount );
    if (lcount>=0) 
      printf ( "       LINE #%i:\n%s\n\n",lcount,S );
    else if (lcount==-1)
      printf ( "       CIF ITEM: %s\n\n",S );
    //  now quit
    return 1;
  } else  {
    //  MMDB allows to identify the type of file that has been just
    //  read:
    FType = MMDB.GetFileType();
    switch (FType)  {
      case MMDB_FILE_PDB    : printf ( " PDB"         );  break;
      case MMDB_FILE_CIF    : printf ( " mmCIF"       );  break;
      case MMDB_FILE_Binary : printf ( " MMDB binary" );  break;
      default : printf ( " Unknown (report as a bug!)" );
    }
    printf ( " file %s has been read in.\n",getenv("XYZIN") );
  }




  //  4.  Interprete cards from standard input stream

  printf (
    "\n"
    " ----------------------------------------------------"
    "--------------------------\n"
    "  Input cards\n\n" );

  //  4.1 We will select atoms in the course of reading the input
  //      cards. Each _new_ selection starts with creation of
  //      the selection handle (a handle may be used in several
  //      selections)

  //  4.2 Assign default values for minimal and maximal contact
  //      distances and the sequence distance

  // Initialise a parser array used by cparser. This is used to
  // return the tokens and associated info. Set maximum number
  // of tokens per line to 20
  parser = (PARSERARRAY *) cparse_start(20);

  if (parser == NULL) ccperror ( 1,"Couldn't create parser array" );

  // Example of how to set up cparser to use non-default delimiters
  // In this example, only allow tabs as delimiters
  //
  // if (! cparse_delimiters(parser,"\t",""))
  //    ccperror(1,"Couldn't reset delimiters");
  
  // Set some convenient pointers to members of the parser array */
  token = parser->token;

  // Read lines from stdin until END/end keyword is entered or
  // EOF is reached
  RC   = 0;

  while (!RC) {

    // Always blank the line before calling cparser to force reading
    // from stdin
    line[0] = '\0';

    // Call cparser to read input line and break into tokens.
    // Returns the number of tokens, or zero for eof
    ntok = cparser(line,sizeof(line)-1,parser,1);

    RC = END_OF_INPUT;
    if (ntok>=1)  {

      renchain   ( token,ntok,MMDB  ,RC );
      renresidue ( token,ntok,MMDB  ,RC );
      renatom    ( token,ntok,MMDB  ,RC );
      renelement ( token,ntok,MMDB  ,RC );
      delmodel   ( token,ntok,MMDB  ,RC );
      delchain   ( token,ntok,MMDB  ,RC );
      delresidue ( token,ntok,MMDB  ,RC );
      delatom    ( token,ntok,MMDB  ,RC );
      lvmodel    ( token,ntok,MMDB  ,RC );
      lvchain    ( token,ntok,MMDB  ,RC );
      lvresidue  ( token,ntok,MMDB  ,RC );
      lvatom     ( token,ntok,MMDB  ,RC );
      genter     ( token,ntok,MMDB  ,RC );
      sernum     ( token,ntok,MMDB  ,RC );
      mvsolvent  ( token,ntok,MMDB  ,RC );
      write      ( token,ntok,FType ,RC );
      symmetry   ( token,ntok,MMDB  ,line  ,RC );
      geometry   ( token,ntok,MMDB  ,RC );
      genunit    ( token,ntok,MMDB  ,RC );
      symop      ( token,ntok,GenSym,RC );
      symcommit  ( token,ntok,MMDB  ,GenSym,RC );
      mkchainids ( token,ntok,MMDB  ,RC );
      rotate     ( token,ntok,MMDB  ,RC );
      vrotate    ( token,ntok,MMDB  ,RC );

      if (RC==END_OF_INPUT)  {
	printf ( "Unrecognised keyword \"%s\"\n",token[0].fullstring );
	RC = Err_KEYWORD;
      }

    }

  }


  if (RC==END_OF_INPUT)   {
    
    RC = 0;  // normal return from the parser loop

    // Clean up parser array
    cparse_end ( parser );


    //  5.  Write updated coordinate file

    switch (FType)  {
      case MMDB_FILE_PDB    : MMDB.WritePDBASCII1 ( "XYZOUT" );
                              printf ( " PDB"         );
                           break;
      case MMDB_FILE_CIF    : MMDB.WriteCIFASCII1 ( "XYZOUT" );
                              printf ( " mmCIF"       );
                           break;
      case MMDB_FILE_Binary : MMDB.WriteMMDBF1 ( "XYZOUT" );
                              printf ( " MMDB binary" );
                           break;
      default               : printf ( " Unknown file type "
                                       "(report as a bug!)" );
                           return 100;
    }
    printf ( " file %s has been written.\n",getenv("XYZOUT") );
  
    ccperror(0,"Normal termination");


  }

  return RC;

}

