#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdarg.h>
#include <string.h>
 
#include "io_lib_header.h"
#include "util_lib_header.h"
#include "dp_lib_header.h"
#include "define_header.h"

int **index_seq_res      ( Sequence *S1, Sequence *S2, int **name_index)
{
  /*Index the residues of S1 according to S2
    index[seq1 of S1][z]->x, where x is the position of residue z of seq1/S1 in S2->seq[index[Seq1/S1]] 
  */ 
  int a;
  int **index;
  char *seq1, *seq2;
  Alignment *Profile;
  
  index=vcalloc ( S1->nseq, sizeof (int*));
  
  for (a=0; a< S1->nseq; a++)
    {

      seq1=S1->seq[a];
      
      if (name_index[a][0]==-1)
	seq2=NULL;
      else if (name_index[a][1]==-1)
	seq2=S2->seq[name_index[a][0]];
      else if ((Profile=seq2R_template_profile (S2, name_index[a][0])) !=NULL)
	{
	  seq2=Profile->seq_al[name_index[a][1]];
	}
      index[a]=get_res_index ( seq1, seq2);
    }
  return index;
}

int **index_seq_name ( Sequence *S1, Sequence *S2)
{
  /*Index the names of S1 according to S2
    index[seq1 of S1][0]->x  if seq1 is in seq2
                        ->-1 if seq1 is nowhere to be found
    index[seq1 of S1][1]->z if seq1 is the zth sequence within the xth profile of S2
  */
  int **index;
  int a, b, x, z;
  Alignment *Profile;
  index=declare_int (S1->nseq, 2);

  
  for ( a=0; a<S1->nseq; a++)
    {
      index[a][0]=index[a][1]=-1;
      x=name_is_in_list (S1->name[a],S2->name,S2->nseq,100);
      if ( x!=-1){index[a][0]=x;index[a][1]=-1;}
      for ( b=0; b<S2->nseq; b++)
	    {
	      if ((Profile=seq2R_template_profile (S2,b)))
		{
		  z=name_is_in_list (S1->name[a],Profile->name,Profile->nseq,100);
		  if ( z!=-1){index[a][0]=b;index[a][1]=z;b=S2->nseq;}
		}
	    }
    }
  return index;
}
		  
		  
	    
	  
int *get_name_index (char **l1, int n1, char **l2, int n2)
{
  int *r;
  int a;
  /*return Array[Index_L1]=Index_L2 */
  r=vcalloc ( n1, sizeof (int));
  for ( a=0; a< n1; a++)
    r[a]=name_is_in_list (l1[a],l2,n2,100);
  return r;
}

int* get_res_index (char *seq0, char *seq1)
{
  int *coor, a;

  if ( !seq0 || !seq1) return NULL;
  

  coor=vcalloc ( strlen (seq0)+1, sizeof (int));
  if (!strm (seq0, seq1))
    {
      int r0, r1 , isr0, isr1;
      int l0=0, l1=0;
      Alignment *A;
      A=align_two_sequences (seq0,seq1,"pam250mt", -10, -1, "myers_miller_pair_wise");
      for ( a=0; a< A->len_aln; a++)
	{
	  r0=A->seq_al[0][a];r1=A->seq_al[1][a];
	  isr0=!is_gap(r0);
	  isr1=!is_gap(r1);
	  l0+= isr0;
	  l1+= isr1;

	  if (isr0 && isr1)coor[l0-1]=l1-1;
	  else if (isr0)  coor[l0-1]=-1;
	}
      free_aln (A);
    }
  else
    {
      int l0;
      
      l0=strlen (seq0);
      for ( a=0;a< l0; a++)
	coor[a]=a;
    }
  
  return coor;
}

int change_residue_coordinate ( char *in_seq1, char *in_seq2, int v)
{
  /*Expresses the coordinate of a residue in seq1, in the coordinate system of seq2*/

  
  static char *seq1, *seq2;
  static int *coor;
  static int max;

  if ( seq1 !=in_seq1 || seq2 !=in_seq2)
    {
      int r0, r1 , isr0, isr1;
      int l0=0, l1=0;
      Alignment *A;
      int a;
      
      vfree (coor);
      seq1=in_seq1, seq2=in_seq2;
      A=align_two_sequences (seq1,seq2,"pam250mt", -10, -1, "myers_miller_pair_wise");
            
      coor=vcalloc ( A->len_aln, sizeof (int));
      for ( a=0; a< A->len_aln; a++)
	{
	  r0=A->seq_al[0][a];r1=A->seq_al[1][a];
	  
	  isr0=!is_gap(r0);
	  isr1=!is_gap(r1);
	  l0+= isr0;
	  l1+= isr1;

	  if (isr0 && isr1)coor[l0-1]=l1-1;
	  else if (isr0)  coor[l0-1]=-1;
	}
      free_aln (A);
    }
  return coor[v];
}
	  
   
int ** minimise_repeat_coor (int **coor, int nseq, Sequence *S)
    {
    int **new_coor;
    int a, min;
    new_coor=declare_int ( nseq, 3);
    min=return_min_int (coor, nseq, 2);
    for ( a=0; a< nseq; a++)
        {
	new_coor[a][0]=coor[a][0];
	new_coor[a][1]=coor[a][1];
	new_coor[a][2]=min;
	}
    return new_coor;
    }
int ** get_nol_seq ( Constraint_list *CL, int **coor, int nseq, Sequence *S)
    {
    int a, s, p, l, nl;
    int **buf;
    int **new_coor;
    
    new_coor=declare_int ( nseq+1, 3);
    
    
    buf=get_undefined_list ( CL);
    
    

    for ( a=0; a< nseq; a++)buf[coor[a][0]][coor[a][1]]=1;

    
    for ( a=0; a< nseq; a++)
        {
	s=coor[a][0];
	p=coor[a][1]+1;
	l=strlen(S->seq[s]);
	nl=0;
	while ( p<=l && !buf[s][p++])nl++;
	new_coor[a][0]=s;
	new_coor[a][1]=coor[a][1];
	new_coor[a][2]=nl;
	}
    free_int ( buf, -1);
    return new_coor;
    }



int compare_pos_column( int **pos1,int p1, int **pos2,int p2, int nseq)
    {
    int a,v1, v2;
    int identical=0;


    
    for ( a=0; a< nseq; a++)
	{
	
	v1=pos1[a][p1];
	v2=pos2[a][p2];
	
	if (v1>0 || v2>0) 
	    {
	    if ( v1!=v2)return 0;
	    else identical=1;
	    }
	}
    
    return identical;
    }
char *seq2alphabet (Sequence *S)
{
  return array2alphabet (S->seq, S->nseq);
}
char *aln2alphabet (Alignment *A)
{
  return array2alphabet (A->seq_al, A->nseq);
}  

char *array2alphabet (char **array, int n)
{
  int a, b, l;
  int *hasch;
  char *alphabet;

  hasch=vcalloc (256, sizeof (int));
  alphabet=vcalloc ( 257, sizeof (char));
  
  
  for ( a=0; a<n; a++)
    {
      l=strlen (array[a]);
      for ( b=0; b<l; b++)
	hasch[tolower(array[a][b])]++;
    }
  for ( a=0, b=0; a< 256; a++)
    if (hasch[a])alphabet[b++]=a;
  alphabet[b]='\0';
  vfree (hasch);
  return alphabet;
}
  
Alignment * seq2pdb   ( char *name, char *seq,Blast_param *B)
{
  Alignment *R;
  char *new_pdb;
  char *tmp_pdb;
  char *buf_seq;
  char command [LONG_STRING];
  /*1-Reads a sequence
    2-Make a blast against nlr3d
    3-pick up the best match
    4-get the pdb structure and write it in name.pdb
  */

 
  
  R=seq2blast ( name, seq,B);

  if ( R==NULL || R->nseq<2){free_aln (R);R= NULL;}
  else 
    {
      R->nseq=2;
      R->score=R->score_aln=aln2sim (R, "idmat");
      
      buf_seq=vcalloc ( R->len_aln+1, sizeof (char));
      sprintf (buf_seq, "%s", R->seq_al[1]);
      ungap (buf_seq);
	  
      tmp_pdb=normalize_pdb_file(is_pdb_struc(R->name[1]),buf_seq, vtmpnam (NULL));
	  
      if   ( !tmp_pdb){free_aln(R);R=NULL;}
      else 
	{
	  declare_name(new_pdb);
	  sprintf (new_pdb, "%s%s.pdb",get_cache_dir(), name);
	  if ( !check_file_exists(new_pdb))
	    {
	      sprintf (command, "cp %s %s", tmp_pdb, new_pdb);
	      my_system (command);
	      add2file2remove_list (new_pdb); /*make sure this pdb file disappears*/
	    }
	  vfree (new_pdb);
	}
      vfree (buf_seq);
    }
  return R;      
}

Alignment* seq2blast ( char *name, char *seq, Blast_param *B)
    {

      int **sim=NULL;
      int **cov=NULL;
      char loc_blast_server[100];
      char command [LONG_STRING];
      char *seq_file, *blast_result, *prf_result;
      char  permanent_blast_file[100];
      int n_failure=0;
      int a;

      Alignment *BProfile,*NBProfile;
      FILE *fp;

 
      
      seq_file=vtmpnam (NULL);
      blast_result=vtmpnam (NULL);
      prf_result=vtmpnam (NULL);

      sprintf ( loc_blast_server, "%s", B->blast_server);
      fp=vfopen (seq_file, "w");
      fprintf (fp, ">query\n%s",seq);
      vfclose (fp); 
      
      sprintf ( permanent_blast_file , "%sVs%s.blast_result",name, B->db);

      if (is_blast_file (permanent_blast_file))
	{
	  sprintf ( command, "cp %s %s", check_file_exists (permanent_blast_file), blast_result);
	  sprintf ( loc_blast_server, "File");
	}
      else
	{

	  
	  if ( strm2 (loc_blast_server, "sib", "SIB"))
	    {
	      check_program_is_installed ( "blastall.remote", SIB_BLAST_4_TCOFFEE, "SIB_BLAST_4_TCOFFEE","not public", 1); 
	       
	      sprintf (command, "%s -p blastp -s  sib-blast.unil.ch -d %s -i %s -m 6 >%s",SIB_BLAST_4_TCOFFEE,B->db, seq_file, blast_result);
	    }
	  
	  else if (strm2 (loc_blast_server,"ncbi","NCBI"))
	    {
	      check_program_is_installed ( "blastcl3",NCBI_BLAST_4_TCOFFEE, "NCBI_BLAST_4_TCOFFEE","ftp://ftp.ncbi.nih.gov/blast/network/netblast/", 1); 
     
	      sprintf (command, "%s -p blastp  -d %s -i %s -m 6 -o %s", NCBI_BLAST_4_TCOFFEE,B->db, seq_file, blast_result);
	    }
	  else if (strm2 (loc_blast_server,"local","LOCAL"))
	    {
	      check_program_is_installed ( "blastall",LOCAL_BLAST_4_TCOFFEE, "LOCAL_BLAST_4_TCOFFEE","ftp://ftp.ncbi.nih.gov/blast/", 1); 
	      sprintf (command, "%s -p blastp -d %s -i %s -m 6 -o %s",LOCAL_BLAST_4_TCOFFEE,B->db, seq_file, blast_result);
	    }
	  else
	    {
	      sprintf ( "ERROR: %s is not a legal blast mode [FATAL:%s]", loc_blast_server, PROGRAM);
	      exit ( EXIT_FAILURE);
	    }
	}
      
      fprintf (stderr, "\n\t\t[seq2blast: %s Vs %s",name, B->db); 
      while ( !check_file_exists ( blast_result))
	{
	  my_system (command);     
	  if ( !is_blast_file (blast_result))
	    {
	      fprintf ( stderr, "\n\t\t***Blast %s Failed on %s: Retry***",loc_blast_server,name );
	      n_failure++;
	      if ( n_failure==10)exit (EXIT_FAILURE);
	    }

	  check_program_is_installed ( "seq_reformat",SEQ_REFORMAT_4_TCOFFEE, "SEQ_REFORMAT_4_TCOFFEE",MAIL, 1); 
	  sprintf ( command, "%s -in %s -input blast_aln -action +rmgap_col '#1'  -output clustalw_aln > %s",SEQ_REFORMAT_4_TCOFFEE,blast_result, prf_result);
	  my_system (command);
	}
      
      fprintf (stderr, "(%s)]", loc_blast_server);
      
      
      if ( !is_blast_file (permanent_blast_file))
	{
	  sprintf (command, "cp %s %s%s", blast_result,get_cache_dir(),permanent_blast_file);
	  my_system ( command);
	}

     if ( !is_aln(prf_result))
       {
	 BProfile=NULL;
	 return NULL;
       }
     else 
       {
	 
	 BProfile=main_read_aln(prf_result, NULL);
       }

     if ( getenv4debug ("DEBUG_BLAST")){fprintf ( stderr, "\n[DEBUG_BLAST:seq2blast]");print_aln (BProfile);}
     
     NBProfile=declare_aln (NULL);
     NBProfile=realloc_aln2 (NBProfile,100,BProfile->len_aln);
     NBProfile->nseq=0;
     NBProfile->len_aln=BProfile->len_aln;
     
     sprintf ( NBProfile->seq_al[NBProfile->nseq], "%s", BProfile->seq_al[0]);
     sprintf ( NBProfile->name[NBProfile->nseq], "%s", BProfile->name[0]);
     NBProfile->nseq++;

     sim=get_sim_master_aln_array (BProfile,0, "idmat");
     cov=get_cov_master_aln_array (BProfile,0, "idmat");

     for ( a=1; a< BProfile->nseq && NBProfile->nseq <100; a++)
       {
	 if ( sim[0][a]>=B->min_id && sim[0][a]<=B->max_id && cov[0][a]>=B->min_cov)
	   {
	     sprintf ( NBProfile->seq_al[NBProfile->nseq], "%s", BProfile->seq_al[a]);
	     sprintf ( NBProfile->name[NBProfile->nseq], "%s", BProfile->name[a]);
	     NBProfile->nseq++;
	   }
       }
     print_aln (NBProfile);
     free_int (sim, -1);
     free_int (cov, -1);
     free_sequence (free_aln (BProfile), -1);

     return NBProfile;
    }
Sequence * seq2unique_name_seq ( Sequence *S)
	{
	int duplicated=0;
	int a, b;
	   	
	for (a=0; a< S->nseq-1; a++)
	    for (b=a+1; b<S->nseq; b++)
		{
		if ( strm (S->name[a], S->name[b]))
			{
	 		fprintf ( stderr, "\nWarning: Sequence %s is duplicated in file %s. The sequence will be renamed [WARNING:%s]", S->name[a], S->file[a], PROGRAM);
			duplicated=1;
			}
		}
	if (duplicated==0) return S;
	else
		{
		Alignment *A;
        	char *tmp1, *tmp2;
		char command[1000];

        	A=seq2aln (S, NULL, 0);
		tmp1=vtmpnam (NULL);tmp2=vtmpnam (NULL);
		output_fasta_seq (tmp1, A);
        	sprintf ( command, "t_coffee -other_pg seq_reformat -in %s -action +name2unique_name > %s", tmp1, tmp2);
        	my_system (command);
        	free_aln (A);
        	free_sequence (S, S->nseq);
        	S=get_fasta_sequence (tmp2, NULL);
		return S;
        	}
	}

Sequence * seq2blast_profile ( Sequence *S, int n, Blast_param *B)
    {
      Alignment *A;
      char *tmp;
      
      
      A=seq2blast ( S->name[n], S->seq[n],B);
      A=local_maln2global_maln ( S->seq[n], A); 
      output_clustal_aln ( tmp=vtmpnam (NULL), A);
            
      (S->T[n])->RB=fill_X_template ( S->name[n], tmp, "_RB_");
      fprintf (stderr, " [Kept %d Seqs]\n",A->nseq);
      
      if ( getenv4debug ( "DEBUG_BLAST")){fprintf ( stderr, "\n[DEBUG_BLAST:seq2blast_profile]");print_aln (A);}
      free_sequence (free_aln (A), -1);
    
      return S;
      }
	

      
int ** seq2aln_pos      (Alignment *A, int *ns, int **l_s)
    {
    int **code;
    int a, b,c, d,l, p , g;
    

    l=MAX(strlen (A->seq_al[l_s[0][0]]), strlen (A->seq_al[l_s[1][0]]));
    code=declare_int ((A->S)->nseq,l+1);
    
    for (c=0; c<2; c++)
        {
	l=strlen (A->seq_al[l_s[c][0]]);
	for (d=0; d<ns[c]; d++)
	    {
	    a=A->order[l_s[c][d]][0];
	    for (p=0, b=0; b<l; b++)
	        {
		    g=is_gap (A->seq_al[l_s[c][d]][b]);
		    if (!g){p++; code[a][p]=b+1;}
		}
	    }
	}
    return code;
    }

Alignment *local_maln2global_maln (char *seq, Alignment *A)
    {
      /*inputs a BLAST alignmnent where the master sequence may be partila
	outputs the same alignment, while amkeing sure the profile is perfectly in sink with its master sequence
      */

      int a, b, c;
      int start, end, rend;
      char qname[100], *p;
      Alignment *B=NULL;
      
      sprintf ( qname, "%s", A->name[0]);
      p=strtok (qname, "_");
      if ( !strm (p, "QUERY"))
	   {
	     fprintf ( stderr, "\nUnappropriate format for the alignment [%s:FATAL]", PROGRAM);
	     exit (EXIT_FAILURE);
	   }
	     
      start=atoi(strtok (NULL, "_"));
      end=atoi(strtok (NULL, "_"));
      rend=strlen (seq);
      
      B=copy_aln (A,NULL);
      if ( start>1 || end<rend )A=realloc_aln (A,rend+1);

      for (a=0; a<start-1; a++)
	{
	  A->seq_al[0][a]=seq[a];
	  for ( b=1; b< A->nseq; b++)A->seq_al[b][a]='-';
	}
      
      for (c=0,a=start-1; a< end; a++, c++)
	{
	  A->seq_al[0][a]=seq[a];
	  for ( b=1; b< A->nseq; b++)
	    {
	      A->seq_al[b][a]=B->seq_al[b][c];
	    }
	}
      for ( a=end; a<rend; a++)
	{
	  A->seq_al[0][a]=seq[a];
	  for ( b=1; b< A->nseq; b++)A->seq_al[b][a]='-';
	}
      for ( a=0; a< A->nseq; a++) A->seq_al[a][rend]='\0';
      free_aln (B);
      
      A->len_aln=rend;
      return A;
    }
  
int *  seq2inv_pos ( char *seq)
{
  /*returns a list where each value gives the index of the corresponding residue in seq*/
  /*Numbering: 1 to L : Analogy to the aln2pos*/
  
  int a,l1, l2;
  int *pos;
  
  l1=strlen ( seq);
  for ( l2=a=0; a< l1; a++)l2+=1-is_gap(seq[a]);
  pos=vcalloc (l2+1, sizeof (int));
  for ( l2=a=0; a< l1; a++)if (!is_gap(seq[a]))pos[++l2]=a+1;
  return pos;
}
  
  
int ** aln2pos_simple_2 (Alignment *A)
    {
    int **pos1;
    int **pos2;
    pos1=aln2pos_simple (A, A->nseq);
    pos2=duplicate_int  (pos1, A->nseq,read_size_int (pos1[0],sizeof (int)));
    pos1=aln2pos_simple (NULL, 0);
    return pos2;
    }
int ** aln2pos_simple (Alignment *A, int n_nseq, ...)
    {
    /*
    function documentation: start
    int ** aln2pos_simple (Alignment *A, int n_nseq, ...)

####with two parameter only: Alignment *A, int n_nseq

    this function turns A into pos, a matrix where each residue is replace by its inice acvcording to the complete sequence.
    the indices in pos are computed using A->order[x][1] that contains the indice of the first residue of seq x of A
    
    n_nseq MUST not be null

####with more than two param:
     int ** aln2pos_simple (Alignment *A, int n_nseq, int *ns, int **ls)
     n_nseq must be set to 0 for the param 3 and four to be read
     
     ns[x]=number seq in group 
     ls[x]=list of the sequences in group x ( size=ns[x])
     
    The computation of the indices is only carried out on the scpecified residues

####IMPORTANT
      in pos, the numbering of the residues goes from 1 to L:
        pos[0][0]=3, means that the first position of the first sequence 
	in the alignmnet contains residue #3 from sequence A->order[0][0];
	
    function documentation: end
    */

    int a, b,c, p, g,l;
    int **T;

    int max_nseq;
    int n_len=0;

    int *list=NULL;
    int *ns=NULL;
    int **ls=NULL;



    va_list ap;
    
    
    if ( A==NULL)
       {
	 return NULL;
       }
    else
       {
       if ( n_nseq>0)
          {
	  list=vcalloc(n_nseq, sizeof (int));
	  for ( a=0; a< n_nseq; a++)list[a]=a;
	  }
       else
          { 
	  va_start (ap, n_nseq);
	  ns=va_arg(ap, int * );
	  ls=va_arg(ap, int **);
	  va_end(ap);
	  list=vcalloc ( ns[0]+ns[1], sizeof (int));
	  n_nseq=0;
	  for ( a=0; a< ns[0]; a++)list[n_nseq++]=ls[0][a];
	  for ( a=0; a< ns[1]; a++)list[n_nseq++]=ls[1][a];

	  }
       max_nseq=MAX(read_size_int(A->order,sizeof (int*)),return_max_int (A->order, read_size_int(A->order,sizeof (int*)),0))+1;
       n_len=get_longest_string ( A->seq_al,A->max_n_seq, NULL, NULL)+1;  

       T=declare_int (max_nseq, n_len);
      
       for ( c=0; c< n_nseq; c++)
           {
	   a=list[c];	       
	   l=strlen ( A->seq_al[a]);
	   
	   for ( p=A->order[a][1],b=0; b<l; b++)
	       {
	       g=1-is_gap(A->seq_al[a][b]);
	       p+=g;
	       T[a][b]=(g==1)?p:-(1+p);
	       if ( A->seq_al[a][b]==UNDEFINED_RESIDUE)T[a][b]=0;
	       if ( A->seq_cache && T[a][b]>0)T[a][b]=A->seq_cache[A->order[a][0]][T[a][b]];
	       } 
	   }
       vfree (list);
       }
   
   return T;
   }
Alignment ** split_seq_in_aln_list ( Alignment **aln, Sequence *S, int n_seq, char **seq_list)
        {
	int a, b, c;
	char * long_seq=NULL;
	int    len,l;
	int  **translation;
	int  **table;


	

	if ( aln==NULL)return NULL;
	translation=declare_int ( S->nseq,2);
	
	for (len=0,a=0; a< S->nseq; a++)
	    {
	    if((b=name_is_in_list (S->name[a],seq_list, n_seq, 100))!=-1)
	       {
	       l=strlen(S->seq[a])+1;
	       long_seq=vrealloc(long_seq,(len+l+1)*sizeof(char));
	       long_seq=strcat(long_seq, S->seq[a]);
	       long_seq=strcat(long_seq, "*");   
	       
	       translation[a][0]=b;
	       translation[a][1]=len;
	       len+=l;
	       }
	    else translation[a][0]=-1;
	    }

	long_seq[len-1]='\0';
	len--;

	table=declare_int ( len+1, 2);

	for ( b=0,a=0; a< S->nseq; a++)
	    {
	    if ( translation[a][0]!=-1)
	       {
	       c=1;
	       while (long_seq[b]!='\0' && long_seq[b]!='*')
		   {
		   table[b+1][1]=c++;
		   table[b+1][0]=translation[a][0];
		   b++;
		   }
	       table[b][1]=c++;
	       table[b][0]=translation[a][0];
	       b++;
	       }
	    }

	for ( a=0; a< (aln[-1])->nseq; a++)
	    {
	    for ( b=0; b< (aln[a])->nseq; b++)
	        {
		
		(aln[a])->order[b][0]=table[(aln[a])->order[b][1]][0];
		(aln[a])->order[b][1]=table[(aln[a])->order[b][1]][1];
		sprintf ( (aln[a])->name[b],"%s_%d_%d", S->name[(aln[a])->order[b][0]],a+1,b+1); 
		}
	    }
	free_int (translation, -1);
	free_int (table,       -1);
	return aln;
	}



Sequence  *  fill_sequence_struc ( int nseq, char **sequences, char **seq_name)
	{
	int a;
	Sequence *S;
	int shortest, longuest;

	if ( nseq>1)
	  {
	    shortest=get_shortest_string( sequences, nseq, NULL, NULL);
	    longuest=get_longest_string (sequences, nseq, NULL, NULL);
	  }
	else if ( nseq==1)
	  {
	    shortest=longuest=strlen (sequences[0]);
	  }
	else
	  {
	    return NULL;
	  }
	

	S=declare_sequence (shortest, longuest,nseq);
	S->nseq=nseq;

	S->seq=copy_char ( sequences, S->seq, nseq, -1);
	S->name=copy_char ( seq_name, S->name,nseq, -1);
	
	ungap_array (S->seq,nseq);
	for ( a=0; a< S->nseq; a++)S->len[a]=strlen(S->seq[a]);
	return S;
	}

Alignment * expand_aln (Alignment *A)
  {
  /*This function expands the profiles within an alignment*/
  
  
  int a, b, d, e;
  Alignment *MAIN=NULL, *SUB=NULL;
  int n_sub_seq=0;
  int new_nseq=0;
  int *list;
  Alignment *Profile;
  
  if ( !A)return A;


  
  list=vcalloc (A->nseq, sizeof (int)); 
  for ( a=0; a< A->nseq; a++)
    {
      Profile=seq2R_template_profile (A->S, A->order[a][0]);
      if (Profile)new_nseq+=Profile->nseq;
      else 
	{
	  new_nseq++;
	  list[n_sub_seq++]=a;
	}      
    }
  
  if ( n_sub_seq==A->nseq){vfree(list);return A;}
  else if (n_sub_seq==0){MAIN=copy_aln (A, MAIN);MAIN->nseq=0;}
  else
    {
      MAIN=extract_sub_aln (A, n_sub_seq, list);
    }
  vfree(list);
  
  
  for ( a=0; a< A->nseq; a++)
    {
      Profile=seq2R_template_profile (A->S, A->order[a][0]);
      if ( Profile)
	{
	  SUB=copy_aln (Profile,SUB);
	  SUB=realloc_aln2(SUB, SUB->nseq, A->len_aln+1);
	  
	  for ( e=0,b=0; b<= A->len_aln; b++)
	    {
	      if ( is_gap(A->seq_al[a][b]))
		{for (d=0; d< SUB->nseq; d++)SUB->seq_al[d][b]='-';}
	      else 
	         {
		   for(d=0; d<SUB->nseq; d++)SUB->seq_al[d][b]=Profile->seq_al[d][e];
		   e++;
		 }
		   
	    }
	  MAIN=stack_aln(MAIN, SUB);
	}
    }
  free_aln (SUB);
  free_aln (A);
  return MAIN;
  }
Alignment * expand_number_aln (Alignment *A,Alignment *EA)
  {
  /*This function expands the profiles within an alignment*/
  
    
  int a, b, d, e;
  Alignment *MAIN=NULL, *SUB=NULL, *C=NULL;
  int n_sub_seq=0;
  int new_nseq=0;
  int *list;
  Alignment *Profile;

  if ( !EA || !A)return EA;

  if ( EA->nseq<A->nseq){fprintf (stderr, "\n[ERROR:expand_number_aln] Using as a master an expanded aln [FATAL:%s]", PROGRAM);exit (EXIT_FAILURE);}
  

  list=vcalloc (EA->nseq, sizeof (int)); 
  for ( a=0; a< EA->nseq; a++)
    {
      Profile=seq2R_template_profile (EA->S, EA->order[a][0]);
      if (Profile)new_nseq+=Profile->nseq;
      else 
	{
	  new_nseq++;
	  list[n_sub_seq++]=a;
	}      
    }
  
  if ( n_sub_seq==EA->nseq){vfree(list);return EA;}
  else if (n_sub_seq==0){MAIN=copy_aln (EA, MAIN);MAIN->nseq=0;}
  else
    {
      MAIN=extract_sub_aln (EA, n_sub_seq, list);      
    }
  
  
  list[0]=EA->nseq;
  C=extract_sub_aln (EA,1, list);  
  vfree(list);
  
  
  
  for ( a=0; a< EA->nseq; a++)
    {
      Profile=seq2R_template_profile (EA->S, EA->order[a][0]);
      if ( Profile)
	{
	  SUB=copy_aln (Profile,SUB);
	  SUB=realloc_aln2(SUB, SUB->nseq, EA->len_aln+1);
	  
	  for ( e=0,b=0; b<= EA->len_aln; b++)
	    {
	      if (is_gap(A->seq_al[a][b]))
		{
		for ( d=0; d<SUB->nseq; d++)
		  SUB->seq_al[d][b]=NO_COLOR_RESIDUE;
		}
	      else
		{
		  for ( d=0; d<SUB->nseq; d++)
		    {

		      if ( is_gap (Profile->seq_al[d][e]))
			{
			  SUB->seq_al[d][b]=NO_COLOR_RESIDUE;
			}
		      else SUB->seq_al[d][b]=EA->seq_al[a][b];
		    }
		  e++;
		}
	    }
	  for (d=0; d< SUB->nseq; d++)SUB->score_seq[d]=EA->score_seq[a];
	  
	  MAIN=stack_aln(MAIN, SUB);
	}
    }
  
  MAIN=stack_aln(MAIN, C);
  MAIN->nseq--;
  MAIN->score=MAIN->score_aln=EA->score_aln;
  
  free_aln (SUB);
  free_aln (EA);
  
  free_aln (C);
  
  return MAIN;
  }

Alignment * probabilistic_rm_aa ( Alignment *A, int pos, int len)
{
  int random_len=0;
  int a, b;
  int left, right;

  if ( len<0)
    {
      random_len=1;
      len=-len;
    }
  
  vsrand(0);

  if (pos==0)pos= (rand()%(A->len_aln-(2*len+len))) +len;
  
   
  for ( a=0; a< A->nseq; a++)
	{
	  if (random_len)left =rand()%len;
	  else left=len;
	  if (random_len)right=rand()%len;
	  else right=len;
	  if ( (pos-right)<0 || (pos+left)>A->len_aln)
	    {
	      fprintf ( stderr, "\nWarning: probabilistic_rm_aa, pos out of range [%s]\n", PROGRAM);
	    }
	  else
	    for ( b=pos-right; b<pos+left; b++)A->seq_al[a][b]=(b==pos)?'~':'*';
	}
  /*print_aln (A);*/ 
  ungap_aln (A);
  free_sequence ( A->S, A->nseq);
  A->S=aln2seq (A);
  return A;
  
}
	  
Alignment * remove_gap_column ( Alignment *A, char *mode)
  {
    int   a, b;
    char *p;
    int  *seq_list;
    int   nseq=0;
    int keep_col, cl;


    seq_list =vcalloc ( A->nseq, sizeof (int));
    while (  (p=strtok(mode, ":")))
      {
	mode=NULL;
	if (p[0]=='#')
	  {
	    seq_list[nseq++]=atoi(p+1)-1;
	  }
	else if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1)
	  {
	    seq_list[nseq++]=a;
	  }
      }
    
    if ( nseq==0)
      {
	for ( a=0; a< A->nseq; a++)seq_list[a]=a;
	nseq=A->nseq;
      }

    for ( cl=0,a=0; a<=A->len_aln; a++)
      {
	for (keep_col=1, b=0; b< nseq && keep_col; b++)
	  {
	    keep_col=(is_gap(A->seq_al[seq_list[b]][a]))?0:keep_col;
	  }
	
	if ( keep_col)
	  {
	    for ( b=0; b< A->nseq; b++)
	      {
		A->seq_al[b][cl]=A->seq_al[b][a];
	      }
	    cl++;
	  }
	else
	  {
	    for ( b=0; b< A->nseq; b++)
	      {
		A->seq_al[b][cl]='-';
	      }
	    cl++;
	  }
      }
    A->len_aln=cl;
    vfree (seq_list);
    
    return A;
  }


Alignment * ungap_sub_aln (Alignment *A, int ns, int *ls)
        {

	int a, b, c,t;
	int len;

	len=strlen ( A->seq_al[ls[0]]);

	for ( c=0,a=0; a<len; a++)
		{
		for ( t=0,b=0; b<ns; b++)
			t+=is_gap(A->seq_al[ls[b]][a]);
		if (t==ns);
		else
		    {
		    for ( b=0; b<ns; b++)
		    	A->seq_al[ls[b]][c]=A->seq_al[ls[b]][a];
		    c++;
		    }
	 	}
	 for ( b=0; b<ns; b++)A->seq_al[ls[b]][c]='\0';  
	 return A;
	}
Sequence * ungap_seq ( Sequence *S)
        {
	  int a;
	  
	  if ( !S)return NULL;
	  ungap(S->seq[0]);
	  S->max_len=S->min_len=strlen (S->seq[0]);
	  for ( a=0; a< S->nseq; a++)
	    {
	      ungap(S->seq[a]);
	      S->len[a]=strlen (S->seq[a]);
	      S->max_len=MAX(S->max_len,S->len[a]);
	      S->min_len=MAX(S->min_len,S->len[a]);
	    }
	  return S;
	  
	}

Alignment *ungap_aln_n ( Alignment *A, int p)
	{
/*remove all the columns of gap-only within an alignment*/  
	int a, b, c;
	int t;
	
	if ( A->nseq==0)return A;

	for ( c=0,a=0; a< A->len_aln; a++)
		{
		for ( t=0,b=0; b<A->nseq; b++)
			t+=is_gap(A->seq_al[b][a]);
		if ((t*100)/A->nseq>=p || (t==A->nseq && p==100) || (t && p==1));
		else
		    {
		    for ( b=0; b<A->nseq; b++)
		    	A->seq_al[b][c]=A->seq_al[b][a];
		    c++;
		    }
	 	}
	 for ( b=0; b<A->nseq; b++)A->seq_al[b][c]='\0';
	 A->len_aln=c; 
	 return A;

	 }

Alignment *ungap_aln ( Alignment *A)
{
  return ungap_aln_n (A, 100);
}
/*
Alignment *ungap_aln ( Alignment *A)
	{
	int a, b, c,t;
	
	for ( c=0,a=0; a< A->len_aln; a++)
		{
		for ( t=0,b=0; b<A->nseq; b++)
			t+=is_gap(A->seq_al[b][a]);
		if (t==A->nseq);
		else
		    {
		    for ( b=0; b<A->nseq; b++)
		    	A->seq_al[b][c]=A->seq_al[b][a];
		    c++;
		    }
	 	}
	 for ( b=0; b<A->nseq; b++)A->seq_al[b][c]='\0';
	 A->len_aln=c; 
	 return A;

	 }
*/


Alignment *remove_end (Alignment *A)
        {
	int a, b, d;
	int left, right;

	for (a=0; a< A->len_aln; a++)
	    {
	    for ( b=0, d=0; b< A->nseq; b++)
		if ( !is_gap(A->seq_al[b][a]))d++;
	    if ( d>1)break;
	    }
	left=a;
	for (a=A->len_aln-1; a>0; a--)
	    {
	    for ( b=0, d=0; b< A->nseq; b++)
		if ( !is_gap(A->seq_al[b][a]))d++;
	    if ( d>1)break;
	    }
	right=a;

	return extract_aln(A, left, right+1);
	}

Alignment* condense_aln (Alignment *A)
{
  /* condense complementarz columns:
     X-       X
     -X  ....>X
     X-       X

  */
  int a, b, plen, n,m, r1, r2;
  
  plen=0;
  while ( A->len_aln !=plen)
    {
      plen=A->len_aln;
      for ( a=0; a< A->len_aln-1; a++)
	{
	  for ( n=m=b=0; b< A->nseq; b++)
	    {
	      r1=is_gap(A->seq_al[b][a]);
	      r2=is_gap(A->seq_al[b][a+1]);
	      n+=(r1 || r2);
	      m+=r1;
	    }
	
	  if ( n==A->nseq && m!=A->nseq)
	    {
	      for (b=0; b< A->nseq; b++)
		{
		  if (!is_gap(A->seq_al[b][a+1]))
		      {
			A->seq_al[b][a]=A->seq_al[b][a+1];
			A->seq_al[b][a+1]='-';
			}
		}
	      a++;
	    }
	}
    }
  A=ungap_aln(A);  
  return A;
}
		    
		      
		  

void compress_aln ( Alignment *A)
        {

	  /*remove all the columns of gap-only within an alignment*/  
	int a, b, c, d;
        
        

	for (c=0, a=0; a< A->len_aln; a++)
	  {
	    for ( b=0, d=0; b< A->nseq; b++)
		if ( A->seq_al[b][a]!='-'){d=1; break;}
	    if ( d==0);
	    else
	      {
		for (b=0; b< A->nseq; b++)
		  A->seq_al[b][c]=A->seq_al[b][a];
	      c++;
	      }
	  }
	 A->len_aln=c;
	
	for ( a=0; a< A->nseq; a++)
	  A->seq_al[a][c]='\0';
	}

Alignment *seq_coor2aln ( Sequence *S, Alignment *A, int **coor, int nseq)
        {
	int a;
	char *buf;

	A=realloc_alignment2(A, nseq, return_maxlen ( S->seq, S->nseq)+1);
	for ( a=0; a< S->nseq; a++)sprintf ( A->file[a], "%s", S->file[a]);
	for ( a=0; a< nseq; a++)
	    {
	    sprintf (A->name[a], "Repeat_%d_%d", a, coor[a][0]);
	    buf=extract_char ( S->seq[coor[a][0]], coor[a][1]-1, coor[a][2]);
	    sprintf ( A->seq_al[a],"%s", buf);
	    vfree(buf);
	    A->order[a][0]=0;
	    A->order[a][1]=coor[a][1]-1;
	    }
	A->nseq=nseq;
	return A;
	}

Alignment *strings2aln (int nseq,...)
        {
	  /*strings2aln(nseq, <name1>, <seq1>, <name2>, <seq2>....)*/
	  va_list ap;
	  char **list, **list2;
	  char **name, **name2;
	  Sequence *S;
	  Alignment *A;
	  int a, max;

	  va_start(ap, nseq);
	  list=vcalloc (nseq, sizeof (char*));
	  name=vcalloc (nseq, sizeof (char*));
	  for ( a=0; a< nseq; a++)
	    {
	      name[a]=va_arg(ap,char*);
	      list[a]=va_arg(ap,char*);
	      
	    }
	  va_end(ap);
	  
	  for ( max=0,a=0; a< nseq; a++)
	    {
	      max=(strlen (list[a])>max)?strlen(list[a]):max;
	    }
	  list2=declare_char (nseq, max+1);
	  name2=declare_char (nseq, MAXNAMES+1);
	  
	  for ( a=0; a< nseq; a++)
	    {
	      sprintf ( list2[a], "%s", list[a]);
	      sprintf ( name2[a], "%s", name[a]);
	    }

	  
	  S=fill_sequence_struc(nseq,list2,name2);
	  
	  free_char (list2, -1);
	  free_char (name2, -1);
	  vfree (list);
	  vfree(name);
	  A=seq2aln(S,NULL, 1); 
	  return A;
	}

Alignment *seq2aln ( Sequence *S, Alignment *A,int rm_gap)
	{
	int a;

	A=realloc_alignment2(A, S->nseq, S->max_len+1); 	
	for ( a=0; a< S->nseq; a++)sprintf ( A->file[a], "%s", S->file[a]);
	A->nseq=S->nseq;
	A->max_len=S->max_len;
	A->min_len=S->min_len;

	for ( a=0; a< S->nseq; a++)
		{
		A->order[a][0]=a;
		A->order[a][1]=0;
		sprintf ( A->seq_comment[a], "%s", S->seq_comment[a]);
		sprintf ( A->aln_comment[a], "%s", S->aln_comment[a]);
		
		sprintf ( A->name[a], "%s", S->name[a]);
		sprintf ( A->seq_al[a], "%s", S->seq[a]);
		
		ungap ( A->seq_al[a]);
		A->len[a]=strlen ( A->seq_al[a]); 
		
		if ( rm_gap==0)sprintf ( A->seq_al[a], "%s", S->seq[a]);
		
		}
	padd_aln (A);
	A->S=S;
	return A;
	}

Alignment *padd_aln ( Alignment *A)
{
  A->seq_al=padd_string (A->seq_al, A->nseq, '-');
  A->len_aln=strlen (A->seq_al[0]);
  return A;
}

char **padd_string ( char **string, int n,char pad)
{
  /*Pads a the strings so that they all have the same length*/
  
  int max_len, a;
  char *buf;
  
  max_len=get_longest_string  (string,n, NULL, NULL);
  for (a=0; a<n; a++)
	    {
	    buf=generate_null (max_len-strlen (string[a]));
	    strcat ( string[a], buf);	
	    vfree (buf);	    
	    }	
  return string;
}

	
Alignment * add_align_seq2aln ( Alignment *A, char *seq, char *seq_name)
        {
	  if ( !A)
	    {
	      A=declare_aln (NULL);
	      A=realloc_aln2 ( A, 1, strlen (seq)+1);
	      A->nseq=0;
	      sprintf ( A->name[A->nseq], "%s", seq_name);
	      sprintf ( A->seq_al[A->nseq], "%s", seq);
	      A->nseq++;
	      
	    }	  
	  else if ( strlen (seq)!=A->len_aln)
	    {
	      fprintf ( stderr, "\nError: Attempt to stack incompatible aln and aligned sequence[FATAL]\n");
	      exit (EXIT_FAILURE);
	      A=NULL;
	    }
	  else
	    {

	      A=realloc_aln2 ( A, A->nseq+1, A->len_aln+1);
	      sprintf ( A->name[A->nseq], "%s", seq_name);
	      sprintf ( A->seq_al[A->nseq], "%s", seq);
	      A->nseq++;
	    }
	  return A;
	}
  

Alignment *aln2number (Alignment *A)
        {
	A->seq_al=char_array2number(A->seq_al, A->nseq);
	return A;
	}
Sequence *seq2number (Sequence *A)
        {
	A->seq=char_array2number(A->seq, A->nseq);
	return A;
	}


Sequence * aln2seq (Alignment *A)
	{
	Sequence *LS;
	int a;

	LS=declare_sequence ( A->len_aln+1, A->len_aln+1, A->nseq);
	LS->nseq=A->nseq;
	for ( a=0; a< LS->nseq; a++)
		{
		sprintf (LS->file[a], A->file[a]); 
		LS->len[a]=A->len[a];
		sprintf ( LS->seq[a], A->seq_al[a]);
		if ( LS->contains_gap==0)ungap ( LS->seq[a]);
		
		LS->len[a]=strlen ( LS->seq[a]);
		sprintf ( LS->seq_comment[a], A->seq_comment[a]);
		sprintf ( LS->aln_comment[a], A->aln_comment[a]);
		sprintf ( LS->name[a], "%s", A->name[a]);
		}
	return LS;
	}

Sequence  *keep_residues_in_seq ( Sequence *S, char *list, char replacement)
{
  Alignment *A=NULL;
  int a;
  
  A=seq2aln (S, A,1);
  A=keep_residues_in_aln ( A, list, replacement);
  for ( a=0; a< A->nseq; a++)
    {
      ungap (A->seq_al[a]);
      sprintf ( S->seq[a], "%s", A->seq_al[a]);
    }
  free_aln (A);
  return S;
}


Alignment *aln2short_aln ( Alignment *A, char *list, char *new, int spacer)
{
  int a, b, r, cl, l;
  char *buf;
  
  for ( a=0; a< A->nseq; a++)
    {
      buf=vcalloc ( strlen (A->seq_al[a])+1, sizeof (char));
      
      for (l=0,cl=0, b=0; b< A->len_aln; b++)
	{
	  r=A->seq_al[a][b];
	  if ( is_gap(r));
	  else if ( is_in_set (r, list))
	    {
	      if (cl){cl=0; buf[l++]=new[0];}
	      buf[l++]=r;
	    }
	  else
	    {
	      if ( cl==spacer){buf[l++]=new[0];cl=0;}
	      cl++;
	    }
	  
	}
      
      buf[l]='\0';
      sprintf (A->seq_al[a], "%s", buf);
      vfree (buf);
    }
  return A;
}
  
Alignment *keep_residues_in_aln ( Alignment *A, char *list, char replacement)
{
  return filter_keep_residues_in_aln (A,NULL, 0, -1, list, replacement);
}
Alignment *filter_keep_residues_in_aln ( Alignment *A,Alignment *ST, int use_cons, int value, char *list, char replacement)
{
  char **sl;
  int n, a;
  
  n=strlen (list);
  sl=declare_char (n+1, 256);
  for (a=0; a< n; a++)
    sprintf ( sl[a], "%c%c", list[a], list[a]);
  sprintf ( sl[a],"#%c", replacement);
  A=filter_aln_convert (A, ST,use_cons,value, n+1, sl);
  free_char (sl, -1);
  return A;
}
  

Alignment *filter_convert_aln ( Alignment *A,Alignment *ST, int use_cons, int value, int n, ...)
{
  va_list ap;
  char **sl;
  int a;
  va_start (ap, n);
  sl=vcalloc ( n,sizeof(char*));
  for ( a=0; a< n; a++)
    {
      sl[a]=va_arg(ap, char * );
    }
  va_end(ap);
  A=filter_aln_convert (A,ST,use_cons,value, n,sl);
  vfree(sl);
  return A;
}

Alignment * filter_aln ( Alignment *A, Alignment *ST, int value)
        {
	  return filter_aln_convert (A, ST,0,value,DELETE, NULL);
	}
Alignment * filter_aln_upper_lower ( Alignment *A, Alignment *ST,int use_cons, int value)
        {
	  
	  return filter_aln_convert (A, ST,use_cons,value, LOWER, NULL);
	}
Alignment * filter_aln_lower_upper ( Alignment *A, Alignment *ST,int use_cons, int value)
        {
	  
	  return filter_aln_convert (A, ST,use_cons,value, UPPER, NULL);
	}
Alignment * filter_aln_convert ( Alignment *A, Alignment *ST,int use_cons, int value, int n_symbol,char **symbol_list)
        {
	  int a, b, c;
	  int st;
	  int cons;

	  if ( use_cons)
	    {
	      cons=name_is_in_list ("cons", ST->name,ST->nseq, 100);
	      if ( cons==-1)cons=name_is_in_list ("Cons", ST->name,ST->nseq, 100);
	      if ( cons==-1)use_cons=0;
	    }
	  
	  A->residue_case=2;
	  for ( a=0; a< A->nseq; a++)
	        {
		if(value!=10 && ST && !use_cons)for ( c=0; c< ST->nseq; c++)if ( strm(ST->name[c], A->name[a]))break;
		for ( b=0; b< A->len_aln; b++)
		    {
		      if ( value==10 || !ST)st=11;
		      else if ( ST && use_cons)
			{
			  st=(isdigit(ST->seq_al[cons][b]))?ST->seq_al[cons][b]-'0':ST->seq_al[cons][b];
			}
		      else st=(isdigit(ST->seq_al[c][b]))?ST->seq_al[c][b]-'0':ST->seq_al[c][b];
		      
		     
		      if ( st==value || value==-1 || st==NO_COLOR_RESIDUE) 
			{
			  if      ( n_symbol==UPPER  && !symbol_list)A->seq_al[a][b]=toupper (A->seq_al[a][b]);
			  else if ( n_symbol==LOWER  && !symbol_list)A->seq_al[a][b]=tolower (A->seq_al[a][b]);
			  else if ( n_symbol==DELETE && !symbol_list)A->seq_al[a][b]='-';
			  else
			    {
			      A->seq_al[a][b]=convert(A->seq_al[a][b],n_symbol,symbol_list);
			    }
			}
		     
		    }
		}
	  return A;
	} 
int  * count_in_aln ( Alignment *A, Alignment *ST, int value, int n_symbol,char **symbol_list, int *table)
        {
	  int a, b, c, d;
	  int st;
	  
	  if (!table)table=vcalloc (n_symbol, sizeof (int));

	  A->residue_case=2;
	  for ( a=0; a< A->nseq; a++)
	        {
		if(value!=10 && ST)for ( c=0; c< ST->nseq; c++)if ( strm(ST->name[c], A->name[a]))break;
		for ( b=0; b< A->len_aln; b++)
		    {
		      if ( value==10 || !ST)st=11;
		      else st=(isdigit(ST->seq_al[c][b]))?ST->seq_al[c][b]-'0':ST->seq_al[c][b];
		      if ( st==value || value==-1) 
			{
			  for ( d=0; d<n_symbol; d++)table[d]+=is_in_set ( A->seq_al[a][b], symbol_list[d]);
			}
		    }
		}
	  return table;
	} 

char *dna_aln2cons_seq ( Alignment *A)
        {
	int a, b, best;
	static int **column_count;
	static int **old_tot_count;
	static int **new_tot_count;
	static char *string1, *string2;
	int **count_buf;
	char r1, r2,*seq;
	int NA=0, NG=1, NC=2, NT=3, IGAP=4;
	static int   MAX_EST_SIZE=10000;
	static int   size_increment=1000;
	static int first;
	int overlap=0, best_overlap=0;
	

	seq=vcalloc ( A->len_aln+1, sizeof (char));

	if (!column_count )
	  {
	    column_count=vcalloc(MAX_EST_SIZE, sizeof (int*));
	    for ( a=0; a< MAX_EST_SIZE; a++)
	      column_count[a]=vcalloc (5, sizeof (int));
	  
	    old_tot_count=vcalloc(MAX_EST_SIZE, sizeof (int*));
	    new_tot_count=vcalloc(MAX_EST_SIZE, sizeof (int*));
	    A->P=declare_profile( "agct-",MAX_EST_SIZE);
	    string1=vcalloc (MAX_EST_SIZE, sizeof (char));
	    string2=vcalloc (MAX_EST_SIZE, sizeof (char));
	  }
	else if (A->len_aln>MAX_EST_SIZE)
	  {
	    if ( column_count)
	      {
		for ( a=0; a< MAX_EST_SIZE; a++)
		  vfree(column_count[a]);
		vfree(column_count);
		vfree(old_tot_count);
		vfree(new_tot_count);
		vfree(string1);
		vfree(string2);
	      }
	    
	  column_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*));
	  for ( a=0; a< MAX_EST_SIZE+ size_increment; a++)
	      column_count[a]=vcalloc (5, sizeof (int));
	  
	  old_tot_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*));
	  new_tot_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*));
	  
	  for (a=0; a< MAX_EST_SIZE; a++)
	    {
	      old_tot_count[a]=*(column_count++);
	      for ( b=0; b<5; b++)old_tot_count[a][b]=(A->P)->count[b][a];
	    }
	  free_int ( (A->P)->count, -1);
	  
	  (A->P)->count=declare_int (5, MAX_EST_SIZE+ size_increment);
	  (A->P)->max_len=MAX_EST_SIZE+ size_increment;
	  MAX_EST_SIZE+= size_increment;
	  string1=vcalloc (MAX_EST_SIZE, sizeof (char));
	  string2=vcalloc (MAX_EST_SIZE, sizeof (char));
	  }
	
	
	sprintf ( string1, "%s",A->seq_al[0]);
	sprintf ( string2, "%s",A->seq_al[1]);
	

	string1=mark_internal_gaps(string1,'.');
	string2=mark_internal_gaps(string2,'.');

	
	
	for (b=0,a=0; a< A->len_aln; a++)
	  {
	    r1=string1[a];
	    r2=string2[a];
	    
	    if ( r1==r2)
	      {
		overlap++;
	      }
	    else
	      {
		best_overlap=MAX(overlap, best_overlap);
		overlap=0;
	      }


	    if (!is_gap(r1) && first==1)new_tot_count[a]=old_tot_count[b++]; 
	    else if (is_gap(r1) || first==0){new_tot_count[a]=*column_count;column_count++;};
	    
	    if ( first==0)
	      {
		if(r1=='a')       new_tot_count[a][NA]++;
		else if ( r1=='g')new_tot_count[a][NG]++;
		else if ( r1=='c')new_tot_count[a][NC]++;
		else if ( r1=='t')new_tot_count[a][NT]++;	
		else if (is_gap(r1));
		else
		  {
		   new_tot_count[a][NA]++;
		   new_tot_count[a][NG]++;
		   new_tot_count[a][NC]++;
		   new_tot_count[a][NT]++;
		  }
	      }
	    if ( a> 0 && a<A->len_aln-1 && r1=='.')
	      {
		new_tot_count[a][IGAP]+=((new_tot_count[a-1][NA]+new_tot_count[a-1][NG]+new_tot_count[a-1][NC]+new_tot_count[a-1][NT]));
	      }
	    

	    if(r2=='a')       new_tot_count[a][NA]++;
	    else if ( r2=='g')new_tot_count[a][NG]++;
	    else if ( r2=='c')new_tot_count[a][NC]++;
	    else if ( r2=='t')new_tot_count[a][NT]++;
	    else if ( r2=='.')new_tot_count[a][IGAP]++;
	    else if ( r2=='-');
	    else 
	      {
		new_tot_count[a][NA]++;
		new_tot_count[a][NG]++;
		new_tot_count[a][NC]++;
		new_tot_count[a][NT]++;	
	      }
	    (A->P)->count[0][a]=new_tot_count[a][NA];
	    (A->P)->count[1][a]=new_tot_count[a][NG];
	    (A->P)->count[2][a]=new_tot_count[a][NC];
	    (A->P)->count[3][a]=new_tot_count[a][NT];
	    (A->P)->count[4][a]=new_tot_count[a][IGAP];

	    best_int(4,1, &best,new_tot_count[a][NA], new_tot_count[a][NG],new_tot_count[a][NC],new_tot_count[a][NT]); 
	    if( best==0)      seq[a]='a';
	    else if ( best==1)seq[a]='g';
	    else if ( best==2)seq[a]='c';
	    else if ( best==3)seq[a]='t';
	  }

	first=1;

	seq[a]='\0';
	fprintf ( stderr, "[Best Overlap: %d Residues]", best_overlap);
	count_buf=old_tot_count;
	old_tot_count=new_tot_count;
	new_tot_count=count_buf;

	return seq;
	
	}
char *aln2cons_seq ( Alignment *A, int ns, int *ls, int n_groups, char **group_list)
        {
	char *seq;
	int a, b, c;
	int best_group=0;
	int aa_group=0;
	int *group;
	int len;
	int clean_ls=0;
	
	len=strlen  (A->seq_al[ls[0]]);
	seq=vcalloc (len+1, sizeof (char));

	if ( ns==0)
	  {
	    ns=A->nseq;
	    ls=vcalloc ( A->nseq, sizeof (int));
	    for ( a=0; a< A->nseq; a++)ls[a]=a;
	    clean_ls=1;
	  }


	if ( !group_list)
	   {
	       group_list=declare_char ( 26, 2);
	       for ( a=0; a<26; a++)group_list[a][0]=a+'a';
	       n_groups=26;
	       aa_group=1;
	   }
	
	
	for ( a=0; a<len; a++)
	    {
		group=vcalloc (n_groups+1, sizeof (int));
		for (best_group=0,b=0; b< ns; b++)
		    {
		    if ( !is_gap(A->seq_al[ls[b]][a]))
			 {
			 for (c=0; c< n_groups; c++)
			     if ( is_in_set (tolower(A->seq_al[ls[b]][a]), group_list[c]))
			                 {group[c]++;
					  best_group=(group[c]>group[best_group])?c:best_group;
					 }
			 }
		    seq[a]=group_list[best_group][0];
		    }
		vfree (group);
	    }
	seq[a]='\0';
	if ( aa_group) free_char (group_list, -1);

	if ( clean_ls)vfree(ls);
	
	return seq;
	}
		
char *aln2cons_seq_mat ( Alignment *A, char *mat_name)
{
  return sub_aln2cons_seq_mat (A, A->nseq, NULL, mat_name);
}
char *sub_aln2cons_seq_mat2 ( Alignment *A,int ns, char **ls, char *mat_name)
{
  char *cons;
  int *list;
  list=name_array2index_array(ls, ns, A->name, A->nseq);
  cons=sub_aln2cons_seq_mat  ( A,ns, list, mat_name);
  vfree (list);
  return cons;
}

char *sub_aln2cons_seq_mat  ( Alignment *A,int ns, int *ls, char *mat_name)
{
 int a, b, c, s;
 char *seq, r1, r2;
 int **mat;
 int score=0, best_score, best_r;
 int len;
 int naa;
 
 mat=read_matrice (mat_name);
 len=strlen ( A->seq_al[(ls==NULL)?0:ls[0]]);
 seq=vcalloc (len+1, sizeof (char));
 for ( a=0; a<len; a++)	    
   {
     for (b=0; b<20; b++)
       {
	 r1=AA_ALPHABET[b];
	 for ( naa=0,score=0,c=0; c<ns; c++)
	   {
	     s=(ls==NULL)?c:ls[c];
	     if ( ls && ls[c]==-1) continue;
	     else if (is_gap(A->seq_al[s][a]))continue;
	     else 
	       {
		 naa++;
		 r2=A->seq_al[s][a];
		 score+=mat[r1-'A'][r2-'A'];
	       }
	   }
	 if (naa==0)best_r='-';
	 if ( b==0 || score>best_score){best_score=score; best_r=r1;}
       }
     seq[a]=best_r;
   }
 free_int (mat, -1);
 return seq;
}
  
int  seq_list2fasta_file( Sequence *S,  char *list, char *file)
        {
	FILE *fp;
	int n, a, s;
	static char *buf;
	static int blen;
	int l;
	

	/*Buf is used because cmalloced functions cannot go through strtok*/
	if ( !S)return 0;
	else
	  {
	    fp=vfopen ( file, "w");
	    if ( !list)
	      {
		for ( a=0; a<S->nseq; a++)
		  {
		    fprintf ( fp, ">%s\n%s\n", S->name[a], S->seq[a]);
		  }
	      }
	    else
	      {
		l=strlen (list);
		if ( l>blen)
		  {
		    if (buf)free(buf);
		    buf=vcalloc ( strlen (list)+1, sizeof (char));
		    sprintf ( buf, "%s", list);
		    blen=l;
		  }
		
		n=atoi(strtok (list,SEPARATORS));
		for ( a=0; a< n; a++)
		  {
		    s=atoi(strtok (NULL, SEPARATORS));
		    fprintf ( fp, ">%s\n%s\n", S->name[s], S->seq[s]);
		  }
	      }
	    vfclose (fp);
	  }
	return 1;
	}
Structure * seq2struc ( Sequence *S, Structure *ST)
        {
	int a, b;
	
	for ( a=0; a< S->nseq; a++)
	    for ( b=0; b< S->len[a]; b++)
		ST->struc[a][b+1][ST->n_fields-1]=S->seq[a][b];
	return ST;
	}

void aln2struc (Alignment *A, Structure *ST) 
        {
	int a, b, c;

	for ( a=0; a< A->nseq; a++)
	    for (c=0, b=0; b< A->len_aln; b++)
	        {
		if ( !is_gap (A->seq_al[a][b]))
		     {
		     ST->struc[a][c][ST->n_fields-1]=A->seq_al[a][b];
		     c++;
		     }
		}
	}
Alignment *stack_aln (Alignment *A, Alignment *B)
        {
	int a,b;
	int max_len=0, max_nseq=0;
	if ( B==NULL)return A;
	if ( A==NULL)return B;
	
	max_nseq=A->nseq+B->nseq;
	for (a=0; a< A->nseq; a++)max_len=MAX(strlen(A->seq_al[a]),max_len);
	for (a=0; a< B->nseq; a++)max_len=MAX(strlen(B->seq_al[a]),max_len);
	
	A=realloc_aln2 ( A,max_nseq,max_len+1);
	
	for (a=A->nseq,b=0; b< B->nseq; b++, a++)
	    {
	    sprintf ( A->seq_comment[a] , "%s", B->seq_comment[b]);
	    sprintf ( A->aln_comment[a] , "%s", B->aln_comment[b]);
	    
	    sprintf ( A->seq_al [a] , "%s", B->seq_al [b]);
	    sprintf ( A->name   [a] , "%s", B->name[b]);
	    sprintf ( A->file   [a], "%s" , B->file[b]);
	    A->order[a][0]=B->order[b][0];
	    A->order[a][1]=B->order[b][1];
	    A->score_seq[a]=B->score_seq[b];
	    A->len[a]=B->len[b];
	    }
	
	A->len_aln=MAX(A->len_aln, B->len_aln);
	A->nseq=A->nseq+B->nseq;
	A->score_aln=A->score_aln+B->score_aln;
	
	A->finished=A->finished+B->finished;
	return A;
	}
	    
Alignment *chseqIaln(char *name, int seq_n, int start,int len,Sequence *S, int seqIaln, Alignment *A)
        {
	char *seq;

	seq=extract_char ( S->seq[seq_n], start, len);
	A=realloc_aln2 (A, (A==NULL)?(seqIaln+1):MAX(A->nseq,seqIaln+1), ((A==NULL)?(strlen (seq)):MAX(strlen (seq),A->len_aln))+1);
	
	
	sprintf ( A->seq_al[seqIaln], "%s",seq);

	
	A->order[seqIaln][0]=seq_n;
	A->order[seqIaln][1]=start;
	sprintf ( A->name[seqIaln], "%s", name);
	A->nseq=MAX(A->nseq, seqIaln+1);
	A->len_aln=return_maxlen(A->seq_al, A->nseq);
	A->S=S;
	vfree (seq);
	return A;
	}

Alignment * aln_gap2random_aa(Alignment *A)
        {
	 int a, b,l;
	 char alp[200];
	 
	 if (strm ( (A->S)->type, "PROTEIN"))
	   sprintf ( alp, "acefghiklmnpqrstuvwy");
	 else if ( strm ( (A->S)->type, "DNA"))
	   sprintf ( alp, "agct");
	 l=strlen (alp);
	 
	 
	 for (a=0; a<A->nseq; a++)
	    for ( b=0; b<A->len_aln; b++)
	      if ( is_gap (A->seq_al[a][b]))A->seq_al[a][b]=alp[(int)rand()%(l)];
	  return A;
	}

Alignment * make_random_aln(Alignment *A,int nseq, int len, char *alphabet)
        {
	int a;
	

	A=realloc_aln2(A, nseq, len+1);

	A->nseq=0;
	A->len_aln=len;
	for ( a=0; a< A->nseq; a++)sprintf ( A->file[a], "random alignment");
	for ( a=0; a< nseq; a++)
	    A=add_random_sequence2aln(A,alphabet);
	return A;
	}
Alignment * add_random_sequence2aln( Alignment *A, char *alphabet)
        {
	int a, n;

	vsrand(0);

	n=strlen(alphabet);
	A=realloc_alignment2 (A, A->nseq+1, A->len_aln+1);
	
	for ( a=0; a< A->len_aln; a++)A->seq_al[A->nseq][a]=alphabet[rand()%n];
	if (! A->name[A->nseq][0])
	  {
	    for ( a=0; a<10; a++)A->name[A->nseq][a]=alphabet[rand()%n];
	    A->name[A->nseq][a]='\0';
	  }
	
	A->nseq++;
	return A;
	}

Sequence *get_defined_residues( Alignment *A)
        {
	    char *buf;
	    Sequence *S;
	    int a, b, s, l, r;
	    if ( !A || !A->S) return NULL;

	    S=duplicate_sequence (A->S);
	    for ( a=0; a< S->nseq; a++)
		for ( b=0; b< S->len[a]; b++)S->seq[a][b]=UNDEFINED_RESIDUE;
	    buf=vcalloc(A->len_aln+1,sizeof (char));
	    for ( a=0; a< A->nseq; a++)
	        {
		    sprintf ( buf, "%s",A->seq_al[a]);
		    ungap(buf);
		    l=strlen (buf);
		    s=A->order[a][0];
		    
		    for ( b=1; b<= l; b++)
			{
			r=A->seq_cache[s][b];
			
			if ( r>=0)S->seq[s][r-1]=(A->S)->seq[s][r-1];
			}
		}
	    vfree(buf);
	    return S;
	}
Alignment *thread_defined_residues_on_aln ( Alignment *A, Sequence *S1)
	{
	int a, b;
	int gap, r,s, r2;
	for ( a=0; a< A->nseq; a++)
	    {
		s=A->order[a][0];
		r=A->order[a][1];
		for (b=0;b< A->len_aln; b++)
		    {
		    gap=is_gap(A->seq_al[a][b]);
		    
		    if (!gap)
			{
			r+=!gap;
			r2=A->seq_cache[s][r]-1;
			
			if (r2>=0 && S1->seq[s][r2]==UNDEFINED_RESIDUE)
			    A->seq_al[a][b]=UNDEFINED_RESIDUE;
			}
		    }
	    }
	return A;
	}

int ** trim_aln_borders (char **seq1, char **seq2, int nseq)
        {
	int a, b, c,l1,l2;
	char *buf1;
	char *buf2;
	int max;

	

	
	max=MAX(get_longest_string (seq1,-1, NULL, NULL),get_longest_string (seq2,-1, NULL, NULL))+1;
	buf1=vcalloc ( max, sizeof(char));
	buf2=vcalloc ( max, sizeof(char));
	
	for ( a=0; a< nseq; a++)
	    {
	    sprintf ( buf1, "%s", seq1[a]);
	    sprintf ( buf2, "%s", seq2[a]);


	   
	    ungap (buf1);
	    ungap (buf2);

	    if (str_overlap ( buf1, buf2,'*')!=0)
		{		      
		l1=strlen ( seq1[a]);
	        l2=strlen ( seq2[a]);
		for ( b=0,c=0; c< l1; c++)
		    if ( !is_gap(seq1[a][c]))seq1[a][c]=buf1[b++];
		seq1[a][c]='\0';
		for ( b=0,c=0; c< l2; c++)
		    if ( !is_gap(seq2[a][c]))seq2[a][c]=buf2[b++]; 
		seq2[a][c]='\0';
      		}
	    }
	vfree (buf1);
        vfree (buf2);
	return NULL;

	}
Sequence * merge_seq    ( Sequence *IN, Sequence *OUT)
        {
	int a;
	
	if ( OUT==NULL)return duplicate_sequence (IN);
	else
	    {
	     if ( IN && check_list_for_dup( IN->name, IN->nseq))
	          {
		      fprintf ( stderr, "\nERROR: %s is duplicated in file %s[FATAL]\n", check_list_for_dup( IN->name, IN->nseq), IN->file[0]);
		      exit (EXIT_FAILURE);
		  }
	    for ( a=0; a< IN->nseq; a++)
		if ((OUT=add_sequence ( IN, OUT, a))==NULL)return NULL;
	    return OUT;
	    }
	}

Alignment *seq_name2removed_seq_name(Sequence *S, Alignment *NA, float **diff)
{
  int a, b, rb, s;
  float min_diff;
  for (a=0; a< S->nseq; a++)
    {
      if (name_is_in_list( S->name[a], NA->name, NA->nseq, 100)!=-1) continue;
      for ( min_diff=100, s=0, b=0; b< NA->nseq; b++)
	{
	  rb=name_is_in_list ( NA->name[b], S->name, S->nseq, 100);
	  if ( diff[a][rb]<min_diff)
	    {	
	      s=b;
	      min_diff=diff[a][rb];

	    }
	}
      strcat ( NA->seq_comment[s], " ");
      strcat ( NA->seq_comment[s], S->name[a]);
    }
  return NA;
}
  
      
	      
     
int seq_name2index (char *name, Sequence *S)
{
  if ( !S) return -1;
  else return name_is_in_list ( name, S->name, S->nseq, MAXNAMES+1);
}
char * seq_name2coor ( char *s, int *start, int *end, char sep)
{
  /*name|start|end */
  char n1[100], n2[100];
  int a=0, b=0, c=0;
  
  n1[0]=n2[0]='\0';
  start[0]=end[0]=0;
  
  while ( s[a]!=sep && s[a]!='\0')a++;
  if ( s[a]=='\0')return s;
  else 
    s[a++]='\0';

 
  
  while ( s[a]!=sep && s[a]!='\0')n1[b++]=s[a++];
  
  if ( s[a]=='\0'){n1[b]='\0';if ( n1[0])start[0]=atoi(n1);return s;}
  else s[a++]=n1[b]='\0';
 
  
  while ( s[a]!=sep && s[a]!='\0')n2[c++]=s[a++];
  n2[c]='\0';

  
  if ( n1[0])start[0]=atoi(n1);
  if ( n2[0])end[0]=atoi(n2);


  return s;
}
  
Sequence *extract_one_seq(char *n,int start, int end, Alignment *S, int keep_name)
       {
	
	 int seq, a;
	 FILE*fp;
	 char *name;
	 Sequence *OUT_S;
	 

	 if ( n[0]=='#')seq=S->nseq;
	 else if ( (seq=name_is_in_list (n, S->name, S->nseq, 100)+1)!=0);
	 else if (is_number (n) && (seq=atoi(n))!=0) seq=atoi(n);
	 else
	   {
	     fprintf ( stderr, "\nCould not find Sequence %s [FATAL]", n);
	     exit (EXIT_FAILURE);
	   }
	 seq--;
	 
	 name=vtmpnam ( NULL);
	 fp=vfopen ( name, "w");
	 if ( start && end &&!keep_name)fprintf (fp, ">%s_%d_%d\n",S->name[seq],start, end);
	 else if ( start && end==0 && !keep_name)fprintf (fp, ">%s_%d_%d\n",S->name[seq],start,strlen ( S->seq_al[seq]));
	 else fprintf (fp, ">%s\n", S->name[seq]);
	 
	 if ( start==0 && end==0){fprintf (fp, "%s\n", S->seq_al[seq]);}
	 else if (end==0){fprintf (fp, "%s\n", S->seq_al[seq]+start-1);}
	 else
	   {
	     for ( a=start-1; a<end; a++){fprintf ( fp, "%c", S->seq_al[seq][a]);}
	     fprintf ( fp, "\n");
	   }
	 
	 
	 vfclose (fp);
	 OUT_S=get_fasta_sequence_num (name, NULL);
	 
	 return OUT_S;
       }
	 

	 
Sequence * extract_sub_seq( Sequence  *COOR, Sequence *S)
        {
	int a, b, c,s;
	int start, end;
	
	for ( a=0; a< S->nseq; a++)
	    {
	    if ( (s=name_is_in_list ( S->name[a], COOR->name, COOR->nseq, 100))!=-1)
		 {
		 
		 sscanf ( COOR->seq_comment[s], "%d %d", &start, &end);
		 for (c=0,b=start-1; b< end; b++, c++)S->seq[a][c]=S->seq[a][b];
		 S->seq[a][c]='\0';
		 sprintf ( S->seq_comment[a], "%s",COOR->seq_comment[s]);
		 
		 }
	    }
	S=reorder_seq ( S, COOR->name, COOR->nseq);
	return S;
	}
		 



Alignment * fix_aln_seq  ( Alignment *A, Sequence *S)
        {
	int a, b, c;
	char *buf1, *buf2;
	int g0, g1, nr0, nr1;
	int id, tot;
	Alignment *B;


	/*This function establishes the correspondance between every (1..N+1) residue of each aligned sequence
	  and its correspondance in S:
	  A->seq_cache[a][b]=x means that residue b of aligned sequence a corresponds to residue x of the sequence with tye same index in S
	  A->seq_cache[a][b]=0 means there is no correspondance.
	  a is the index of the sequence
	  Applying this function is needed for turning an alignment into a constraint list
	*/
	

	if ( S==NULL)return A;

	A->seq_cache=declare_int ( S->nseq, MAX((A->len_aln+1), S->max_len+1));
	
	for (a=0; a< S->nseq; a++)
	  for ( b=0; b< A->len_aln; b++)A->seq_cache[a][b]=-1;
	
	buf1=buf2=NULL;
	for ( a=0; a< S->nseq; a++)
	    {
	    for (b=0; b< A->nseq; b++) 
	        {
		if (strm ( S->name[a], A->name[b]))
		   {
		   A->order[b][0]=a;
		   
		   vfree (buf1);
		   buf1=vcalloc ( A->len_aln+1, sizeof (char));
		   sprintf (buf1, "%s", A->seq_al[b]);
		   ungap (buf1);
		   upper_string (buf1);
		   
		   vfree(buf2);
		   buf2=vcalloc (strlen(S->seq[a])+1, sizeof (char));
		   sprintf (buf2, "%s",S->seq[a]);
		   ungap (buf2);
		   upper_string (buf2);
		   
		   

		   if ( strm (buf1,buf2))
		       {
			   
			   for ( c=0; c<S->len[a]; c++)A->seq_cache[a][c+1]=c+1;
		       }
		   else
		       {
			  
			   B=align_two_sequences (buf2,buf1,"blosum62mt",-4,-1, "myers_miller_pair_wise");
			   if ( getenv ("DEBUG_RECONCILIATION"))
			     {
			       fprintf (stderr, "\n[DEBUG_RECONCILIATION:fix_aln_seq]\nReconciliation of %s\nA=Ref_sequence\nB=New_seq", S->name[a]);
			       print_aln (B);
			     }
			   
			   for (id=0, tot=0,nr0=0,nr1=0,c=0; c<B->len_aln; c++)
			     {
			       g0=is_gap(B->seq_al[0][c]);
			       g1=is_gap(B->seq_al[1][c]);
			       nr0+=1-g0;
			       nr1+=1-g1;
			       if ( !g0 && !g1)
				 {
				   tot++;
				   id+=(B->seq_al[0][c]==B->seq_al[1][c])?1:0;
				   A->seq_cache[a][nr1]=nr0;
				 }
			       else if (g0 && !g1)
				 {
				   A->seq_cache[a][nr1]=0;
				 }
			     }
			   if ( ((id*100)/tot)<20)
			     {
			       print_aln (B);
			       fprintf ( stderr, "\nTwo different sequences have the same name: %s", S->name[a]);		
			       fprintf ( stderr, "\nIf %s is a PDB ID, Make sure it identifies the right chain (A, B, 1, 2...)", S->name[a]);
			       fprintf ( stderr, "\nChain number or index must be added to the PDB id (i.e. 1gowA)");
			       fprintf ( stderr, "\nIf You want to use %s anyway, rename it with a non-PDB identifier such as seq_%s\n",S->name[a],S->name[a]); 
			       exit (EXIT_FAILURE);
			     }

			   free_sequence ( B->S, -1);
			   free_aln (B);
		       }
		   
		   }
		}
	    }
	    vfree(buf1);vfree(buf2);
	    return A;
	}

Sequence * add_prf2seq  ( char *file, Sequence *S)
    {
      char **new_seq;
      Sequence *NS;
      
      if ( !is_aln (file))return S;
      else
	{
	  X_template *R;
	  Alignment *A;
	  R=fill_R_template(file,file, S);
	  A=(R->VR)->A;
	  new_seq=declare_char (1,A->len_aln+1);
	  sprintf ( new_seq[0], "%s",aln2cons_seq_mat(A, "blosum62mt"));
	  
	  NS=fill_sequence_struc(1, new_seq,A->file);
	  S=add_sequence (NS, S, 0);
	  (S->T[S->nseq-1])->R=R;
	  
	  free_sequence (NS, NS->nseq);
	  free_char( new_seq, -1);
	  
	  return S;
	}
    }
int prf_in_seq ( Sequence *S)
{
  int a;
  
  if ( !S) return 0;
  else 
    {
      for ( a=0; a< S->nseq; a++)
	if (seq2R_template_profile(S, a)) return 1;
    }
  return 0;
}
Sequence * add_sequence ( Sequence *IN, Sequence *OUT, int i)
    {
    int s;
   
    char *buf;
    if (OUT==NULL)
      {
	OUT=duplicate_sequence (IN);
	return OUT;
      }

    /*Adds sequence i of IN at the end of OUT*/

    if ((s=name_is_in_list ( IN->name[i], OUT->name, OUT->nseq,STRING))==-1 )
       {
	 OUT=realloc_sequence (OUT, OUT->nseq+1, IN->len[i]);	 
	 sprintf ( OUT->name[OUT->nseq],"%s",IN->name[i]);
	 sprintf ( OUT->file[OUT->nseq],"%s",IN->file[i]);
	 sprintf ( OUT->seq_comment[OUT->nseq],"%s",IN->seq_comment[i]);
	 sprintf ( OUT->aln_comment[OUT->nseq],"%s",IN->aln_comment[i]);
	 
	 sprintf ( OUT->seq[OUT->nseq],"%s",IN->seq[i]);
	 OUT->len[OUT->nseq]=IN->len[i];
	 OUT->T[OUT->nseq][0]=IN->T[i][0];
	 OUT->nseq++;
	 return OUT;
       }
    else if ( s!=-1 && !case_insensitive_strcmp ( IN->seq[i], OUT->seq[s]))
       {
       	 
	 if ( getenv4debug("DEBUG_RECONCILIATION"))fprintf ( stderr,"[DEBUG_RECONCILIATION:add_sequence]\n%s\n%s\n", IN->seq[i], OUT->seq[s]);
	 
	 fprintf ( stderr, "WARNING: DISCREPANCY:%s in [%s] and  [%s]\n", IN->name[i], IN->file[i], OUT->file[s]);
	 
       if (((buf=build_consensus(IN->seq[i], OUT->seq[s],"cfasta_pair_wise" ))!=NULL)||((buf=build_consensus(IN->seq[i], OUT->seq[s],"myers_miller_pair_wise" ))!=NULL))
	   {
	    
	     OUT->max_len=MAX(OUT->max_len, strlen(buf));
	     OUT->min_len=MIN(OUT->min_len, strlen(buf));
	     OUT->seq    =realloc_char ( OUT->seq, -1, -1,OUT->nseq,OUT->max_len+1);
	     
	     sprintf ( OUT->seq[s],"%s",buf);
	     OUT->len[s]=strlen (buf);
	     vfree (buf);
	     return OUT;
	   }
       else
	 {
	   fprintf ( stderr, "IMPOSSIBLE TO RECONCILIATE SOME SEQUENCES[FATAL:%s]\n", PROGRAM);
	       print_aln ( align_two_sequences (IN->seq[i], OUT->seq[s], "idmat", 0, 0, "fasta_pair_wise"));
	       return NULL;
	   }
       
       }
    else
       {
       return OUT;
       }
    }
			   

Sequence  * trim_seq       ( Sequence  *A, Sequence  *B)
        {
	int a;
	Sequence *R;
	
	R=declare_sequence (MIN(A->min_len,B->min_len), MAX(A->max_len, B->max_len), MIN(A->nseq, B->nseq));
	R->nseq=0;
	
	for (a=0; a< A->nseq; a++)
	    {  
	    if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING+1)!=-1)
	        {
		  sprintf ( R->name[R->nseq], "%s", A->name[a]);
		  sprintf ( R->seq[R->nseq], "%s", A->seq[a]);
		  sprintf ( R->file[R->nseq], "%s", A->file[a]);
		  sprintf ( R->aln_comment[R->nseq], "%s", A->aln_comment[a]);
		  sprintf ( R->seq_comment[R->nseq], "%s", A->seq_comment[a]);
		  
		  R->len[R->nseq]=A->len[a];
		  R->nseq++;
		}
	    }
	return R;
	}

Sequence * trim_aln_seq ( Alignment *A, Alignment *B)
        {
	int a;
	static char **name_list;
	int n=0;
	Sequence *SA, *SB;
	int **cache_A=NULL;
	int **cache_B=NULL;
	int * p;

	/*This function inputs two alignments A and B
	  It removes sequences that are not common to both of them
	  It rearange the sequences so that they are in the same order
	  A decides on the order
	  The Sequences (A->S) and (B->S) are treated the same way
	  Sequences are also merged in order to detects discrepencies.
	  A pointer to S is returned
	*/
	if (name_list)free_char (name_list, -1);
	name_list=declare_char (MAX(A->nseq, B->nseq), STRING+1);
        
	for ( a=0; a< A->nseq; a++)
	    {  
	    if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING)!=-1)
	        {
		sprintf ( name_list[n++], "%s", A->name[a]);
		}
	    }
	
	
	
	reorder_aln ( A, name_list, n);
	if (A->seq_cache)cache_A=duplicate_int (A->seq_cache, -1, -1);
	if (B->seq_cache)cache_B=duplicate_int (B->seq_cache, -1, -1);
	reorder_aln ( B, name_list, n);
	for ( a=0; a< n; a++)
	  {
	    if ( cache_A)
	      {
		p=A->seq_cache[A->order[a][0]];
		A->seq_cache[A->order[a][0]]=cache_A[a];
		cache_A[a]=p;
	      }
	   if ( cache_B)
	      {
		p=B->seq_cache[B->order[a][0]];
		B->seq_cache[B->order[a][0]]=cache_B[a];
		cache_B[a]=p;
	      } 
	   A->order[a][0]=B->order[a][0]=a;
	  }
        free_int(A->seq_cache, -1);
	free_int(B->seq_cache, -1);

	A->seq_cache=cache_A;
	B->seq_cache=cache_B;
	  

	
	SA=aln2seq(A);
	SB=aln2seq(B);
	
	A->S=B->S=merge_seq (SA, SB);
	return A->S;
	}
Sequence * trim_aln_seq_name ( Alignment *A, Alignment *B)
        {
	int a;
	Sequence *S;
	
	/*This function inputs two alignments A and B
	  It removes sequences that are not common to both of them
	  It rearange the sequences so that they are in the same order
	  A decides on the order
	*/
	S=declare_sequence ( 1, 1, A->nseq+B->nseq);
	S->nseq=0;
	for ( a=0; a< A->nseq; a++)
	    {  
	    if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING)!=-1)
	        {
		sprintf ( S->name[S->nseq++], "%s", A->name[a]);
		}
	    }
	return S;
	}



char ** rm_name_tag (char **name, int nseq, char *tag)
{
  int a , b, ntag;
  char **tag_list;
  char *s;
  char **template_list;	
  if ( !name )return NULL;

  tag_list=declare_char (10, 4);

  if ( tag)
    {
      ntag=1; sprintf ( tag_list[0], "%s", tag);
    }
  else
    {
      ntag=0;
      sprintf ( tag_list[ntag++], "_S_");
      sprintf ( tag_list[ntag++], "_G_");
    }
  template_list=declare_char (nseq, 100);
  for ( a=0; a<nseq ; a++)
    {
      for ( b=0; b<ntag; b++)
	{
	s=strstr(name[a], tag_list[b]);
	if ( s)
	  {
	    s[0]='\0';
            s[2]='\0';
	    sprintf ( template_list[a], ">%s _%s_ %s", name[a], s+1, s+3);
	    break;
	  }
	}
    }
		
  free_char (tag_list, -1); 	
  return template_list;
}
Sequence * swap_header ( Sequence *S, Sequence *H)
{
  int a, b, n;

  for ( a=0; a< S->nseq; a++)
    {
      if ( (n=name_is_in_list (S->name[a],H->name, H->nseq, 1000))!=-1)
	   {
	     char **list;
	     int ll;
	     
	     list=string2list (H->seq_comment[n]);
	     if ( list==NULL || atoi(list[0])==1)continue;
	     S->seq_comment[a]='\0';
	     sprintf (S->name[a], "%s%s%s",H->name[n], list[1], list[2]);
	     vfree ( S->seq_comment[a]);S->seq_comment[a]=vcalloc ( strlen (H->seq_comment[n])+1, sizeof (char));
	     for (b=3; b< atoi(list[0]); b++)S->seq_comment[a]=strcat (S->seq_comment[a], list[b]);
	     free_char (list, -1);
	   }
    }
  return S;
}


Sequence * profile_seq2template_seq ( Sequence *S, char *template_file)
{
  /*This function fetches potential templates associated with sequences within a profile*/
  int i, j;
  Alignment *A;
  static int prf_index_name;

  for ( i=0; i< S->nseq; i++)
    {
      if ( A=seq2R_template_profile (S, i))
	{
	  A->S=aln2seq (A);
	  A->S=seq2template_seq (A->S, template_file);	  
	}
    }

  return S;
}
      
Sequence * seq2template_type(Sequence *Seq)
{
  int a, e;
  int s, p;
  struct X_template *S=NULL;
  struct X_template *P=NULL;
  struct X_template *R=NULL;
  struct X_template *G=NULL;
  Alignment *A;
  e=' ';
  for (a=0; a< Seq->nseq; a++)
    {
      if (!Seq->T[a])continue;
      P=seq_has_template (Seq, a, "_P_");
      S=seq_has_template (Seq, a, "_S_");
      R=seq_has_template (Seq, a, "_R_");
      G=seq_has_template (Seq, a, "_G_");
      
      s=(!P)?1:0;
      p=(P && (P->VP)->public_pdb)?1:0;
      sprintf ( (Seq->T[a])->seq_type, "%c%c%c%c%c%c", (P)?'P':e, (S)?'S':e, (S)?'s':e, (p)?'p':e, (R)?'R':e, (G)?'G':e);
      if (R && (A=seq2R_template_profile (Seq,a)))
	{
	  A->S=seq2template_type ( A->S);
	}	  
    }
  return Seq;
}

char * string_contains_template_tag (char *string)
{
  if ( strstr (string, "_P_"))return "_P_";
  if ( strstr (string, "_S_"))return "_S_";
  if ( strstr (string, "_R_"))return "_R_";
  if ( strstr (string, "_G_"))return "_G_";
  return NULL;
}

Sequence * seq2template_seq ( Sequence *S, char *template_list)
{
  /*Expected format for the template file:
    >seq_name _X_ Target_template
    X: S for Structures 
    G for genomes (Exoset)
  */

  /*Fill the sequences*/
  
  if ( (template_list && template_list[0]=='\0') || strm ( template_list, "no_template")) return S;
  else if ( string_contains_template_tag (template_list))
    {
      int a;
      char *p;
      for (a=0; a< S->nseq; a++)
	{
	  if ( p=strstr (template_list,"_self"))p=S->name[a];
	  else if ( p=strstr (template_list, "_seqfile_"));
	  else 
	    {
	      fprintf ( stderr, "\nUnkown mode for Template [FATAL:%s]\n", PROGRAM);
	      exit (EXIT_FAILURE);
	    }
	  
	  if (      strstr (template_list, "_P_" ))(S->T[a])->P  =fill_P_template  ( S->name[a], p,S);
	  else if ( strstr (template_list, "_S_")) (S->T[a])->S  =fill_S_template  ( S->name[a], p,S);	  
	  else if ( strstr (template_list, "_R_" ))(S->T[a])->R  =fill_R_template  ( S->name[a], p,S);
	  else if ( strstr (template_list, "_G_" ))(S->T[a])->G  =fill_G_template  ( S->name[a], p,S);
	  else if ( strstr (template_list, "_RB_"))(S->T[a])->RB =fill_RB_template ( S->name[a], p,S);
	}
    }
  else if ( template_list==NULL || format_is_fasta (template_list))
    {
      Sequence *T;
      int a, i;
      
      
      T=(template_list!=NULL)?get_fasta_sequence (template_list, NULL):S;
      for (a=0; a< T->nseq; a++)
	  {
	    char *p;
	    
	    if ((i=name_is_in_list(T->name[a], S->name, S->nseq, MAXNAMES))!=-1)
	      {
		
		if (       (p=strstr (T->seq_comment[a], "_P_")))(S->T[i])->P=fill_P_template (S->name[i],p,S);
		else if (  (p=strstr (T->seq_comment[a], "_S_")))(S->T[i])->R=fill_S_template (S->name[i],p,S);
		else if (  (p=strstr (T->seq_comment[a], "_R_")))(S->T[i])->R=fill_R_template (S->name[i],p,S);
		else if (  (p=strstr (T->seq_comment[a], "_G_")))(S->T[i])->G=fill_G_template (S->name[i],p,S);
		else if (  (p=strstr (T->seq_comment[a], "_RB_")))(S->T[i])->RB=fill_RB_template (S->name[i],p,S);
		if (T!=S)strcat (S->seq_comment[i], T->seq_comment[a]);
	      }
	  }
	if (T!=S)free_sequence (T, -1);
    }
  else
    {
      char *tmp1, *tmp2, command[10000];
      Alignment *A;
      tmp1=vtmpnam (NULL); tmp2=vtmpnam (NULL);
      
      A=seq2aln (S,NULL, 0);
      output_fasta_seq (tmp1, A);
      sprintf ( command, "%s -infile=%s -outfile=%s", template_list, tmp1, tmp2);
      my_system ( command);
      free_aln (A);
      if ( check_file_exists (tmp2) && format_is_fasta(tmp2))
	S=seq2template_seq (S, tmp2);
      else 
	fprintf ( stderr, "\nCould not Run %s to find templates [WARNING:%s]\n", template_list, PROGRAM);
    }
  return S;
}

char* seq2template_file (Sequence *S, char *file)
{
  Alignment *A;
  int i;
  
  if (file==NULL)file=vtmpnam (NULL);
  
  seq2template_file2 (S, file, "w");
  for (i=0; i<S->nseq; i++)
    if ( A=seq2R_template_profile (S, i))
      {
	seq2template_file2 (A->S, file, "a");
      }
  return file;
}
  
int seq2template_file2 (Sequence *S, char *file, char *mode)
{
  FILE *fp;
  int i;
  char buf1[10000];
  char buf2[10000];
  struct X_template *X;
  
  fp=vfopen ( file, mode);
  for ( i=0; i< S-> nseq; i++)
    {
      buf1[0]=0;
      if ( (X=(S->T[i])->P)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}
      if ( (X=(S->T[i])->S)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}
      if ( (X=(S->T[i])->R)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}
      if ( (X=(S->T[i])->G)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}
      if (buf1[0])fprintf ( fp, ">%s %s\n", S->name[i], buf1);
    }
  vfclose (fp);
}
	   
      
	
	
int seq2n_X_template ( Sequence *S, char *type)
{
  int a, n;
  
  for (n=0,a=0; a< S->nseq; a++)
    {
      if ( strm2 (type, "_P_","_*_") && (S->T[a])->P)n++;
      if ( strm2 (type, "_S_","_*_") && (S->T[a])->S)n++;
      if ( strm2 (type, "_R_","_*_") && (S->T[a])->R)n++;
      if ( strm2 (type, "_G_","_*_") && (S->T[a])->G)n++;
    }
  return n;
}
struct X_template *fill_X_template ( char *name, char *p, char *token)
{
  struct X_template *X;
  struct P_template *P;
  struct S_template *S;
  struct R_template *R;
  struct G_template *G;
  char *k;
  
  X=vcalloc (1, sizeof (X_template));
  sprintf ( X->seq_name, "%s", name);
  if ( k=strstr (p, token))sscanf (k+strlen(token), "%s",X->template_name);
  else sprintf (X->template_name, "%s", p);
  
  sprintf ( X->template_type, "%s", token);
  if ( strm (token, "_P_"))X->VP=vcalloc (1, sizeof (P_template));
  if ( strm (token, "_S_"))X->VS=vcalloc (1, sizeof (S_template));
  if ( strm (token, "_R_"))X->VR=vcalloc (1, sizeof (R_template));
  if ( strm (token, "_G_"))X->VG=vcalloc (1, sizeof (G_template));
  if ( strm (token, "_RB_"))X->VRB=vcalloc (1, sizeof (R_template));
  
  return X;
}

struct X_template* free_X_template ( struct X_template *X)
{
  if (X->VP)
    {
      vfree (X->VP);
    }
  else if ( X->VS)
    {
      free_sequence ((X->VS)->S, -1);
      vfree (X->VS);
    }
  else if ( X->VR)
    {
      free_aln ((X->VR)->A);
      vfree (X->VR);
    }
  else if ( X->VG)
    {
      free_sequence ((X->VG)->S, -1);
      vfree (X->VG);
    }
  else if ( X->VRB)
    {
      free_aln ((X->VRB)->A);
      vfree (X->VG);
    }
  
  vfree (X);
  return NULL;
}

FILE * display_sequence_templates (Sequence *S,int i, FILE *io)
{
  
  io=display_X_template ( (S->T[i])->P, io);
  io=display_X_template ( (S->T[i])->S, io);
  io=display_X_template ( (S->T[i])->R, io);
  io=display_X_template ( (S->T[i])->G, io);
  io=display_X_template ( (S->T[i])->RB, io);
  
  return io;
}
 
FILE * display_X_template (struct X_template *X, FILE *io)
{
  if ( !X) return io;
  if ( !strm (X->template_type, "_S_"))fprintf (io, "\n\t%s: Template=%s, File=%s",template_type2type_name (X->template_type), X->template_name,X->template_file);
  return io;
}
char *template_type2type_name (char *type)
{
  if ( strstr (type, "_P_"))       return "PDB struc";
  else if ( strstr (type, "_S_")) return "Sequence ";
  else if ( strstr (type, "_R_")) return "Profile  ";
  else if ( strstr (type, "_G_")) return "Genomic  ";
  else if ( strstr (type, "_RB_")) return "BlastProf";
  
  else return type;
}

struct X_template *fill_P_template ( char *name,char *p, Sequence *S)
{
  struct X_template *P;
  char *template_file;
  
  P=fill_X_template ( name, p, "_P_");  
  

  template_file=is_pdb_struc(P->template_name);

  if (!template_file)
    {
      fprintf ( stderr, "\nWarning: _P_ Template |%s| Could Not Be Found [WARNING:%s]\n", p,PROGRAM);
      free_X_template (P);
      return NULL;
    }
  else
    {

      sprintf ( P->template_file, "%s",template_file);
      sprintf ((P->VP)->pdb_id, "%s", get_pdb_id (template_file));
      
      (P->VP)->public_pdb=is_pdb_name ((P->VP)->pdb_id);
    }
  return P;
}

struct X_template *fill_S_template ( char *name,char *p, Sequence *Seq)
{
  struct X_template *S;
  S=fill_X_template ( name, p, "_S_"); 
  if ( strm (name, p))sprintf ( S->template_file, "%s",output_fasta_seqX (NULL,"w",Seq,NULL, seq_name2index (name, Seq)));
  (S->VS)->S=get_fasta_sequence (S->template_file, NULL);
  return S;
}

struct X_template *fill_R_template ( char *name,char *p, Sequence *S)
{
  /*Profile template*/
  struct X_template *R;
  
  R=fill_X_template ( name, p, "_R_");
  sprintf ( R->template_file, "%s", R->template_name);
  
  if (!is_aln(R->template_file) && !is_seq (R->template_file))
    {
      fprintf ( stderr, "\nWarning: _R_ Template %s Could Not Be Found [WARNING:%s]\n",R->template_name,PROGRAM);
      free_X_template (R);
      return NULL;
    }
  else
    {
      (R->VR)->A=main_read_aln (R->template_file, NULL);
      (R->VR)->A=aln2profile ((R->VR)->A);
    }
  return R;
}

struct X_template *fill_G_template ( char *name,char *p, Sequence *S)
{
  struct X_template *G;
  G=fill_X_template ( name, p, "_G_");  
  

  /*1: Get the sequence from another file if needed*/
  if ( strm (name, p))sprintf ( G->template_file, "%s",output_fasta_seqX (NULL,"w",S,NULL, seq_name2index (name, S)));
  else if ( strstr (p, "_seqfile_"))
    {
      Sequence *ST;
      int i1, i2;
      
      ST=main_read_seq (after_strstr ( p,"_seqfile_"));
      i1=seq_name2index (name, S);
      i2=seq_name2index (name, ST);
      sprintf ( G->template_file, "%s",output_fasta_seqX (NULL,"w",ST,NULL, i2));
      sprintf ( G->template_name, "%s", name);
      free_sequence (ST, -1);
    }
  else sprintf (G->template_file, "%s", G->template_name);
	
  
  /*2: Put the template in VG->S*/
  if (!is_seq (G->template_file))
    {
      fprintf ( stderr, "\nWarning: _G_ Template %s Could Not Be Found [WARNING:%s]\n",p, PROGRAM);

      free_X_template (G);
      return NULL;
    }
  else
    {
      (G->VG)->S=get_fasta_sequence (G->template_file, NULL);
    }
  return G;
}

struct X_template *fill_RB_template ( char *name,char *p, Sequence *S)
{
 
  /*Profile template*/
  return fill_R_template (name, p, S);
}

char *seq2T_value ( Sequence *S, int n, char *value, char *type)
{
  static char *rv_buf;
  X_template *X;

  if ( !rv_buf)rv_buf=vcalloc (100, sizeof(char));
  if (!(X=seq_has_template (S, n, type)))return NULL;
  else
    {
      if (strm (value, "template_file"))return X->template_file;
      else if ( strm (value, "template_name"))return X->template_name;
      else if ( strm (value, "seq_name"))return X->seq_name;
      else if (strm (type, "_P_"))
	{
	  if ( strm (value, "pdb_id"))return (X->VP)->pdb_id;
	  else if ( strm (value, "public_pdb"))
	    {
	      if ((X->VP)->public_pdb){sprintf ( rv_buf, "1");return rv_buf;}
	      else return NULL;
	    }
	}
      else if ( strm (type, "_R_"))
	{
	  if ( strm (value, "A"))
	    {
	      if ((X->VR)->A){sprintf ( rv_buf, "%d", (X->VR)->A);return rv_buf;}
	      else return NULL;
	    }
	}
      else if ( strm (type, "_RB_"))
	{
	   if ( strm (value, "A"))
	    {
	      if ((X->VRB)->A){sprintf ( rv_buf, "%d", (X->VRB)->A);return rv_buf;}
	      else return NULL;
	    }
	}
    }
  return NULL;
}

char *seq2P_template_file(Sequence *S, int n)
{
  return seq2T_value (S, n, "template_file", "_P_");
}

Alignment * seq2R_template_profile (Sequence *S, int n)
{
  char *r;
  return (Alignment *)atop(seq2T_value (S, n, "A", "_R_"));
}

struct X_template* seq_has_template ( Sequence *S, int n, char *mode)
{
  Template *T;
  
  if ( !S || !mode) return NULL;
  else if ( n<0 || n>=S->nseq)return NULL;
  else if ( !(S->T)) return NULL;
  else if ( !(S->T[n]))return NULL;

  T=S->T[n];
  if      ( strm (mode, "_P_"))return T->P;
  else if ( strm (mode, "_S_"))return T->S;
  else if ( strm (mode, "_R_"))return T->R;
  else if ( strm (mode, "_G_"))return T->G;
  else return NULL;
}  
  
Alignment * reorder_aln ( Alignment *A, char **name, int nseq)
	{
	int a,sn;
	Alignment *BUF;
	int  n=0;
	int *tpp_int;
	
	BUF=copy_aln ( A,NULL);	
	for ( a=0; a<nseq; a++)
		{
		  sn =name_is_in_list ( name[a],BUF->name, A->nseq,STRING);
		  if ( sn==-1)
			{
			    ;
			}
		else
		    {
		    
		      
		    SWAPP(A->order[n], BUF->order[sn], tpp_int);
		    sprintf ( A->name[n], "%s", BUF->name[sn]);		    
		    sprintf ( A->seq_al[n], "%s",BUF->seq_al[sn]);
		    sprintf ( A->seq_comment[n], "%s",  BUF->seq_comment[sn]);
		    
		    n++;
		      
		    }
		}
	
	for ( a=n; a< A->nseq; a++)A->name[a][0]=A->seq_al[a][0]='\0';
	A->nseq=n;
	
	if ( A->A)A->A=reorder_aln(A->A, name, nseq);
	
	free_aln (BUF);
	return A;
	} 

Sequence * reorder_seq ( Sequence *A, char **name, int nseq)
	{
	int a,sn;
	Sequence *nA;
	Alignment *iA;
	
	nA=duplicate_sequence (A);
	
	
	for ( a=0; a< nseq; a++)
	  {
	    sn=name_is_in_list (name[a] ,nA->name, nA->nseq, 100);
	    if (sn==-1)continue;
	    
	    if ( nA->file)       sprintf ( A->file[a], "%s", nA->file[sn]);
	    
	    if ( nA->seq_comment)sprintf ( A->seq_comment[a], "%s", nA->seq_comment[sn]);
	    if ( nA->aln_comment)sprintf ( A->aln_comment[a], "%s", nA->aln_comment[sn]);
	    sprintf ( A->seq[a], "%s", nA->seq[sn]);
	    A->len[a]=nA->len[sn];
	    sprintf ( A->name[a], "%s", nA->name[sn]);
	    A->T[a][0]=nA->T[sn][0];
	  }
	A->nseq=nseq;
	free_sequence (nA, nA->nseq);
	
	return A;
} 

char * concatenate_seq ( Sequence *S, char *conc, int *order)
        {
	    int a;
	    
	    vfree (conc);
	    conc=vcalloc ( S->nseq*S->max_len, sizeof (char));

	    for ( a=0; a< S->nseq; a++)
	        {
		    conc=strcat ( conc, S->seq[order[a]]);
		}
	    return conc;

	}

Alignment * rotate_aln ( Alignment *A, char *name)
{
  Alignment *B;
  int a, b;
  
  B=declare_aln2 (A->len_aln, A->nseq+1);
  for ( a=0; a< A->nseq; a++)
    for ( b=0; b< A->len_aln; b++)
      {
	B->seq_al[b][a]=A->seq_al[a][b];
      }
  for (a=0; a< A->len_aln; a++)sprintf ( B->name[a], "%s_%s%d", name, (a<9)?"0":"",a+1);
  for (a=0; a< A->len_aln; a++)B->seq_al[a][A->nseq]='\0';
  B->len_aln=A->nseq;
  B->nseq=A->len_aln;
  free_aln (A);
  return B;
}
  
Alignment * invert_aln ( Alignment *A)
{
  char *buf;
  int l, a, b, c;
  
  l=strlen ( A->seq_al[0]);
  buf=vcalloc ( l,sizeof (char) );
  for ( a=0; a< A->nseq; a++)
    {
      
      for ( c=l-1,b=0; b< l; b++, c--)
	{
	  buf[c]=A->seq_al[a][b];
	 }
      buf[l]='\0';
      sprintf ( A->seq_al[a], "%s", buf);
    }
  vfree(buf);
  return A;
}
Alignment * extract_nol_local_aln(Alignment *A, int start, int max_end)
     {
     A=extract_aln ( A, start, max_end);
     A=trunkate_local_aln (A);
     return A;
     }
Alignment * extract_aln ( Alignment *A, int start, int end)
{
  return extract_aln2 ( A, start, end, NULL);
}

Alignment * extract_aln2 ( Alignment *A, int in_start, int in_end, char *seq)
     {
       char *tmp;
       FILE *fp;
       

       tmp=vtmpnam (NULL);
       fp=vfopen (tmp, "w");
       fprintf ( fp, "%s %d %d\n", seq, in_start, in_end);
       vfclose (fp);
       return extract_aln3 (A,tmp);
     }
Alignment * extract_aln3 ( Alignment *B, char *file)
     {
     int a, b, c;
     int start, end;
     int n, i, s, nline=0;
     FILE *fp;
     Alignment *A=NULL;
     int *col;
     char name[MAXNAMES];
     char line[VERY_LONG_STRING];
     
     /*Reads in a file
       #comment
       seqname pos
       OR
       seqname start end[
       modifies the incoming alignment
     */
     
     A=copy_aln (B, A);     
     col=vcalloc ( A->len_aln, sizeof (int));
     
     fp=vfopen ( file, "r");
     while ( (c=fgetc(fp))!=EOF)
       {
	 nline++;
	 if ( c=='#')fgets ( line, VERY_LONG_STRING,fp);
	 else
	   {
	     ungetc(c, fp);
	     fgets ( line, VERY_LONG_STRING,fp);
	     
	     
	     if (sscanf (line, "%s %d %d\n", name, &start, &end)==3);
	     else if (sscanf (line, "%s %d\n", name, &start)==2)
	       {
		 end=start+1;
	       }
	     else
	       {
		 fprintf ( stderr, "\nWARNING: wrong format in coordinate file (line=%d) [NON FATAL:%s]\n", nline, PROGRAM);
		 continue;
	       }
	     s=name_is_in_list (name,A->name,A->nseq,MAXNAMES);
	 
	     if ( s==-1 && !strm (name, "cons"))
	       {
		 fprintf ( stderr, "\nWARNING: Seq %s does not belong to the alignment (line %d)  [NON FATAL:%s]\n", name,nline, PROGRAM);
		 continue;
	       }
	     else if ( start>end)
	       {
		 fprintf ( stderr, "\nWARNING: Illegal coordinates [%s %d %d] (line %d)  [NON FATAL:%s]\n", name,start, end,nline, PROGRAM);
		 continue;
	       }
	     else
	       {
		 for (n=0, a=0; a< A->len_aln; a++)
		   {
		     
		     i=(strm (name, "cons"))?1:!is_gap(A->seq_al[s][a]);
		     n+=i;
		     if (n>=start && n<end && !(i==0 && n==end-1))
		       {
			 col[a]=1;
		       }
		     else if ( n>=end)a=A->len_aln;
		   }
	       }
	   }
       }
     vfclose ( fp);
     

		 
     /*Extract [start-end[*/
     for ( b=0,a=0; a< A->len_aln; a++)
       {
	 if ( col[a])
	   {
	     for (c=0; c< A->nseq; c++)A->seq_al[c][b]=A->seq_al[c][a];
	     b++;
	   }
       }
     A->len_aln=b;
     for (c=0; c< A->nseq; c++)A->seq_al[c][A->len_aln]='\0';
     vfree (col);
     
     return A;

     }
Alignment * trunkate_local_aln ( Alignment *A)
     {
     int a, b;
     int **pos;
     int **cache;
     int seq;
   
     
     cache=declare_int (return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),0)+1,return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),1)+A->len_aln+1);    
     pos=aln2pos_simple(A,A->nseq);
     
     for ( b=0; b<A->len_aln; b++)
	 for ( a=0; a< A->nseq; a++)	 
	     {
	     seq=A->order[a][0];
	     if ( pos[a][b]<=0);
	     else if ( pos[a][b]>0)
		 {
		 
		 if (cache[seq][pos[a][b]]==0)cache[seq][pos[a][b]]++;
		 else if ( cache[seq][pos[a][b]]>=1)
		      {	     
		      cache[seq][pos[a][b]]++;
		      A->seq_al[a][b]='\0';
		      }
		 }
	     }
     
     A->len_aln=get_shortest_string ( A->seq_al, A->nseq, NULL, NULL);
     pad_string_array ( A->seq_al, A->nseq, A->len_aln, '-');
     
     free_int (pos, -1);
     free_int ( cache,-1);
     
     
     return A;
     }

int get_nol_aln_border ( Alignment *A, int start, int direction)
     {
     int a, b;
     int **pos;
     int **cache;
     int seq,end;
     
     /*This Function Returns the limit position for a non overlaping alignment*/
     
     cache=declare_int (return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),0)+1,return_max_int (A->order,read_size_int ( A->order,sizeof (int)),1)+A->len_aln+1);
     pos=aln2pos_simple(A,A->nseq);
     end=(direction==GO_RIGHT)?A->len_aln:-1;
     

     for ( b=start; b!=end;b+=direction)
	 for ( a=0; a< A->nseq; a++)	 
	     {
	     seq=A->order[a][0];
	     if ( pos[a][b]<=0);
	     else if ( pos[a][b]>0)
		 {
		 
		 if (cache[seq][pos[a][b]]==0)cache[seq][pos[a][b]]++;
		 else if ( cache[seq][pos[a][b]]>=1)
		      {	     
		      cache[seq][pos[a][b]]++;
		      free_int(cache, -1);
		      return b-direction;
		      }
		 }
	     }
     
     free_int ( cache,-1);
     free_int (pos, -1);
     return end-direction;
     }





char * extract_defined_seq ( char *in, int in_of, int in_start, int *aa_def, int dir, int *out_start, char *out)
     {
     int start, end,l;
     int b, c, d;

     

     if ( dir==GO_LEFT){start=in_start-1;}
     else if ( dir==GO_RIGHT){start=in_start+1;}        
	
     end=start;
     while (aa_def[end]!=UNDEFINED)
	 {
	 end+=dir;
	 }
     end-=dir;
     
     if (end<start)SWAP(end,start);
     
     l=strlen ( in);
     out_start[0]=-1;
     for (b=0,d=0,c=in_of;b<l; b++)
         {
	 c+=1-is_gap(in[b]);
	 if ( c>=start && c<=end)
	     {
	     if ( out_start[0]==-1)out_start[0]=c-!is_gap(in[b]);
	     out[d++]=in[b];
	     }
	 }
     out[d]='\0';
     
    
     return out;
     }
Alignment * aln_cat ( Alignment *A, Alignment *B)
     { 
     int a;
     
     if ( A->nseq!=B->nseq) 
	 {
	 fprintf ( stderr, "\nERROR IN ALN CAT: DIFFERENT NSEQ\n");
	 exit(EXIT_FAILURE);
	 }

     A=realloc_alignment2(A, A->nseq,A->len_aln+B->len_aln+1);
    
     for ( a=0;a< A->nseq; a++)
         {	
	 strcat ( A->seq_al[a], B->seq_al[a]);
	 }
     A->len_aln+=B->len_aln;
     return A;
     }
int verify_aln ( Alignment *A, Sequence *S, char *message)
     {
     int a, b, c,s,r;


     for ( a=0;a< A->nseq; a++)
         {
	 s=A->order[a][0];
	 r=A->order[a][1];
	 for ( b=0, c=0; b< A->len_aln; b++)
	     {
	     if ( !is_gap(A->seq_al[a][b]))
		  {
		  if (tolower(A->seq_al[a][b])!=tolower(S->seq[s][c+r]))
		      {
		      fprintf ( stderr, "\n%s\nResidue [%c %d, %c %d] line %d seq %d",message,A->seq_al[a][b], b,S->seq[s][c+r], c+r,a,s);  
		      output_Alignment_with_res_number(A, stderr);
		      exit(EXIT_FAILURE);
		      return 0;
		      }
		  c++;
		  }
	     }
	 }
     return 1;
     }

Alignment *adjust_est_aln ( Alignment *PW, Alignment *M, int s)
{
  /*This function reajusts M, threading M onto PW
   two seqences in PW
   s+1 seq in M
   
   seq 0 PW ----> 0->s-1 in M
   seq 1 PW ----> 1->s   in M;
   
   */
  int a, b;
  static char **array;

  
  int top_M=0;
  int bottom_M=0;
  
  
  if ( array==NULL)
    {
      array=declare_char (500, 100000);
    }

  for ( a=0; a< PW->len_aln; a++)
    {
      if ( is_gap(PW->seq_al[0][a]))
	{
	  for ( b=0; b< s; b++)
	    array[b][a]='-';
	}
      else
	{
	  for ( b=0; b< s; b++)
	    array[b][a]=M->seq_al[b][top_M];
	top_M++;
	}
      
      if ( is_gap(PW->seq_al[1][a]))
	{
	  array[s][a]='-';
	}
      else
	{
	  
	  array[s][a]=M->seq_al[s][bottom_M];
	bottom_M++;
	} 
    }
  
  M->len_aln=PW->len_aln;
  for (a=0; a<s; a++)
    {
      for (b=0; b<PW->len_aln; b++)
	M->seq_al[a][b]=array[a][b];
      M->seq_al[a][b]='\0';
    }


  M->nseq=s+1;
  
  return M;
}


Alignment * rename_seq_in_aln (Alignment *A, char ***list)
{
  int n, i;
  if ( !A)return A;
  

  
  n=0;
  while ( list[n][0][0])
    {
      if ( (i=name_is_in_list (list[n][0], A->name, A->nseq, 100))!=-1)
	{
	  sprintf ( A->name[i], "%s", list[n][1]);
	}
      n++;
    }
  
  A->S=rename_seq_in_seq (A->S, list);
  return A;
}
Sequence * rename_seq_in_seq (Sequence *A, char ***list)
{
  int n, i;
  if ( !A || !list)return A;
  
  n=0;
  while ( list[n][0][0])
    {
      if ( (i=name_is_in_list (list[n][0], A->name, A->nseq, 100))!=-1)
	{
	  sprintf ( A->name[i], "%s", list[n][1]);
	}
      n++;
    }
  return A;
}
/********************************************************************/
/*                                                                  */
/*                   FLOAT SIMILARITIES                             */
/*                                                                  */
/*                                                                  */
/*                                                                  */
/********************************************************************/
float get_seq_fsim ( char *string1, char *string2, char *ignore, char *similarity_set,int **matrix, int MODE )
	{
	int len, a, r1, r2, nr1=0, nr2=0;
	float pos=0, sim=0;
		

	len=MIN((strlen (string1)),(strlen (string2)));
	if ( len==0)return 0;
	
	for ( a=0; a< len; a++)
		{
		  
		  r1=string1[a];
		  r2=string2[a];
		  nr1+=!is_gap(r1);
		  nr2+=!is_gap(r2);
		  
		  if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))
			{
			pos++;
			if ( matrix)sim+=matrix[r1-'A'][r2-'A'];
			else if (is_in_same_group_aa(r1,r2,0, NULL,similarity_set))
				{
				sim++;
				}
			}
		}
	if ( MODE==UNGAPED_POSITIONS)return ( sim*100)/pos;
	else if ( MODE==ALIGNED_POSITIONS)return (sim*100)/len;
	else if ( MODE==AVERAGE_POSITIONS)return (sim*200)/(nr1+nr2);
	else
	  {
	    return 0;
	  }
	
	}


/********************************************************************/
/*                                                                  */
/*                   ALIGNMENT ANALYSES                             */
/*                                                                  */
/*                                                                  */
/*                                                                  */
/********************************************************************/
int **dist_array2sim_array ( int **p, int max)
{
  int s1, s2, a, b;
  s1=read_array_size ((void *)p, sizeof (void *));
  s2=read_array_size ((void*)p[0],sizeof (void *));
  for ( a=0; a< s1; a++)
    for ( b=0; b< s2; b++)
      {
	p[a][b]=max-p[a][b];
      }	
  return p;
}
int **sim_array2dist_array ( int **p, int max)
{
  int s1, s2, a, b;
  s1=read_array_size ((void *)p, sizeof (void *));
  s2=read_array_size ((void*)p[0],sizeof (void *));
  for ( a=0; a< s1; a++)
    for ( b=0; b< s2; b++)
      {
	p[a][b]=max-(int)p[a][b];
      }	
  return p;
}


int aln2most_similar_sequence ( Alignment *A, char *mode)
{
  int **w;
  int a, b;
  int avg, best_avg=0, best_seq=0;
  char *buf;
  int coverage;

  
  if ( !A) return -1;
  else if ( A->nseq==1)return 0;
  else
    {
      buf=vcalloc ( A->len_aln+1, sizeof (char));
      w=get_sim_aln_array ( A, mode);
      
      for ( a=0; a< A->nseq; a++)
	{
	  sprintf ( buf, "%s", A->seq_al[a]);
	  ungap(buf);
	  coverage=(strlen(buf)*MAXID)/A->len_aln;
	  
	  for ( avg=0,b=0; b< A->nseq; b++)avg+=w[a][b]*coverage;
	  if ( avg>best_avg){best_avg=avg; best_seq=a;}
	}
      free_int (w, -1);
      vfree (buf);
      return best_seq;
    }
  
}
int aln2coverage ( Alignment *A, int ref_seq)
{
  int a,b;
  int cov_pos=0, npos=0;

  
  for ( a=0; a< A->len_aln; a++)
    {
      if ( !is_gap ( A->seq_al[ref_seq][a]))
	{
	  npos++;
	  for ( b=0; b< A->nseq; b++)
	    {
	      if ( b!=ref_seq && !is_gap ( A->seq_al[b][a])){cov_pos++;break;}
	    }
	}
    }
  return  (int) (npos==0)?0:(( MAXID*cov_pos)/npos);
}
 
int aln2sim ( Alignment *A, char *mode)
{
  int **w;
  float avg=0;
  int a, b, c;

  if ( !A || A->nseq<2) return -1;
  w=get_sim_aln_array ( A, mode);

  for (c=0, a=0; a< A->nseq-1; a++)
    for ( b=a+1; b< A->nseq; b++, c++)
      {
	avg+=(float)w[a][b];
      }
  free_int (w, -1);
  return (int)((float)avg/(float)c);
}

int aln_is_aligned ( Alignment *A)
{
  int a, b;
  
  if ( !A)return 0;
  for (a=0; a< A->nseq; a++)
    for ( b=A->len_aln-1; b>0; b--)
      {
	if (!is_gap(A->seq_al[a][b]) && is_gap(A->seq_al[a][b-1]))return 1;
      }
  return 0;
}
	
int* get_cdna_seq_winsim ( int *cache, char *string1, char *string2, char *ignore, char *mode,int *w )
	{
	int len1, len2;
	int a, x;

	
	len1=strlen (string1);
	len2=strlen (string2);
		
	if ( len1!=len2)
	  {
	    fprintf ( stderr, "\nTHE TWO cDNAs DO NOT HAVE THE SAME LENGTH [FATAL:get_cdna_seq_sim:%s", PROGRAM);
	    crash("");
	  }
	
	x=get_cdna_seq_sim(cache, string1, string2, ignore, "");
	for ( a=0; a< len1; a++)
	  w[a]=x;

	fprintf (stderr, "\nWARNING: winsim not implemented for cDNA");
	return w;
	}

int get_cdna_seq_sim ( int *cache, char *string1, char *string2, char *ignore, char *mode)
	{
	int len1;
	int len2;
	int a;
	int pos=0;
	int sim=0;
	char r1, r2;
	
	len1=strlen (string1);
	len2=strlen (string2);


	
	if ( len1!=len2)
	  {
	    fprintf ( stderr, "\nTHE TWO cDNAs DO NOT HAVE THE SAME LENGTH [FATAL:get_cdna_seq_sim:%s", PROGRAM);
	    crash("");
	  }
	    
	for ( a=0; a< len1;)
		{
		 
		  if ( cache[a]==0){a++;continue;}
		  else if ( cache[a]==1)
		    {
		      
		      r1=translate_dna_codon (string1+a, 'x');
		      r2=translate_dna_codon (string2+a, 'x');
		      a+=3;
		    }
		  
		  if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))
			{
			pos++;
			if (is_in_same_group_aa(r1,r2,0, NULL,mode+4))
				{
				sim++;
				}
			}
		}



	if (pos==0)
		 return 0;
	else	
		return (int) (sim*MAXID)/pos;
	
	}	

int* get_seq_winsim ( char *string1, char *string2, char *ignore, char *mode, int*w)
	{
	int len1, len2, len;
	int left, right;
	int a,b;
	int sim=0;
	int window;
	int r1, r2;

	len1=strlen (string1);
	len2=strlen (string2);
	window=atoi(mode);
	len=2*window+1;
	
	if ( len1!=len2)return 0;
	if (window==0 || (window*2+1)>=len1)
	  {
	    sim=get_seq_sim (string1, string2, ignore, "");
	    for (a=0; a<len1; a++)w[a]=sim;
	    return w;
	  }
	

	for ( a=0; a< len1; a++)
		{
		  
		  left =MAX(0, a-window);
		  right=MIN(len1, left+len);
		  for (sim=0,b=left; b<right; b++)
		    {
		      r1=string1[b];
		      r2=string2[b];
		      if (  !is_in_set (r1, ignore) && !is_in_set (r2, ignore))
			{
			  if (r1==r2)sim++;
			}
		    }
		  w[a]=(sim*MAXID)/len;
		}
	return w;
	}


int get_seq_sim ( char *string1, char *string2, char *ignore, char *mode)
	{
	int len1;
	int a;
	int pos1, pos2, pos0, sim;
	int p1, p2;
	int r0,r1, r2;
	
	
	
	len1=strlen (string1);


	for ( sim=pos1=pos2=pos0=0,a=0; a< len1; a++)
		{
		  
		  r1=string1[a];
		  r2=string2[a];
		  p1=1-is_in_set (r1, ignore);
		  p2=1-is_in_set (r2, ignore);
		  pos1+=p1; pos2+=p2;
		  if (p1 && p2)
			{
			  pos0++;
			  if (is_in_same_group_aa(r1,r2,0, NULL, mode))
			    {		      
			      sim++;
			    }
			}
		}
	
	r0=(pos0==0)?0:(sim*MAXID)/pos0;
	r1=(pos1==0)?0:(sim*MAXID)/pos1;
	r2=(pos2==0)?0:(sim*MAXID)/pos2;
	

	return r0;
	
	}	
int get_seq_sim_2 ( char *string1, char *string2, char *ignore, char **gr, int ng)
	{
	int len1;
	int len2;
	int a;
	int pos=0;
	int sim=0;
	char r1, r2;
	
	
	len1=strlen (string1);
	len2=strlen (string2);
	
	if ( len1!=len2)return 0;
	
	for ( a=0; a< len1; a++)
		{
		r1=string1[a];
		r2=string2[a];
		if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))
			{
			pos++;
			if (is_in_same_group_aa(r1,r2,ng, gr, NULL))
				{
				sim++;
				}
			}
		}
	
	if (pos==0)
		 return 0;
	else	
		return (int) (sim*MAXID)/pos;
	
	}

int get_seq_sim_3 ( char *string1, char *string2, char *ignore, int **mat)
	{
	int len1;
	int len2;
	int a;
	
	int sim=0;
	char r1, r2;
	
	
	len1=strlen (string1);
	len2=strlen (string2);
	
	if ( len1!=len2)return 0;
	
	for ( a=0; a< len1; a++)
		{
		r1=string1[a];
		r2=string2[a];
		if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))
			{
			sim+=mat[r1-'A'][r2-'A'];
			}
		}
	return sim;
	
	}	
int * get_aln_col_weight ( Alignment *A, char *mode)
	{
	int a, b;
	char *col;
	int *weight;
	
	col=vcalloc ( A->nseq, sizeof (int));
	weight=vcalloc (A->len_aln, sizeof (int));
	
	for (a=0; a< A->len_aln; a++)
		{
		for ( b=0; b< A->nseq; b++)
			col[b]=A->seq_al[b][a];
		weight[a]=(find_group_aa_distribution (col, A->nseq,0,NULL,NULL, mode )*MAXID)/A->nseq;		 
		}
	vfree (col);
	return weight;
	
	}	
			
int analyse_aln_column ( Alignment *B, int col)
    {

    char r=' ';
    int a, b, c;
    static char *mat;
    static int ng_cw_star;
    static char **cw_star;
    int *cw_star_count;
    
    static int ng_cw_col;
    static char **cw_col;
    int *cw_col_count;
    
    static int ng_cw_dot;
    static char **cw_dot;
    int *cw_dot_count;
    

    
    if ( !B->S)B= get_aln_type (B);
    
    
    if ( !B->S || !(B->S)->type)B= get_aln_type (B);
    
    if ( !mat)mat=vcalloc ( STRING, sizeof (char));

    if ( !ng_cw_star)
       {
	   cw_star=make_group_aa ( &ng_cw_star, strcpy ( mat,"idmat"));
	   cw_col=make_group_aa ( &ng_cw_col, strcpy (mat,"clustalw_col"));
	   cw_dot=make_group_aa ( &ng_cw_dot, strcpy (mat, "clustalw_dot"));
       }

    cw_star_count=vcalloc (ng_cw_star, sizeof (int));
    cw_col_count=vcalloc ( ng_cw_col, sizeof (int));
    cw_dot_count=vcalloc (ng_cw_dot, sizeof (int));
    
    for ( a=0; a< B->nseq; a++)
        {
	c=tolower (B->seq_al[a][col]);
	if (is_gap(c)){r=' ';break;}
	  
	for ( b=0; b< ng_cw_star; b++)
	    cw_star_count[b]+=is_in_set (c, cw_star[b]);	
	for ( b=0; b< ng_cw_col; b++)
	    cw_col_count[b]+=is_in_set  (c, cw_col[b]);
	for ( b=0; b< ng_cw_dot; b++)
	    cw_dot_count[b]+=is_in_set  (c, cw_dot[b]);
	}
    
   
   
   
    
    if ( !is_gap(c) && r==' ')
	for ( b=0; b< ng_cw_star; b++)if ( cw_star_count[b]==B->nseq){r='*'; break;}
    if ( !is_gap(c) && r==' ' && !(strm((B->S)->type, "DNA")))
	for ( b=0; b< ng_cw_col ; b++)if ( cw_col_count [b]==B->nseq){r=':'; break;}
    if ( !is_gap(c) && r==' ' && !(strm((B->S)->type, "DNA")))
	for ( b=0; b< ng_cw_dot ; b++)if ( cw_dot_count [b]==B->nseq){r='.'; break;}
    
    
    
    vfree(cw_star_count);
    vfree(cw_col_count);
    vfree(cw_dot_count);   
	
    return r;
    }


int ** get_cov_aln_array ( Alignment *A, char *mode)
{
  int **w;
  int a, b, c, t;
  
  w=declare_int ( A->nseq, A->nseq);
  
  
  for ( a=0; a< A->nseq-1; a++)
    {
      w[a][a]=100;
      for ( t=0,b=a+1; b< A->nseq; b++)
	{
	  for ( c=0; c< A->len_aln; c++)
	    {
	      t+=(!is_gap(A->seq_al[a][c]) &&!is_gap(A->seq_al[b][c]));
	    }
	  w[a][b]=w[b][a]=(t*100)/A->len_aln;
	}
    }
  return w;
}

int ** get_cov_master_aln_array ( Alignment *A,int n, char *mode)
{
  int **w;
  int    b, c, t;
  
  w=declare_int ( A->nseq, A->nseq);
  
  
  for (b=0; b< A->nseq; b++)
	{
	  
	  for (t=0, c=0; c< A->len_aln; c++)
	    {
	      t+=(!is_gap(A->seq_al[n][c]) &&!is_gap(A->seq_al[n][c]));
	    }
	  w[n][b]=w[b][n]=(t*100)/A->len_aln;
	}
    
  return w;
}
int ** get_sim_master_aln_array ( Alignment *A,int n, char *mode)
	{
	int **w;
	int a, b;
	
	w=declare_int ( A->nseq, A->nseq);
	
	
	for ( a=0; a< A->nseq; a++)
	  {
	    if ( strm (mode, "cdna"))
	    w[n][a]=w[a][n]=get_cdna_seq_sim ( A->cdna_cache[0], A->seq_al[a], A->seq_al[n], "-", mode);			
	  else
	    w[n][a]=w[a][n]=get_seq_sim ( A->seq_al[n], A->seq_al[a], "-", mode);
	  }
	return w;
	}
int ** get_dist_aln_array ( Alignment *A, char *mode)
{
  int a, b;
  int **w;

  w=get_sim_aln_array ( A, mode);
  return sim_array2dist_array(w,MAXID);
}

int ** get_sim_aln_array ( Alignment *A, char *mode)
	{
	int **w;
	int a, b;
	
	
	w=declare_int ( A->nseq, A->nseq);
	
	for ( a=0; a< A->nseq-1; a++)
	  {
	    for ( b=a+1; b< A->nseq; b++)
			{
			  if ( strm (mode, "cdna"))
			    w[a][b]=w[b][a]=get_cdna_seq_sim ( A->cdna_cache[0], A->seq_al[a], A->seq_al[b], "-", mode);			
			  else if ( strnm (mode, "ktup",4))
			    w[a][b]=w[b][a]=ktup_comparison (A->seq_al[a], A->seq_al[b],atoi(mode+4));
			  else
			    w[a][b]=w[b][a]=get_seq_sim ( A->seq_al[a], A->seq_al[b], "-", mode);
			}
	  }
	return w;
	}

int *** get_winsim_aln_array ( Alignment *A,char *mode, int ***w)
	{
	int a, b;
	for ( a=0; a< A->nseq; a++)
		for ( b=0; b< A->nseq; b++)
			{
			  if ( strm (mode, "cdna"))
			    w[a][b]=get_cdna_seq_winsim ( A->cdna_cache[0], A->seq_al[a], A->seq_al[b], "-", mode, w[a][b]);			
			  else
			    w[a][b]=get_seq_winsim ( A->seq_al[a], A->seq_al[b], "-", mode, w[a][b]);
			}
	return w;
	}

Alignment * seq2profile (Sequence *S, int i)
{
  Alignment *A;
  
  if (A=seq2R_template_profile (S, i))return A;
  else
    {
      char *tmp;
      FILE *fp;
      
      tmp=vtmpnam (NULL);
      fp=vfopen ( tmp, "w");
      fprintf (fp, ">%s\n%s\n", S->name[i], S->seq[i]);
      vfclose (fp);
      
      (S->T[i])->R=fill_R_template (S->name[i], tmp, S);
      
      return  seq2R_template_profile (S, i);
    }
}

Alignment* aln2sub_aln_file (Alignment *A, int n, char **string)
{
  char ***list;
  int a;

  list=vcalloc (A->nseq, sizeof (char***));
  if ( n==0)return A;
  else if (n>1)
    {
      int l;
      char *buf;
      
      for (l=0,a=0; a< n; a++)l+=strlen (string[a]);
      buf=vcalloc ( 2*n+l+1, sizeof (char));
      for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");}     
      list[0]=string2list (buf);
      vfree (buf);
    }
  else if ( file_exists (string[0]))
    {
      list=read_group (string[0]);

    }
  else
    {
      fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM);
      exit (EXIT_FAILURE);
    }  

  
  a=0;
  while (list[a])
    {
      int i, b;
      FILE *fp;
      n=atoi (list[a][0]);
      fp=vfopen (list[a][1], "w");
      for (b=2; b<n; b++)
	{
	  i=name_is_in_list (list[a][b], A->name, A->nseq, MAXNAMES);
	  if (n==3)ungap (A->seq_al[i]);
	  fprintf (fp, ">%s\n%s\n", A->name[i], A->seq_al[i]);	  
	}
      vfclose (fp);
      free_char (list[a], -1);
      a++;
    }
  vfree(list);
  return A;
}

Alignment* aln2sub_seq (Alignment *A, int n, char **string)
{
  char ***list;
  int a;
  Sequence *S=NULL;
    
  list=vcalloc (A->nseq, sizeof (char***));
  if ( n==0)return A;
  else if (n>1)
    {
      int l;
      char *buf;
      
      for (l=0,a=0; a< n; a++)l+=strlen (string[a]);
      buf=vcalloc ( 2*n+l+1, sizeof (char));
      for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");}     
      list[0]=string2list (buf);
      vfree (buf);
    }
  else if ( file_exists (string[0]))
    {
      list=read_group (string[0]);

    }
  else
    {
      fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM);
      exit (EXIT_FAILURE);
    }  

 
  
  a=0;
  while (list[a])
    {
      int t;
      Alignment *B;
      Sequence *subS;
      
      
      B=main_read_aln (list[a][1], NULL);
      t=aln2most_similar_sequence(B, "idmat");
      subS=extract_one_seq(B->name[t],0,0,B,KEEP_NAME);
      S=add_sequence (subS,S,0);
      free_aln (B);free_sequence (subS, -1);
      vremove (list[a][1]);
      a++;
    }
  vfree(list);
  return seq2aln (S, NULL, RM_GAP);
}

Alignment * aln2collapsed_aln (Alignment * A, int n, char **string)
{
  Alignment *B;
  char ***list;
  char **list2;
  char *buf;
  FILE *fp;
  int a, b,c, ns, m, l;
  int *collapsed;
  
  list=vcalloc (A->nseq, sizeof (char***));
  ns=0;
  if ( n==0)return A;
  else if (n>1)
    {
      for (l=0,a=0; a< n; a++)l+=strlen (string[a]);
      buf=vcalloc ( 2*n+l+1, sizeof (char));
      for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");}
     
      list[0]=string2list (buf);ns=1;
      
    }
  else if ( file_exists (string[0]))
    {
      /*Format: Fasta like, the name fo the group followed with the name of the sequences
	><Group name> <First Seq> <second seq> ....
	Groups must NOT be overlaping
      */
      l=measure_longest_line_in_file (string[0])+1; 
      buf=vcalloc (l, sizeof (char));
      ns=0;
      fp=vfopen (string[0], "r");
      while ((c=fgetc(fp))!=EOF)
	{
	  buf=fgets (buf,l-1, fp);
	  if ( c=='>')list[ns++]=string2list (buf);
	}
      vfclose (fp);
    }
  else
    {
      fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM);
      exit (EXIT_FAILURE);
    }
  
  vfree (buf); buf=NULL;

  /*Identify lost sequences*/
  collapsed=vcalloc (A->nseq, sizeof (int));
  for ( a=0; a< ns; a++)
      {
	m=atoi (list[a][0]); 
	for (b=2; b<m ; b++)
	  {
	    c=name_is_in_list (list[a][b], A->name, A->nseq, MAXNAMES);
	    if ( c>=0)collapsed[c]=1;
	  }
      }
  for ( a=0; a< A->nseq; a++)
    {
      if ( collapsed[a]==0)
	{
	  list[ns]=declare_char (3, MAXNAMES);
	  sprintf ( list[ns][0], "3");
	  sprintf ( list[ns][1], "%s", A->name[a]);
	  sprintf ( list[ns][2], "%s", A->name[a]);
	  ns++;
	}
    }
  vfree (collapsed);
  
  
  


  list2=declare_char (A->nseq, 100);
  /*1 Collapse the alignment*/
  for ( a=0; a< ns; a++)
    {
      sprintf ( list2[a], "%s", list[a][2]);
    }
   B=extract_sub_aln2 ( A, ns, list2);
  /*2 Rename the sequences*/
  for ( a=0; a< ns; a++)
    {
      sprintf ( B->name[a], "%s", list[a][1]);
    }
  /*replace sequence with consensus*/
  
  for ( a=0; a< ns; a++)
    {
      m=atoi (list[a][0]);
      for (c=0, b=2; b<m;c++, b++)
	{
	  sprintf ( list2[c], "%s", list[a][b]);
	}
      buf=sub_aln2cons_seq_mat2 ( A,m-2,list2, "blosum62mt");
      sprintf (B->seq_al[a], "%s", buf);     
    }
  vfree (buf);

  free_aln (A);
  B->S=aln2seq(B);
  return B;
}
Alignment * aln2profile (Alignment * A)
    {
      Alignment *B=NULL;
      char *cons;
      
      if (!A->P)
	{
	  A->P=declare_profile (AA_ALPHABET,A->len_aln+1);
	}
      B=copy_aln (A, B);
      free_int ((A->P)->count, -1);
      free_int ((A->P)->count2, -1);
      free_int ((A->P)->count3, -1);
      (A->P)->count=aln2count_mat (A);
      (A->P)->count2=aln2count_mat2 (A);
      
      cons=aln2cons_seq_mat (A, "blosum62mt");
      
      sprintf (B->seq_al[0], "%s", cons);
      B->nseq=1;
      (A->P)->count3=aln2count_mat2 (B);
      vfree (cons);
      free_aln (B);
      
      

      return A;
	  
    }

int** aln2count_mat2 ( Alignment *A)
{
  return sub_aln2count_mat2 (A, 0, NULL);
}
int** sub_aln2count_mat2 ( Alignment *A, int ns, int *l_s)
{
  int **count;
  int used[1000];
  int a, b;
  char r;
  int l_s_flag=0;
  int len;
  int us;
  
  /*count[x][0]=n symbols in column
    count[x][1]=total_size of line
    count[x][2]=Gap frequency
    count[x][n]=symbol n
    count[x][n+1]=N occurence symbol n;
    count[x][n+1]=N frequence symbol n*100;
 
    special multi-channeling
    count[x][count[x][1]]=Nseq
    count[x][count[x][1]+s]=residue col x, sequence s
  */

  for (a=0; a< 1000; a++)used[a]=0;
  if ( ns==0)
    {
      ns=A->nseq;
      l_s=vcalloc ( A->nseq, sizeof (int));
      for ( a=0; a< A->nseq; a++)l_s[a]=a;
      l_s_flag=1;
    }
  len=strlen (A->seq_al[l_s[0]]);
	      
  count=declare_int (len,100+ns+2);
  for (a=0; a<len; a++)
    {
      
      for (us=ns, b=0; b<ns; b++)
	{
	  
	  r=tolower (A->seq_al[l_s[b]][a]);
	  
	  if (is_gap(r))us--;
	  else if (used[r])
	    {
	      count[a][used[r]*3+1]++;
	    }
	  else
	    {
	      used[r]=++count[a][0];
	      count[a][used[r]*3]=r;
	      count[a][used[r]*3+1]++;
	    }
	}
      count[a][1]=count[a][0]*3+2;
      count[a][2]=(A->nseq-us)*100/A->nseq;
      
      for (b=3; b<count[a][1]; b+=3)
	{
	  count[a][b+2]=(count[a][b+1]*100)/us;
	  used[count[a][b]]=0;
	}
      /*Option for multi channeling*/
      count[a][count[a][1]]=A->nseq;
      for (b=1; b<=A->nseq; b++)
	count [a][count[a][1]+b]=(is_gap(A->seq_al[b-1][a]))?0:A->seq_al[b-1][a];
      
    }

  if ( l_s_flag)vfree(l_s);
  return count;
}
	  
int** aln2count_mat ( Alignment *A)
    { /*
	function documentation: start
	
	int output_freq_mat ( char *outfile, Aligmnent *A)

	This function counts the number of residues in each column of an alignment (Prot/NA)
	It outputs these values in the following format

	This format can be piped into:
	The routine used for computing the p-value  gmat-inf-gc-v2c
	
	function documentation: end
      */
      
    int a, b,x;
    int **freq_mat;
    int alp_size;

    alp_size=sizeof (AA_ALPHABET); 
    freq_mat=declare_int (alp_size+2, A->len_aln);
      

    for ( a=0; a<A->len_aln; a++)
      {
	for ( b=0; b< A->nseq; b++)
	  {
	    if ( is_gap ( A->seq_al[b][a]))freq_mat[alp_size][a]++;
	    else
	      {
		x=tolower(A->seq_al[b][a]);
		freq_mat[x-'a'][a]++;
		freq_mat[alp_size+1][a]++;
		
	      }
	  }
      }
    
    return freq_mat;
    }
char *aln2random_seq (Alignment *A, int pn1, int pn2, int pn3, int gn)
    {

      /* 

	 
	 Given the frequencies in A ( read as total counts of each Residue in
	 freq[A->nseq][A->len_aln], and pn1, pn2 and pn3:
	  
	                  1-Generate a new amino-acid at each position
			  2-Insert Gaps, using a HMM.

	 
	 pn3=Weight of the noise induced with sub mat.

	 pn1=% noise type 1 ( Varies with entropi)
	  n1=Ratio noise type 1
	  
	 T =Nseq 
	 t1=Noise 1 expressed in Nseq
	 al=alphabet size;
	 ncat=number of non 0 cat for a given position
	 ICi initial count for residue i

	 Ci=freq[seq][AA]
	 t1=T*n1*(1-1/ncat);
	 t2=T*n2;
	 
	 Ci= ICi*(T-(t1+t2))/T +(t1)/al+(t2)/al
	 
      */
      
      int **freq;
      int **count;
      float T, tot_t1, tot_t2,tot_t3, n1, n2, n3;
      float ncat;
      
      double gf;
      double *init_freq;
      double *blur_freq;
      double *t1, *t2,*t3;
      int a, b, c, x;
      char *seq;
      int tot;
      /*Viterbi  Parameters */
      
      int p;
      int AL=0;        /*Allowed Transition*/
      int F=-100000; /*Forbiden Transition*/

      int GAP_TRANSITION;
      int IGAP=0, IAA=1;
      
      int state,best_state, score, best_score;
      int p_state;
      int e;
      int **score_tab;
      int **state_tab;
      int nstate=2;
      int **transitions;
      
      int max;

      seq=vcalloc ( A->len_aln+1, sizeof (char));     
      count=aln2count_mat(A);
      freq=aln2count_mat(A);

      T=100;

      n1=(float)pn1/100;
      n2=(float)pn2/100;
      n3=(float)pn3/100;
      
      for ( a=0; a< A->len_aln; a++)
	{
	  for ( b=0; b<26; b++)
	    freq[b][a]=freq[b][a]*((T)/(A->nseq-freq[26][a]));
	  freq[26][a]= (freq[26][a]*T)/A->nseq;
	}

      
      init_freq=vcalloc ( 26, sizeof (double));
      blur_freq=vcalloc ( 26, sizeof (double));
      
      tot_t1=tot_t2=tot_t3=0;
      
      t1=vcalloc ( 27, sizeof (double));
      t2=vcalloc ( 27, sizeof (double));
      t3=vcalloc ( 27, sizeof (double));
      for (a=0; a< A->len_aln; a++)
	{

        /*Compute Frequencies*/
	  for (tot=0, b=0; b<26; b++)
		{
		  if ( is_aa(b+'A'))
		    {
		      init_freq[b]=freq[b][a];
		      tot+=freq[b][a];
		    }
		}
        /*Count the number of  different amino acids*/
	for ( ncat=0, b=0; b<=26; b++)
	  {
	    ncat+=(freq[b][a]!=0)?1:0;	    
	  }
	/*Blurr the distribution using */
       	blur_freq=compute_matrix_p (init_freq,tot);	
		
	
	/*compute noise 1: biased with blurred content * enthropy--> keeps prosite motifs*/
	tot_t1=T*n1*(1-1/ncat);
	for (  b=0; b< 26; b++)if ( is_aa(b+'A')){t1[b]=blur_freq[b]*(1-1/ncat)*n1;}
	
	/*Compute noise 2: completely random*/
	tot_t2=T*n2;
	for (  b=0; b< 26; b++)if ( is_aa(b+'A')){t2[b]=tot_t2/21;}
	
	/*compute noise 3: biased with the sole content(pam250mt)*/
	tot_t3=T*n3;
	for (  b=0; b<26; b++)if ( is_aa(b+'A')){t3[b]=blur_freq[b]*n3;}
	
	for ( b=0; b<26; b++)
	  {
	    if ( is_aa('A'+b))
	      freq[b][a]=freq[b][a]*(T-(tot_t1+tot_t2+(tot_t3)))/T+t1[b]+t2[b]+t3[b];
	  }
	
	/*end of the loop that mutates position a*/
	}
     
        vfree (blur_freq);
	vfree (init_freq);
	vfree ( t3);
	
      /*1-Generate the amino acids of the new sequence new*/
      
      
      vsrand (0);
      
      for ( a=0; a< A->len_aln; a++)
	{

	  for (T=0,b=0; b<26; b++)T+=freq[b][a];
	  x=rand ()%((int)T);
	  for (c=0,b=0; b<26; b++)
	    {
	     c+=freq[b][a];
	     if ( c>=x)
	       {
		 seq[a]='A'+b;
		 c=-1;
		 break;
	       }
	    }
	  if ( c!=-1)seq[a]='-';
	}
      seq[a]='\0';
      

      /*2 Generate the gaps in the new sequence*/
      
      

      if ( gn<0);
      else
	{

	  transitions=declare_int ( nstate, nstate);
	  score_tab=declare_int ( A->len_aln+2, nstate       );
	  state_tab=declare_int ( A->len_aln+2, nstate       );
	  
	  
	  
	  for (a=0; a<nstate;a++)
	    for (b=0; b<nstate;b++)
	      {transitions[a][b]=F;}
	  
	  GAP_TRANSITION=AL-gn;
	  
	  

	 



	  transitions[IGAP ][IGAP ]=AL;
	  transitions[IAA][IAA]=AL;
	  transitions[IAA ][IGAP]=GAP_TRANSITION;
	  transitions[IGAP][IAA ]=GAP_TRANSITION;
	  
	  
	  for ( p=1; p<=A->len_aln; p++){for (state=0; state< nstate; state++){score_tab[p][state]=F;state_tab[p][state]=-1;} }
	  
	  for (p=1; p<= A->len_aln; p++)
	    {
	      for (max=0,a=0; a<26; a++)max=MAX(max, freq[a][p-1]);
	      max=(max*(A->nseq-count[26][p-1]))/A->nseq;
	      
	      for (state=0; state< nstate; state++)
		{
		  
		  
		  gf=freq[26][p-1];
		  if      ( state==IGAP)  e=gf-50;
		  else if ( state==IAA )  e=max-50;
		  for (p_state=0; p_state<nstate; p_state++)
		    {
		      score=(score_tab[p-1][p_state]==F)?F:(e+transitions[p_state][state]+score_tab[p-1][p_state]);
		      if(p_state==0 || score>best_score){ best_score=score;best_state=p_state;}
		    }
		  score_tab[p][state]=best_score;
		  state_tab[p][state]=best_state;
		}
	    }
	  
	  for (state=0; state<nstate; state++)
	    {
	      if (state==0 || score_tab[p-1][state]>best_score){best_score=score_tab[p-1][state]; best_state=state;}
	    }
	  
	  for (p=A->len_aln; p>0;)
	    {
	      if ( best_state==IGAP)
		{
		  seq[p-1]='-';
		}
	      else if ( best_state==IAA)
		{
		  seq[p-1]=seq[p-1];
		}
	      best_state=state_tab[p][best_state];
	      p--;
	    }
        }
      
      free_int (freq, -1);
      return seq;
    }

/********************************************************************/
/*                                                                  */
/*			Weighting functions                         */
/*                                                                  */
/*                                                                  */
/*                                                                  */
/********************************************************************/
Alignment * master_trimseq( Alignment *A, Sequence *S,char *mode)
     {
       Alignment *NA;
       char *p;
       int a, b;
       int use_aln=0, upper_sim=0, min_nseq=0, lower_sim=0;
       float f_upper_sim, f_lower_sim;
       char weight_mode[1000];
       char method[1000];
       int statistics=0;
       int trim_direction=TOP;
       float **sim_weight;
       int *seq_list;
       int table=0;
       
       
     

     /*
       mode: 
           (trim)_<seq or aln>_%<percentage of tot weight to keep>_n<number of seq to keep>_w<weight mode>
     */
     

     
     seq_list=vcalloc ( S->nseq, sizeof (int));
     for ( a=0; a< A->nseq; a++)
       {
	 seq_list[a]=1;
       }
     

     use_aln=aln_is_aligned(A);
     
     if ( mode[0]=='\0')
       {
	
	 upper_sim=50;
	 lower_sim=0;
	 min_nseq=0;
	 sprintf (weight_mode, "pwsim");
	 sprintf ( method, "clustering2");
       }
     else 
       {
	
	 upper_sim=lower_sim=min_nseq;
	 sprintf (weight_mode, "pwsim");
	 sprintf ( method, "clustering2");
       }

     /*
      U or % (deprecated) Upper bound for pairwise similarity
      L or m (depercated) Lower  bound for pairwise similarity
      n max number of sequences
      N max number of sequences as a fraction of thet total
      S print Statistics
      T print Table of distances
     */

     

     while ( (p=strtok(mode, "_")))
	   {	     
	     mode=NULL;
	     if (strm (p, "seq"))use_aln=0;
	     else if ( strm(p,"aln"))use_aln=1;
	     else if  (p[0]=='s')statistics=1;
	     else if  (p[0]=='t')table=1;
	     else if  (p[0]=='U')upper_sim=atoi(p+1);
	     else if  (p[0]=='L')lower_sim=atoi(p+1);
	     else if  (p[0]=='n')min_nseq=atoi(p+1);
	     else if  (p[0]=='N')min_nseq=atoi(p+1)*-1;
	     else if  (p[0]=='B')trim_direction=BOTTOM;
	     else if  (p[0]=='T')trim_direction=TOP;
	     else if  (p[0]=='W')sprintf (weight_mode, "%s", p+1);
	     else if  (p[0]=='M')sprintf (method, "%s", p+1);
	     else if  (p[0]=='K')
	       {
		 
		 while (p=strtok(NULL, ":"))
		   {
		     
		     if ( p[0]=='#')
		       {
			 seq_list[atoi(p+1)-1]=2;
		       }
		     else if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1)

		       {
			 seq_list[a]=2;
		       } 
		   }
	       }
	   }
     
     if ( !upper_sim && !min_nseq && !lower_sim)upper_sim=50;
     
     

     if  (!S)
       {
	 fprintf ( stderr, "\ntrimseq requires a set of sequences[FATAL:%s]\n", PROGRAM);
	 crash("");
       }
     
     else if ( min_nseq> S->nseq)
       {
	 min_nseq=S->nseq;
       }
     else if ( min_nseq<0)
       {
	 if ( min_nseq<-100)
	   {
	     fprintf ( stderr, "\nWARNING: trimseq: Nseq(N)  max_val=100%% [Automatic reset]\n");
	     min_nseq=-100;
	   }
	 
	 min_nseq=(int)((float)S->nseq*((float)min_nseq/100)*-1);
       }


     NA=seq2subseq3 (A, S,use_aln,lower_sim,upper_sim,min_nseq,trim_direction, weight_mode,&sim_weight, seq_list );
     
     if ( table)
       {
	 fprintf ( stderr, "\nSIMILARITY MATRIX\n");
	 for ( a=0; a< A->nseq-1; a++)
	   for ( b=a+1; b< A->nseq; b++)
	     {
	       fprintf ( stderr, "%15s Vs %15s : %3.2f %% id\n", A->name[a], A->name[b], 100-sim_weight[a][b]);
	     }
       }
     if ( statistics)
       {
	 f_upper_sim=(upper_sim>100)?((float)upper_sim/(float)100):upper_sim;
	 f_lower_sim=(upper_sim>100)?((float)lower_sim/(float)100):lower_sim;
	 
	 fprintf ( stderr, "\nTRIM Informations:\n");
	 fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln");
	 fprintf ( stderr, "\tcluster_mode..: %s\n"  ,method);
	 fprintf ( stderr, "\tsim_mode......: %s\n"  ,weight_mode);
	 fprintf ( stderr, "\tlower_id_bound: %.2f%%\n"  ,(f_lower_sim==0)?-1:f_lower_sim);
	 fprintf ( stderr, "\tupper_id_bound: %.2f%%\n",(f_upper_sim==0)?-1:f_upper_sim);
	 fprintf ( stderr, "\tnseq_kept.....: %d (out of %d)\n"  ,NA->nseq, S->nseq);
	 fprintf ( stderr, "\treduction.....: %d%% of original set\n"  ,(NA->nseq*100)/S->nseq);
	 fprintf ( stderr, "\tTrim_direction: From %s \n"  ,(trim_direction==BOTTOM)?"Bottom":"Top");
       }

     return NA;
   }

Alignment * trimseq( Alignment *A, Sequence *S,char *mode)
   {
     Alignment *NA;
     char *p;
     int a, b;
     int use_aln=0, upper_sim=0, min_nseq=0, lower_sim=0;
     char weight_mode[1000];
     char method[1000];
     int statistics=0;
     int trim_direction=TOP;
     float **sim_weight;
     int *seq_list;
     int table=0;
     int print_name=0;
     float f_lower_sim, f_upper_sim;
     
         

     /*
       mode: 
           (trim)_<seq or aln>_%<percentage of tot weight to keep>_n<number of seq to keep>_w<weight mode>
     */
     

     
     seq_list=vcalloc ( S->nseq, sizeof (int));
     for ( a=0; a< A->nseq; a++)
       {
	 seq_list[a]=1;
       }
     

     use_aln=aln_is_aligned(A);
     
     
     if ( mode[0]=='\0')
       {
	
	 upper_sim=50;
	 lower_sim=0;
	 min_nseq=0;
	 sprintf (weight_mode, "pwsim_fragment");
	 sprintf ( method, "clustering2");
       }
     else 
       {
	
	 upper_sim=lower_sim=min_nseq;
	 sprintf (weight_mode, "pwsim_fragment");
	 sprintf ( method, "clustering2");
       }

     /*
      U or % (deprecated) Upper bound for pairwise similarity
      L or m (depercated) Lower  bound for pairwise similarity
      n max number of sequences
      N max number of sequences as a fraction of thet total
      S print Statistics
      T print Table of distances
     */

     

     while ( (p=strtok(mode, "_")))
	   {	     
	     mode=NULL;
	     if (strm (p, "seq"))use_aln=0;
	     else if ( strm(p,"aln"))use_aln=1;
	     else if  (p[0]=='s')statistics=1;
	     else if  (p[0]=='t')table=1;
	     else if  (p[0]=='p')print_name=1;
	     else if  (p[0]=='U')upper_sim=atoi(p+1);
	     else if  (p[0]=='L')lower_sim=atoi(p+1);
	     else if  (p[0]=='n')min_nseq=atoi(p+1);
	     else if  (p[0]=='N')min_nseq=atoi(p+1)*-1;
	     else if  (p[0]=='B')trim_direction=BOTTOM;
	     else if  (p[0]=='T')trim_direction=TOP;
	     else if  (p[0]=='W')sprintf (weight_mode, "%s", p+1);
	     else if  (p[0]=='M')sprintf (method, "%s", p+1);
	     else if  (p[0]=='K')
	       {
		 
		 while (p=strtok(NULL, ":"))
		   {
		     
		     if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1)
		       {
			 seq_list[a]=2;
		       } 
		   }
	       }
	   }
     
     if ( !upper_sim && !min_nseq && !lower_sim)upper_sim=50;
     
     

     if  (!S)
       {
	 fprintf ( stderr, "\ntrimseq requires a set of sequences[FATAL:%s]\n", PROGRAM);
	 crash("");
       }
     
     else if ( min_nseq> S->nseq)
       {
	 min_nseq=S->nseq;
       }
     else if ( min_nseq<0)
       {
	 if ( min_nseq<-100)
	   {
	     fprintf ( stderr, "\nWARNING: trimseq: Nseq(N)  max_val=100%% [Automatic reset]\n");
	     min_nseq=-100;
	   }
	 
	 min_nseq=(int)((float)S->nseq*((float)min_nseq/100)*-1);
       }


     NA=seq2subseq2 (A, S,use_aln,lower_sim,upper_sim,min_nseq,trim_direction, weight_mode,&sim_weight, seq_list );
     
     if ( table)
       {
	 fprintf ( stderr, "\nSIMILARITY MATRIX\n");
	 for ( a=0; a< A->nseq-1; a++)
	   for ( b=a+1; b< A->nseq; b++)
	     {
	       fprintf ( stderr, "%15s Vs %15s : %3.2f %% id\n", A->name[a], A->name[b], 100-sim_weight[a][b]);
	     }
       }
     
     NA=seq_name2removed_seq_name(S, NA,sim_weight);

     if ( print_name)
       {
	 fprintf ( stderr, "\nList of sequences with their closest removed neighbors\n");
	 for ( a=0; a< NA->nseq; a++)fprintf ( stderr, "\n%s: %s\n", NA->name[a], NA->seq_comment[a]);
       }
     
     if ( statistics)
       {
	 f_lower_sim=(lower_sim>100)?(float)lower_sim/100:lower_sim;
	 f_upper_sim=(upper_sim>100)?(float)upper_sim/100:upper_sim;

	 fprintf ( stderr, "\nTRIM seq Informations:\n");
	 fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln");
	 fprintf ( stderr, "\tcluster_mode..: %s\n"  ,method);
	 fprintf ( stderr, "\tsim_mode......: %s\n"  ,weight_mode);
	 fprintf ( stderr, "\tlower_id_bound: %.2f%%\n"  ,(f_lower_sim==0)?-1:f_lower_sim);
	 fprintf ( stderr, "\tupper_id_bound: %.2f%%\n",(f_upper_sim==0)?-1:f_upper_sim);
	 fprintf ( stderr, "\tnseq_kept.....: %d (out of %d)\n"  ,NA->nseq, S->nseq);
	 fprintf ( stderr, "\treduction.....: %d%% of original set\n"  ,(NA->nseq*100)/S->nseq);
	 fprintf ( stderr, "\tTrim_direction: From %s \n"  ,(trim_direction==BOTTOM)?"Bottom":"Top");
       }

     return NA;
   }

Alignment * tc_trimseq( Alignment *A, Sequence *S,char *mode)
   {
     Alignment *NA;
     Sequence  *TS;
     char *trimfile, *alnfile;
     int *seq_list;
     int a, nseq=0, sim=0;
     char *p;
     char command[100000];
     char keep_list[10000];
     
     int top, bottom, middle, pmiddle;
     
     keep_list[0]='\0';
    
     seq_list=vcalloc ( S->nseq, sizeof (int));
     for ( a=0; a< A->nseq; a++)
       {
	 seq_list[a]=1;
       }
     
     trimfile=vtmpnam (NULL);
     alnfile=vtmpnam (NULL);
     if ( !aln_is_aligned (A))
       {
	 fprintf ( stderr, "\ntrimTC: computation of an Approximate MSA  [");
	 A=compute_tcoffee_aln_quick ( A, NULL);
	 fprintf ( stderr, "DONE]\n");
       }
     output_clustal_aln (alnfile, A);
     
    
     while ( (p=strtok(mode, "#")))
	   {	     
	     mode=NULL;

	     	     
	     if (p[0]=='%' || p[0]=='S')sim=(p[1]=='%')?atoi(p+2):atoi(p+1);
	     else if  (p[0]=='n' || p[0]=='N')nseq=atoi(p+1);
	     else if  (p[0]=='K')
	       {
		 if ( (a=name_is_in_list (p+1, A->name, A->nseq, 100))!=-1)
		   {
		     seq_list[a]=2;
		   }  
		 
	       }
	   }
     if ( nseq ==0 && sim ==0)
       {
	 fprintf ( stderr, "\nERROR: trimTC\nIndicate the maximum number of sequences Nnseq\nOR the maximum average similarity of the chosen sequencesSx\nEX: +trimTC S20 OR +trimTC N5"); 
	 fprintf ( stderr, "\n[FATAL:%s]", PROGRAM);
	 exit (EXIT_FAILURE);
       }
     
     for ( a=0; a<A->nseq; a++)if (seq_list[a]==2){strcat ( keep_list, A->name[a]);strcat ( keep_list," ");}
     
     if ( sim)
       {
	 sprintf ( command , "t_coffee -infile %s -trim  -trimfile=%s  -split_score_thres %d -convert -iterate 0 ", alnfile, trimfile,sim);
	 if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);}
	 my_system ( command);
	 TS=read_sequences (trimfile);
       }
     else if ( nseq && A->nseq>nseq)
       {
	 
	 top=100;bottom=0;
	 pmiddle=0;middle=50;
	 
	 sprintf ( command , "t_coffee -infile %s -trim  -trimfile=%s  -split_score_thres %d -convert -iterate 0", alnfile, trimfile,middle);
	 if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);}
	 my_system ( command);
	 
	 TS=read_sequences (trimfile);
	 fprintf ( stderr, "\n\tTrimTC: Sim %d Nseq %d\t",middle, TS->nseq);
	 
	 if ( TS->nseq>nseq)top=middle;
	 else if ( TS->nseq<nseq)bottom=middle;
	 pmiddle=middle;
	 middle=(top-bottom)/2+bottom;
	 
	 while (TS->nseq!=nseq && pmiddle!=middle)
	   {
	     
	     sprintf ( command , "t_coffee -infile %s -trim  -trimfile=%s  -split_score_thres %d -convert -iterate 0 ", alnfile, trimfile,middle);
	     if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);}
	     my_system ( command);
	     free_sequence (TS, -1);
	     TS=read_sequences (trimfile);
	     fprintf ( stderr, "\n\tTrimTC: Sim %d Nseq %d\t", middle, TS->nseq);
	     
	     if ( TS->nseq>nseq)top=middle;
	     else if ( TS->nseq<nseq)bottom=middle;
	     pmiddle=middle;
	     middle=(top-bottom)/2+bottom;
	   }
       }
     else
       {
	 TS=aln2seq (A);
       }
     NA=seq2aln (TS, NULL, 1);
     vremove ( alnfile);
     fprintf ( stderr, "\n");
     
     return NA;
   }   

Alignment* seq2subseq3( Alignment *A, Sequence *S,int use_aln, int int_lower_sim,int int_upper_sim, int min_nseq, int trim_direction, char *weight_mode, float ***sim_weight, int *seq_list)
{
  int a, b;
  int new_nseq;

  /*OUTPUT*/
  char **seq, **name;
  Sequence *NS;
  Alignment *NA;
  float sim, lower_sim, upper_sim;
  
  lower_sim=(int_lower_sim>100)?(float)int_lower_sim/100:int_lower_sim;
  upper_sim=(int_upper_sim>100)?(float)int_upper_sim/100:int_upper_sim;

  sim_weight[0]=get_weight   ((use_aln)?A:NULL, S, weight_mode);
  
  name=declare_char (S->nseq, (MAXNAMES+1));
  seq= declare_char (S->nseq, S->max_len+1);
  
  /*
    Remove every sequence that is more than upper_sim and less than lower_sim similar to the master sequences
    the master sequence(s) are those for which seq_list[x]==2
  */


  

  new_nseq=A->nseq;
  

  for (a=0; a< A->nseq; a++)
    {
      if ( seq_list[a]==2)
	{
	
	  for ( b=0; b< A->nseq;b++)
	    {
	      sim=100-sim_weight[0][a][b];
	      if (seq_list[b]==1 && (sim>upper_sim || sim<lower_sim))
		{
		 seq_list[b]=0;
		 new_nseq--;
		}
	    }
	  
	}
    }
  
  /*Prepare the new sequence List*/

  for (b=0, a=0; a<S->nseq; a++) 
	  {
	    if ( seq_list[a])
	      {
		sprintf ( name[b], "%s", S->name[a]);
		sprintf ( seq[b] , "%s",(use_aln)?A->seq_al[a]: S->seq[a] );
		b++;
	      }
	  }
  
  
  NS=fill_sequence_struc (new_nseq,seq,name);
  NA=seq2aln(NS,NULL,1);	 
  
  if ( use_aln && A)
    {
      NA=realloc_aln2  ( NA,A->max_n_seq,A->len_aln+1);
      
      for (b=0, a=0; a<S->nseq; a++) 
	{
	  if ( seq_list[a])
	    {
	      sprintf ( NA->seq_al[b] , "%s",A->seq_al[a]);
	      b++;
	      }
	}

      NA->len_aln=A->len_aln;
      ungap_aln(NA);
    }
  

  return NA;
}
Alignment* seq2subseq2( Alignment *A, Sequence *S,int use_aln, int int_lower_sim,int int_upper_sim, int min_nseq, int trim_direction, char *weight_mode, float ***sim_weight, int *seq_list)
{
  int a, b;
  int new_nseq;
  int seq_index=0;
  /*OUTPUT*/
  char **seq, **name;
  Sequence *NS;
  Alignment *NA;
  float lower_sim, upper_sim;
  
  lower_sim=(int_lower_sim>100)?(float)int_lower_sim/100:int_lower_sim;
  upper_sim=(int_upper_sim>100)?(float)int_upper_sim/100:int_upper_sim;

 
  sim_weight[0]=get_weight   ((use_aln)?A:NULL, S, weight_mode);
  
  name=declare_char (S->nseq, (MAXNAMES+1));
  seq= declare_char (S->nseq, S->max_len+1);
  
  /*
    1 REMOVE OUTLAYERS
    2 REMOVE CLOSELY RELATED SEQUENCES
    3 IF STILL TOO MANY SEQUENCES:
      REMOVE THE MOST CLOSELY RELATED ONES
  */


  /*1 Remove outlayers*/
  
  new_nseq=A->nseq;
 
 
  /*1 Remove outlayers*/
  while ( lower_sim && (extreme_seq(BOTTOM,A,sim_weight[0],seq_list, &seq_index) <lower_sim) && ((new_nseq)>min_nseq) && seq_index!=-1)
    {
  
      if ( seq_list[seq_index]==1)
	{
	  seq_list[seq_index]=0;
	  new_nseq--;
	}
    }
   /*2 Remove close relative*/
  
 
  while ( upper_sim && (extreme_seq(TOP, A,sim_weight[0],seq_list, &seq_index)>upper_sim) && ((new_nseq)>min_nseq)&& seq_index!=-1)
    { 
  
      if ( seq_list[seq_index]==1)
	{
	  seq_list[seq_index]=0;
	  new_nseq--;
	}
    }


  /*Remove extra sequences*/
  
  while ( min_nseq>0 && new_nseq>min_nseq && seq_index!=-1)
    {
     
      extreme_seq(trim_direction, A,sim_weight[0],seq_list, &seq_index);
  
      if ( seq_index==-1)break;
      if ( seq_list[seq_index]==1)
	{
	  seq_list[seq_index]=0;
	  new_nseq--;
	}
    }


  /*Prepare the new sequence List*/

  for (b=0, a=0; a<S->nseq; a++) 
	  {
	    if ( seq_list[a])
	      {
		sprintf ( name[b], "%s", S->name[a]);
		sprintf ( seq[b] , "%s",(use_aln)?A->seq_al[a]: S->seq[a] );
		b++;
	      }
	  }
  
  
  NS=fill_sequence_struc (new_nseq,seq,name);
  NA=seq2aln(NS,NULL,1);	 
  
  if ( use_aln && A)
    {
      NA=realloc_aln2  ( NA,A->max_n_seq,A->len_aln+1);
      
      for (b=0, a=0; a<S->nseq; a++) 
	{
	  if ( seq_list[a])
	    {
	      sprintf ( NA->seq_al[b] , "%s",A->seq_al[a]);
	      b++;
	      }
	}

      NA->len_aln=A->len_aln;
      ungap_aln(NA);
    }
  

  return NA;
}

float extreme_seq (int direction, Alignment *A,float **sim_weight,int *seq_list, int *seq_index)
{
  
  /*find the closest relative of each sequence
    Return:
          Direction= BOTTOM: the sequence whose closest relative is the most distant
	  Direction= TOP:    the sequence whose closest relative is the closest
  weight: different sequences=100
          similar sequences  =0
  */
  int a, b;
 
  float top_sim,bottom_sim, best_sim, sim;
  int   top_seq, bottom_seq;
  
  bottom_seq=top_seq=seq_index[0]=-1;
  top_sim=-1;
  bottom_sim=101;
  
  for (a=0; a< A->nseq; a++)
    {
      if (seq_list[a]!=1)continue;

      for ( best_sim=0, b=0; b< A->nseq; b++)
	{
	  if ( a==b || !seq_list[b])continue;
	  
	  sim=100-sim_weight[a][b];
	  if (sim>best_sim)
	    {
	      best_sim=sim;
	    }
	}

      if ( best_sim>top_sim)
	{
	  top_seq=a;
	  top_sim=best_sim;
	}
     
      if ( best_sim<bottom_sim)
	{
	  bottom_seq=a;
	  bottom_sim=best_sim;
	}
      
    }
  if ( direction==BOTTOM  ){seq_index[0]= bottom_seq; return bottom_sim;}
  else if ( direction==TOP){seq_index[0]= top_seq; return top_sim;}
  else
    {
      seq_index[0]=-1;
      return -1;
    }
}
  
  
      
      
Alignment* seq2subseq1( Alignment *A, Sequence *S,int use_aln, int percent,int max_nseq, int ms,char *weight_mode)
   {
     float **pw_weight,**sim_weight, **seq_weight;
     int a,b,c,d;
     float sum, chosen,last_chosen, last_nchosen,nchosen;
     int condition1, condition2;
     Sequence *NS;
     Alignment *NA;
     char **name, **seq;
     float score, best_score;
     int best_seq;
     int *seq_list, *used_seq_list;
     
     /*
       mode: 
           (trim)_<seq or aln>_%<percentage of tot weight to keep>_n<number of seq to keep>_w<weight mode>
     */
     
     sim_weight=get_weight   ((use_aln)?A:NULL, S, weight_mode);
     pw_weight=declare_float (S->nseq, S->nseq);
     seq_weight=declare_float ( S->nseq, 2);

     
     for (best_score=0,a=0; a<S->nseq; a++)
       {
	 for ( b=0; b<S->nseq; b++)
	   {
	     if ( a==b)continue;
	     seq_weight[a][0]+=sim_weight[a][b];
	   }
	 seq_weight[a][0]=seq_weight[a][0]/(S->nseq-1);
	 score=seq_weight[a][0]=100-seq_weight[a][0];
	 
	 if ( score>best_score)
	   {
	     best_seq=a;
	     best_score=score;
	   }

       }
      for (a=0; a<S->nseq; a++)
       {
	 for ( b=0; b<S->nseq; b++)
	   {
	     if ( a==b)continue;
	     pw_weight[a][b]=sim_weight[a][b]*seq_weight[a][0]*seq_weight[b][0]/(100*100);
	     
	   }
       }
      
 
     seq_list=vcalloc ( S->nseq, sizeof (int));
     used_seq_list=vcalloc ( S->nseq, sizeof (int));

    

     name=declare_char (S->nseq, (MAXNAMES+1));
     seq= declare_char (S->nseq, S->max_len+1);
     
     /*compute the normalization factor*/
     for (sum=0,d=0; d< S->nseq; d++)
	   {
	     for (score=0,c=0; c<S->nseq; c++)
	       {
		 if ( c!=d)
		   score=MAX(score, 100-sim_weight[c][d]);
	       }
	     sum+=score;
	   }
     sum=sum/S->nseq;
     /*chose the first sequence */
     for ( best_score=0,a=0; a< S->nseq; a++)
       {
	 for (score=0, b=0; b< S->nseq; b++)
	   {
	     score+=100-sim_weight[a][b];
	   }
	 if ( score>best_score)
	   {
	     best_seq=a;
	     best_score=score;
	   }
	 
       }


     last_chosen=chosen=((best_score/S->nseq)*100)/sum;
     nchosen=last_nchosen=1;
     seq_list[0]=best_seq;
     used_seq_list[best_seq]=1;

     sprintf ( name[0],"%s", S->name[seq_list[0]]);
     sprintf ( seq[0],"%s", S->seq[seq_list[0]]);
     nchosen=last_nchosen=1;
     

     fprintf ( stderr, "\nTRIM:\n");
     fprintf ( stderr, "\n1-Chosen Sequences\n");
     /*Assemble the list of sequences*/
     for (a=1; a< S->nseq; a++)
       {
	 for (best_score=0,b=0; b< S->nseq; b++)
	   {
	     if (used_seq_list[b]);
	     else
	       {
		 score=pw_weight[seq_list[0]][b]+1;
		 for (c=0; c<a; c++)
		   score=MIN(score,pw_weight[seq_list[c]][b]);
		   
		 if ( score>=best_score)
		   {
		   best_seq=b;
		   best_score=score;
		   }
	       
	       }
	   }
	 seq_list[a]=best_seq;
	 used_seq_list[best_seq]=1;
	 
	 

	 for ( chosen=0,d=0; d< S->nseq; d++)
	   {
	     for (score=0, c=0; c<=a; c++)
	       {
		 if ( seq_list[c]!=d)
		   score=MAX(score, 100-sim_weight[seq_list[c]][d]);
	       }
	     chosen+=score;
	     
	   }
	 
	 chosen=((chosen/S->nseq)*100)/sum;
	 nchosen=a+1;
	 	 
	 condition1= (int)chosen<=(int)percent || !percent;
	 condition2=(nchosen)<=max_nseq     || !max_nseq;
	 
	 if (condition1 && condition2)
	   {
	     fprintf ( stderr, "\tADD %s (set score: %.2f %%)\n", S->name[seq_list[a]], chosen);
	     sprintf ( name[a],"%s", S->name[seq_list[a]]);
	     sprintf ( seq[a],"%s", S->seq[seq_list[a]]);
	     
	   }
	 else
	   {
	     break;	 
	   }
	 last_chosen=chosen;
	 last_nchosen=nchosen;
       }
       
     NS=fill_sequence_struc (last_nchosen,seq,name);
     NA=seq2aln(NS,NULL,1);	 
     fprintf ( stderr, "\n2-Informations:\n");
     fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln");
     fprintf ( stderr, "\tweight_mode...: %s\n"  ,weight_mode);
     fprintf ( stderr, "\tpercent_weight: %.2f%% (max=%d%%)\n",last_chosen,percent);
     fprintf ( stderr, "\tn_seq.........: %d\n"  ,NS->nseq);
     fprintf ( stderr, "\treduction.....: %d%% of original set\n"  ,(NS->nseq*100)/S->nseq);
     
     return NA;
   }   
float ** get_weight ( Alignment *A, Sequence *S, char *mode)
{
 char *aln_name;
 char *weight_name;
 char *seq_name;
 char command[LONG_STRING];
 char program[LONG_STRING];
 float **weight;
 FILE *fp;
 int c;
  
 if ( !mode || !mode[0] || strm (mode, "msa"))
      {
	if ( getenv ( "SEQ2MSA_WEIGHT")==NULL)sprintf (program, "%s",SEQ2MSA_WEIGHT);
	else sprintf ( program, "%s", (getenv ( "SEQ2MSA_WEIGHT")));   
      }
 else if ( strm(mode, "pwsim") ||strm(mode, "pwsim_fragment") )
      {
	return seq2pwsim (A, S, mode);
      }
 else
     {
       if (getenv (mode))sprintf ( program, "%s", (getenv (mode)));
       else fprintf ( stderr, "\nERROR: %s is not a valid mode for weight computation [FATAL:%s]", mode, PROGRAM);
     }

 /*MSA weights*/
 seq_name=vtmpnam(NULL);
 aln_name=vtmpnam(NULL);
 weight_name=vtmpnam(NULL);
 weight=declare_float (S->nseq+1, 2);
 

  
  if (A)
    {
      output_clustal_aln (seq_name,A);
      output_fasta_seq   (aln_name,A);
      sprintf ( command, "%s %s -i %s -w %s", program, seq_name, aln_name, weight_name);
    }
  else
    {
      A=seq2aln(S,A,1);
      output_fasta_seq   (seq_name,A);
      sprintf ( command, "%s %s -w %s", program, seq_name, weight_name);
    }
  
 
  my_system ( command);
  
  fp=vfopen( weight_name, "r");
  while ( (c=fgetc(fp))!='$');
  c=fgetc(fp);
  c=0;
  while ( (fscanf (fp, "%*s %f\n",&(weight[c][1])))==1)
    {weight[c][0]=c;c++;}
  vfclose (fp);
  
  
  return weight;
}

float **seq2pwsim (	   Alignment *A, Sequence *S, char *mode)
{
  int a, b, c;
  float d,t;
  float  **W;
  Alignment *B;
  W=declare_float (S->nseq, S->nseq);



  for (a=0; a< S->nseq; a++)
	for ( b=a; b<S->nseq; b++)
	  {
	    if ( a==b){d=1;}
	    else if (!A)
	      {

		B=align_two_sequences ((S)->seq[a], (S)->seq[b],"pam250mt", -10, -1, "fasta_pair_wise");
		for (t=0,d=0,c=0; c<B->len_aln; c++)
		  {
		    d+=(B->seq_al[0][c]==B->seq_al[1][c] && !is_gap(B->seq_al[0][c]));
		    t+=(!is_gap(B->seq_al[0][c]) && !is_gap(B->seq_al[1][c]));
		  }
		t=(strm ( mode, "pwsim_fragment"))?B->len_aln:t;
		
		d=d/((t==0)?1:t);
		free_aln(B);
	      }
	    else
	      {
		for (t=0,d=0,c=0; c<A->len_aln; c++)
		  {
		    d+=(A->seq_al[a][c]==A->seq_al[b][c] && !is_gap(A->seq_al[a][c]));
		    t+=(!is_gap(A->seq_al[a][c]) && !is_gap(A->seq_al[b][c]));
		  }
		d=d/((t==0)?1:t);
	      }
	  

	    W[a][b]=W[b][a]=(1-d)*100;
	  }
 
  
  return W;

}
 
float **seq2pwsim_fragment (	   Alignment *A, Sequence *S, char *mode)
{

  
  int a, b, c;
  float d,t;
  float  **W;
  Alignment *B;
  W=declare_float (S->nseq, S->nseq);




  for (a=0; a< S->nseq; a++)
	for ( b=a; b<S->nseq; b++)
	  {
	    if ( a==b){d=1;}
	    else if (!A)
	      {

		B=align_two_sequences ((S)->seq[a], (S)->seq[b],"pam250mt", -10, -1, "fasta_pair_wise");
		for (t=0,d=0,c=0; c<B->len_aln; c++)
		  {
		    d+=(B->seq_al[0][c]==B->seq_al[1][c] && !is_gap(B->seq_al[0][c]));
		    t+=(!is_gap(B->seq_al[0][c]) && !is_gap(B->seq_al[1][c]));
		  }
	
		d=d/((t==0)?1:t);
		free_aln(B);
	      }
	    else
	      {
		for (t=0,d=0,c=0; c<A->len_aln; c++)
		  {
		    d+=(A->seq_al[a][c]==A->seq_al[b][c] && !is_gap(A->seq_al[a][c]));
		    t+=(!is_gap(A->seq_al[a][c]) && !is_gap(A->seq_al[b][c]));
		  }
		d=d/((t==0)?1:t);
	      }
	  

	    W[a][b]=W[b][a]=(1-d)*100;
	  }
 
  
  return W;

} 

/********************************************************************/
/*                                                                  */
/*			AMINO ACID FUNCTIONS                        */
/*                                                                  */
/*                                                                  */
/*                                                                  */
/********************************************************************/

char** make_group_aa (int *ngroup, char *mode)
	{
/*mode:         indicates which matrix will be used for the grouping*/
/*n_group:      pointer to the number of groups                     */
/*return value: an array of strings containing the AA of each group */


	int **matrix;
	int a, b,c,is_in;
	char buf[28];
	char **group_list;
	char *matrix_name;
	matrix_name=vcalloc ( 100, sizeof (char));

	
	ngroup[0]=0;
	group_list=declare_char ( 100, 27);
		
	if ( mode[0]=='_'){mode++;sprintf ( matrix_name, "%s", mode);}

	if (mode==NULL || mode[0]=='\0')sprintf ( matrix_name, "idmat");
	else if ( strm (mode, "simple"))
	     {
		 ngroup[0]=6;
		 sprintf ( group_list[0], "avilmAVILM");
		 sprintf ( group_list[1], "dekrDEKR");
		 sprintf ( group_list[2], "stcnqhSTCNQH");
		 sprintf ( group_list[3], "wfyWFY");
		 sprintf ( group_list[4], "gG");
		 sprintf ( group_list[5], "pP");
		 vfree (matrix_name);
		 return group_list;
	     }
	else if ( strm (mode, "clustalw"))
	     {
		 ngroup[0]=11;
		 sprintf ( group_list[0] ,"astaASTA");
		 sprintf ( group_list[1] ,"bneqkBNEQK");
		 sprintf ( group_list[2] ,"cnhqkCNHQK");
		 sprintf ( group_list[3] ,"dndeqDNDEQ");
		 sprintf ( group_list[4] ,"eqhrkEQHRK");
		 sprintf ( group_list[5] ,"fmilvFMILV");
		 sprintf ( group_list[6] ,"gmilfGMILF");
		 sprintf ( group_list[7] ,"hhyHHY");
		 sprintf ( group_list[8] ,"ifywIFYW");
		 sprintf ( group_list[9] ,"jcJC");
		 sprintf ( group_list[10],"kpKP");
		 vfree (matrix_name);
		 return group_list;
	     }
	else if ( strm (mode, "polarity"))
	     {
		 ngroup[0]=6;
		 sprintf ( group_list[0], "eqrsdnkhtEQRSDNKHT");
		 sprintf ( group_list[1], "pP");
		 sprintf ( group_list[2], "gG");
		 sprintf ( group_list[3], "cC");
		 sprintf ( group_list[4], "fywFYW");
		 sprintf ( group_list[5], "iavlmIAVLM");
		 vfree (matrix_name);
		 return group_list;
	     }
	else if ( strm (mode, "vasiliky"))
	     {
		 ngroup[0]=0;
		 sprintf ( group_list[ngroup[0]++], "rkRK");
		 sprintf ( group_list[ngroup[0]++], "deDE");
		 sprintf ( group_list[ngroup[0]++], "qhQH");
		 sprintf ( group_list[ngroup[0]++], "vilmVILM");
		 sprintf ( group_list[ngroup[0]++], "fyFY");
		 sprintf ( group_list[ngroup[0]++], "sS");
		 sprintf ( group_list[ngroup[0]++], "wW");
		 sprintf ( group_list[ngroup[0]++], "aA");
		 sprintf ( group_list[ngroup[0]++], "cC");
		 sprintf ( group_list[ngroup[0]++], "gG");
		 sprintf ( group_list[ngroup[0]++], "nN");
		 sprintf ( group_list[ngroup[0]++], "pP");
		 sprintf ( group_list[ngroup[0]++], "tT");
		 vfree (matrix_name);
		 return group_list;
	     }
	else if ( strm (mode, "clustalw_col"))
	     {
		 sprintf ( group_list[ngroup[0]++], "staSTA");
		 sprintf ( group_list[ngroup[0]++], "neqkNEQK");
		 sprintf ( group_list[ngroup[0]++], "nhqkNHQK");
		 sprintf ( group_list[ngroup[0]++], "ndeqNDEQ");
		 sprintf ( group_list[ngroup[0]++], "qhrkQHRK");
		 sprintf ( group_list[ngroup[0]++], "milvMILV");
		 sprintf ( group_list[ngroup[0]++], "milfMILF");
		 sprintf ( group_list[ngroup[0]++], "hyHY");
		 sprintf ( group_list[ngroup[0]++], "fywFYW");
		 sprintf ( group_list[ngroup[0]++], "gG");
		 sprintf ( group_list[ngroup[0]++], "pP");
		 sprintf ( group_list[ngroup[0]++], "cC");
		 vfree (matrix_name);
		 
		 return group_list;
	     }
	else if ( strm (mode, "clustalw_dot"))
	     {
		 sprintf ( group_list[ngroup[0]++], "csaCSA");
		 sprintf ( group_list[ngroup[0]++], "atvATV");
		 sprintf ( group_list[ngroup[0]++], "sagSAG");
		 sprintf ( group_list[ngroup[0]++], "stnkSTNK");
		 sprintf ( group_list[ngroup[0]++], "stpaSTPA");
		 sprintf ( group_list[ngroup[0]++], "sgndSGND");
		 sprintf ( group_list[ngroup[0]++], "sndeqkSNDEQK");
		 sprintf ( group_list[ngroup[0]++], "ndeqhkNDEQHK");
		 sprintf ( group_list[ngroup[0]++], "neqhrkNEQHRK");
		 sprintf ( group_list[ngroup[0]++], "fvlimFVLIM");
		 sprintf ( group_list[ngroup[0]++], "hfyHFY");
		 vfree (matrix_name);
		 return group_list;
	     }
	else if ( strm (mode, "make_all"))
	     {
		 ngroup[0]=1;
		 sprintf ( group_list[0], "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
		 vfree (matrix_name);
		 return group_list;
	     }
	else sprintf ( matrix_name, "%s", mode);
	
	matrix=read_matrice ( matrix_name); 
	
	for ( a=0;a< 26; a++)
		{
		if ( matrix[a][a]>0)
			{
			for ( c=0,b=0;b< 26; b++)
				{
				
				if ( matrix[a][b]>0 && matrix[b][b]>0)
					{
					buf[c++]=b+'A';
					buf[c++]=b+'a';
					}
				}
			buf[c]='\0';
			for ( is_in=0,b=0; b< ngroup[0]; b++)if ( strcmp (buf, group_list[b])==0)is_in=1;
			if (is_in==0)sprintf ( group_list[ngroup[0]++], "%s", buf);
					
			}
		}
	free_int (matrix, -1); 
	vfree (matrix_name);
		 
	return group_list;
	}

int find_group_aa_distribution (char *col, int nseq,int n_group, char **gl,  int *distrib, char *mode )
	{
	static int *distribution;
	static char **lgl;
	static int ln_group;
	int a, b, c;
	int *d;
	char **gl2;
	int n_group2;
	
	
	
	if ( lgl==NULL)
		lgl=make_group_aa ( &ln_group, mode);
		
		if ( gl==NULL)
		{
		gl2=lgl;
		n_group2=ln_group;
		}
	else
		{
		gl2=gl;
		n_group2=n_group;
		}
	
	if ( distribution==NULL || ln_group<n_group)distribution=vcalloc ( n_group2, sizeof (int));
	if ( distrib==NULL)d=distribution;
	else d=distrib;
	
	
	for ( a=0; a< n_group2; a++)d[a]=0;
	
	for ( a=0; a< nseq; a++)
		{
		for ( b=0; b< n_group2; b++)
			d[b]+=is_in_set (col[a], gl2[b]);
		}
	c=d[0];
	for ( a=0; a< n_group2; a++)
		c=(d[a]>c)?d[a]:c;
	return c;
	}



int is_in_same_group_aa ( char r1, char r2, int n_group, char **gl, char *mode)
	{
	int a;
	static char **lgl;
	static int ln_group;
	
	char **gl2;
	int n_group2;
	
	/*use mode=idmat for similarity based on id*/

	r1=toupper(r1);
	r2=toupper(r2);
	if (mode==NULL)return (r1==r2)?1:0;
	
	if ( strm (mode, "clean"))
	     {
	     free_char (lgl, -1);
	     lgl=NULL;
	     ln_group=0;
	     return 0;
	     }
	
	if ( lgl==NULL)
		{
		  lgl=make_group_aa ( &ln_group, mode);
		}
		
	if ( gl==NULL)
		{
		gl2=lgl;
		n_group2=ln_group;
		}
	else
		{
		gl2=gl;
		n_group2=n_group;
		}
	
	for ( a=0; a< n_group2; a++)
		if ( is_in_set ( r1, gl2[a]) && is_in_set ( r2, gl2[a]))return 1;
		return 0;
	}
			

Alignment * gene2prot (Alignment *A){return A; }
char * test_gene2prot (Constraint_list *CL, int s1)
       {
	   int a, b,q, nal;
	   int F=-10000000; /*FORBIDEN STATE*/
	   int AL=0;       /*ALLOWED STATE*/
	   int SPLICE_PENALTY=1000;
	   int FRAME_PENALTY=1000;
	   

	   int START,  ORF1,  ORF2, ORF3, s5NC; 
	   int s3NC,ORF3_G1, ORF3_T2, ORF3_NC, ORF3_A3, ORF3_T4;
	   int U1_G1,   U1_T2,   U1_NC,   U1_A3,   U1_T4;
	   int U2_G1,   U2_T2,   U2_NC,   U2_A3,   U2_T4;
	   int U1,     U2, U3,  U4, U5, END;
	   
	   int nstate=0;
	   int **transitions;
	   int **v_tab;
	   int **v_tab_p;
	   int **last_coding;
	   int **last_t4;
	   int *potential;
	   int v;

	   int orf1, orf2, orf3, ncp, p, state, pstate, e, best_state_p, best_state_v, best_pstate_p, best_pstate_v;
	   char *seq, *seq2, *seq3;
	   int l;
	   int  *is_coding;
	   int *is_t4;
	   char *codon;

	   static int *entry;
	   int tot=0;
	   
	   seq=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char));
	   seq2=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char));
	   seq3=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char));
	   sprintf ( seq, "%s", (CL->S)->seq[s1]);
	   ungap (seq);

	   l=strlen (seq);
	   for ( a=0; a< l; a++) seq[a]=tolower ( seq[a]);
	   for ( a=0; a< l; a++) seq[a]=(seq[a]=='t')?'u': seq[a];
	   

	   potential=vcalloc (l+1, sizeof (int));
	   CL=index_constraint_list ( CL);
	   for (nal=0, a=0; a<(CL->S)->nseq; a++)
	       for ( b=CL->start_index[s1][a]; b< CL->end_index[s1][a];b++)
	           {
		       entry=extract_entry(entry, b, CL);
		       if ( entry[SEQ1]==s1)potential[entry[R1]-1]+=entry[WE];
		       else if (  entry[SEQ2]==s1)potential[entry[R2]-1]+=entry[WE];
		       tot+=entry[WE];
		       nal++;
		   }


	   SPLICE_PENALTY=10000;
	   FRAME_PENALTY=1000;

	   
	   nstate=0;
	   START=nstate++;  ORF1=nstate++;  ORF2=nstate++; ORF3=nstate++; s5NC=nstate++; 
	   s3NC=nstate++;
	   ORF3_G1=nstate++;U1_G1=nstate++;U2_G1=nstate++; 
	   ORF3_T2=nstate++;U1_T2=nstate++;U2_T2=nstate++;
	   ORF3_NC=nstate++;U1_NC=nstate++;U2_NC=nstate++; 
	   ORF3_A3=nstate++;U1_A3=nstate++;U2_A3=nstate++; 
	   ORF3_T4=nstate++;U1_T4=nstate++;U2_T4=nstate++;
	   
	   
	   U1=nstate++;     U2=nstate++; U3=nstate++;  U4=nstate++; U5=nstate++;  
	   END=nstate++;
	   
	   is_coding=vcalloc ( nstate, sizeof (int));
	   is_coding[ORF1]=is_coding[ORF2]=is_coding[ORF3]=is_coding[U1]=is_coding[U2]=1;
	   is_coding[U3]=is_coding[U4]=is_coding[U5]=1;
	   
	   is_t4=vcalloc ( nstate, sizeof (int));
	   is_t4[ORF3_T4]=is_t4[U1_T4]=is_t4[U2_T4]=1;
	   transitions=declare_int ( nstate, nstate);
	   for (a=0; a< nstate; a++)
		   for ( b=0; b< nstate; b++)transitions[a][b]=F;
	   
	   transitions[START][ORF1]=AL;
	   transitions[START][s5NC]=AL-FRAME_PENALTY;
	   transitions[s5NC][s5NC]=AL;

	   transitions[s5NC][ORF1]=AL-FRAME_PENALTY;

	   transitions[ORF1][ORF2]=AL;
	   transitions[ORF2][ORF3]=AL;
	   transitions[ORF3][U1]=AL;
	   transitions[ORF3][ORF1]=AL;
	   transitions[ORF3][ORF3_G1]=AL-SPLICE_PENALTY;
	   
	   
	   transitions[ORF3_G1][ORF3_T2]=AL;
	   transitions[ORF3_T2][ORF3_NC]=AL;
	   transitions[ORF3_NC][ORF3_NC]=AL;
	   transitions[ORF3_NC][ORF3_A3]=AL;
	   transitions[ORF3_A3][ORF3_T4]=AL;
	   transitions[ORF3_T4][ORF1]=AL-SPLICE_PENALTY;

	   transitions[U1][U2]=AL;
	   transitions[U1][U1_G1]=AL-SPLICE_PENALTY;
	   transitions[U1_G1][U1_T2]=AL;
	   transitions[U1_T2][U1_NC]=AL;
	   transitions[U1_NC][U1_NC]=AL;
	   transitions[U1_NC][U1_A3]=AL;
	   transitions[U1_A3][U1_T4]=AL;
	   transitions[U1_T4][U3]=AL-SPLICE_PENALTY;
	   transitions[U3][U4]=AL;
	   transitions[U4][ORF1]=AL;
	   
	   transitions[U2][U2_G1]=AL-SPLICE_PENALTY;
	   transitions[U2_G1][U2_T2]=AL;
	   transitions[U2_T2][U2_NC]=AL;
	   transitions[U2_NC][U2_NC]=AL;
	   transitions[U2_NC][U2_A3]=AL;
	   transitions[U2_A3][U2_T4]=AL;
	   transitions[U2_T4][U5]=AL-SPLICE_PENALTY;
	   transitions[U5][ORF1]=AL;
	   
	   transitions[ORF3][s3NC]=AL-FRAME_PENALTY;
	   transitions[ORF3][END]=AL;
	   transitions[s3NC][END]=AL;

	   	   
	   v_tab=declare_int ( l+1,nstate);
	   v_tab_p=declare_int ( l+1,nstate);
	   last_coding=declare_int ( l+1,nstate);
	   last_t4=declare_int ( l+1,nstate);

	   for (a=0; a< l; a++) potential[a]-=200;

	   codon=vcalloc ( 4, sizeof (char));
	   best_pstate_p=START;
	   best_pstate_v=0;
	   nal=0;
	   for ( p=1; p<=l; p++)
	       {
	       if  (translate_dna_codon (seq+(p-1), 'x')=='x' || p>(l-2))orf1=F;
	       else orf1=potential[p-1];

	       if  (p<2 || translate_dna_codon (seq+(p-2), 'x')=='x' || p>(l-1))orf2=F;
	       else orf2=potential[p-1];

	       
	       if  (p<3 || translate_dna_codon (seq+(p-3), 'x')=='x' || p>l)orf3=F;
	       else orf3=potential[p-1];
	       
	       if ( best_int (3, 1, &a, orf1, orf2, orf3)!=F)ncp=-best_int (3, 1, &a, orf1, orf2, orf3);
	       else ncp=1000;
	       
	       for ( state=0; state< nstate; state++)
	           {
		       
		       if      ( state==ORF1)e=orf1;
		       else if ( state==ORF2)e=orf2;
		       else if ( state==ORF3)e=orf3;
		       else if ( state>=U1 && state<=U3)
			   {
			   e=0;
			   }
		       else if ( state==U4)
		          {
			      codon[2]=seq[p-1];
			      codon[1]=seq[last_coding[p-1][U3]-1];
			      codon[0]=seq[last_coding[p-2][U1_T4]-1];
			      if ( translate_dna_codon (codon, 'x')=='x')e=F;
			      else e=0;
			  }
		       else if ( state==U5)
		          {
			      codon[2]=seq[p-1];
			      codon[1]=seq[last_coding[p-1][U2_T4]-1];
			      q=seq[last_coding[p-1][U2_T4]];
			      codon[0]=seq[last_coding[q-1][U1]-1];
			      if ( translate_dna_codon (codon, 'x')=='x')e=F;
			      else e=0;
			  }

		       else if (state>=ORF3_G1 && state<=U2_G1)e=(p<l-1 && seq[p-1]=='g' && seq[p]=='u')?ncp:F;
		       else if ( state>=ORF3_T2 && state<=U2_T2)
			   {
			   e=(p>1 && seq[p-2]=='g' && seq[p-1]=='u')?ncp:F;
			   }
		       else if ( state>=ORF3_A3 && state<=U2_A3)e=(seq[p-1]=='a')?ncp:F;
		       else if ( state>=ORF3_T4 && state<=U2_T4)e=(seq[p-1]=='u')?ncp:F;
		       else e=ncp;
		       
		       for ( pstate=0; pstate<nstate; pstate++)
		           {
			       if (e==F ||  transitions[pstate][state]==F || v_tab[p-1][pstate]==F)v=F;
			       else v=e+transitions[pstate][state]+v_tab[p-1][pstate];
				    
			       if ( pstate==0 || v>best_pstate_v)
			          {best_pstate_v=v;best_pstate_p=pstate;}
			   }
		      v_tab[p][state]=best_pstate_v;
		      v_tab_p[p][state]=best_pstate_p; 
		      
		      if (!is_coding[state])last_coding[p][state]=last_coding[p-1][best_pstate_p];
		      else if (is_coding[state])last_coding[p][state]=p;
		      
		      if (!is_t4[state])
		         {
			     if (is_coding[state] && last_t4[p-1][best_pstate_p]==0)last_t4[p][state]=p;
			     else last_t4[p][state]=last_t4[p-1][best_pstate_p];
			 }
		      else if (is_t4[state])last_t4[p][state]=p;
		      
		      if (state==0 ||best_pstate_v>best_state_v ){best_state_p=state; best_state_v=best_pstate_v;}
		   }
	       }
	   tot=0;
	   for ( p=l; p>0; p--)
	           {
		       if ( best_state_p>=ORF1 &&  best_state_p<=ORF3){seq2[tot++]=tolower (seq[p-1]);}
		       else if ( best_state_p>=U1 && best_state_p<=U5){seq2[tot++]=tolower (seq[p-1]);}
		       if (best_state_p==ORF1)seq[p-1]=toupper (seq[p-1]);
		       else if (best_state_p==ORF2 || best_state_p==ORF3)seq[p-1]=tolower (seq[p-1]);
		       else if ( best_state_p==ORF3_NC || best_state_p==U1_NC ||  best_state_p==U2_NC) seq[p-1]='.';
		       else if ( best_state_p==U1 || best_state_p==U2 || best_state_p==U3 || best_state_p==U4 || best_state_p==U5) seq[p-1]=best_state_p-U1+'1';
		       else seq[p-1]=toupper (seq[p-1]);
		       best_state_p=v_tab_p[p][best_state_p];
		   }

	   for ( a=0, b=tot-1; b>=0; b--, a++)
	       seq3[a]=seq2[b];
	   
	   fprintf ( stderr, "\n%s\n", seq);
	   fprintf ( stderr, "\nN coding=%d\n", tot);
	   for ( a=0; a< tot; a+=3)
	        {
		b=translate_dna_codon (seq3+a, 'x');
		fprintf ( stderr, "%c",b);
		if ( b=='x'){fprintf ( stderr, "\n");exit (EXIT_SUCCESS);}
		}
	    
	    fprintf ( stderr, "\n");	  
	    exit (EXIT_SUCCESS);
	    return 0;
	    
	       
	    
       }
Alignment * dna_aln2_3frame_cdna_aln(Alignment *A,int *ns,int **l_s)
{
  Alignment *B;
  int a;
  B=realloc_aln2 (NULL,6,strlen(A->seq_al[l_s[0][0]])+strlen(A->seq_al[l_s[1][0]]));
  for ( a=0; a< 3; a++)  
    {	 
      B->seq_al[a]=translate_dna_seq (A->seq_al[l_s[0][0]]+a, 0, 'o',B->seq_al[a]);
      B->seq_al[a+3]=translate_dna_seq (A->seq_al[l_s[1][0]]+a, 0, 'o',B->seq_al[a+3]);
    }
  for ( a=1; a<3; a++)
    {
      if ( strlen(B->seq_al[a])<strlen(B->seq_al[0])) B->seq_al[a]=strcat ( B->seq_al[a], "x");
      if ( strlen(B->seq_al[a+3])<strlen(B->seq_al[3])) B->seq_al[a+3]=strcat ( B->seq_al[a+3], "x");
    }
  
  B->nseq=6;
  B->len_aln=strlen (B->seq_al[0]);
  return B;
}

/************************************************************************************/
/*                ALIGNMENT ANALYZE     : SAR                                            */
/************************************************************************************/
int **analyze_sar_compound1 ( char *name, char *seq, Alignment *A, char *mode);
int **analyze_sar_compound2 ( char *name, char *seq, Alignment *A, char *mode);

double evaluate_sar_score ( int len, int n11, int n1a, int n1b);

Alignment *sar_analyze  (Alignment *A, Alignment *SAR, char *mode)
{
  int a;
  int **list;
  
  for ( a=0; a< SAR->nseq; a++)
    {
      if (strm (mode, "method1") || strm (mode, "method2") ||strm (mode, "method3")||strm (mode, "method4") )
	list=analyze_sar_compound1 (SAR->name[a], SAR->seq_al[a], A, mode);
      else if ( strm (mode, "method5"))
	list=analyze_sar_compound2 (SAR->name[a], SAR->seq_al[a], A, mode);
      free_int (list, -1);
    }
  exit (EXIT_SUCCESS);
  return A;
}

int **analyze_sar_compound2 ( char *name, char *seq, Alignment *A, char *mode)
{
  int **list;
  int a, b;
  int true;
  int C1, C2, r1, r2;
  int sim, diff;
  int npairs=0;
  int N1,NGAP,N00, N11,N10, N01, MSA1,MSA0, SAR1, SAR0, LEN, SAR_SCORE, COL;
  int Nval=0;
  int score;
  int AA0[26], AA1[26];
  static int **pos, p;
  
  if ( !pos)pos=aln2pos_simple(A, A->nseq);

  COL=Nval++;NGAP=Nval++;N00=Nval++;N11=Nval++;N10=Nval++;N01=Nval++; MSA0=Nval++;MSA1=Nval++;SAR0=Nval++;SAR1=Nval++;LEN=Nval++;SAR_SCORE=Nval++;
      
  list=declare_int (A->len_aln, Nval+1);
  
  for ( N1=0,a=0; a< A->len_aln; a++)
    list[a][COL]=a;  
  for ( a=0; a< strlen (seq); a++) N1+=(seq[a]=='I')?1:0;
  for ( a=0; a< 26; a++)AA0[a]=AA1[a]=0;
  
  for ( a=0; a<A->len_aln; a++)
    {
      for ( b=0; b<A->nseq; b++)
	{
	  C1=seq[b];
	  
	  
	  r1=tolower(A->seq_al[b][a]);
	  if (is_gap(r1))r1='z';
	  r1-='a';
	  if (C1=='O')AA0[r1]++;
	  if (C1=='I')AA1[r1]++;
	}
      
      for (b=0; b< 26; b++)
	{
	  if (AA1[b])list[a][MSA1]+=AA0[b]+AA1[b];
	  AA0[b]=AA1[b]=0;
	}
      
      list[a][N11]=MIN(N1,list[a][MSA1]);
    }
 

  for ( a=0; a< A->len_aln; a++)
    {
      if (list[a][NGAP]==0 )
	{
	  list[a][SAR_SCORE]=100*evaluate_sar_score(A->nseq,list[a][N11], list[a][MSA1],N1);
	}
    }

  sort_int_inv (list,Nval, SAR_SCORE, 0, A->len_aln-1);
  
  for ( a=0; a<1;a++)
    {
      if ( list[a][SAR_SCORE]<0)continue;
      
      fprintf ( stdout, "\nPROF %11s %s ", name, seq);
      fprintf ( stdout, " COL: %-3d SCORE %3d N11 %4d N00 %4d N10 %4d N01 %4d LEN %4d SEQ ", list[a][COL]+1, list[a][SAR_SCORE], list[a][N11], list[a][N00], list[a][N10],  list[a][N01], list[a][LEN]);
      
      for ( b=0; b< A->nseq; b++)fprintf ( stdout, "%c", A->seq_al[b][list[a][COL]]);
      for ( b=0; b< A->nseq; b++)
	{
	  if (seq[b]=='I')
	    {
	      p=list[a][COL];
	      fprintf ( stdout, "\n%-11s %10s %c POS %4d COL %4d SCORE %d", name,A->name[b], A->seq_al[b][p],pos[b][p],list[a][COL]+1, list[a][SAR_SCORE]);
	    }
	}
      
    }
  return list;
}
int **analyze_sar_compound1 ( char *name, char *seq, Alignment *A, char *mode)
{
  int **list;
  int a, b, c;
  int true;
  int C1, C2, r1, r2;
  int sim, diff;
  int npairs=0;
  int N1,NGAP,N00, N11,N10, N01, MSA1,MSA0, SAR1, SAR0, LEN, SAR_SCORE, COL;
  int Nval=0;
  int score;
  int n, SKIP=-1, p;
  static int **pos;
  static int header;
  
  if ( !pos)pos=aln2pos_simple (A, A->nseq);
  
  COL=Nval++;NGAP=Nval++;N00=Nval++;N11=Nval++;N10=Nval++;N01=Nval++; MSA0=Nval++;MSA1=Nval++;SAR0=Nval++;SAR1=Nval++;LEN=Nval++;SAR_SCORE=Nval++;
      
  list=declare_int (A->len_aln, Nval+1);
  
  for ( a=0; a< A->len_aln; a++)
    list[a][COL]=a;
  
  for ( N1=0,a=0; a< strlen (seq); a++) N1+=(seq[a]=='I')?1:0;
  
  
  
  for ( npairs=0, a=0; a< A->nseq-1; a++)    
    for ( b=a+1; b< A->nseq; b++)
      {
	C1=seq[a];
	C2=seq[b];
	sim=get_seq_sim ( A->seq_al[a],A->seq_al[b],"-.","idmat");
	if (C1=='O' && C2=='O')continue;
	score=(C1==C2)?100-sim:sim;
	npairs++;
	for (c=0; c< A->len_aln; c++)
	  {
	    r1=A->seq_al[a][c];
	    r2=A->seq_al[b][c];
	    
	    if ( (is_gap(r1) || is_gap(r2)))
	      list[c][NGAP]+=score;
	    
	    else
	      {
	    
		list[c][LEN]+=score;
		if ( (C1==C2) && (r1==r2))list[c][N11]+=score;
		if ( (C1!=C2) && (r1!=r2))list[c][N00]+=score;
		if ( (C1!=C2) && (r1==r2))list[c][N01]+=score;
		if ( (C1==C2) && (r1!=r2))list[c][N10]+=score;
		
		if ( (C1==C2))list[c][SAR1]+=score;
		else list[c][SAR0]+=score;
		
		if ( (r1==r2))list[c][MSA1]+=score;
		else list[c][MSA0]+=score;
	      }
	  }
      }
  
  
  for ( c=0; c< A->len_aln; c++)
    {
      list[c][SAR_SCORE]=SKIP;
      if ( list[c][NGAP]>100);
      else 
	{
	  list[c][SAR_SCORE]=SKIP;
	  if ( strm (mode, "method3"))
	       {
		 if ( list[c][LEN])list[c][SAR_SCORE]+=(1000*list[c][N00])/list[c][LEN];
	       }
	    
	  else if (strm (mode, "method1"))
	    {
	      /*method 1: combine (same reac, same seq)+(diff reac, diff seq)*/
	      if ( list[c][LEN])list[c][SAR_SCORE]+=100*evaluate_sar_score(list[c][LEN], list[c][N11], list[c][MSA1], list[c][SAR1]);
	    }
	  else if  ( strm (mode, "method2"))
	    {
	      /*method 2: combine (same reac, same seq)*/
	      list[c][SAR_SCORE]=(100*(list[c][N11]+list[c][N00])/(list[c][LEN]));
	    }
	  else if ( strm (mode, "method4"))
	    {
	      if ( list[c][N01]==0)
		list[c][SAR_SCORE]=list[c][SAR1];
	      else
		list[c][SAR_SCORE]=SKIP;
	    }
	}
    }
  if ( !header)
    {
      header=1;
      fprintf ( stdout, "#SAR_ANALYSIS sar_analysis_format_01 Method %s",mode);
    }
  
  sort_int_inv (list,Nval, SAR_SCORE, 0, A->len_aln-1);
  
  
  for (n=0, a=0;n<1 && a<A->len_aln;a++)
    {
      if ( list[a][SAR_SCORE]==SKIP)continue;
      
      fprintf ( stdout, "\nPROF %11s %s ", name, seq);
      fprintf ( stdout, " COL: %-3d SCORE %3d N11 %4d N00 %4d N10 %4d N01 %4d LEN %4d SEQ ", list[a][COL]+1, list[a][SAR_SCORE], list[a][N11], list[a][N00], list[a][N10],  list[a][N01], list[a][LEN]);
      
      for ( b=0; b< A->nseq; b++)fprintf ( stdout, "%c", A->seq_al[b][list[a][COL]]);
      for ( b=0; b< A->nseq; b++)
	{
	  if (seq[b]=='I')
	    {
	      p=list[a][COL];
	      fprintf ( stdout, "\n%-11s %10s %c POS %4d COL %4d SCORE %d", name,A->name[b], A->seq_al[b][p],pos[b][p],list[a][COL]+1, list[a][SAR_SCORE]);
	    }
	}
      n++;
    }
  return list;
}
double evaluate_sar_score ( int N, int n11, int n1msa, int n1sar)
{
  double p;
  
  if ( n11==0)return 0;
  p  = M_chooses_Nlog (n1msa, N) + M_chooses_Nlog (n1sar-n11, N-n1msa) + M_chooses_Nlog (n11, n1msa);
  p-=(M_chooses_Nlog (n1msa, N)+M_chooses_Nlog (n1sar, N));
  return -p;
}
int aln2sar_column_list ( Alignment *A, char *filter)
{
  int a, b, c;
  int one, zero;
  int keep, strict;
  int r;

 
  for ( a=0; a< A->len_aln; a++)
    {
      for ( b=0; b< A->nseq; b++)
	{
	  if (  filter[b]=='1')one=A->seq_al[b][a];
	  if (  filter[b]=='0')zero=A->seq_al[b][a];
	}

      for (keep=1, strict=1, b=0; b< A->nseq; b++)
	{
	  r= A->seq_al[b][a];
	  if ( (filter[b]=='1' && r!=one) ||(filter[b]=='0' && r==one) )
	    {
	      keep=0;
	      continue;
	    }
	  if ( filter[b]=='0' && (r!=zero))strict=0;
	  
	}
      if ( keep==1)
	{
	  fprintf ( stdout, "\nColumn %4d ", a+1);
	  for ( b=0; b< A->nseq; b++)
	    fprintf ( stdout, "%c", A->seq_al[b][a]);
	  fprintf ( stdout, " %s", (strict)?"STRICT":"UNSTRICT");
	}
    }
  return 1;
}

/*********************************COPYRIGHT NOTICE**********************************/
/* Centre National de la Recherche Scientifique (CNRS) */
/*and */
/*Cedric Notredame */
/*Tue May 10 12:08:44     2005. */
/*All rights reserved.*/
/*This file is part of T-COFFEE.*/
/**/
/*    T-COFFEE is free software; you can redistribute it and/or modify*/
/*    it under the terms of the GNU General Public License as published by*/
/*    the Free Software Foundation; either version 2 of the License, or*/
/*    (at your option) any later version.*/
/**/
/*    T-COFFEE is distributed in the hope that it will be useful,*/
/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
/*    GNU General Public License for more details.*/
/**/
/*    You should have received a copy of the GNU General Public License*/
/*    along with Foobar; if not, write to the Free Software*/
/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
/*...............................................                                                                                      |*/
/*  If you need some more information*/
/*  cedric.notredame@europe.com*/
/*...............................................                                                                                                                                     |*/
/**/
/**/
/*	*/
/*********************************COPYRIGHT NOTICE**********************************/
