#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h> 
#include "io_lib_header.h"
#include "util_lib_header.h"
#include "dp_lib_header.h"
#include "define_header.h"
/************************************************************************************/
/*                NEW      ANALYZE 2    : SAR                                        */
/************************************************************************************/
float display_prediction_old (int **prediction, int n, Alignment *A, Alignment *S, int field);

float display_prediction (int ***count, Alignment *S, int c, int n);
Alignment * filter_aln4sar0 ( Alignment *A, Alignment *S, int c, int leave, char *mode);
Alignment * filter_aln4sar1 ( Alignment *A, Alignment *S, int c, int leave, char *mode);
Alignment * filter_aln4sar2 ( Alignment *A, Alignment *S, int c, int leave, char *mode);
Alignment * filter_aln4sar3 ( Alignment *A, Alignment *S, int c, int leave, char *mode);
Alignment * filter_aln4sar4 ( Alignment *A, Alignment *S, int c, int leave, char *mode);
Alignment * filter_aln4sar5 ( Alignment *A, Alignment *S, int c, int leave, char *mode);

int **sar2profile ( Alignment *A, Alignment *S, int c, int leave);
int **sar2profile_sim ( Alignment *A, Alignment *S, int **sim, int comp, int leave);
int sar_profile2score ( char *seq, int **profile);
double sar_vs_seq1 ( char *sar, char *seq, float gl, int **sim, char *best_aa);
double sar_vs_seq2 ( char *sar, char *seq, float ng, int **mat, char *a);
double sar_vs_seq3 ( char *sar, char *seq, float ng, int **mat, char *a);
double sar_vs_seq4 ( char *sar, char *seq, float ng, int **mat, char *a);
int make_sim_pred ( Alignment *A,Alignment *S, int comp, int seq);

int **sar2profile_sim ( Alignment *A, Alignment *S, int **sim, int comp, int leave)
{

  int a, b, r, c, c1, c2, r1, r2, s, p;
  int ***cache, **profile;
  
  
  profile=declare_int (A->len_aln, 26);
  cache=declare_arrayN (3,sizeof (int),2,A->len_aln, 26);
  
  for ( a=0; a< A->len_aln; a++)
    for ( b=0; b< A->nseq; b++)
      {
	r=tolower(A->seq_al[b][a]);
	c=( S->seq_al[comp][b]=='I')?1:0;
	if (b==leave || is_gap(r)) continue;
	cache [c][a][r-'a']++;
      }
  for (a=0; a< A->nseq; a++)
    {
      if ( a==leave) continue;
      for ( b=0; b< A->nseq; b++)
	{
	  c1=(S->seq_al[comp][a]=='I')?1:0;
	  c2=(S->seq_al[comp][b]=='I')?1:0;
	  if ( b==leave || b==a || c1!=1 || c1==c2) continue;
	  s=sim[a][b];
	  
	  for (p=0; p<A->len_aln; p++)
	    {
	      r1=tolower(A->seq_al[a][p]);
	      r2=tolower(A->seq_al[b][p]);
	      if ( is_gap(r1) || is_gap(r2) || r1==r2)continue;
	      r1-='a';r2-='a';
	      if (cache[1][p][r2])continue;
	      if ( s<50)continue;
	      profile[p][r2]-=s;
	    }
	}
    }

  free_arrayN((void***)cache,3);
  return profile;

}
int **sar2profile ( Alignment *A, Alignment *S, int comp, int leave)
{

  int a, b,c,r, n, v, npos=0;
  int ***cache, **profile;
  int ncat;
  float n_gap, max_gap;
  profile=declare_int (A->len_aln, 26);
  cache=declare_arrayN (3,sizeof (int),2,A->len_aln, 26);



  for ( n=0, a=0; a< A->nseq; a++)
    {
      if ( a==leave) continue;
      else n+=(S->seq_al[comp][a]=='I')?1:0;
    }

  for ( a=0; a< A->len_aln; a++)
    for ( b=0; b< A->nseq; b++)
      {
	r=tolower(A->seq_al[b][a]);
	c=( S->seq_al[comp][b]=='I')?1:0;
	if (b==leave) continue;
	else if (is_gap(r))continue;
	r-='a';
	cache [c][a][r]++;
      }

  ncat=15; /*ncat: limit the analysis to columns containing less than ncat categories of aa*/
  max_gap=0.05;
  for (a=0; a< A->len_aln; a++)
    {
      for (n_gap=0,b=0; b< A->nseq; b++)
	n_gap+=(is_gap(A->seq_al[b][a]));
      n_gap/=(float)A->nseq;
      
      if ( n_gap> max_gap)continue;
      
      for (v=0,r=0; r< 26; r++)
	{
	  if (cache [0][a][r] || cache[1][a][r])v++;
	} 
      
      for (n=0,r=0; r< 26 && v<ncat; r++)
	{
	  if (cache [0][a][r] && !cache[1][a][r])
	    {
	      n++;
	      profile[a][r]=-cache[0][a][r];
	    }
	}
      if (n) npos++;
    }
  
  free_arrayN((void***)cache,3);
  return profile;

}
Alignment * filter_aln4sar0 ( Alignment *A, Alignment *S, int comp, int leave, char *mode)
{
  return copy_aln (A,NULL);
}
Alignment * filter_aln4sar1 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode)
{
  Alignment *F, *A;
  int a, b,c, i,r, n0, n1,g, score;
  int ***cache, **list1, **list2;
  int Delta;

  int T1;
  
  /*Keep only the positions where there are residues ONLY associated with 0 sequences*/
  
  list1=declare_int ( inA->nseq, 2);
  list2=declare_int ( inA->len_aln, 2);

  cache=declare_arrayN (3,sizeof (int),inA->len_aln,2, 26);
  F=copy_aln (inA, NULL);
  
  A=copy_aln (inA, NULL);
  A->nseq=strlen (S->seq_al[comp]);

  strget_param (mode, "_T1_", "5", "%d", &T1);
  for ( a=0; a< A->len_aln; a++)
    {
      n1=n0=g=0;
      for (b=0; b< A->nseq; b++)
	{
	  if ( b==leave) continue;
	  i=(S->seq_al[comp][b]=='I')?1:0;
	  r=tolower(A->seq_al[b][a]);
	  if ( r=='-')continue;
	  cache[a][i][r-'a']++;
	}
    }
  
  for (a=0; a< A->nseq; a++)
    for ( score=0,b=0; b<A->len_aln; b++)
      {
	r=tolower (A->seq_al[a][b]);
	if ( is_gap(r))continue;
	else if ( cache[b][0][r-'a'] && !cache[b][1][r-'a'])list1[a][0]++;
      }
 
  for (a=0; a< A->len_aln; a++)
    {
      for ( score=0,b=0; b< A->nseq; b++)
	{
	  r=tolower (A->seq_al[b][a]);
	  if ( r=='-')continue;
	  else r-='a';
	  if ( cache[a][0][r] && !cache[a][1][r])score ++;
	}
      list2[a][0]=a;
      list2[a][1]=score;
    }
  sort_int (list2, 2, 1, 0, F->len_aln-1);
  
  Delta=A->len_aln/(100/T1);
  for ( a=0; a< F->len_aln-Delta; a++)
    {
      b=list2[a][0];
      for ( c=0; c<F->nseq; c++)
	{
	  F->seq_al[c][b]='-';
	}
    }

  ungap_aln (F);
  free_aln (A);
  free_arrayN ( (void ***)cache, 3);
  free_arrayN ((void**)list1, 2);
  free_arrayN ((void**)list2, 2);

  return F;
}
Alignment * filter_aln4sar2 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode)
{
  Alignment *F, *A;
  int a,b,r,ncat;
  int *cache;
  int max_ncat=10;

  /*Keep Low entropy columns that contain less than ncat categories of different amino acids*/
  /*REmove columns containing 10% or more gaps*/
  
  cache=vcalloc ( 500, sizeof (char));
  F=copy_aln (inA, NULL);
  A=copy_aln (inA, NULL);
  A->nseq=strlen (S->seq_al[comp]);
  for ( a=0; a< A->len_aln; a++)
    {
      for (ncat=0,b=0; b< A->nseq; b++)
	{
	  if ( b==leave) continue;

	  r=tolower(A->seq_al[b][a]);
	  if ( !cache[r])ncat++;
	  cache[r]++;
	}
      
      if ( ncat <max_ncat && ((cache['-']*100)/A->nseq)<10)
	{
	  ;
	}
      else
	{
	  for (b=0; b<F->nseq; b++)
	    {
	      r=tolower(F->seq_al[b][a]);
	      F->seq_al[b][a]='-';
	      cache[r]=0;
	    }
	}
      for (b=0; b<A->nseq; b++)
	  {
	    r=tolower(A->seq_al[b][a]);
	    cache[r]=0;
	  }
    }

  free_aln (A);
  ungap_aln (F);
  vfree (cache);
  return F;
}

Alignment * filter_aln4sar3 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode)
{
  Alignment *F, *rA, *A;
  int a, b,c;
  int **list1;
  char *bufS, *bufA;
  int Delta;
  int T3;
  
  /*Keep the 10% positions most correlated with the 0/1 pattern*/
  
  A=copy_aln (inA, NULL);
  A->nseq=strlen (S->seq_al[comp]);
  F=copy_aln (inA, NULL);
  rA=rotate_aln (A, NULL);
  
  strget_param (mode, "_T3_", "10", "%d", &T3);
  
  
  list1=declare_int ( inA->len_aln, 2);
  bufA=vcalloc ( A->nseq+1, sizeof (char));
  bufS=vcalloc ( A->nseq+1, sizeof (char));
  
  


  sprintf ( bufS, "%s", S->seq_al[comp]);
  splice_out_seg(bufS,leave, 1);
  
  
  for (a=0; a< A->len_aln; a++)
    {
      char aa;
      list1[a][0]=a;
      sprintf (bufA, "%s", rA->seq_al[a]);
      splice_out_seg (bufA,leave,1);
      list1[a][1]=(int)sar_vs_seq3 ( bufS, bufA,0,NULL, &aa);
    }

  sort_int (list1, 2, 1, 0, F->len_aln-1);
  Delta=F->len_aln/(100/T3);
  for ( a=0; a< F->len_aln-Delta; a++)
    {
	  b=list1[a][0];
	  
	  for ( c=0; c<F->nseq; c++)
	    {
	      F->seq_al[c][b]='-';
	    }

    }
  F->score_aln=list1[F->len_aln-1][1];
  ungap_aln (F);

  free_aln (rA);    
  free_aln(A);
  free_arrayN ((void**)list1, 2);
  vfree (bufS);vfree (bufA);
  return F;
}
Alignment * filter_aln4sar4 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode)
{
  Alignment *F, *A;
  int a, b,c, i,r, n0, n1,g,score;
  int ***cache, **list1, **list2;
 
  /*Keep only the positions where there are residues ONLY associated with 0 sequences*/
  
  list1=declare_int ( inA->nseq, 2);
  list2=declare_int ( inA->len_aln, 2);

  cache=declare_arrayN (3,sizeof (int),inA->len_aln,2, 26);
  F=copy_aln (inA, NULL);
  A=copy_aln (inA, NULL);
  A->nseq=strlen (S->seq_al[comp]);
  
  for ( a=0; a< A->len_aln; a++)
    {
      n1=n0=g=0;
      for (b=0; b< A->nseq; b++)
	{
	  if ( b==leave) continue;
	  i=(S->seq_al[comp][b]=='I')?1:0;
	  r=tolower(A->seq_al[b][a]);
	  if ( r=='-')continue;
	  cache[a][i][r-'a']++;
	  n1+=i;
	}
    }
  
  
  for (a=0; a< A->len_aln; a++)
    {
      for ( score=0,b=0; b< A->nseq; b++)
	{
	  r=tolower (F->seq_al[b][a]);
	  if ( r=='-')continue;
	  else r-='a';
	  if (cache[a][1][r]>=n1/2)score=1;
	}
      list2[a][0]=a;
      list2[a][1]=score;
    }

  
  for ( a=0; a< F->len_aln; a++)
    {
      if ( list2[a][1]==1);
      else
	{
	  b=list2[a][0];
	  for ( c=0; c<F->nseq; c++)
	    {
	      F->seq_al[c][b]='-';
	    }
	}
    }
  ungap_aln (F);
  free_aln (A);
  free_arrayN ( (void ***)cache, 3);
  free_arrayN ((void**)list1, 2);
  free_arrayN ((void**)list2, 2);

  return F;
}

Alignment * filter_aln4sar5 ( Alignment *inA, Alignment *S, int comp, int leave, char *mode)
{
  Alignment *F, *rA, *A;
  int a, b,c;
  int **list1;
  char *bufS, *bufA;
  int max;
  /*Look for the positions that show the best correlation between the sequence variation and the SAR*/

  A=copy_aln (inA, NULL);
  A->nseq=strlen (S->seq_al[comp]);
  
  rA=rotate_aln (inA, NULL);
  F=copy_aln (inA, NULL);
  
  list1=declare_int ( A->len_aln, 2);
  bufA=vcalloc ( A->nseq+1, sizeof (char));
  bufS=vcalloc ( A->nseq+1, sizeof (char));
  


  sprintf ( bufS, "%s", S->seq_al[comp]);
  splice_out_seg(bufS,leave, 1);
  
  
  for (a=0; a< A->len_aln; a++)
    {
      char aa;
      list1[a][0]=a;
      sprintf (bufA, "%s", rA->seq_al[a]);
      splice_out_seg (bufA,leave,1);
      list1[a][1]=(int)sar_vs_seq4 ( bufS, bufA,0,NULL, &aa);
    }

  sort_int (list1, 2, 1, 0, F->len_aln-1);
  max=F->score=list1[F->len_aln-1][1];
  max-=(max/10);
  
  
  for ( a=0; a< F->len_aln-10; a++)
    {
      
	  b=list1[a][0];
	  
	  for ( c=0; c<F->nseq; c++)
	    {
	      F->seq_al[c][b]='-';
	    }

    }
  F->score_aln=10;
  ungap_aln (F);
  free_aln (inA);
  free_aln (rA);    
  free_arrayN ((void**)list1, 2);
  vfree (bufS);vfree (bufA);
  return F;
}

int sar_profile2score ( char *seq, int **P)
{
  int a,r, l, score;
  
  l=strlen (seq);
  for ( score=0,a=0; a< l; a++)
    {
      r=seq[a];
      if ( is_gap(r))continue;
      score+=P[a][tolower(r)-'a'];
    }
  return score;
}
int make_sim_pred ( Alignment *A,Alignment *S, int comp, int seq)
{
  int a, b, i, r1, r2;
  static float **cscore;
  static float **tscore;

  if ( !cscore)
    {
      cscore=declare_float (2, 2);
      tscore=declare_float (2, 2);
    }
  
  for (a=0; a< 2; a++)for (b=0; b<2; b++)cscore[a][b]=tscore[a][b]=0;
  
  for ( a=0; a<A->len_aln; a++)
    {
      r1=A->seq_al[seq][a];
      if ( r1=='-') continue;
      else
	{
	  for ( b=0; b< A->nseq; b++)
	    {
	      if (b==seq) continue;
	      else
		{
		  r2=A->seq_al[b][a];
		  if (r2=='-')continue;
		  else
		    {
		      
		      i=(S->seq_al[comp][b]=='I')?1:0;
		      cscore[i][0]+=(r1==r2)?1:0;
		      cscore[i][1]++;
		    }
		}
	    }
	 
	  for (i=0; i<2; i++)
	    {
	      cscore[i][0]/=(cscore[i][1]==0)?1:cscore[i][1];
	      tscore[i][0]+=cscore[i][0];tscore[i][1]++;
	      cscore[i][0]=cscore[i][1]=0;
	    }
	}
    }

  fprintf ( stdout, "\nn\t 1: %.2f 0: %.2f", tscore[1][0],tscore[0][0]);
  return ( tscore[1][0]>=tscore[0][0])?1:0;
}
      

Alignment * sar_analyze (Alignment *inA, Alignment *inS, char *mode)
{
  int ***sim,***glob_results, ***comp_results;
  int *count;
  int a,b,c,m;
  float *tot2;
  Alignment *A=NULL,*S=NULL,*F, *SUBSET;
  char *subset, *target;
  int jack, T, filter;
  filter_func *ff;
  int n_methods=0;
  char *prediction, *reliability;
  int pred_start=0, pred_end, ref_start=0, ref_end;
  int display, CSV=1, NONCSV=0;
  

  ff=vcalloc (6,sizeof (filter_func));
  ff[n_methods++]=filter_aln4sar0;
  ff[n_methods++]=filter_aln4sar1;
  ff[n_methods++]=filter_aln4sar2;
  ff[n_methods++]=filter_aln4sar3;
  /*
    ff[n_methods++]=filter_aln4sar4;
    ff[n_methods++]=filter_aln4sar5;
  */
  sim=vcalloc (n_methods, sizeof (int**));
  

  tot2=vcalloc ( 10, sizeof (float));
  subset=vcalloc ( 100, sizeof (char));
  target=vcalloc ( 100, sizeof (char));
  
  strget_param (mode, "_TARGET_", "no", "%s_", target);
  strget_param (mode, "_SUBSET_", "no", "%s_", subset);
  strget_param (mode, "_JACK_", "0", "%d", &jack);
  strget_param (mode, "_T_", "0", "%d", &T);
  strget_param (mode, "_FILTER_", "11", "%d", &filter);
  strget_param (mode, "_DISPLAY_", "0", "%d", &display);
  
  

  if ( !strm (target, "no"))
    {
      Alignment *T;
      T=main_read_aln(target, NULL);
      if ( T->len_aln !=inA->len_aln )
	{
	  printf_exit ( EXIT_FAILURE,stderr, "Error: %s is incompatible with the reference alignment [FATAL:%s]",target,PROGRAM);
	}
      
      inA=stack_aln (inA, T);
      
    }

  if ( !strm(subset, "no")) 
    {
      SUBSET=main_read_aln (subset, NULL);
      sarset2subsarset ( inA, inS, &A, &S, SUBSET);
    }
  else
    {
      A=inA;
      S=inS;
    }
  

  prediction=vcalloc ( n_methods+1, sizeof (char));
  reliability=vcalloc ( n_methods+1, sizeof (char));
  
  glob_results=declare_arrayN(3, sizeof (int), n_methods*2, 2, 2);

  count=vcalloc (S->nseq, sizeof (int));
  for (a=0; a<S->nseq; a++)
    {
      int l;
      l=strlen (S->seq_al[a]);
      for ( b=0; b<l; b++)
	count[a]+=(S->seq_al[a][b]=='I')?1:0;
    }
  if ( display==CSV)
    {fprintf ( stdout, "\nCompound %s ; Ntargets %d", S->name[a],count[a]);
      pred_start=(strlen (S->seq_al[0])==A->nseq)?0:strlen (S->seq_al[0]);
      pred_end=A->nseq;
      for (a=pred_start; a< pred_end; a++)
	fprintf ( stdout, ";%s", A->name[a]);
      fprintf ( stdout, ";npred;");
    }
  
  
  for (a=0; a<S->nseq; a++)
    {
      int n_pred;
      comp_results=declare_arrayN(3, sizeof (int), n_methods*2, 2, 2);

      pred_start=(strlen (S->seq_al[a])==A->nseq)?0:strlen (S->seq_al[a]);
      pred_end=A->nseq;
      if ( display==CSV)fprintf ( stdout, "\n%s;%d", S->name[a],count[a]);
      
      for (n_pred=0,b=pred_start; b<pred_end;b++)
	{
	  int t, score=0,pred, real;
	  
	  if ( display==NONCSV)fprintf ( stdout, "\n>%-15s %10s %c ", S->name[a], A->name[b], (pred_start==0)?S->seq_al[a][b]:'?');
	  if (jack || b==pred_start)
	    {
	      for (m=0; m<n_methods; m++)
		{
		  free_int (sim[m], -1);
		  F=(ff[m]) (A,S,a,(jack==0)?-1:b, mode);
		  sim[m]=aln2sim_mat(F, "idmat");
		  free_aln (F);
		}
	    }
	  
	  for (m=0; m<n_methods; m++)
	    {
	      int Nbsim=0,Ybsim=0,bsim=0;
	      ref_start=0;
	      ref_end=strlen (S->seq_al[m]);
	      
	      for (c=ref_start;c<ref_end; c++)
		{
		  if ( b==c) continue;
		  else if ( S->seq_al[a][c]=='O')
		    {
		      Nbsim=MAX(Nbsim,sim[m][b][c]);
		    }
		  else 
		    {
		      Ybsim=MAX(Ybsim,sim[m][b][c]);
		    }
		}
	      
	      bsim=(Ybsim>Nbsim)?Ybsim:-Nbsim;
	      pred=(bsim>0)?1:0;
	      real=(S->seq_al[a][b]=='O')?0:1;
	      comp_results[m][pred][real]++;
	      glob_results[m][pred][real]++;
	      score+=pred;
	      prediction[m]=pred+'0';
	      reliability[m]=(FABS((Ybsim-Nbsim))-1)/10+'0';
	    }
	  
	  if ( score>0)n_pred++;
	  prediction[m]=reliability[m]='\0';
	  if (display==NONCSV)fprintf ( stdout, "Compound_Count:%d primary_predictions: %s Total: %d", count[a],prediction, score);
	  else if ( display==CSV)fprintf ( stdout, ";%d", score);
	  for (t=0; t<n_methods; t++)
	    {
	      if (score>t)
		{
		  comp_results[t+n_methods][1][real]++;
		  glob_results[t+n_methods][1][real]++;
		}
	      else 
		{
		  comp_results[t+n_methods][0][real]++;
		  glob_results[t+n_methods][0][real]++;
		}
	    }
	}
      if ( display==NONCSV)
	{if ( pred_start==0)display_prediction (comp_results, S,a, n_methods*2);}
      else fprintf (stdout, ";%d;",n_pred);
    }
  if ( display==NONCSV)if (pred_start==0)display_prediction (glob_results, S,-1, n_methods*2);
      
  
  exit (EXIT_SUCCESS);
}
float display_prediction (int ***count, Alignment *S, int c, int n)
{
  float tp,tn,fn,fp,sp,sn,sn2;
  int a, nm;

  nm=n/2;
  
  for (a=0; a<n; a++)
    {
      tp=count[a][1][1];
      tn=count[a][0][0];
      fp=count[a][1][0];
      fn=count[a][0][1];

      sn2=tp/(tp+fp);
      sn=tp/(tp+fn);
      sp=tn/(tn+fp);
      if ( a<nm)fprintf ( stdout, "\n>#Method %d Compound %15s sp=%.2f sn=%.2f sn2=%.2f",a, (c==-1)?"TOTAL":S->name[c],sp, sn, sn2 );
      else fprintf ( stdout, "\n>#Combined: T=%d Compound %15s sp=%.2f sn=%.2f sn2=%.2f",a-nm, (c==-1)?"TOTAL":S->name[c],sp, sn, sn2 );
    }
  fprintf ( stdout, "\n");
  return 0; 
}
 
float display_prediction_2 (int **prediction, int n,Alignment *A, Alignment *S, int field)
{
  int a, t, T;
  float max_sn, max_sp;
  
  if ( field==17 || field ==18) 
    {
      printf_exit ( EXIT_FAILURE, stderr, "\nERROR: Do not use filed %d in display_prediction", field);
    }
  
  sort_int_inv ( prediction, 10,field, 0, n-1);
  for (t=0,a=0; a<n; a++)
    {
      t+=prediction[a][3];
      prediction[a][17]=t;
    }

  for (t=0,a=n-1; a>=0; a--)
    {
      prediction[a][18]=t;
      t+=prediction[a][3];
    }

  max_sn=max_sp=T=0;
  for (a=0; a<n; a++)
    {
      float tp, fn, fp, sp, sn;
      
      tp=prediction[a][17];
      fn=prediction[a][18];
      fp=(a+1)-tp;
      
      sp=((tp+fp)==0)?0:tp/(tp+fp);
      sn=((tp+fn)==0)?0:tp/(tp+fn);

      if (sp>0.8)
	{
	  if (sn>max_sn)
	    {
	      max_sn=sn;
	      max_sp=sp;
	      
	      T=prediction[a][field];
	    }
	}
    }
  if (max_sn==0)
      fprintf (stdout, "\n T =%d SN=%.2f SP= %.2f",T,max_sn,max_sp);
  else
      fprintf (stdout, "\n T =%d SN=%.2f SP= %.2f",T,max_sn,max_sp);
  
  return max_sn;
}


/************************************************************************************/
/*                NEW      ANALYZE     : SAR                                        */
/************************************************************************************/
float** cache2pred1 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** cache2pred2 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** cache2pred3 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** cache2pred4 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** cache2pred5 (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** cache2pred_new (Alignment *A,int**cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);

int **sar2cache_adriana ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **sar2cache_proba_old ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **sar2cache_count1 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **sar2cache_count2 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **sar2cache_count3 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);

int **sar2cache_proba_new ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **sar2cache_proba2 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode);
int **analyze_sar_compound1 ( char *name, char *seq, Alignment *A, char *mode);
int **analyze_sar_compound2 ( char *name, char *seq, Alignment *A, char *mode);

int aln2n_comp_col ( Alignment *A, Alignment *S, int ci);

double evaluate_sar_score1 ( int len, int n11, int n1a, int n1b);
double evaluate_sar_score2 ( int len, int n11, int n1a, int n1b);


int ***simple_sar_analyze_vot ( Alignment *inA, Alignment *SAR, char *mode);
int ***simple_sar_analyze_col ( Alignment *inA, Alignment *SAR, char *mode);


int sarset2subsarset ( Alignment *A, Alignment *S, Alignment **subA, Alignment **subS, Alignment *SUB);
int benchmark_sar (int v);
int aln2jack_group1 (Alignment *A, int seq, int **l1, int *nl1, int **l2, int *nl2);
int aln2jack_group2 (Alignment *A, int seq, int **l1, int *nl1, int **l2, int *nl2);
int aln2jack_group3 (Alignment *A, char *sar_seq, int **l1, int *nl1, int **l2, int *nl2);
float** jacknife5 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);
float** jacknife6 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode);

int process_cache ( Alignment *A,Alignment *S, int ***Cache, char *mode);
Alignment *analyze_compounds (Alignment *A, Alignment *S, char *mode);

Alignment *analyze_compounds (Alignment *A, Alignment *S, char *mode)
{
  int a, b, c, tot, n;
  int **sim;
  int sar1, sar2;
  
  sim=aln2sim_mat (A, "idmat");
  for (a=0; a< S->nseq; a++)
    {
      for (n=0, tot=0, b=0; b< A->nseq-1; b++)
	{
	  sar1=(S->seq_al[a][b]=='I')?1:0;
	  for ( c=b+1; c<A->nseq; c++)
	    {
	      sar2=(S->seq_al[a][c]=='I')?1:0;
	      
	      if (sar1 && sar2)
		{
		  tot+=sim[b][c];
		  n++;
		}
	    }
	}
      fprintf ( stdout, ">%-10s   CMPSIM: %.2f\n", S->name[a],(float)tot/(float)n); 
    }
  free_int (sim, -1);
  return A;
}

int print_seq_pos ( int pos, Alignment *A, char *seq);
int abl1_evaluation (int p);
int print_seq_pos ( int pos, Alignment *A, char *seq)
{
  int a, b, s;
  
  s=name_is_in_list (seq, A->name, A->nseq, MAXNAMES);
  fprintf ( stdout, "S=%d", s);
  
  for (b=0,a=0; a<pos; a++)
    {
      if (!is_gap (A->seq_al[s][a]))b++;
    }
  fprintf ( stdout, "Pos %d SEQ %s: %d ", pos+1, seq, b+246);
  if ( strm ( seq, "ABL1")) fprintf ( stdout , "PT: %d", abl1_evaluation (b+246));
  return 0;
}

int process_cache ( Alignment *A,Alignment *S, int  ***Cache, char *mode)
{
  int a, b;
  int **pos, **pos2;
  int **C;
  int ab1, *ab1_pos;
  int weight_mode;
  
  strget_param ( mode, "_WEIGHT_", "1", "%d", &weight_mode);
  pos=declare_int(A->len_aln+1,2);
  pos2=declare_int (A->len_aln+1,S->nseq);
  for (a=0; a<S->nseq; a++)
    {
      C=Cache[a];
      for (b=0; b< A->len_aln; b++)
	{
	    pos[b][0]+=C[26][b];
	    if ( C[26][b]>0)
	      {
		pos[b][1]++;
		pos2[b][a]=1;
	      }
	}
    }
  
  C=Cache[0];
  ab1=name_is_in_list ("ABL1", A->name, A->nseq,100);
  ab1_pos=vcalloc (A->len_aln+1, sizeof (int));
  
  for ( b=0,a=0; a< A->len_aln; a++)
    {
      if ( A->seq_al[ab1][a]=='-')ab1_pos[a]=-1;
      else ab1_pos[a]=++b;
    }
    
  for ( a=0; a< A->len_aln; a++)
    {
      fprintf ( stdout, "\n%4d %5d %5d %5d [%c] [%2d] ALN", a+1, pos[a][0], pos[a][1], ab1_pos[a]+246,A->seq_al[ab1][a],abl1_evaluation (ab1_pos[a]+246));
      for ( b=0; b< S->nseq; b++)fprintf ( stdout, "%d", pos2[a][b]);
    }
  return 1;
}
int abl1_evaluation (int p)
{
  if ( p==248) return 10;
  if ( p==250) return 10;
  if ( p==253) return 10;
  if ( p==254) return 10;
  if ( p==255) return 9;
  if ( p==256) return 10;
  if ( p==257) return 5;
  if ( p==258) return 8;
  if ( p==269) return 8;
  if ( p==291) return 4;
  if ( p==294) return 8;
  if ( p==299) return 10;
  if ( p==306) return 0;
  if ( p==314) return 9;
  if ( p==315) return 10;
  if ( p==318) return 10;

  if ( p==319) return 10;
  if ( p==321) return 10;
  if ( p==323) return 0;
  if ( p==324) return 0;
  if ( p==339) return 0;
  if ( p==340) return 0;
  if ( p==355) return 5;
  if ( p==364) return 10;
  
  if ( p==366) return 0;
  if ( p==368) return 10;
  if ( p==370) return 10;
  if ( p==372) return 0;
  if ( p==378) return 8;
  if ( p==382) return 10;

  if ( p==384) return 10;
  if ( p==387) return 10;
  if ( p==395) return 8;

  if ( p==398) return 8;
  if ( p==399) return 8;
  if ( p==400) return 8;
  if ( p==403) return 0;
  if ( p==416) return 8;
  if ( p==419) return 5;
  if ( p>400) return 0;
  return -1;
}
float** cache2pred1 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, s2, seq1, seq2, r1, r2,col, pred, real, ci;
  double score, max, id, m;
  float **R, T;
  
  
  int used_col, used_res,is_used_col, n_res=0;
  int weight_mode;
  /*Predict on ns[1] what was trained on ns[0]*/

  strget_param ( mode, "_THR_", "0.09", "%f", &T);
  strget_param ( mode, "_WEIGHT_", "0", "%d", &weight_mode);
  
  R=declare_float (2, 2);
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);

  

  for (s1=0; s1<ns[1]; s1++)
    {
      int v;
      seq1=ls[1][s1];
      
      for (max=0,score=0, col=0; col<A->len_aln; col++)
	{
	  int max1;
	  r1=tolower (A->seq_al[seq1][col]);
	  for (max1=0,id=0, m=0,s2=0; s2<ns[0]; s2++)
	    {
	      seq2=ls[0][s2];
	      if ( S->seq_al[ci][seq2]=='O')continue;
	      if ( cache[seq2][col]==0 && !is_gap( A->seq_al[seq2][col]))continue;	      
	      
	      r2=tolower ( A->seq_al[seq2][col]);
	      if ( is_gap(r2))continue;
	      
	      v=(cache[seq2][col]>0 && weight_mode==1)?cache[seq2][col]:1;

	      max+=v;
	      if ( r2==r1)
		{
		  score+=v;
		}
	      
	    }
	  
	}
      pred=(( score/max) >T)?1:0;
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      R[pred][real]++;
      
      fprintf ( stdout, "\n>%s %d%d SCORE %.2f C %s [SEQ]\n", A->name[seq1],real, pred, (float)score/(float)max, compound);
    }

  for (used_col=0,used_res=0,col=0; col<A->len_aln; col++)
    {
      for (is_used_col=0,s2=0; s2<ns[0]; s2++)
	{
	  seq2=ls[0][s2];
	  if ( cache[seq2][col]==0 && !is_gap(A->seq_al[seq2][col]))n_res++;
	  else if (is_gap(A->seq_al[seq2][col]));
	  else 
	    {
	    is_used_col=1;
	    used_res++;
	    }
	}
      used_col+=is_used_col;
    }
  fprintf ( stdout, "\n>%s USED_POSITIONS: COL: %.2f RES: %.2f COMP\n", S->name[ci],  (float)used_col/(float)A->len_aln, (float)used_res/(float) n_res);
  
  return R;
}

float** cache2pred2 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, s2, seq1, seq2, r1, r2,col, pred, real, ci;
  double score, max;
  float **R, T;
  
  
  int used_col, used_res,is_used_col, n_res=0;
  /*Predict on ns[1] what was trained on ns[0]*/

  strget_param ( mode, "_THR_", "0.5", "%f", &T);
  
  
  R=declare_float (2, 2);
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);

  for (s1=0; s1<ns[1]; s1++)
    {
      int v;
      seq1=ls[1][s1];
      fprintf ( stdout, "\n");
      for (max=0,score=0, col=0; col<A->len_aln; col++)
	{
	  int used;
	  
	  r1=tolower (A->seq_al[seq1][col]);
	  for (used=0,s2=0; s2<ns[0]; s2++)
	    {
	      seq2=ls[0][s2];
	      
	      if ( S->seq_al[ci][seq2]=='O')continue;
	      if ( cache[seq2][col]==0 && !is_gap( A->seq_al[seq2][col]))continue;	      
	      

	      r2=tolower ( A->seq_al[seq2][col]);
	      if ( is_gap(r2))continue;
	      
	      v=cache[seq2][col];
	      if ( r2==r1){score+=v;}
	      used=1;
	      max+=v;
	    }
	  if (used) fprintf ( stdout, "%c", r1);
	}

      pred=(( score/max) >T)?1:0;
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      R[pred][real]++;

      fprintf ( stdout, "PSEQ: %-10s SC: %4d MAX: %4d S: %.2f R: %4d", A->name[seq1],(int)score, (int)max, (float)score/max,real);

    }

  for (used_col=0,used_res=0,col=0; col<A->len_aln; col++)
    {
      for (is_used_col=0,s2=0; s2<ns[0]; s2++)
	{
	  seq2=ls[0][s2];
	  if ( cache[seq2][col]==0 && !is_gap(A->seq_al[seq2][col]))n_res++;
	  else if (is_gap(A->seq_al[seq2][col]));
	  else 
	    {
	    is_used_col=1;
	    used_res++;
	    }
	}
      used_col+=is_used_col;
    }
  fprintf ( stdout, "\n>%s USED_POSITIONS: COL: %.2f RES: %.2f COMP\n", S->name[ci],  (float)used_col/(float)A->len_aln, (float)used_res/(float) n_res);
  
  return R;
}

float** cache2pred3 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, s2, seq1, seq2, r1, r2,col, pred, real, ci, a, n;
  double score, max;
  float **R, T;
  
  
  
  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  
  /*Predict on ns[1] what was trained on ns[0]*/

  strget_param ( mode, "_THR_", "0.5", "%f", &T);
  
  
  R=declare_float (2, 2);
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int ( ns[1],3);
  
  for (s1=0; s1<ns[1]; s1++)
    {
      int v;
      seq1=ls[1][s1];
      
      for (max=0,score=0, col=0; col<A->len_aln; col++)
	{
	  int used;
	  
	  r1=tolower (A->seq_al[seq1][col]);
	  for (used=0,s2=0; s2<ns[0]; s2++)
	    {
	      seq2=ls[0][s2];
	      
	      if ( S->seq_al[ci][seq2]=='O')continue;
	      if ( cache[seq2][col]==0 && !is_gap( A->seq_al[seq2][col]))continue;	      
	      

	      r2=tolower ( A->seq_al[seq2][col]);
	      if ( is_gap(r2))continue;
	      
	      v=cache[seq2][col];
	      if ( r2==r1){score+=v;}
	      used=1;
	      max+=v;
	    }
	}

     

      pred=(( score/max) >T)?1:0;
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      
      list[s1][0]=real;
      list[s1][1]=(int)((score/max)*(float)1000);
      list[s1][2]=seq1;
      
      

    }
  sort_int_inv (list, 3, 1, 0, ns[1]-1);
    
  for ( a=0; a<ns[1]; a++)
    {
      seq1=list[a][2];
      fprintf ( stdout, "PSEQ: %-10s SC: %5d R: %4d\n", A->name[seq1],list[a][0], list[a][1]);
    }

  for (n=0, a=0; a<ns[1]; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<ns[1]; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
	{
	  best_delta=delta;
	  best_tp=tp;
	  best_fp=fp;
	}
    }
  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=ns[1]-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  return R;
}
float** cache2pred4 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, s2, seq1, seq2, ci, a,b, c, n;
  double score;
  float **R;


  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  int **sim;
  int *ul;
  int nused=0;
  
  /*Predict on ns[1] what was trained on ns[0]*/
  /*Identify interesting coloumns*/
  ul=vcalloc ( A->len_aln, sizeof (int));
  for (a=0; a< A->len_aln; a++)
    for ( b=0; b< A->nseq; b++)
      if ( cache[b][a])ul[nused++]=a;
  
  /*compute the similarity on the used columns*/
  
  R=declare_float (2, 2);
  sim=declare_int (A->nseq, A->nseq);
  for (a=0; a< A->nseq; a++)
    for ( b=0; b< A->nseq; b++)
      {
	for (c=0; c< nused; c++)
	  {
	    if ( A->seq_al[a][ul[c]]==A->seq_al[b][ul[c]])sim[a][b]++;
	  }
	sim[a][b]=(sim[a][b]*100)/nused;
      }
  vfree (ul);
  
    
  
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int ( ns[1],2);
  
  for (s1=0; s1<ns[1]; s1++)
    {

      seq1=ls[1][s1];

      for (score=0,s2=0; s2<ns[0]; s2++)
	{
	  seq2=ls[0][s2];

	  if ( seq1==seq2)continue;
	  if (S->seq_al[ci][seq2]=='I')score=MAX(score, sim[seq1][seq2]);
	}
      list[s1][0]=(S->seq_al[ci][seq1]=='I')?1:0;
      list[s1][1]=(int)score;

    }
  sort_int_inv (list, 2, 1, 0, ns[1]-1);
  
  for (n=0, a=0; a<ns[1]; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<ns[1]; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
	{
	  best_delta=delta;
	  best_tp=tp;
	  best_fp=fp;
	}
    }
  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=ns[1]-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  free_int (sim, -1);
  return R;
}

float** cache2pred5 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, s2, seq1, seq2, ci, a, n;
  double score;
  float **R;
  
  
  
  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  static int **sim;
  
  /*Predict on ns[1] what was trained on ns[0]*/
  
  R=declare_float (2, 2);

  if ( sim==NULL)
    sim=aln2sim_mat (A, "idmat");
  
  
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int ( ns[1],2);
  
  for (s1=0; s1<ns[1]; s1++)
    {

      seq1=ls[1][s1];

      for (score=0,s2=0; s2<ns[0]; s2++)
	{
	  seq2=ls[0][s2];

	  if ( seq1==seq2)continue;
	  if (S->seq_al[ci][seq2]=='I')score=MAX(score, sim[seq1][seq2]);
	}
      list[s1][0]=(S->seq_al[ci][seq1]=='I')?1:0;
      list[s1][1]=(int)score;

    }
  sort_int_inv (list, 2, 1, 0, ns[1]-1);
  
  for (n=0, a=0; a<ns[1]; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<ns[1]; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
	{
	  best_delta=delta;
	  best_tp=tp;
	  best_fp=fp;
	}
    }
  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=ns[1]-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  return R;
}

float** jacknife5 (Alignment*A,int **cacheIN, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int seq1, ci, a,b, c, n;
  double score, max_score;
  float **R;


  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  int **cache;
 
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int (A->nseq,2);
  R=declare_float (2, 2);
  

  for ( a=0; a<A->nseq; a++)
    {
      int real, res;

      ns[0]=A->nseq-1;
      ns[1]=1;
      for (c=0,b=0; b<A->nseq; b++)
	if (a!=b)ls[0][c++]=b;
      ls[1][0]=a;
      
      
      cache=sar2cache_count1 (A, ns, ls,S, compound, mode);
      for (b=0; b<=26; b++)
	for ( c=0; c< A->len_aln; c++)
	  cacheIN[b][c]+=cache[b][c];
      
      seq1=a;
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      fprintf ( stdout, ">%-10s %d ", A->name[seq1], real);
      

      
      for (max_score=0,b=0; b<A->len_aln; b++)
	max_score+=cache[26][b];
      
      for (score=0,b=0; b<A->len_aln; b++)
	{
	  res=tolower (A->seq_al[seq1][b]);
	  if ( cache[26][b]==0) continue;
	  if ( !is_gap(res))
	    {
	      score+=cache[res-'a'][b];
	    }
	  /*fprintf ( stdout, "%c[%3d]", res,b);*/
	}
      fprintf ( stdout, " SCORE: %5d SPRED %d RATIO: %.2f \n", (int)score, a, (score*100)/max_score);
      list[a][0]=real;
      
      if ( strstr (mode, "SIMTEST"))list[a][1]=(score*100)/max_score;
      else list[a][1]=(score*100)/max_score;
      free_int (cache, -1);
    }
  

  sort_int_inv (list, 2, 1, 0, A->nseq-1);
  for (n=0, a=0; a<A->nseq; a++)
    {
      n+=list[a][0];
    }

  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<A->nseq; a++)
    {
      
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
	{
	  best_delta=delta;
	  best_tp=tp;
	  best_fp=fp;
	}
    }
  
  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=A->nseq-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  
  return R;
}
float** jacknife6 (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int seq1, ci, a,b, c,d,e,f, n;
  double score;
  float **R;
  
  
  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int (A->len_aln,2);
  R=declare_float (2, 2);
  

  for ( a=0; a<A->nseq; a++)
    {
      int sar, res;
      int **new_cache;
      
      ns[0]=A->nseq-1;
      ns[1]=1;
      for (c=0,b=0; b<A->nseq; b++)
	if (a!=b)ls[0][c++]=b;
      ls[1][0]=a;
      
      cache=sar2cache_proba_new (A, ns, ls,S, compound, mode);
      

      new_cache=declare_int (27,A->len_aln);
      
      for (d=0; d< A->len_aln; d++)
	{
	  int **analyze;
	  if ( cache[26][d]==0)continue;
	  analyze=declare_int (26, 2);
	  
	  for ( e=0; e< ns[0]; e++)
	    {
	      f=ls[0][e];
	      sar=(S->seq_al[ci][f]=='I')?1:0;
	      res=tolower (A->seq_al[f][d]);
	      
	      if ( res=='-') continue;
	      analyze[res-'a'][sar]++;
	    }
	  for (e=0;e<26; e++)
	    {
	      if ( analyze[e][1]){new_cache[26][d]=1;new_cache[e][d]+=cache[e][d];}
	      /*
	      if ( analyze[e][0] && analyze[e][1]){new_cache[26][d]=1;new_cache[e][d]+=analyze[e][1];}
	      else if ( analyze[e][0]){new_cache[26][d]=1;new_cache[e][d]-=analyze[e][0]*10;}
	      else if ( analyze[e][1]){new_cache[26][d]=1;new_cache[e][d]+=analyze[e][1];}
	      else if ( !analyze[e][0] &&!analyze[e][1]);
	      */
	    }
	  free_int (analyze, -1);
	}

      seq1=a;
      sar=(S->seq_al[ci][seq1]=='I')?1:0;
      fprintf ( stdout, ">%-10s %d ", A->name[seq1], sar);
      
      for (score=0,b=0; b<A->len_aln; b++)
	{
	  res=tolower (A->seq_al[seq1][b]);
	  if ( cache[26][b]==0) continue;
	  if ( !is_gap(res))
	    {
	      score+=new_cache[res-'a'][b];
	    }
	}
      fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score);
      list[seq1][0]=sar;
      list[seq1][1]=(int)score;
    
      free_int (new_cache, -1);
      free_int (cache, -1);
    }
  sort_int_inv (list, 2, 1, 0, A->nseq-1);
  for (n=0, a=0; a<A->nseq; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<A->nseq; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
	{
	  best_delta=delta;
	  best_tp=tp;
	  best_fp=fp;
	}
    }
  
  fprintf ( stderr, "\n%d %d", best_tp, best_fp);
  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=A->nseq-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  

  return R;
}
float** cache2pred_new (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1, seq1, ci, a,b, n;
  double score;
  float **R;


  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int ( ns[1],2);
  R=declare_float (2, 2);
  
  for (s1=0; s1<ns[1]; s1++)
    {
      int res, real;
     
      seq1=ls[1][s1];
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      fprintf ( stdout, ">%-10s %d ", A->name[seq1], real);
      for (score=0,b=0; b<A->len_aln; b++)
	{
	  res=tolower (A->seq_al[seq1][b]);
	  if ( cache[26][b]==0) continue;
	  if ( !is_gap(res))
	    {
	      score+=cache[res-'a'][b];
	    }
	  fprintf ( stdout, "%c", res);
	}
      fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score);
      list[s1][0]=real;
      list[s1][1]=(int)score;
    }
    
  sort_int_inv (list, 2, 1, 0, ns[1]-1);
  
  for (n=0, a=0; a<ns[1]; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<ns[1]; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
	{
	  best_delta=delta;
	  best_tp=tp;
	  best_fp=fp;
	}
    }
  
  

  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=ns[1]-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  

  return R;
}
float** cache2pred_forbiden_res (Alignment*A,int **cache, int *ns, int **ls, Alignment *S, char *compound, char *mode)
{
  int s1,seq1, ci, a,b, c, n;
  double score;
  float **R;
  
  
  int tp, tn, fn, fp;
  int best_tp, best_fp;
  int delta, best_delta;
  int **list;
  int **new_cache;
  int **mat;

  mat=read_matrice ( "blosum62mt");
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  list=declare_int ( ns[1],2);
  R=declare_float (2, 2);
  
  for (s1=0; s1<ns[1]; s1++)
    {
      int res, real;
     
      seq1=ls[1][s1];
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      fprintf ( stdout, ">%-10s %d ", A->name[seq1], real);
      for (score=0,b=0; b<A->len_aln; b++)
	{
	  res=tolower (A->seq_al[seq1][b]);
	  if ( cache[26][b]==0) continue;
	  if ( !is_gap(res))
	    {
	      score+=cache[res-'a'][b];
	    }
	  fprintf ( stdout, "%c", res);
	}
      fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score);
      list[s1][0]=real;
      list[s1][1]=(int)score;
    }
  new_cache=declare_int (27,A->len_aln);
  for (a=0; a< A->len_aln; a++)
    {
      int **analyze, real, res, d;
      int *res_type;
      int **sub;
      int *keep;
      keep=vcalloc ( 26, sizeof (int));
      res_type=vcalloc ( 26, sizeof (int));
      sub=declare_int (256, 2);
      
      if ( cache[26][a]==0)continue;
      analyze=declare_int (26, 2);
      for ( b=0; b< ns[0]; b++)
	{
	  seq1=ls[0][b];
	  real=(S->seq_al[ci][seq1]=='I')?1:0;
	  res=tolower (A->seq_al[seq1][a]);
	  
	  if ( res=='-') continue;
	  analyze[res-'a'][real]++;
	}
      fprintf ( stdout, "RSPRED: ");
      for (c=0;c<26; c++)fprintf ( stdout, "%c", c+'a');
      fprintf ( stdout, "\nRSPRED: ");
      for (c=0;c<26; c++)
	{
	  if ( analyze[c][0] && analyze[c][1]){fprintf ( stdout, "1");res_type[c]='1';}
	  else if ( analyze[c][0]){new_cache[26][a]=1;new_cache[c][a]-=analyze[c][0];fprintf ( stdout, "0");res_type[c]='0';}
	  else if ( analyze[c][1]){new_cache[26][a]=1;new_cache[c][a]+=analyze[c][1];fprintf ( stdout, "1");res_type[c]='1';}
	  else if ( !analyze[c][0] &&!analyze[c][1]){fprintf ( stdout, "-");res_type[c]='-';}
	}

     
      for ( c=0; c<26; c++)
	{
	  for ( d=0; d<26; d++)
	    {
	      
	      if ( res_type[c]==res_type[d])
		{
		  sub[res_type[c]][0]+=mat[c][d];
		  sub[res_type[c]][1]++;
		}
	      if ( res_type[c]!='-' && res_type[d]!='-')
		{
		  sub['m'][0]+=mat[c][d];
		  sub['m'][1]++;
		}
	    }
	}
      for ( c=0; c< 256; c++)
	{
	  if ( sub[c][1])fprintf ( stdout, " %c: %5.2f ", c, (float)sub[c][0]/(float)sub[c][1]);
	}
      fprintf ( stdout, " SC: %d\nRSPRED  ", cache[26][a]);
      
      for ( c=0; c<26; c++)
	if ( res_type[c]=='1')
	  {
	    for (d=0; d<26; d++)
	      if (mat[c][d]>0)keep[d]++;
	    keep[c]=9;
	  }

      for (c=0; c<26; c++)
	{
	  if ( keep[c]>10)fprintf ( stdout, "9");
	  else fprintf ( stdout, "%d", keep[c]);
	}
      for ( c=0; c<26; c++)
	{
	  if ( keep[c]>8)new_cache[c][a]=10;
	  else new_cache[c][a]=-10;
	}
      fprintf ( stdout, "\n");
      free_int (analyze, -1);
      free_int (sub, -1);
      vfree (res_type);
      vfree (keep);
      
    }
  for ( a=0; a<25; a++)
    for (b=a+1; b<26; b++)
      {
	int r1, r2;
	r1=a+'a';r2=b+'a';
	if ( strchr("bjoxz", r1))continue;
	if ( strchr("bjoxz",r2))continue;
	
	if ( mat[a][b]>0 && a!=b)fprintf ( stdout, "\nMATANALYZE %c %c %d", a+'a', b+'a', mat[a][b]);
      }
  
  for (s1=0; s1<ns[1]; s1++)
    {
      int res, real;
     
      seq1=ls[1][s1];
      real=(S->seq_al[ci][seq1]=='I')?1:0;
      fprintf ( stdout, ">%-10s %d ", A->name[seq1], real);
      for (score=0,b=0; b<A->len_aln; b++)
	{
	  res=tolower (A->seq_al[seq1][b]);
	  if ( cache[26][b]==0) continue;
	  if ( !is_gap(res))
	    {
	      score+=new_cache[res-'a'][b];
	    }
	  fprintf ( stdout, "%c", res);
	}
      fprintf ( stdout, " SCORE: %5d SPRED\n", (int)score);
      list[s1][0]=real;
      list[s1][1]=(int)score;
    }
  free_int (new_cache, -1);
  sort_int_inv (list, 2, 1, 0, ns[1]-1);
  
  
  for (n=0, a=0; a<ns[1]; a++)n+=list[a][0];
  for (best_delta=100000,best_tp=0,tp=0,fp=0,best_fp=0,a=0; a<ns[1]; a++)
    {
      tp+=list[a][0];
      fp+=1-list[a][0];
      delta=(n-(tp+fp));
      if (FABS(delta)<best_delta)
	{
	  best_delta=delta;
	  best_tp=tp;
	  best_fp=fp;
	}
    }



  /*R[pred][real]*/
  tp=best_tp;
  fp=best_fp;
  fn=n-tp;
  tn=ns[1]-(tp+fp+fn);
  R[1][1]=tp;
  R[1][0]=fp;
  R[0][1]=fn;
  R[0][0]=tn;
  free_int (list, -1);
  

  return R;
}

int **sar2cache_proba_old ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int col, s, seq,ms,mseq, res, mres, res1, n,maxn1, maxn2,maxn3, t, ci, a;
  float quant=0;
  int **list;

  int N1msa,N1sar, N, N11, N10, N01,N00, SCORE, COL_INDEX, RES;
  int nfield=0;
  int value;
  float T1, T2, T3, T4;
  int weight_mode;
  int **cache;
  static int **sim;
  int sim_weight, w, sw_thr;
  int train_mode;
  
  float zscore;
  
  RES=nfield++;COL_INDEX=nfield++;N1msa=nfield++;N1sar=nfield++;N=nfield++;N11=nfield++;N10=nfield++;N01=nfield++;N00=nfield++;SCORE=nfield++;
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  cache=declare_int (A->nseq, A->len_aln);
  
  strget_param ( mode, "_FILTER1_", "0"   , "%f", &T1);
  strget_param ( mode, "_FILTER2_", "1000000", "%f", &T2);
  strget_param ( mode, "_FILTER3_", "0"   , "%f", &T3);
  strget_param ( mode, "_FILTER4_", "1000000", "%f", &T4);
  strget_param ( mode, "_SIMWEIGHT_", "1", "%d", &sim_weight);
  strget_param ( mode, "_SWTHR_", "30", "%d", &sw_thr);
  strget_param (mode, "_TRAIN_","1", "%d", &train_mode);
  strget_param (mode, "_ZSCORE_","0", "%f", &zscore);
  
  



  if (sim_weight==1 && !sim) sim=aln2sim_mat(A, "idmat");
  for ( ms=0; ms<ns[0]; ms++)
    {
      mseq=ls[0][ms];
      if ( S->seq_al[ci][mseq]!='I')continue;

      list=declare_int (A->len_aln+1, nfield);
      for (t=0,n=0, col=0; col< A->len_aln; col++)
	{
	  int same_res;
	  
	  mres=tolower(A->seq_al[mseq][col]);
	  list[col][RES]=mres;
	  list[col][COL_INDEX]=col;

	  if ( is_gap(mres))continue;
	  for ( s=0; s<ns[0]; s++)
	    {
	      seq=ls[0][s];
	      res=tolower(A->seq_al[seq][col]);
	      if (is_gap(res))continue;
	      

	      if (sim_weight==1)
		{
		  w=sim[seq][mseq];w=(mres==res)?100-w:w;
		  if (w<sw_thr)w=0;
		}
	      else
		w=1;
	      
	      if ( train_mode==4)
		{
		  if ( S->seq_al[ci][seq]=='I')same_res=1;
		  else same_res=(res==mres)?1:0;
		}
	      else
		same_res=(res==mres)?1:0;
	      
	      list[col][N]+=w;
	      
	      if (S->seq_al[ci][seq]=='I' && same_res)list[col][N11]+=w;
	      else if (S->seq_al[ci][seq]=='I' && same_res)list[col][N10]+=w;
	      else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N01]+=w;
	      else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N00]+=w;
	      
	      if ( S->seq_al[ci][seq]=='I')list[col][N1sar]+=w;
	      if ( same_res)list[col][N1msa]+=w;
	      
	    }
	  
	  list[col][SCORE]=(int)evaluate_sar_score1 (list[col][N], list[col][N11], list[col][N1msa], list[col][N1sar]);
	  
	}

      strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1);
      strget_param ( mode, "_WEIGHT_", "1", "%d", &weight_mode);
      strget_param ( mode, "_QUANT_", "0.0", "%f", &quant);
      
      sort_int_inv (list,nfield,SCORE,0,A->len_aln-1);
      if ( quant !=0)
	{
	
	  n=quantile_rank ( list,SCORE, A->len_aln,quant);
	  sort_int (list,nfield,N1msa, 0, n-1);
	  maxn1=MIN(n,maxn1);
	}
      
      for (a=0; a<maxn1; a++)
	{
	  col=list[a][COL_INDEX];
	  res1=list[a][RES];
	  value=list[a][SCORE];
	  if ( value>T1 && value<T2){cache[mseq][col]= value;}
	}
      free_int (list, -1);
    }
  
  /*Filter Columns*/
  list=declare_int (A->len_aln+1, nfield);
  for ( col=0; col< A->len_aln; col++)
    {
      list[col][COL_INDEX]=col;
      for ( s=0; s<ns[0]; s++)
	{
	  seq=ls[0][s];
	  list[col][SCORE]+=cache[seq][col];
	}
    }
 
  /*Filter Columns with a score not between T2 and T3*/
  
  for (col=0; col< A->len_aln; col++)
    if (list[col][SCORE]<T3 || list[col][SCORE]>T4)
      {
	list[col][SCORE]=0;
	for (s=0; s< A->nseq; s++)
	  if (!is_gap(A->seq_al[s][col]))cache[s][col]=0;
      }
  
  /*Keep The N Best Columns*/
  if ( zscore!=0)
    {
      double sum=0, sum2=0, z;
      int n=0;
      for (a=0; a< A->len_aln; a++)
	{
	  if ( list[a][SCORE]>0)
	    {
	      sum+=list[a][SCORE];
	      sum2+=list[a][SCORE]*list[a][SCORE];
	      n++;
	    }
	}
      for (a=0; a<A->len_aln; a++)
	{
	  if ( list[a][SCORE]>0)
	    {
	      z=return_z_score (list[a][SCORE], sum, sum2,n);
	      if ((float)z<zscore)
		{  
		  col=list[a][COL_INDEX];
		  for (s=0; s<A->nseq; s++)
		    cache [s][col]=0;
		}
	      else
		{
		  fprintf ( stdout, "\nZSCORE: KEEP COL %d SCORE: %f SCORE: %d\n", list[a][COL_INDEX], (float)z, list[a][SCORE]);
		}
	    }
	}
    }
  else
    {
      sort_int_inv (list,nfield,SCORE,0,A->len_aln-1);
      strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2);
      
      for (a=maxn2;a<A->len_aln; a++)
	{
	  col=list[a][COL_INDEX];
	  for (s=0; s<A->nseq; s++)
	    cache [s][col]=0;
	}
    }

  /*Get Rid of the N best Columns*/;
  strget_param ( mode, "_MAXN3_", "0", "%d", &maxn3);
  
  for (a=0; a<maxn3;a++)
    {
      col=list[a][COL_INDEX];
      for (s=0; s<A->nseq; s++)
	cache [s][col]=0;
    }
  
  return cache;
}
int aln2n_comp_col ( Alignment *A, Alignment *S, int ci)
{
  int  res, seq,sar, col, r;
  int **analyze;

  int tot=0;
  
  analyze=declare_int (27, 2);  
  for ( col=0; col< A->len_aln; col++)
    {
      int n1, n0;

      
      for ( n1=0, n0=0,seq=0; seq<A->nseq; seq++)
	{
	  res=tolower(A->seq_al[seq][col]);
	  sar=(S->seq_al[ci][seq]=='I')?1:0;
	  n1+=(sar==1)?1:0;
	  n0+=(sar==0)?1:0;
	  if ( res=='-')continue;
	  res-='a';
	  analyze[res][sar]++;
	}
      
      for (r=0; r<26; r++)
	{
	  int a0,a1;
	  a0=analyze[r][0];
	  a1=analyze[r][1];
	  
	  
	  if ( a1==n1 && a0<n0)
	    {
	      tot++;
	    }
	}
      for ( r=0; r<26; r++)analyze[r][0]=analyze[r][1]=0;
    }
  
  free_int (analyze, -1);
  return tot;
}
int **sar2cache_count1 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int  maxn2, res, seq,sar, ci, col,s, r;
  int **analyze, **list, **cache;
  static int **mat;

  int a0,a1, w;
  if (!mat) mat=read_matrice ("blosum62mt");
  
  
  list=declare_int ( A->len_aln, 2);
  cache=declare_int ( 27, A->len_aln);
  analyze=declare_int (27, 2);  
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  
  for ( col=0; col< A->len_aln; col++)
    {
      int n1, n0;

      
      for ( n1=0, n0=0,s=0; s<ns[0]; s++)
	{
	  seq=ls[0][s];
	  res=tolower(A->seq_al[seq][col]);
	  sar=(S->seq_al[ci][seq]=='I')?1:0;
	  n1+=(sar==1)?1:0;
	  n0+=(sar==0)?1:0;
	  if ( res=='-')continue;
	  res-='a';
	  	  
	  analyze[res][sar]++;
	}
      
      for (r=0; r<26; r++)
	{
	  
	  a0=analyze[r][0];
	  a1=analyze[r][1];
	  
	  if ( strstr (mode, "SIMTEST"))
	    {
	      w=a1;
	    }
	  else if (a1 )
	    {
	      w=n0-a0;
	    }
	  else w=0;
	  
	  cache[r][col]+=w;
	  cache[26][col]=MAX(w, cache[26][col]);
	}
      
      for ( r=0; r<26; r++)analyze[r][0]=analyze[r][1]=0;
      list[col][0]=col;
      list[col][1]=cache[26][col];
    }
  
  free_int (analyze, -1);

  sort_int_inv (list, 2, 1, 0, A->len_aln-1);
  
  strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2);

  for ( col=maxn2; col<A->len_aln; col++)
    for ( r=0; r<=26; r++)cache[r][list[col][0]]=0;
 
  free_int (list, -1);
  return cache;
}


int **sar2cache_count2 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int maxn2, res, seq,sar, ci, col,s, r;
  int **analyze, **list, **cache, **conseq;
  static int **mat;
  int w=0;
  if (!mat) mat=read_matrice ("blosum62mt");
  
  
  list=declare_int ( A->len_aln, 2);
  cache=declare_int ( 27, A->len_aln);
  conseq=declare_int ( A->len_aln,3);

  analyze=declare_int (27, 2);  
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);  
  for ( col=0; col< A->len_aln; col++)
    {
      int n1, n0;

      for ( n1=0, n0=0,s=0; s<ns[0]; s++)
	{
	  seq=ls[0][s];
	  res=tolower(A->seq_al[seq][col]);
	  sar=(S->seq_al[ci][seq]=='I')?1:0;
	  n1+=(sar==1)?1:0;
	  n0+=(sar==0)?1:0;
	  if ( res=='-')continue;
	  res-='a';
	  analyze[res][sar]++;
	}
      for (r=0; r<26; r++)
	{
	  int a0,a1;
	  a0=analyze[r][0];
	  a1=analyze[r][1];
	  if ( a1==n1 && a0<n0)
	    {
	      	      	      
	      w=n0-a0;
	      conseq[col][0]=r;
	      conseq[col][1]=w;
	    }
	}
      for ( r=0; r<26; r++)analyze[r][0]=analyze[r][1]=0;
    }
  free_int (analyze, -1);
  
  for (s=0; s<ns[0]; s++)
    {
      int w1, w2;
      seq=ls[0][s];
      for (w1=0,w2=0,col=0; col<A->len_aln; col++)
	{

	  res=tolower(A->seq_al[seq][col]);
	  if ( is_gap(res))continue;
	  else res-='a';
	  
	  if ( conseq[col][1] && res!=conseq[col][0])w1++;
	  if ( conseq[col][1])w2++;
	}
      for (col=0; col<A->len_aln; col++)
	{
	  res=tolower(A->seq_al[seq][col]);
	  if ( is_gap(res))continue;
	  else res-='a';
	  
	  if ( conseq[col][1] && res!=conseq[col][0])conseq[col][2]+=(w2-w1);
	}
    }
  
  for (col=0; col<A->len_aln; col++)
    {
      r=conseq[col][0];
      w=conseq[col][2];

      
      cache[r][col]=cache[26][col]=list[col][1]=w;
      list[col][0]=col;
    }
  sort_int_inv (list, 2, 1, 0, A->len_aln-1);
  strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2);

  for ( col=maxn2; col<A->len_aln; col++)
    for ( r=0; r<=26; r++)cache[r][list[col][0]]=0;
 

  free_int (list, -1);
  return cache;
}  

int **sar2cache_count3 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int  maxn2, res, seq,sar, ci, col,s, r, a1, a0, n1, n0;
  int **analyze, **list, **cache;
  static int **mat;
  
  if (!mat) mat=read_matrice ("blosum62mt");
  
  
  list=declare_int ( A->len_aln, 2);
  cache=declare_int ( 27, A->len_aln);
  analyze=declare_int (27, 2);  
  
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  
  for ( col=0; col< A->len_aln; col++)
    {
      double e, g;
      for ( n1=0, n0=0,s=0; s<ns[0]; s++)
	{
	  seq=ls[0][s];
	  res=tolower(A->seq_al[seq][col]);
	  sar=(S->seq_al[ci][seq]=='I')?1:0;
	  n1+=(sar==1)?1:0;
	  n0+=(sar==0)?1:0;
	  if ( res=='-')continue;
	  res-='a';
	  	  
	  analyze[res][sar]++;
	}
      
      /*Gap*/
      for (g=0,r=0; r<A->nseq; r++)
	g+=is_gap(A->seq_al[r][col]);
      g=(100*g)/A->nseq;
     
      /*enthropy
      for (e=0, r=0; r<26; r++)
	{
	  a0=analyze[r][0];
	  a1=analyze[r][1];
	  t=a0+a1;
	  
	  if (t>0)
	    e+= t/(double)A->nseq*log(t/(double)A->nseq);
	}
      e*=-1;
      */
      e=0;
      if (g>10) continue;
      if (e>10) continue;
      
      if ( strstr ( mode, "SIMTEST"))
	{
	  for (r=0; r<26; r++)
	    {
	      
	      a0=analyze[r][0];
	      a1=analyze[r][1];
	      
	      if (a1)
		{
		  cache[r][col]=a1;
		  cache[26][col]=MAX(cache[26][col],a1);
		}
	    }
	}
      else
	{
	
	  
	  
	  for (r=0; r<26; r++)
	    {
	      
	      a0=analyze[r][0];
	      a1=analyze[r][1];
	      
	      if (!a1 && a0)
		{
		  cache[r][col]=a0;
		  cache[26][col]=MAX(cache[26][col],a0);
		}
	    }
	}
      
      for ( r=0; r<26; r++)analyze[r][0]=analyze[r][1]=0;
      list[col][0]=col;
      list[col][1]=cache[26][col];
    }

  free_int (analyze, -1);

  sort_int_inv (list, 2, 1, 0, A->len_aln-1);
  
  strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2);

  for ( col=maxn2; col<A->len_aln; col++)
    for ( r=0; r<=26; r++)cache[r][list[col][0]]=0;
 
  free_int (list, -1);
  return cache;
}


int **sar2cache_proba_new ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int col, s, seq,ms,mseq, res, mres, res1, n,maxn1, maxn2,maxn3, t, ci, a,w;

  int **list;

  int N1msa,N1sar, N, N11, N10, N01,N00, SCORE, COL_INDEX, RES;
  int nfield=0;
  int value;
  

  int **cache;
  static int **sim;
  int sw_thr;
  float zscore;
  
  RES=nfield++;COL_INDEX=nfield++;N1msa=nfield++;N1sar=nfield++;N=nfield++;N11=nfield++;N10=nfield++;N01=nfield++;N00=nfield++;SCORE=nfield++;
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  cache=declare_int (27, A->len_aln);
  
  strget_param ( mode, "_SWTHR_", "30", "%d", &sw_thr);
  strget_param (mode, "_ZSCORE_","0", "%f", &zscore);
  
 
  if (!sim)sim=aln2sim_mat(A, "idmat");
  for ( ms=0; ms<ns[0]; ms++)
    {
      mseq=ls[0][ms];
      if ( S->seq_al[ci][mseq]!='I')continue;

      list=declare_int (A->len_aln+1, nfield);
      for (t=0,n=0, col=0; col< A->len_aln; col++)
	{
	  int same_res;
	  
	  mres=tolower(A->seq_al[mseq][col]);
	  if ( is_gap(mres))continue;
	  
	  list[col][RES]=mres;
	  list[col][COL_INDEX]=col;

	  for ( s=0; s<ns[0]; s++)
	    {
	      seq=ls[0][s];
	      res=tolower(A->seq_al[seq][col]);
	      if (is_gap(res))continue;
	      w=sim[seq][mseq];w=(mres==res)?100-w:w;
	      if (w<sw_thr)w=0;
	      same_res=(res==mres)?1:0;
	      
	      list[col][N]+=w;
	      
	      if (S->seq_al[ci][seq]=='I' && same_res)list[col][N11]+=w;
	      else if (S->seq_al[ci][seq]=='I' && same_res)list[col][N10]+=w;
	      else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N01]+=w;
	      else if (S->seq_al[ci][seq]=='O' && same_res)list[col][N00]+=w;
	      
	      if ( S->seq_al[ci][seq]=='I')list[col][N1sar]+=w;
	      if ( same_res)list[col][N1msa]+=w;
	      
	    }

	  list[col][SCORE]=(int)evaluate_sar_score1 (list[col][N], list[col][N11], list[col][N1msa], list[col][N1sar]);
	  
	}
      strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1);
      sort_int_inv (list,nfield,SCORE,0,A->len_aln-1);
      for (a=0; a<maxn1; a++)
	{
	  col=list[a][COL_INDEX];
	  res1=list[a][RES];
	  value=list[a][SCORE];

	  if ( res1!=0)
	    {
	      cache[res1-'a'][col]+= value;
	      cache[26][col]+=value;
	    }
	}
      free_int (list, -1);
    }

  /*Filter Columns*/
  list=declare_int (A->len_aln+1, nfield);
  for ( col=0; col< A->len_aln; col++)
    {
      list[col][COL_INDEX]=col;
      list[col][SCORE]=cache[26][col];
    }
  /*Keep The N Best Columns*/
  if ( zscore!=0)
    {
      double sum=0, sum2=0, z;
      int n=0;
      for (a=0; a< A->len_aln; a++)
	{
	  if ( list[a][SCORE]>0)
	    {
	      sum+=list[a][SCORE];
	      sum2+=list[a][SCORE]*list[a][SCORE];
	      n++;
	    }
	}
      for (a=0; a<A->len_aln; a++)
	{
	  if ( list[a][SCORE]>0)
	    {
	      z=return_z_score (list[a][SCORE], sum, sum2,n);
	      if ((float)z<zscore)
		{  
		  col=list[a][COL_INDEX];
		  for (s=0; s<27; s++)
		    cache [s][col]=0;
		}
	      else
		{
		  fprintf ( stdout, "\nZSCORE: KEEP COL %d SCORE: %f SCORE: %d\n", list[a][COL_INDEX], (float)z, list[a][SCORE]);
		}
	    }
	}
    }
  else
    {
      sort_int_inv (list,nfield,SCORE,0,A->len_aln-1);
      strget_param ( mode, "_MAXN2_", "100000", "%d", &maxn2);
      
      for (a=maxn2;a<A->len_aln; a++)
	{
	  col=list[a][COL_INDEX];
	  for (s=0; s<27; s++)
	    cache [s][col]=0;
	}
    }

  /*Get Rid of the N best Columns*/;
  strget_param ( mode, "_MAXN3_", "0", "%d", &maxn3);
  
  for (a=0; a<maxn3;a++)
    {
      col=list[a][COL_INDEX];
      for (s=0; s<27; s++)
	cache [s][col]=0;
    }
  return cache;    
}
int **sar2cache_adriana ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  
  int col,maxn1, s, seq,ms,mseq, res, mres,res1, n, t, ci, a;
  float quant=0;
  int **list;


  int **cache;
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
  cache=declare_int (A->nseq, A->len_aln);
    
  
  for ( ms=0; ms<ns[0]; ms++)
    {
      mseq=ls[0][ms];
      if ( S->seq_al[ci][mseq]!='I')continue;

      list=declare_int (A->len_aln+1, 5);
      for (t=0,n=0, col=0; col< A->len_aln; col++)
	{
	  mres=tolower(A->seq_al[mseq][col]);
	  list[col][0]=mres;
	  list[col][1]=col;

	  if ( is_gap(mres))continue;
	  for ( s=0; s<ns[0]; s++)
	    {
	      seq=ls[0][s];
	      res=tolower(A->seq_al[seq][col]);
	      if (is_gap(res))continue;
	      
	      if (S->seq_al[ci][seq]=='I' && res==mres)list[col][3]++;
	      if (res==mres)list[col][2]++;
	    }
	}
      
      sort_int_inv (list,5,3,0,A->len_aln-1);
      
      strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1);
      strget_param ( mode, "_QUANT_", "0.95", "%f", &quant);
      
      n=quantile_rank ( list, 3, A->len_aln,quant);
      sort_int (list, 5, 2, 0, n-1);
      
      for (a=0; a<maxn1; a++)
	{
	 
	  col=list[a][1];
	  res1=list[a][0];
	  cache[mseq][col]=list[a][3];
	}
      free_int (list, -1);
     
    }
  return cache;
}
int **sar2cache_proba2 ( Alignment *A, int *ns,int **ls, Alignment *S, char *compound, char *mode)
{
  int col, s, seq,ms,mseq, res, mres,n,maxn1, t, ci, a,b;
  int COL, SCORE;
  
  float quant=0;
  int **list;

  float T1, T2, T3, T4;

  int **cache;
  cache=declare_int ( A->nseq, A->len_aln);
  ci=name_is_in_list ( compound, S->name, S->nseq, -1);
      
  strget_param ( mode, "_FILTER1_", "0"   , "%f", &T1);
  strget_param ( mode, "_FILTER2_", "1000000", "%f", &T2);
  strget_param ( mode, "_FILTER3_", "0"   , "%f", &T3);
  strget_param ( mode, "_FILTER4_", "1000000", "%f", &T4);
  
  list=declare_int (A->len_aln+1,A->nseq+2);
  SCORE=A->nseq;
  COL=A->nseq+1;
  
  for ( ms=0; ms<ns[0]; ms++)
    {
      mseq=ls[0][ms];
      if ( S->seq_al[ci][mseq]!='I')continue;

      for (t=0,n=0, col=0; col< A->len_aln; col++)
	{
	  int N11=0,N10=0,N01=0,N00=0,N1sar=0,N1msa=0,N=0;
	  	  
	  mres=tolower(A->seq_al[mseq][col]);
	  if ( is_gap(mres))continue;
	  for ( s=0; s<ns[0]; s++)
	    {
	      seq=ls[0][s];
	      res=tolower(A->seq_al[seq][col]);
	      if (is_gap(res))continue;
	      
	      N++;
	      if (S->seq_al[ci][seq]=='I' && res==mres)N11++;
	      else if (S->seq_al[ci][seq]=='I' && res!=mres)N10++;
	      else if (S->seq_al[ci][seq]=='O' && res==mres)N01++;
	      else if (S->seq_al[ci][seq]=='O' && res!=mres)N00++;

	      if ( S->seq_al[ci][seq]=='I')N1sar++;
	      if ( res==mres)N1msa++;
	    }
	  list[col][mseq]=(int)evaluate_sar_score1 (N,N11,N1msa,N1sar);
	  list[col][SCORE]+=list[col][mseq];
	  list[col][COL]=col;
	}
    }

  strget_param ( mode, "_MAXN1_", "5", "%d", &maxn1);
  strget_param ( mode, "_QUANT_", "0.95", "%f", &quant);
  sort_int_inv (list,A->nseq+2,SCORE, 0, A->len_aln-1);
  n=quantile_rank ( list,A->nseq, A->len_aln,quant);
  n=5;
  

  for (a=0; a<n; a++)
    {
      int value;
      
      col=list[a][COL];
      for ( b=0; b<A->nseq; b++)
	{
	  value=list[col][b];
	  if ( value>T1 && value<T2){cache[b][col]= value;}  
	}
    }
  
  free_int (list, -1);
  return cache;
}

  
	  

/************************************************************************************/
/*                ALIGNMENT ANALYZE     : SAR                                            */
/************************************************************************************/
int aln2jack_group3 (Alignment *A,char *comp, int **l1, int *nl1, int **l2, int *nl2)
{
  int **seq_list, **sar_list, nsar=0, nseq=0;
  int a, b, mid;

  vsrand (0);
  sar_list=declare_int (A->nseq, 2);
  seq_list=declare_int (A->nseq, 2);
  for (a=0; a< A->nseq; a++)
    {
      if (comp[a]=='I')
	{
	  sar_list[nsar][0]=a;
	  sar_list[nsar][1]=rand()%100000;
	  nsar++;
	}
      else
	{
	  seq_list[nseq][0]=a;
	  seq_list[nseq][1]=rand()%100000;
	  nseq++;
	}
    }
  
  
  l1[0]=vcalloc (A->nseq, sizeof (int));
  l2[0]=vcalloc (A->nseq, sizeof (int));
  nl1[0]=nl2[0]=0;
  
  sort_int (seq_list, 2, 1, 0,nseq-1);
  sort_int (sar_list, 2, 1, 0,nsar-1);
  mid=nsar/2;
  for (a=0; a<mid; a++)
    {
      l1[0][nl1[0]++]=sar_list[a][0];
    }
  for (a=0,b=mid; b<nsar; b++, a++)
    {
      l2[0][nl2[0]++]=sar_list[b][0];
    }

  mid=nseq/2;
  for (a=0; a<mid; a++)
    {
      l1[0][nl1[0]++]=seq_list[a][0];
    }
  for (a=0,b=mid; b<nseq; b++, a++)
    {
      l2[0][nl2[0]++]=seq_list[b][0];
    }

  
  free_int (seq_list, -1);
  free_int (sar_list, -1);
  return 1;
}

int aln2jack_group2 (Alignment *A, int seq, int **l1, int *nl1, int **l2, int *nl2)
{
  int **list;
  int a, b, mid;
  

  list=declare_int (A->nseq, 2);
  l1[0]=vcalloc (A->nseq, sizeof (int));
  l2[0]=vcalloc (A->nseq, sizeof (int));
  nl1[0]=nl2[0];
  
  vsrand (0);
  for ( a=0; a< A->nseq; a++)
    {
      list[a][0]=a;
      list[a][1]=rand()%100000;
    }
  sort_int (list, 2, 1, 0,A->nseq-1);
  mid=A->nseq/2;
  for (a=0; a<mid; a++)
    {
      l1[0][nl1[0]++]=list[a][0];
    }
  for (a=0,b=mid; b<A->nseq; b++, a++)
    {
      l2[0][nl2[0]++]=list[b][0];
    }

  free_int (list, -1);
  return 1;
}
int aln2jack_group1 (Alignment *A, int seq, int **l1, int *nl1, int **l2, int *nl2)
{
  int **sim;
  int **list;
  int a, mid;
  
  list=declare_int ( A->nseq, 3);
  l1[0]=vcalloc (A->nseq, sizeof (int));
  l2[0]=vcalloc (A->nseq, sizeof (int));
  nl1[0]=nl2[0];
  
  sim=aln2sim_mat (A, "idmat");
  for ( a=0; a< A->nseq; a++)
    {
      list[a][0]=seq;
      list[a][1]=a;
      list[a][2]=(a==seq)?100:sim[seq][a];
    }
  sort_int_inv (list, 3, 2, 0, A->nseq-1);
  fprintf ( stderr, "\nJacknife fromsequence %s [%d]\n", A->name[seq], seq);
  mid=A->nseq/2;
  for (a=0; a< mid; a++)
    l1[0][nl1[0]++]=list[a][1];
  for (a=mid; a<A->nseq; a++)
    l2[0][nl2[0]++]=list[a][1];
  return 1;
}
  
      
int sarset2subsarset ( Alignment *A, Alignment *S, Alignment **subA, Alignment **subS, Alignment *SUB)
{
  Alignment *rotS, *intS;
  int a,b, *list, nl;
  
  list=vcalloc ( SUB->nseq, sizeof (int));
  for (nl=0,a=0; a<SUB->nseq; a++)
    {
      b=name_is_in_list(SUB->name[a], A->name, A->nseq, 100);
      if ( b!=-1)list[nl++]=b;
    }

  subA[0]=extract_sub_aln (A, nl, list);
  rotS=rotate_aln (S, NULL);
  intS=extract_sub_aln (rotS, nl, list);
    
  subS[0]=rotate_aln (intS, NULL);

  for ( a=0; a<S->nseq; a++) sprintf ( (subS[0])->name[a], "%s", S->name[a]);
  
  
  return 0;
}

int ***simple_sar_analyze_vot ( Alignment *A, Alignment *SAR, char *mode)
{
  int a, b, c, d;
  int res1, res2, sar1, sar2;
  float s;
  int **sim;
  static float ***result;
  static int ***iresult;
  if (!result)
    {
    result=declare_arrayN (3,sizeof (float),SAR->nseq, A->len_aln,3);
    iresult=declare_arrayN (3,sizeof (int),SAR->nseq, A->len_aln,3);
    }

  sim=aln2sim_mat (A, "idmat");
  
  
  for (a=0; a<SAR->nseq; a++)
    for (b=0; b<A->len_aln; b++)
      result[a][b][0]=1;
 
  for ( a=0; a< SAR->nseq; a++)
    for ( b=0; b<A->nseq-1; b++)
      for ( c=b+1; c< A->nseq; c++)
	for ( d=0; d<A->len_aln; d++)
	  {
	    res1=A->seq_al[b][d];
	    res2=A->seq_al[c][d];

	    sar1=(SAR->seq_al[a][b]=='I')?1:0;
	    sar2=(SAR->seq_al[a][c]=='I')?1:0;
	    
	    s=sim[b][c];
	    
	    
	    
	    
	    if ( sar1!=sar2 && res1!=res2)
	      result[a][d][0]*=(1/(100-s));
	    
	    else if ( sar1==sar2 && sar1==1 && res1==res2)
	      result[a][d][0]*=1/s;
	    
	    
	    

	    /*
	    else if ( sar1==sar2 && res1==res2)result[a][d][0]+=(100-s)*(100-s);
	    else if ( sar1==sar2 && res1!=res2)result[a][d][0]-=s*s;
	    else if ( sar1!=sar2 && res1==res2)result[a][d][0]-=(100-s)*(100-s);
	    */
	    
	    result[a][d][1]='a';
	  }
  for ( a=0; a<SAR->nseq; a++)
    for ( b=0; b<A->len_aln; b++)
      {
	fprintf ( stderr, "\n%f", result[a][b][0]);
	iresult[a][b][0]=100*log(1-result[a][b][0]);
      }
  return iresult;
}


int ***simple_sar_analyze_col ( Alignment *inA, Alignment *SAR, char *mode)
{
  Alignment *A;
  double score=0, best_score=0;
  int best_pos=0;
  int a, b;
 
  static int ***result;
  int **sim;
  char aa;
  int sar_mode=1;
  
  if (!result)
    result=declare_arrayN (3,sizeof (int),SAR->nseq, inA->len_aln, 3);
  

  sim=aln2sim_mat (inA, "idmat");
  A=rotate_aln (inA, NULL);
  
  
  for ( a=0; a<SAR->nseq; a++)
    {
      best_pos=best_score=0;
      fprintf ( stderr, "[%d/%d]", a, SAR->nseq);
      for ( b=0; b<A->nseq; b++)
	{
	  if ( sar_mode==3)
	    score=sar_vs_seq3(SAR->seq_al[a], A->seq_al[b],100, sim, &aa);
	  else if ( sar_mode==2)
	    score=sar_vs_seq2(SAR->seq_al[a], A->seq_al[b],100, sim, &aa);
	  else
	    score=sar_vs_seq1(SAR->seq_al[a], A->seq_al[b],100, sim, &aa);
	  
	  result[a][b][0]+=score*10;
	  result[a][b][1]=aa;
	}
    }
  
  return result;
 }



double sar_vs_seq1 ( char *sar, char *seq, float gl, int **sim, char *best_aa)
{
  double score=0, return_score=0;
  int RN,N11, Nmsa, Nsar, N, N10, N01, N00;
  int a, b, r, s, res, res1, res2;
  double Ng=0;
  static int **mat;
  static int *aa;
  
  /*measure the E-Value for every amino acid. Returns the best one*/

  if ( mat==NULL)
    {
      mat=read_matrice ("idmat");
    }
  
  N=strlen (sar);
  for (a=0; a<N; a++)
    Ng+=is_gap(seq[a]);
  Ng/=N;
  
  if (Ng>gl) return 0;
  
  if (!aa)aa=vcalloc (256, sizeof(int));
  for ( a=0; a<N; a++)aa[tolower(seq[a])]=1;
  
  best_aa[0]='-';
  for (a=0; a<26; a++)
    {
      if (!aa['a'+a]);
      else
	{
	  RN=Nmsa=Nsar=N11=N10=N01=N00=0;
	  res='a'+a;
	  for (b=0; b<N; b++)
	    {
	      	      
	      res1=tolower(seq[b]);
	      if (res1=='-')r=0;
	      else
		{
		  res1-='A';
		  res2=res-'A';
	      
		  r=(mat[res1][res2]>0)?1:0;
		}
	      
	      if ( sar[b]!='o')
		{
		  s=(sar[b]=='I')?1:0;
		  
		  Nmsa+=r; Nsar+=s;
		  N11+=(r && s)?1:0;
		  N01+=(!r &&s)?1:0;
		  N10+=(r && !s)?1:0;
		  N00+=(!r && !s)?1:0;
		  RN++;
		}
	    }
	  if (N11)
	    {
	      score=evaluate_sar_score1 ( RN, N11, Nmsa, Nsar);
	    }
	  else
	    {
	      score=0;
	    }
	  
	  if ( score>return_score)
	    {
	      best_aa[0]='a'+a;
	      return_score=score;
	    }
	}
    }
  
  for ( a=0; a<N; a++)aa[tolower(seq[a])]=0;
  
  return return_score;
}

double sar_vs_seq4 ( char *sar, char *seq, float gl, int **sim, char *best_aa)
{

  int N11, Nmsa, Nsar, N, N10, N01, N00;
  int a, b, r, s;
  double Ng=0, ratio;
  int *aa;
  
  /*Correlation between AA conservation and Activity*/
  
  N=strlen (sar);
  for (a=0; a<N; a++)
    Ng+=is_gap(seq[a]);
  Ng/=N;
  if (gl<1)Ng*=100;
  
  if (Ng>gl) return 0;

  aa=vcalloc ( 256, sizeof (int));
  for (b=0; b<N; b++)
    {

      s=(sar[b]=='I')?1:0;
      if (s)aa[(int)seq[b]]=1;
    }
  N11=N10=N01=N00=Nmsa=Nsar=0;
  for (b=0; b<N; b++)
    {
      
      r=aa[(int)seq[b]];
      s=(sar[b]=='I')?1:0;
	  
      Nmsa+=r; Nsar+=s;
      N11+=(r && s)?1:0;
      N01+=(!r &&s)?1:0;
      N10+=(r && !s)?1:0;
      N00+=(!r && !s)?1:0;
    }

  /*Sparce Matrix full of 0*/

  ratio=(float)Nsar/(float)N;
  
  if (ratio<0.2)
    {

      if    ((N11+N01+N10)==0) return 0;
      else  return ((100*N11)/(N11+N10+N01));

    }
  /*Sparce Matrix full of 1s*/
  else if (ratio>0.8)
    {
      if    ((N00+N01+N10)==0) return 0;
      else  return ((100*N00)/(N00+N10+N01));
    }
  /*Average Matrix*/
  else 
    {
      if ( N==0) return 0;
      else return ((100*(N11+N00))/N);
    }
}

double sar_vs_seq3 ( char *sar, char *seq, float gl, int **sim, char *best_aa)
{
  double score=0;
  int N11, Nmsa, Nsar, N, N10, N01, N00;
  int a, b, r, s;
  double Ng=0;
  int *aa;

  /*measure the E-Value if all the 1AA are considered like alphabet 1*/
  
  N=strlen (sar);
  for (a=0; a<N; a++)
    Ng+=is_gap(seq[a]);
  Ng/=N;
  
  if (Ng>gl) return 0;

  aa=vcalloc ( 256, sizeof (int));
  for (b=0; b<N; b++)
    {

      s=(sar[b]=='I')?1:0;
      if (s)aa[(int)seq[b]]=1;
    }
  N11=N10=N01=N00=Nmsa=Nsar=0;
  for (b=0; b<N; b++)
    {
      
      r=aa[(int)seq[b]];
      s=(sar[b]=='I')?1:0;
	  
      Nmsa+=r; Nsar+=s;
      N11+=(r && s)?1:0;
      N01+=(!r &&s)?1:0;
      N10+=(r && !s)?1:0;
      N00+=(!r && !s)?1:0;
    }
  
  if (N11)
    {
      score=evaluate_sar_score1 ( N, N11, Nmsa, Nsar);
    }
  else score=0;
  
  vfree (aa);
  return score;
  
}

double sar_vs_seq2 ( char *sar, char *seq, float gl, int **sim_mat, char *best_aa)
{
  double score=0, return_score=0;
  int L,N11, Nmsa, Nsar,N10, N01, N;
  int a, b,c,d, r1, s1,r2, s2, res;
  double Ng=0;
  int sim, diff, w;
  char string[5];

  /*Weighted E-Value Similarity*/
  L=strlen (sar);
  for (a=0; a<L; a++)
    Ng+=is_gap(seq[a]);
  Ng/=L;
  
  if (Ng>gl) return 0;
  for (a=0; a<26; a++)
    {

      N=Nmsa=Nsar=N11=N10=N01=0;
      res='a'+a;
      for (d=0,b=0; b<L; b++)d+=((tolower(seq[b]))==res)?1:0;
      if ( d==0) continue;
      
      for (b=0; b<L; b++)
	{
	  r1=(tolower(seq[b])==res)?1:0;
	  s1=(sar[b]=='I')?1:0;
	  for ( c=0; c<L; c++)
	    {
	      r2=(tolower(seq[c])==res)?1:0;
	      s2=(sar[c]=='I')?1:0;
	    
	      sprintf ( string, "%d%d%d%d", r1,s1, r2, s2);
	      sim= sim_mat[b][c]/10;
	      diff=10-sim;
	      
	      if (strm (string, "0000"))      {w=diff;N+=2*w;}
	      else if ( strm (string, "0011")){w=sim ;N+=2*w ; N11+=w  ;N10+=0   ;N01+=w   ;Nmsa+=w   ;Nsar+=w;}
	      else if ( strm (string, "1010")){w=diff;N+=2*w ; N11+=0  ;N10+=2*w ;N01+=0   ;Nmsa+=2*w ;Nsar+=0;}
	      else if ( strm (string, "0101")){w=diff;N+=2*w;  N11+=0  ;N10+=0   ;N01+=2*w ;Nmsa+=0   ;Nsar+=2*w;}
	      else if ( strm (string, "1111")){w=diff;N+=2*w;  N11+=2*w;N10+=0   ;N01+=0   ;Nmsa+=2*w ;Nsar+=2*w;}
	      else if ( strm (string, "1001")){w=sim; N+=2*w;  N11+=0  ;N10+=w   ;N01+=w   ;Nmsa+=w;Nsar+=w;}
	      else if ( strm (string, "0110")){w=sim; N+=2*w;  N11+=0  ;N10+=w   ;N01+=w   ;Nmsa+=w;Nsar+=w;}
	    }
	}
      if (N11)
	{
	 
	  score=evaluate_sar_score1 ( N, N11, Nmsa, Nsar);
	}
      return_score=MAX(return_score, score);
    }
  if ( return_score <0)fprintf ( stderr, "\n%.2f", return_score);
  return return_score;
}
  
float get_sar_sim (char *seq1, char *seq2)
{
  int a, l, s, r;
  int n11=0, n10=0, n01=0, n00=0;
  

  l=strlen (seq1);
  for ( a=0; a<l; a++)
    {
      s=(seq1[a]=='O')?0:1;
      r=(seq2[a]=='O')?0:1;

      n00+=(!s && !r)?1:0;
      n11+=(s && r)?1:0;
      n01+=(!s && r)?1:0;
      n10+=(s && !r)?1:0;
    }
  if ( n11==0) return 0;
  else return ((float)(n11)*100)/(float)(n11+n10+n01);
}
	  

double evaluate_sar_score1 ( int N, int n11, int n1msa, int n1sar)
{
  double p;
  int n10, n01;
  
  n10=n1msa-n11;
  n01=n1sar-n11;
  
  if ( n11==0)return 0;
  /*if ( (n10)>n11 || n01>n11)return 0;*/
  

  p  = M_chooses_Nlog (n1msa, N) + M_chooses_Nlog (n1sar-n11, N-n1msa) + M_chooses_Nlog (n11, n1msa);

  p-=(M_chooses_Nlog (n1msa, N)+M_chooses_Nlog (n1sar, N));
  return -p;
  
}
double evaluate_sar_score2 ( int N, int n11, int n1msa, int n1sar)
{
  
  
  return n11-((n1msa-n11)+(n1sar-n11));
  
  if ( n11<n1msa) return 0;
  else if ( n11<n1sar) return 0;
  else if ( n11==N)return 0;
  return n11;
}


int benchmark_sar( int value)
{
  static int v[1000];
  static int a;

  if (a==0)
    {
      for (a=0; a< 1000; a++)v[a]=0;
      v[2]=1; 
      v[3]=2;
      v[6]=2; 
      v[7]=1;
      v[8]=2; 
      v[9]=1;
      v[10]=1; 
      v[11]=1;
      v[12]=2; 
      v[30]=2;
      v[31]=1; 
      v[32]=2;
      v[33]=1; 
      v[34]=2;
      v[35]=1;
      v[36]=1; 
      v[37]=2;
      v[43]=2; 
      v[44]=1;
      v[45]=2; 
      v[73]=2;
      v[74]=1; 
      v[75]=1;
      v[76]=2; 
      v[80]=2;
      v[81]=1; 
      v[82]=2;
      v[83]=1; 
      v[85]=2;
      v[86]=1;
      v[87]=1;
      v[88]=2; 
      v[89]=2;
      v[90]=1; 
      v[91]=2;
      v[92]=1; 
      v[93]=2;
      v[103]=2; 
      v[104]=1;
      v[105]=1; 
      v[106]=1;
      v[107]=2; 
      v[130]=2;
      v[131]=1; 
      v[132]=2;
      v[133]=1;
      v[134]=1; 
      v[135]=1;
      v[136]=2; 
      v[137]=1;
      v[138]=2;
      v[271]=2;
      v[272]=1; 
      v[273]=2;
      v[281]=2; 
      v[282]=1;
      v[283]=2; 
      v[284]=1;
      v[285]=1; 
      v[286]=1;
      v[287]=2;
      v[319]=2;
      v[320]=1; 
      v[321]=1;
      v[322]=1; 
      v[323]=1;
      v[324]=2; 
      v[325]=1;
      v[326]=2; 
      v[327]=1;
      v[328]=2; 
      v[356]=2;
      v[357]=1; 
      v[358]=1;
      v[359]=2; 
      v[377]=2;
      v[378]=1;
      v[379]=2; 
      v[386]=3;
      v[388]=2;
      v[389]=1; 
      v[390]=1;
      v[391]=1; 
      v[392]=2;
      v[393]=2; 
      v[394]=2;
      v[395]=1; 
      v[396]=1;
      v[397]=2; 
      v[399]=2;
      v[400]=1; 
      v[401]=2;
      v[414]=2;
      v[415]=1;
      v[416]=2; 
      v[420]=2;
      v[421]=1; 
      v[422]=1;
      v[423]=1; 
      v[424]=2;
      v[425]=1; 
      v[426]=2;
    }
  return v[value];
}

Alignment *weight2sar (Alignment *A, Alignment *SAR, char *weight_file, int limit)
{
  int a, b, c;
  int ***weight;
  char ***list;
  float score;
  
  weight=vcalloc (SAR->nseq, sizeof (int**));
  
  
  list=file2list (weight_file, " ");

  a=b=0;
  for (a=0; a< SAR->nseq; a++)
    {
      b=c=0;
      while (list[b])
	{
	  if ( strm (list[b][1], SAR->name[a]) && atoi (list[b][3])>0)c++;
	  b++;
	}

      weight[a]=declare_int (c+1, 3);
      fprintf ( stderr, "\n%s %d", SAR->name[a], c);
      b=c=0;
      while (list[b])
	{
	  if ( strm (list[b][1], SAR->name[a]) && atoi (list[b][3])>0)
	    {
	      weight[a][c][0]=atoi(list[b][2])-1;
	      weight[a][c][1]=list[b][5][0];
	      weight[a][c][2]=atoi (list[b][3]);
	      c++;
	    }
	  b++;
	}
      weight[a][c][0]=-1;
    }
 
  for (a=0; a<A->nseq; a++)
    {
      fprintf ( stdout, ">%s\n", A->name[a]);
      for ( b=0; b< SAR->nseq; b++)
	{
	  score=seq2weighted_sar_score(A->seq_al[a], weight[b]);
	  fprintf ( stdout, "%c", (score>limit)?'I':'O');
	}
      fprintf (stdout, "\n");
    }
  myexit (EXIT_SUCCESS);
  return A;
}
  
Alignment *display_sar ( Alignment *A, Alignment *SAR, char *compound)
{
  int a,c;
  char name[100];
  
  c=name_is_in_list ( compound, SAR->name, SAR->nseq, 100);
  if ( c==-1)return A;

  for ( a=0; a< A->nseq; a++)
    {
      sprintf (name, "%s", A->name[a]);
      sprintf ( A->name[a], "%c_%s_%s", SAR->seq_al[c][a], name,compound);
    }
  return A;
}
Alignment *aln2weighted_sar_score ( Alignment *A,Alignment *SAR, char *weight_file, char *compound)
{
  
  int a, b, c=0;
  int **weight;
  
  int score;
  char reactivity;
  char ***list;

  
  if ( SAR)
    {
      c=name_is_in_list (compound, SAR->name, SAR->nseq, 100);
    }
  
  list=file2list (weight_file, " ");
  a=b=0;
  while (list[a])
    {
      if (strm (list[a][1], compound))b++;
      a++;
    }
  weight=declare_int ( b+1, 3);
  
  
  a=b=0;
  while (list[a])
    {
      if ( !strm (list[a][1], compound) || strm ("TOTPOS", list[a][1]));
      else
	{
	  weight[b][0]=atoi(list[a][2])-1;
	  weight[b][1]=list[a][5][0];
	  weight[b][2]=atoi(list[a][3]);
	  b++;
	}
      a++;
    }
  weight[b][0]=-1;
  for ( a=0; a< A->nseq; a++)
    {
      score=seq2weighted_sar_score (A->seq_al[a], weight);
      reactivity=(!SAR || c==-1)?'U':SAR->seq_al[c][a];
      
      sprintf (A->seq_comment[a], "Compound %-15s Reactivity %c SAR_SCORE %5d", compound,reactivity, (int) score);
      
    }
  return A;
}

float seq2weighted_sar_score ( char *seq, int **weight)
{
  int a, p, r, w;
  float score=0;
  
  a=0;
  while (weight[a][0]!=-1)
    {
      p=weight[a][0];
      r=weight[a][1];
      w=weight[a][2];
      
      if ( is_gap(seq[p]));
      else if ( tolower(seq[p])==r)score+=w;
      a++;
    }
  return score;
  }

Alignment * sar2simpred (Alignment *A, Alignment *SAR, char *posfile, char *compound, int L1,int L2 )
{
  int a, b, c, c1, c2;
  int **sim, **sim_ref, npred=0;
  float n11, n10, n01, n00;
  float sn, sp; 
  
  int tot_sim=0;
  int N11=1, N01=2, N10=3, NXX=4, SIM=5;
  float ***tot;
  int i1, i2;
  
  
  n11=n10=n01=n00=0;
  tot=declare_arrayN(3,sizeof (float), 10, 6, 2);
  
  sim_ref=aln2sim_mat (A, "idmat");
  if (strm (posfile, "all"))
    sim=sim_ref;
  else
    {
      Alignment *B;
      B=copy_aln ( A,NULL);
      B=extract_aln3(B,posfile);
      
      /*if (B->len_aln==0)L1=100;
      else
	L1=((B->len_aln-1)*100)/B->len_aln;
      
      if (L1<=0)L1=100;
      */
      sim=aln2sim_mat (B, "idmat");
    }
  
  for (a=0; a< A->nseq-1; a++)
    {
      for ( b=a+1; b< A->nseq; b++)
	{
	  for ( c=0; c<SAR->nseq; c++)
	    {
	      if ( (strm (compound, SAR->name[c]) || strm ( compound, "all")))
		{
		  /*if ( sim_ref[a][b]<30 || sim_ref[a][b]>60)continue;*/
		  i1=0; /*sim_ref[a][b]/10;if (i1==10)i1--;*/
		  
		  i2=sim[a][b];

		  
		  c1=(SAR->seq_al[c][a]=='I')?1:0;
		  c2=(SAR->seq_al[c][b]=='I')?1:0;
		  
		  n11=(c1 && c2)?1:0;
		  n01=(!c1 && c2)?1:0;
		  n10=(c1 && !c2)?1:0;
		  n00=(!c1 && !c2)?1:0;
		  
		  tot[i1][N11][0]+=n11;
		  tot[i1][N01][0]+=n01;
		  tot[i1][N10][0]+=n10;
		  /*tot[i1][N00][0]+=n00;*/
		  tot[i1][NXX][0]++;
		  tot[i1][SIM][0]+=sim_ref[a][b];
		  
		  if ( i2>=L1)
		    {
		      tot[i1][N11][1]+=n11;
		      tot[i1][N01][1]+=n01;
		      tot[i1][N10][1]+=n10;
		      /*tot[i1][N00][1]+=n00;*/
		      tot[i1][NXX][1]++;
		      tot[i1][SIM][1]+=sim_ref[a][b];
		    }
		}
	    }
	}
    }
  
  for (a=0; a<1; a++)
    {
      sp=(tot[a][N11][0])/(tot[a][N11][0]+tot[a][N10][0]);
      fprintf ( stdout, "\n%15s N11 %5d SP %.2f ",compound, (int)tot[a][N11][0],sp);
      sp=((tot[a][N11][1]+tot[a][N10][1])==0)?1:(tot[a][N11][1])/(tot[a][N11][1]+tot[a][N10][1]);
      sn=(tot[a][N11][0]==0)?1:(tot[a][N11][1]/tot[a][N11][0]);
      fprintf ( stdout, " N11 %5d SP %.2f SN %.2f SIM %.2f", (int)tot[a][N11][1], sp,sn, (tot[a][SIM][1]/tot[a][NXX][1]));
    }
  
  myexit (0);
  sp=((n11+n01)==0)?1:n11/(n11+n01);
  sn=((n11+n01)==0)?1:n11/(n11+n10);
  
  fprintf ( stdout, "\nLimit: %d NPRED %d AVGSIM %d SN %.2f   SP %.2f TP %d FP %d FN %d",L1, npred, tot_sim, sn, sp, (int)n11, (int)n01, (int)n10);
  myexit (EXIT_SUCCESS);
  return A;
}

Alignment * sar2simpred2 (Alignment *A, Alignment *SAR, char *seqlist, char *posfile, char *compound, int L )
{
  int a,b, c,c1, c2, p, s;
  float n11, n10, n01, n00, n, sn2, prediction,sp, n1, n0, t, entropy, Delta;
  int *rlist, *tlist, *pred, *npred, tsim, psim;
  int **sim, **sim_ref;
  int nr=0;
  int nrs;
  char *out;
  int delta_max;
  Alignment *B;
  int printall=1;

  out=vcalloc (A->nseq+1, sizeof (char));
  rlist=vcalloc ( A->nseq, sizeof (int));
  tlist=vcalloc ( A->nseq, sizeof (int));
  pred=vcalloc(2, sizeof (int));
  npred=vcalloc(2, sizeof (int));
  
  nrs=0;
  if ( strm (seqlist, "first"))
    {
      for ( a=0; a<SAR->nseq; a++)
	{
	  if ( strm ( compound, SAR->name[a]))
	    {
	      for ( b=0; b<A->nseq; b++)
		{
		  if ( SAR->seq_al[a][b]=='I')
		    {
		      fprintf ( stderr, "COMP: %s REF SEQ: %s\n", A->name[b], compound);
		      rlist[nrs]=b;
		      tlist[rlist[nrs]]=1;
		      nrs++;
		      break;
		    }
		}
	    }
	}
    }
  else if (strm (seqlist, "all"))
    {
      for ( a=0; a< A->nseq; a++)
	{
	  rlist[nrs]=a;
	  tlist[rlist[a]]=1;
	  nrs++;
	}
    }
  else if ((a=name_is_in_list ( seqlist, A->name, A->nseq, 100))!=-1)
    {
      rlist[nrs]=a;
      tlist[rlist[nrs]]=1;
      nrs++;
    }
  else
    {
      Alignment *R;
      R=main_read_aln (seqlist, NULL);
      for (a=0; a<R->nseq; a++)
	{
	  rlist[a]=name_is_in_list( R->name[a], A->name, A->nseq, 100);
	  tlist[rlist[a]]=1;
	}
      free_aln (R);
    }
  
  c=name_is_in_list ( compound, SAR->name, SAR->nseq, 100);
  
  sim_ref=aln2sim_mat (A, "idmat");
  if (strm (posfile, "all"))
    {
      sim=sim_ref;
      B=A;
    }
  else
    {
      B=copy_aln ( A,NULL);
      B=extract_aln3(B,posfile);
      sim=aln2sim_mat (B, "idmat");
    }
  
  n11=n10=n01=n00=n=n1=n0=0;
  delta_max=0;
  for (a=0; a<A->nseq; a++)
    {
      if ( tlist[a] && !strm (seqlist, "all"))
	out[a]=(SAR->seq_al[c][a]=='I')?'Z':'z';/*SAR->seq_al[c][a];*/
      else
	{
	  
	  pred[0]=pred[1]=0;
	  npred[0]=npred[1]=1;
	  c1=(SAR->seq_al[c][a]=='I')?1:0; 
	  for (nr=0,tsim=0,psim=0,b=0; b<nrs; b++)
	    {
	      if ( SAR->seq_al[c][rlist[b]]=='o');
	      else
		{
		  c2=(SAR->seq_al[c][rlist[b]]=='I')?1:0;
		  nr+=c2;
		  s=sim[a][rlist[b]];
		  tsim+=sim_ref[a][rlist[b]];
		  psim+=sim[a][rlist[b]];
		  if (s>=L)
		    {
		      pred[c2]+=s;
		      npred[c2]++;
		    }
		}
	    }
	  
	  if (c1==0)n0++;
	  else n1++;
	  t++;
	  
	  
	  Delta=pred[1]-pred[0];
	  
	  if (Delta<-delta_max){p=0;out[a]= (c1==0)?'O':'o';}
	  else if (Delta>delta_max){p=1;out[a]=(c1==1)?'I':'i';}
	  else {p=-1; out[a]=(c1==1)?'U':'u';}
	  
	  if ( p==-1);
	  else if (  p &&  c1)n11++;
	  else if (  p && !c1)n10++;
	  else if ( !p && !c1)n00++;
	  else if ( !p &&  c1)n01++;

	  if (p!=-1)n++;
	  if (printall)fprintf ( stdout, ">%-15s %d %c OVERALL_SIM:%d POSITION_SIM %d\n%s\n", B->name[a], c1, out[a],tsim/nrs,psim/nrs,B->seq_al[a]); 
	}
    }
  sp=((n11+n10)==0)?1:n11/(n11+n10);
  sn2=((n1)==0)?1:n11/n1;
  prediction=(n11+n00)/(n1+n0);
  entropy=(float)(M_chooses_Nlog (nr, nrs)/M_chooses_Nlog(nrs/2, nrs));
  
  fprintf ( stdout, ">%-15s Sp %.2f  Sn %.2f Pred %.2f E %.2f\n", compound,sp, sn2,prediction,entropy ); 
  fprintf ( stdout, "%s\n", out);
  
  myexit (EXIT_SUCCESS);
  return A;
}
/*********************************COPYRIGHT NOTICE**********************************/
/* Centre National de la Recherche Scientifique (CNRS) */
/*and */
/*Cedric Notredame */
/*Fri Oct 26 17:03:04     2007. */
/*All rights reserved.*/
/*This file is part of T-COFFEE.*/
/**/
/*    T-COFFEE is free software; you can redistribute it and/or modify*/
/*    it under the terms of the GNU General Public License as published by*/
/*    the Free Software Foundation; either version 2 of the License, or*/
/*    (at your option) any later version.*/
/**/
/*    T-COFFEE is distributed in the hope that it will be useful,*/
/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
/*    GNU General Public License for more details.*/
/**/
/*    You should have received a copy of the GNU General Public License*/
/*    along with Foobar; if not, write to the Free Software*/
/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
/*...............................................                                                                                      |*/
/*  If you need some more information*/
/*  cedric.notredame@europe.com*/
/*...............................................                                                                                                                                     |*/
/**/
/**/
/*	*/
/*********************************COPYRIGHT NOTICE**********************************/
