static char rcsid[] = "$Id: goby.c 34469 2011-01-29 00:27:09Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "goby.h"

#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#include "mem.h"
#include "chrnum.h"
#include "substring.h"


#ifdef HAVE_GOBY
#include <goby/C_Reads.h>
#include <goby/C_Alignments.h>
#include <goby/C_CompactHelpers.h>

struct Gobyreader_T {
  CReadsHelper *helper;
};

struct Gobywriter_T {
  CAlignmentsWriterHelper *helper;
};

#endif


void
Goby_shutdown () {
#ifdef HAVE_GOBY
  goby_shutdownProtobuf();
#endif
  return;
}


/************************************************************************
 *   Reader
 ************************************************************************/

void
Goby_reader_free (Gobyreader_T *old) {
  FREE(*old);
  return;
}

Gobyreader_T
Goby_reader_new (char **files, int nfiles, unsigned long window_start, unsigned long window_end) {
#ifdef HAVE_GOBY
  Gobyreader_T new = (Gobyreader_T) MALLOC(sizeof(*new));

  gobyReads_openReadsReaderWindowed(files,nfiles,/*circularp*/false,window_start,window_end,&new->helper);
  return new;
#else
  return NULL;
#endif
}

Shortread_T
Goby_read (Shortread_T *queryseq2, Gobyreader_T reader, int barcode_length,
	   bool invert_first_p, bool invert_second_p) {
#ifdef HAVE_GOBY
  unsigned long goby_read_index;
  char *acc, *read_identifier = NULL, *description = NULL;
  char *sequence1, *quality1, *sequence2, *quality2;
  int sequence1_length, quality1_length, sequence2_length, quality2_length;

  if (gobyReads_hasNext(reader->helper) != 1) {
    return (Shortread_T) NULL;
  } else {
    goby_read_index = 
      gobyReads_nextSequencePair(reader->helper,&read_identifier,&description,
				 &sequence1,&sequence1_length,
				 &quality1,&quality1_length,
				 &sequence2,&sequence2_length,
				 &quality2,&quality2_length);
    acc = (char *) CALLOC(25,sizeof(char));
    sprintf(acc,"%ld",goby_read_index);

    *queryseq2 = Shortread_new(/*acc*/NULL,/*description*/NULL,
			       sequence2,sequence2_length,quality2,quality2_length,
			       barcode_length,invert_second_p,/*copy_acc_p*/true);

    return Shortread_new(acc,description,
			 sequence1,sequence1_length,quality1,quality1_length,
			 barcode_length,invert_first_p,/*copy_acc*/true);
  }
#else
  return (Shortread_T) NULL;
#endif
}


void
Goby_reader_finish (Gobyreader_T reader) {
#ifdef HAVE_GOBY
  gobyReads_finished(reader->helper);
#endif
  return;
}


/************************************************************************
 *   Writer
 ************************************************************************/

void
Goby_writer_free (Gobywriter_T *old) {
  FREE(*old);
  return;
}

Gobywriter_T
Goby_writer_new (char *output_root, char *aligner_name, char *aligner_version) {
#ifdef HAVE_GOBY
  Gobywriter_T new = (Gobywriter_T) MALLOC(sizeof(*new));

  gobyAlignments_openAlignmentsWriterDefaultEntriesPerChunk(output_root,&new->helper);
  gobyAlignments_setAlignerName(new->helper,aligner_name);
  gobyAlignments_setAlignerVersion(new->helper,aligner_version);

  return new;
#else
  return NULL;
#endif
}

void
Goby_writer_add_chromosomes (Gobywriter_T writer, IIT_T chromosome_iit) {
#ifdef HAVE_GOBY
  unsigned int low, high, length;
  char *chr;
  bool allocp;
  int nintervals;
  int chri;
  Chrnum_T chrnum;

  nintervals = IIT_total_nintervals(chromosome_iit);

  for (chri = 0; chri < nintervals; chri++) {
    /* chri is 0-based for Goby, but chrnum is 1-based for GSNAP */
    chrnum = chri + 1;

    chr = IIT_label(chromosome_iit,chrnum,&allocp);
    IIT_interval_bounds(&low,&high,chromosome_iit,chrnum);
    length = high - low + 1;
    gobyAlignments_addTarget(writer->helper,chri,chr,length);
    if (allocp == true) {
      FREE(chr);
    }
  }

#endif
  return;
}


void
Goby_writer_finish (Gobywriter_T writer, Gobyreader_T reader) {
#ifdef HAVE_GOBY
  gobyAlignments_finished(writer->helper,reader->helper->numberOfReads);
#endif
  return;
}



#ifdef HAVE_GOBY
static void
output_deletion (Gobywriter_T writer, Stage3_T stage3) {
  int start, j;
  char *deletion;

  start = Stage3_indel_pos(stage3);
  deletion = Stage3_deletion_string(stage3);
  for (j = 0; j < Stage3_nindels(stage3); j++) {
    gobyAlEntry_addSequenceVariation(writer->helper,start+j,deletion[j],'-',0,'\0');
  }
  return;
}

static void
output_subs (Gobywriter_T writer, Hittype_T hittype, char *genomic, char *query, char *quality_string, int startpos) {
  int nmismatches = 0, i;
  char genomic_char, read_char, quality_char = '\0';
  int has_quality_p;

  if (quality_string == NULL) {
    has_quality_p = 0;
  } else {
    has_quality_p = 1;
  }

  for (i = 0; i < strlen(genomic); i++) {
    genomic_char = toupper(genomic[i]);
    read_char = toupper(query[i]);
    if (quality_string != NULL) {
      quality_char = quality_string[i];
    }
    if (genomic_char != read_char) {
      gobyAlEntry_addSequenceVariation(writer->helper,startpos+i,genomic_char,read_char,has_quality_p,quality_char);
      nmismatches++;
    }
  }

  /* Update with the number of mismatches within the aligned region */
  if (hittype == SUB || hittype == TERMINAL) {
    gobyAlEntry_setNumberOfMismatches(writer->helper,nmismatches);
  }
  return;
}


static char *
merge_ref_substrings (Substring_T substring1, Substring_T substring2, Substring_T substringM) {
  char *genomic1, *genomic2, *result;
  int i;

  genomic1 = Substring_genomic_refdiff(substring1);
  result = (char *) MALLOC(strlen(genomic1) + 1);
  strcpy(result,genomic1);

  if (substringM != NULL) {
    genomic2 = Substring_genomic_refdiff(substringM);
    for (i = 0; i < strlen(result); i++) {
      if (result[i] == '-') {
	result[i] = genomic2[i];
      }
    }
  }

  if (substring2 != NULL) {
    genomic2 = Substring_genomic_refdiff(substring2);
    for (i = 0; i < strlen(result); i++) {
      if (result[i] == '-') {
	result[i] = genomic2[i];
      }
    }
  }
  return result;
}


static void
output_result (Gobywriter_T writer, Stage3_T stage3, Shortread_T queryseq) {
  char *genomic, *query, *quality, *raw_genomic, *raw_query, *raw_quality;
  int startpos = 0, length = 0;
  Hittype_T hittype;
  Substring_T substring1, substring2, substringM;

  hittype = Stage3_hittype(stage3);
  if (hittype == EXACT) {
    startpos = 0;
    length = Shortread_fulllength(queryseq);
    genomic = query = Shortread_fullpointer(queryseq);
    quality = Shortread_quality_string(queryseq);

  } else {
    substring1 = Stage3_substring1(stage3);
    substring2 = Stage3_substring2(stage3);
    substring2 = Stage3_substringM(stage3);

    startpos = Substring_querystart(substring1);
    length = Stage3_query_alignment_length(stage3);

    raw_genomic = merge_ref_substrings(substring1,substring2,substringM);
    genomic = (char *) CALLOC(length+1,sizeof(char));
    strncpy(genomic,&(raw_genomic[startpos]),length);
    genomic[length] = '\0';
    FREE(raw_genomic);

    raw_query = Shortread_fullpointer(queryseq);
    query = (char *) CALLOC(length+1,sizeof(char));
    strncpy(query,&(raw_query[startpos]),length);
    query[length] = '\0';
  
    raw_quality = Shortread_quality_string(queryseq);
    if (raw_quality == NULL) {
      quality = NULL;
    } else {
      quality = (char *) CALLOC(length+1,sizeof(char));
      strncpy(quality,&(raw_quality[startpos]),length);
      quality[length] = '\0';
    }
  }

  gobyAlignments_debugSequences(writer->helper,hittype,genomic,query,startpos);

  if (hittype == DEL) {
    output_deletion(writer,stage3);
  }

  if (hittype == SUB || hittype == TERMINAL || hittype == INS) {
    output_subs(writer,hittype,genomic,query,quality,startpos);
  }

  if (hittype != EXACT) {
    if (raw_quality != NULL) {
      FREE(quality);
    }
    FREE(query);
    FREE(genomic);
  }

  return;
}
#endif


/* Assume that stage3array has already been sorted */
void
Goby_print_single (Gobywriter_T writer, Stage3_T *stage3array, Shortread_T queryseq1,
		   int npaths, int maxpaths, bool quiet_if_excessive_p) {
#ifdef HAVE_GOBY
  Stage3_T stage3;
  Substring_T substring1;
  UINT4 query_aligned_length;
  int pathnum;
  unsigned long goby_read_index;

  goby_read_index = (unsigned long) strtoul(Shortread_accession(queryseq1), NULL, 10);
  writer->helper->numberOfAlignedReads++;

  if (npaths > 1) {
    query_aligned_length = Stage3_query_alignment_length(stage3array[0]);
    gobyAlEntry_appendTooManyHits(writer->helper,goby_read_index,query_aligned_length,npaths);
  }

  if (quiet_if_excessive_p && npaths > maxpaths) {
    /* No output in Goby */
  } else {
    for (pathnum = 1; pathnum <= npaths && pathnum <= maxpaths; pathnum++) {
      stage3 = stage3array[pathnum-1];
      substring1 = Stage3_substring1(stage3);
    
      if (Stage3_hittype(stage3) == SPLICE) {
	fprintf(stderr,"Goby does not yet support hittype of SPLICE\n");
      } else {
	gobyAlignments_appendEntry(writer->helper);
	gobyAlEntry_setMultiplicity(writer->helper,1);
	gobyAlEntry_setQueryIndex(writer->helper,goby_read_index);

	/* Gsnap starts target ID's with 1,Goby starts them with 0.  Adjust them. */
	gobyAlEntry_setTargetIndex(writer->helper,Stage3_chrnum(stage3) - 1);
	gobyAlEntry_setPosition(writer->helper,Stage3_chrpos_low_trim(stage3));
	gobyAlEntry_setMatchingReverseStrand(writer->helper,(Stage3_plusp(stage3) == 0 ? 1 : 0));
	gobyAlEntry_setQueryPosition(writer->helper,Substring_querystart(substring1));
	gobyAlEntry_setScoreInt(writer->helper,Substring_querylength(substring1) - Stage3_score(stage3));
	gobyAlEntry_setNumberOfMismatches(writer->helper,Stage3_nmismatches_refdiff(stage3));
	gobyAlEntry_setNumberOfIndels(writer->helper,Stage3_nindels(stage3));
	gobyAlEntry_setQueryAlignedLength(writer->helper,(UINT4) Stage3_query_alignment_length(stage3));
	gobyAlEntry_setTargetAlignedLength(writer->helper,Stage3_genomic_alignment_length(stage3));
	gobyAlEntry_setQueryLength(writer->helper,Substring_querylength(substring1));
      
	output_result(writer,stage3,queryseq1);
      }
    }
  }

#endif
  return;
}

