/*
 * Written by Bastien Chevreux (BaCh)
 *
 * Copyright (C) 1997-2000 by the German Cancer Research Center (Deutsches
 *   Krebsforschungszentrum, DKFZ Heidelberg) and Bastien Chevreux
 * Copyright (C) 2000 and later by Bastien Chevreux
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the
 * Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 *
 */

//#include <stdlib.h>


#include <getopt.h>

#include <iostream>
#include <string>
#include <algorithm>

//#include <valgrind/callgrind.h>

#include "io/generalio.H"
#include "util/fileanddisk.H"
#include "mira/assembly.H"
#include "mira/parameters.H"
#include "mira/manifest.H"

#include "progs/quirks.H"

#ifdef MIRAMEMORC
#include "memorc/memorc.H"
#endif

#include "version.H"


using namespace std;


const char compileinfo[] = {
#include "compileinfo.itxt.xxd.H"
};

const char step1par[] = {
//#include "me_step1.par.H"
};
const char step2par[] = {
//#include "me_step2.par.H"
};
const char step3par[] = {
//#include "me_step3.par.H"
};



void doAbort()
{
#ifndef PUBLICQUIET
  Read::dumpStringContainerStats(cout);
#endif

  cout << "\n\nFor general help, you will probably get a quicker response on the\n"
    "    MIRA talk mailing list\n"
    "than if you mailed the author directly.\n"
    "\nTo report bugs or ask for features, please use the new ticketing system at:\n"
    "\thttp://sourceforge.net/apps/trac/mira-assembler/\n"
    "This ensures that requests don't get lost.\n";
  abort();
}

void loadparams(vector<MIRAParameters> & Pv, const string & filename, const char * def)
{
  try{
    if(filename.size()){
      MIRAParameters::loadParams(filename, Pv);
    } else if(def!=nullptr) {
      MIRAParameters::parse(def, Pv);
    }
  }
  catch(...) {
    if(filename.size()){
      cout << "Error while trying to load parameter file " << filename << endl;
      cout << "Setting defaults for this step.";
    }
    if(def!=nullptr) {
      MIRAParameters::parse(def, Pv);
    }
  }
}


void dumpStdMsg()
{
  cout <<
    "To (un-)subscribe the MIRA mailing lists, see:\n"
    "\thttp://www.chevreux.org/mira_mailinglists.html\n\n"
    "After subscribing, mail general questions to the MIRA talk mailing list:\n"
    "\tmira_talk@freelists.org\n\n"
    "To report bugs or ask for features, please use the new ticketing system at:\n"
    "\thttp://sourceforge.net/apps/trac/mira-assembler/\n"
    "This ensures that requests don't get lost.\n\n\n";

  bool addnl=false;

  //if(sizeof(size_t) == 4){
  //  cout << "Compiled in 32 bit mode.\n";
  //}else if(sizeof(size_t) == 8){
  //  cout << "Compiled in 64 bit mode.\n";
  //}else{
  //  cout << "Compiled in ??? bit mode.\n";
  //}

  cout << compileinfo;
#ifdef CEBUGFLAG
  cout << "Compiled in debug output mode.\n";
  addnl=true;
#endif
#ifdef TRACEFLAG
  cout << "Compiled with trace mode.\n";
  addnl=true;
#endif
#ifdef BOUNDTRACKFLAG
  cout << "Compiled in boundtracking mode.\n";
  addnl=true;
#endif
#ifdef BUGTRACKFLAG
  cout << "Compiled in bugtracking mode.\n";
  addnl=true;
#endif
#ifdef PARANOIABUGTRACKFLAG
  cout << "Compiled in paranoia bugtracking mode.\n";
  addnl=true;
#endif
#ifdef ENABLE64
  cout << "Compiled with ENABLE64 activated.\n";
  addnl=true;
#else
  cout << "Compiled with ENABLE64 de-activated.\n";
  addnl=true;
#endif
#ifdef MIRAMEMORC
  cout << "Compiled with memory overrun checks, MIRA *will* be slower.\n";
  addnl=true;
#endif

  cout << "Runtime settings (sorry, for debug):"
       << "\n\tSize of size_t  : " << sizeof(size_t)
       << "\n\tSize of uint32  : " << sizeof(uint32)
       << "\n\tSize of uint32_t: " << sizeof(uint32_t)
       << "\n\tSize of uint64  : " << sizeof(uint64)
       << "\n\tSize of uint64_t: " << sizeof(uint64_t)
       << "\nCurrent system: ";
  {
    cout.flush();
    int tmp=system("uname -a");
    // don't complain about unused variable
    (void) tmp;
  }

  if(addnl) cout << endl;
}



void dumpCommandlineToFile(Assembly & as, vector<MIRAParameters> & Pv, int argc, char ** argv)
{
  string filename;
  filename=as.buildFileName(-1,"","",
			    Pv[0].getAssemblyParams().as_outfile_callparams,
			    "",
			    Pv[0].getDirectoryParams().dir_info);

  ofstream fout(filename.c_str(), ios::out | ios::trunc);
  fout << argv[0];
  for(int32 i=1; i<argc; i++){
    fout << " " << argv[i];
  }
  fout << endl;
  fout.close();
}


void miraESTstep1(vector<MIRAParameters> & Pv, const string & csfile, int argc, char ** argv)
{
  cout << "De-activated atm, sorry." << endl;
  exit(0);

//  // step 1
//  // assemble _all_ reads to squeeze out most of the data
//  // assembled reads will be written per strain to a CAF file
//  //  if they're in a contig >= 2 reads or have SRMr/WRMr tags
//
//  // by setting the AssumeSNPInsteadofRepeats to false, we will have
//  //  SRMB tags set and the assembler will try to resolve what
//  //  it thinks are repeats.
//
//  vector<string> strainfiles;
//
//  //loadparams(Pv,"me_step1.par", &step1par[0]);
//
//  const_cast<assembly_parameters &>(Pv[0].getAssemblyParams()).as_put_asswithmira_tags=true;
//
//  cout << "Starting step 1: assembling all reads of all strains." << endl;
//  cout << "Setting some standard parameters.\n";
//  //as.assemble();
//
//  MIRAParameters::postParsingChanges(Pv);
//  MIRAParameters::dumpAllParams(Pv, cout);
//
//  Assembly as(Pv, false);
//  dumpCommandlineToFile(as, Pv, argc, argv);
//
//  as.loadSequenceData();
//
//  if(Pv[0].getAssemblyParams().as_filecheck_only==true){
//    cout << "File check only was selected, exiting.";
//  }
//  strainfiles=as.assembleESTs();
//  as.discard();
//  cout << "Finished step 1" << endl;
//
//  ofstream fout;
//  fout.open(csfile.c_str(), ios::out| ios::trunc);
//
//  for(uint32 i=0; i<strainfiles.size(); i+=2){
//    fout << strainfiles[i] << "\t" << strainfiles[i+1] << endl;
//  }
//  fout.close();
}


uint32 mests2_contigcount=0;
uint32 mests2_straini=0;
ofstream mests2_step2out;
ofstream mests2_step2strout;
vector<string> mests2_strainfiles;

void miraESTstep2_contigBuiltCallback(Contig & con, const ReadPool & rp)
{
  ++mests2_contigcount;

  ostringstream ostr;
  if(con.getNumReadsInContig() > 1){
    ostr << mests2_strainfiles[mests2_straini+1] << "_c" << mests2_contigcount;
  } else {
    ostr << mests2_strainfiles[mests2_straini+1] << "_s" << mests2_contigcount;
  }

  Read dummy;

  string cons;
  vector<base_quality_t> quals;
  con.newConsensusGet(cons, quals);
  dummy.setSequenceFromString(cons);
  dummy.setQualities(quals);

  //cout << "My consensus is: " << cons << endl;
  //cout << "My read is: " << dummy << endl;

  for(uint32 j=0; j<cons.size(); j++){
    if(cons[j]=='*') dummy.changeBaseInSequence('*', 0, j);
  }
  con.transposeReadSRMTagsToContig();
  {
    const vector<Contig::consensustag_t> & ctags=con.getConsensusTags();
    for(uint32 j=0; j<ctags.size(); j++){
      dummy.addTagO(ctags[j]);
    }
  }

  {
    Contig::cccontainer_t & concounts=const_cast<Contig::cccontainer_t&>(con.getConsensusCounts());
    size_t lbound=0;
    size_t rbound=concounts.size();
    Contig::cccontainer_t::iterator ccI=concounts.begin();
    for(; ccI!=concounts.end(); lbound++, ccI++){
      if(ccI->total_cov > 1) break;
    }

    ccI=concounts.end();
    for(; ccI!=concounts.begin(); rbound--){
      if((--ccI)->total_cov>1) break;
    }


    //cout << "Clip bounds: " << lbound << "\t" << rbound << endl;

    //Read::setCoutType(Read::AS_TEXTSHORT);
    //cout << "Before clipping:\n" << dummy;

    // single reads will have reversed bounds, but we still
    // want them as they had some marks in the combined assembly,
    // so don't set bound for them as it would get them
    // completely removed from assembly

    //if(lbound>rbound) {
    //  dummy.setClipoffs(0, 1, false);
    //}else{
    //  dummy.setClipoffs(lbound, rbound, false);
    //}

    //Read::setCoutType(Read::AS_TEXTSHORT);
    //cout << "After clipping:\n" << dummy;

  }

  dummy.removeGapsFromRead();
  Read::setCoutType(Read::AS_CAF);
  mests2_step2out << dummy;
  mests2_step2strout << dummy.getName() << "\t"<< mests2_strainfiles[mests2_straini+1] << '\n';

}

void miraESTstep2(vector<MIRAParameters> & Pv, const string & csfile, int argc, char ** argv)
{
  cout << "De-activated atm, sorry." << endl;
  exit(0);

//
//
//  // assemble each strain for itself taking only the good
//  //  reads identified in the previous step
//  // the resulting contigs are transformed into virtual 'reads':
//  //  single coverage at the ends is clipped (exception: reads that
//  //  are completely in single coverage), tags are taken
//  //  and together with virtual base quality the 'read' gets
//  //  written to a CAF file
//
//  //loadparams(Pv,"me_step2.par", &step2par[0]);
//
//  Pv[0].setAssemblyPutAssembledWithMIRATags(false);
//
//  cout << "Starting step 2: assembling each strain for itself" << endl;
//
//  MIRAParameters::postParsingChanges(Pv);
//  MIRAParameters::dumpAllParams(Pv, cout);
//
//  // Load the file of with caf names and strain names
//  ifstream fin;
//  fin.open(csfile.c_str(), ios::in|ios::ate);
//  if(!fin){
//    throw Notify(Notify::FATAL, "main", (static_cast<std::string>("File not found: ")+csfile.c_str()).c_str());
//  }
//
//  std::streamoff len_fofn=fin.tellg();
//  if(len_fofn==1){
//    throw Notify(Notify::FATAL, "main", (static_cast<std::string>("Zero length file: ")+csfile.c_str()).c_str());
//  }
//  fin.seekg(0, ios::beg);
//
//  string filename, sname;
//  while(GeneralIO::readKeyValue(fin, filename, sname)){
//    mests2_strainfiles.push_back(filename);
//    mests2_strainfiles.push_back(sname);
//  }
//  fin.close();
//
//
//
//  mests2_step2out.open("step2_reads.caf", ios::out | ios::trunc);
//  mests2_step2strout.open("step2_straindata_in.txt", ios::out | ios::trunc);
//
//  mests2_step2strout << "# Automatically generated file" << endl;
//  mests2_step2strout << "# You probably don't want to edit it.\n" << endl;
//  for(mests2_straini=0; mests2_straini<mests2_strainfiles.size(); mests2_straini+=2){
//
//    cout << "Assembly of strain " << mests2_strainfiles[mests2_straini] << "(" << mests2_strainfiles[mests2_straini+1] << ")" << endl;
//
//    Pv[0].generateProjectNames(Pv,"step2_"+mests2_strainfiles[mests2_straini+1]);
//    Pv[0].setAssemblyInfileCAF(const_cast<char *>(mests2_strainfiles[mests2_straini].c_str()));
//    Pv[0].setAssemblyInfileStrainData("step2_straindata_in.txt");
//
//    //P.setAssemblyOutfileCAF((char *)strainfiles[i+1].c_str());
//    //P.setAssemblyOutdirGAP4DA(strainfiles[i+1].c_str());
//    //cout << P;
//
//    Assembly as(Pv, false);
//
//    bool loadok=true;
//    try{
//      as.loadSequenceData();
//    }
//    catch(Notify n){
//      loadok=false;
//      n.setGravity(Notify::WARNING);
//      n.handleError(" miraESTstep2()");
//    }
//    if(!loadok) continue;
//
//    as.setContigBuiltCallback(miraESTstep2_contigBuiltCallback);
//    as.assemble();
//    //as.saveResults();
//
//    cout << "Finished assembly, extracting contigs." << endl;
//
//  }
//  cout << "Closing step2out." << endl;
//  mests2_step2out.close();
//  cout << "Closing step2strout." << endl;
//  mests2_step2strout.close();
//
//  cout << "Done with step 2." << endl;
}


void miraESTstep3(vector<MIRAParameters> & Pv, int argc, char ** argv)
{
  cout << "De-activated atm, sorry." << endl;
  exit(0);

//  // in the last step, the virtual 'reads' get assembled
//  // by setting AssumeSNPInsteadofRepeats to true, PALV (possible
//  //  allellic variation) or PAVS (possible allellic variation
//  //  with SNP) will be set when conflicts occur instead of SRMB
//  // the assembler will therefore work in cluster-mode and not
//  //  break those 'misassemblies'
//
//  cout << "Starting step 3:\n\tclustering contigs\n\tfinding possible allelic variances\n\tfinding possible allelic variances with SNP\n\n";
//
//  //loadparams(Pv,"me_step3.par", &step3par[0]);
//
//  const_cast<assembly_parameters &>(Pv[0].getAssemblyParams()).as_put_asswithmira_tags=true;
//
//  Assembly as(Pv, false);
//
//  as.loadSequenceData();
//  as.assemble();
//  as.saveResults();
}

void miraEST(int argc, char ** argv)
{
  cout << "De-activated atm, sorry." << endl;
  exit(0);

//  cout << "This is miraEST "MIRAVERSION" for EST SNP analysis in strains.\n\n";
//
//  cout << "De-activated atm, step 2&3 need to adapt to new loading system, sorry." << endl;
//  exit(0);
//
//  cout << "Please cite: Chevreux, B., Pfisterer, T., Drescher, B., Driesel, A. J.,\nMueller, W. E., Wetter, T. and Suhai, S. (2004),\nUsing the miraEST Assembler for Reliable and Automated mRNA Transcript\nAssembly and SNP Detection in Sequenced ESTs. Genome Research, 14(6).\n\n";
//
//  //cout << "miraEST has been de-activated in this development version as necessary adaptations there have not been made yet, sorry.\n";
//  //doAbort();
//
//  dumpStdMsg();
//
//  try{
//    vector<MIRAParameters> X;
//    //loadparams(X,"", &step1par[0]);
//    //loadparams(X,"", &step2par[0]);
//    //loadparams(X,"", &step3par[0]);
//  }
//  catch(...){
//    cout << "Internal error: one of the default parameter files caused an error while parsing, aborting.\n";
//    exit(1000);
//  }
//
//  vector<MIRAParameters> Pv;
//  MIRAParameters::setupStdMIRAParameters(Pv);
//  {
//    MIRAParameters::parse(argc, argv, Pv);
//    cout << "\nParameters parsed without error, perfect.\n\n";
//    MIRAParameters::postParsingChanges(Pv);
//  }
//
//  string csfile="step1_res_cafstrainnames.txt";
//
//  uint32 startstep=Pv[0].getSpecialParams().sp_est_startstep;
//
//  switch(startstep) {
//    case 1 : {
//      miraESTstep1(Pv, csfile, argc, argv);
//     break;
//    }
//    case 2 : {
//      miraESTstep2(Pv, csfile, argc, argv);
//      break;
//    }
//    case 3 : {
//      miraESTstep3(Pv, argc, argv);
//      break;
//    }
//  default : {
//    throw Notify(Notify::FATAL, "main", ": miraEST SNP pipeline.step is not 1,2 or 3");
//  }
//  }
//
//  cout << "\n\nEnd of assembly process, thank you for using miraEST." << endl;
//
  return;
}



void miraMain(int argc, char ** argv, bool resumeassembly)
{
  FUNCSTART("void mira(int argc, char ** argv, bool resumeassembly)");

  // that loop is straight from the GNU getopt_long example
  // http://www.gnu.org/s/hello/manual/libc/Getopt-Long-Option-Example.html
  while (1){
    static struct option long_options[] =
      {
	{"help",  no_argument,           0, 'h'},
	{"resume", no_argument,          0, 'r'},
	{"version", no_argument,         0, 'v'},
	{"cwd", required_argument,       0, 'c'},
	{"job", optional_argument,       0, ' '},        // catch old command line
	{"project", optional_argument,   0, ' '},        // catch old command line
	{0, 0, 0, 0}
      };
    /* getopt_long stores the option index here. */
    int option_index = 0;

    int c = getopt_long (argc, argv, "hrvc: ",
		     long_options, &option_index);

    if (c == -1) break;

    switch (c) {
    case 'c':
      if(optarg){
	int ret=chdir(optarg);
	if(ret){
	  cout << "Changing working directory to '" << optarg << "' failed, system message is: " << strerror(errno) << endl;
	  exit(100);
	}
      }else{
	cout << "Missing directory name for option -c / --cwd=" << endl;
	exit(100);
      }
      break;
    case 'h':
      cout << "mira\t\tMIRALIB version " << MIRAVERSION << "\n"
	"Author:\t\tBastien Chevreux (bach@chevreux.org)\n"
	"Purpose:\tassemble sequencing data.\n\n";

      dumpStdMsg();
      cout << "Usage:\n"
	"mira [options] manifest_file [manifest_file ...]\n";
      cout << "\nOptions:\n";
      cout <<
	"  -c / --cwd=\t\tdirectory\tChange working directory\n"
	"  -r / --resume\t\t\t\tResume an interupted assembly\n"
	"  -h / --help\t\t\t\tPrint short help and exit\n"
	"  -v / --version\t\t\tPrint version and exit\n"
	;
      exit(0);
    case 'r':
      resumeassembly=true;
      break;
    case 'v':
      cout << MIRAVERSION << endl;
      exit(0);
    case ' ':
      cout << "It looks like you are using the old command line format of MIRA 3.4.x and earlier."
	"\nPlease look up in the manual on how to use manifest files for MIRA 3.9.x and later.\n";
      exit(0);
    default:
      abort ();
    }
  }

  if (optind == argc) {
    cout << "You did not specify a manifest file to load?\n";
    exit(100);
  }

  cout << "This is MIRA " MIRAVERSION ".\n\n";

  cout << "Please cite: Chevreux, B., Wetter, T. and Suhai, S. (1999), Genome Sequence\nAssembly Using Trace Signals and Additional Sequence Information.\nComputer Science and Biology: Proceedings of the German Conference on\nBioinformatics (GCB) 99, pp. 45-56.\n\n";

  dumpStdMsg();

  Manifest manifest;

  for(; optind < argc; ++optind) {
    manifest.loadManifestFile(argv[optind],resumeassembly);
  }

  cout << manifest;

  vector<MIRAParameters> Pv;
  MIRAParameters::setupStdMIRAParameters(Pv);

  MIRAParameters::generateProjectNames(Pv,manifest.getProjectName());

  string mparams(manifest.getFullMIRAParameterString());
  //cout << "Seen parameters in manifest: " << mparams << endl;
  MIRAParameters::parse(mparams, Pv);

  if(Pv[0].getSpecialParams().mi_printversion) exit(0);

  // some users make the error to use "mira" instead of miraSearchESTSNPs
  // this code takes care of it:
  // if start_step >0, then the miraSearchESTSNPs pipeline is used,
  //  else the normal mira
  if(Pv[0].getSpecialParams().sp_est_startstep){
    cout << "\nOooooops? You called the 'mira' executable but have parameters set for the"
      "\nEST-SNP-Search pipeline set. For this, you have to use the 'miraSearchESTSNPs'"
      "\nexecutable (sorry).\n\n";
    doAbort();
  }

  cout << "\nParameters parsed without error, perfect.\n\n";

  MIRAParameters::postParsingChanges(Pv);
  MIRAParameters::dumpAllParams(Pv, cout);



  //exit(0);

  {
    Assembly as(manifest, Pv, resumeassembly);

    //if(!resumeassembly) dumpCommandlineToFile(as, Pv, argc, argv);

    as.loadSequenceData();

    //doAbort();

    if(Pv[0].getAssemblyParams().as_filecheck_only==false){
      try {
	as.assemble();
	cout << "\n\n";
	as.setEverythingWentFine(true);
      }
      catch(const std::bad_alloc & e){
	cout << "Ouch, out of memory detected.\n";
	as.dumpMemInfo();
	throw;
      }
      catch(...){
	throw;
      }
    }
  }
  //Read::dumpStringContainerStats(cout);


  cout << "\n\nEnd of assembly process, thank you for using MIRA." << endl;

  return;
}



void miraPre(int argc, char ** argv)
{
  cout << "De-activated atm, sorry." << endl;
  exit(0);

//  cout << "This is MIRA preprocessing "MIRAVERSION".\n\n";
//
//  cout << "Please cite: Chevreux, B., Wetter, T. and Suhai, S. (1999), Genome Sequence\nAssembly Using Trace Signals and Additional Sequence Information.\nComputer Science and Biology: Proceedings of the German Conference on\nBioinformatics (GCB) 99, pp. 45-56.\n\n";
//
//  dumpStdMsg();
//
//  vector<MIRAParameters> Pv;
//  MIRAParameters::setupStdMIRAParameters(Pv);
//  Pv[0].generateProjectOutNames(Pv, "pre");
//  MIRAParameters::parseQuickmode(
//    "\n\t-AS:nop=10:rbl=1:sd=no -GE:uti=no"
//    "\n\t-CO:mr=yes:mroir=no:asir=no"
//    "\n\t-SB:lb=0"
//    "\n\t-OUT:orc=no:orf=no:org=no:ora=no:orh=no:ors=no:ort=no"
//    "\n\t-OUT:otc=no:otf=no:otg=no:ota=no:oth=no:ots=no:ott=no"
//    "\n\t-OUT:oetc=no:oetf=no:oetg=no:oeta=no:oeth=no",
//    "", Pv, false, nullptr);
//
//  MIRAParameters::parse(argc, argv, Pv);
//  cout << "\nParameters parsed without error, perfect.\n\n";
//  MIRAParameters::postParsingChanges(Pv);
//  MIRAParameters::dumpAllParams(Pv, cout);
//
//  MIRAParameters paramscopy(Pv[0]);
//
//  Assembly as(Pv, false);
//  {
//    string filename;
//    filename=as.buildFileName(-1,"","",
//			      Pv[0].getAssemblyParams().as_outfile_callparams,
//			      "",
//			      Pv[0].getDirectoryParams().dir_info);
//
//    ofstream fout(filename.c_str(), ios::out | ios::trunc);
//    fout << argv[0];
//    for(int32 i=1; i<argc; i++){
//      fout << " " << argv[i];
//    }
//    fout << endl;
//    fout.close();
//  }
//
//  as.loadSequenceData();
//
//  as.prework();
//
//  //P=paramscopy;
//  //P.generateProjectNames();
//
//  cout << "\n\n\nPreprocessing finished, saving final results.\n\n\n";
//
//  {
//    ReadPool & rp= as.getReadPool();
//    assembly_parameters const & as_params= Pv[0].getAssemblyParams();
//
//    string filename;
//    filename= as_params.as_projectname_out+"_preprocessed_out.caf";
//    ofstream preout(filename.c_str(), ios::out | ios::trunc);
//    rp.dumpAs(preout,Read::AS_CAF,false);
//    preout.close();
//
//    cout << "Preprocessed reads are now in " << filename << '\n';
//  }
//
//  cout << "\n\nEnd of preprocessing, thank you for using MIRA." << endl;

  return;
}

void miraClip(int argc, char ** argv)
{
  cout << "De-activated atm, sorry." << endl;
  exit(0);

//  cout << "This is MIRA clipping "MIRAVERSION".\n\n";
//
//  cout << "Please cite: Chevreux, B., Wetter, T. and Suhai, S. (1999), Genome Sequence\nAssembly Using Trace Signals and Additional Sequence Information.\nComputer Science and Biology: Proceedings of the German Conference on\nBioinformatics (GCB) 99, pp. 45-56.\n\n";
//
//  dumpStdMsg();
//
//  //cout << "MIRAClip must be reworked, sorry.\n";
//  //doAbort();
//
//  vector<MIRAParameters> Pv;
//  MIRAParameters::setupStdMIRAParameters(Pv);
//  {
//    MIRAParameters::parseQuickmode("--project=miraclip --noclipping","", Pv, false, nullptr);
//    MIRAParameters::parse(argc, argv, Pv);
//    cout << "\nParameters parsed without error, perfect.\n\n";
//
//    vector<int> indexesInPv;
//    indexesInPv.push_back(ReadGroupLib::SEQTYPE_SANGER);
//    indexesInPv.push_back(ReadGroupLib::SEQTYPE_454GS20);
//    indexesInPv.push_back(ReadGroupLib::SEQTYPE_SOLEXA);
//
//    cout << "\nMIRA settings specific for MIRAclip:\n";
//    MIRAParameters::dumpDataProcessingParams(Pv, indexesInPv, cout);
//    MIRAParameters::dumpClippingParams(Pv, indexesInPv, cout);
//
//  }
//
//
//
//  Assembly as(Pv, false);
//  {
//    string filename;
//    filename=as.buildFileName(-1,"","",
//			      Pv[0].getAssemblyParams().as_outfile_callparams,
//			      "",
//			      Pv[0].getDirectoryParams().dir_info);
//
//    ofstream fout(filename.c_str(), ios::out | ios::trunc);
//    fout << argv[0];
//    for(int32 i=1; i<argc; i++){
//      fout << " " << argv[i];
//    }
//    fout << endl;
//    fout.close();
//  }
//
//  as.loadSequenceData();
//
//  cout << "\n\n\nClipping process finished, saving final results.\n\n\n";
//
//  {
//    ReadPool & rp= as.getReadPool();
//
//    string filename;
//    filename= Pv[0].getAssemblyParams().as_outfile_CAF;
//    ofstream clipout(filename.c_str(), ios::out | ios::trunc);
//    rp.dumpAs(clipout,Read::AS_CAF,false);
//    clipout.close();
//    cout << "Clipped reads are now in: " << filename << '\n';
//  }
//
//  cout << "\n\nEnd of clipping, thank you for using MIRA." << endl;
//
  return;
}

void mme_askChar(const string & question, const string & possibilities, char & answer, const char defchar)
{
  bool doloop=true;
  while(doloop){
    cout << question << ' ';
    if(!possibilities.empty()){
      cout <<"(";
      std::copy(possibilities.begin(),
		possibilities.end(),
		ostream_iterator<char> (cout, "/"));
      cout << ") ";
    }
    if(defchar!=0){
      cout << "[" << defchar << "] ";
    }
    string input;
    getline(cin,input);

    // empty input, try to get defchar from possibilities if it exists
    if(input.empty() && defchar!=0) input=defchar;
    input.resize(1);
    for(uint32 i=0; i<possibilities.size(); i++){
      if(input[0] == possibilities[i]) {
	doloop=false;
	answer=input[0];
	break;
      }
    }
  }
  cout << answer << endl;
}

void mme_askDoubleNP(const string & question, double & answer, const string & defd)
{
  bool doloop=true;
  while(doloop){
    cout << question << ' ';
    if(!defd.empty()){
      cout << "[" << defd << "] ";
    }
    string input;
    getline(cin,input);

    // empty input, try to get def from possibilities if it exists
    if(input.empty() && !defd.empty()) {
      input=defd;
    }
    char * pend;
    answer=strtod(input.c_str(),&pend);
    doloop=false;

    // try to parse kilo, mega, giga
    while(*pend != 0 && isspace(*pend)) pend++;
    switch(toupper(*pend)){
    case 0 : break;
    case 'K' :{
      answer*=1000;
      break;
    }
    case 'M' :{
      answer*=1000000;
      break;
    }
    case 'G' :{
      answer*=1000000000;
      break;
    }
    default : {
      cout << "Please only use k, m, g as modifiers.\n";
      doloop=true;
    }
    }
  }
}

void mme_askDouble(const string & question, double & answer, const string & defd)
{
  mme_askDoubleNP(question, answer, defd);
  cout << answer << endl;
}

void mme_askInt(const string & question, int64 & answer, const string & defint)
{
  double tmp;
  mme_askDoubleNP(question, tmp, defint);
  answer=static_cast<int64>(tmp);
  cout << answer << endl;
}


void miraMemEstimate()
{
  cout << "This is MIRA " MIRAVERSION ".\n\n";

  cout << "Please cite: Chevreux, B., Wetter, T. and Suhai, S. (1999), Genome Sequence\nAssembly Using Trace Signals and Additional Sequence Information.\nComputer Science and Biology: Proceedings of the German Conference on\nBioinformatics (GCB) 99, pp. 45-56.\n\n";
  dumpStdMsg();

  cout << "\n\nmiraMEM helps you to estimate the memory needed to assemble a project.\n"
    "Please answer the questions below.\n\n"
    "Defaults are give in square brackets and chosen if you just press return.\n"
    "Hint: you can add k/m/g modifiers to your numbers to say kilo, mega or giga.\n\n";

  char yesno;
  char ptype=' ';
  char denovomapping;
  int64 seqsize=0;

  int64 numsanreads=0;
  int64 num454gs20reads=0;
  int64 num454flxreads=0;
  int64 num454titaniumreads=0;
  int64 numsxareads=0;
  int64 avgsxalen=0;
  int64 numpbsreads=0;
  int64 avgpbslen=0;
  int64 largestcontigexpected=0;

  // computed
  int64 totalexpectedbases=0;
  int64 totalreads=0;
  int64 readsinlargestcontig=0;
  int64 readbasesinlargestcontig=0;

  mme_askChar("Is it a genome or transcript (EST/tag/etc.) project?",
	      "ge",
	      ptype,
	      'g');
  if(ptype=='g'){
    mme_askInt("Size of genome?",
	       seqsize,
	       "4.5m");
    if(seqsize<100) {
      cout << "A sequence size of less than 100 bases is pretty improbable.\n"
	   << "Did you forget a modifier (k, m or g) to the number you gave?\n";
      exit(10);
    }
    largestcontigexpected=seqsize;
    if(largestcontigexpected>30*1000*1000){
      cout << "Looks like a larger eukaryote, guessing largest chromosome size: 30m\nChange if needed!\n";
      largestcontigexpected=30*1000*1000;
    }

    {
      string tmplc;
      ostringstream ostr;
      ostr << largestcontigexpected;
      tmplc=ostr.str();
      mme_askInt("Size of largest chromosome?",
		 largestcontigexpected,
		 tmplc);
    }

    mme_askChar("Is it a denovo or mapping assembly?",
		"dm",
		denovomapping,
		'd');
  }


  mme_askInt("Number of Sanger reads?",
	     numsanreads,
	     "0");
  mme_askChar("Are there 454 reads?",
	      "yn",
	      yesno,
	      'n');
  if(yesno=='y'){
    mme_askInt("Number of 454 GS20 reads?",
	       num454gs20reads,
	       "0");
    mme_askInt("Number of 454 FLX reads?",
	       num454flxreads,
	       "0");
    mme_askInt("Number of 454 Titanium reads?",
	       num454titaniumreads,
	       "0");
  }
  mme_askChar("Are there PacBio reads?",
	      "yn",
	      yesno,
	      'n');
  if(yesno=='y'){
    mme_askInt("Number of PacBio reads?",
	       numpbsreads,
	       "0");
    mme_askInt("Average PacBio length?",
	       avgpbslen,
	       "1100");
  }
  mme_askChar("Are there Solexa reads?",
	      "yn",
	      yesno,
	      'n');
  if(yesno=='y'){
    mme_askInt("Number of Solexa reads?",
	       numsxareads,
	       "0");
    mme_askInt("Average Solexe length?",
	       avgsxalen,
	       "75");
  }

  totalexpectedbases=numsanreads*1000;
  totalexpectedbases+=num454gs20reads*120;
  totalexpectedbases+=num454flxreads*260;
  totalexpectedbases+=num454titaniumreads*460;
  totalexpectedbases+=numsxareads*avgsxalen;
  totalexpectedbases+=numpbsreads*avgpbslen;

  totalreads=numsanreads;
  totalreads+=num454gs20reads;
  totalreads+=num454flxreads;
  totalreads+=num454titaniumreads;
  totalreads+=numsxareads;
  totalreads+=numpbsreads;

  if(ptype=='g'){
    if(denovomapping=='d'){
      readsinlargestcontig=totalreads/2;
      readbasesinlargestcontig=totalexpectedbases/2;
    }else{
      largestcontigexpected=seqsize;
      readsinlargestcontig=totalreads;
      readbasesinlargestcontig=totalexpectedbases;

      // if solexa is mapped, there are less reads due to
      //  coverage equivalent mapping and virtual long reads
      // be conservative, reduce only by 50%
      if(numsxareads>0){
	readsinlargestcontig-=numsxareads/2;
      }
    }
  }else{
    seqsize=50000;
    largestcontigexpected=seqsize;
    readsinlargestcontig=50000;
    readbasesinlargestcontig=readsinlargestcontig*1000; //10k reads times sanger length
  }

  // account for gaps with 454 reads
  if(num454flxreads>0 || num454gs20reads>0){
    largestcontigexpected+=largestcontigexpected/10;
    readbasesinlargestcontig+=readbasesinlargestcontig/10;
  }

  //cout << "totalreads: " << totalreads
  //     << "\nreadsinlargestcontig: " << readsinlargestcontig
  //     << "\ntotalexpectedbases: " << totalexpectedbases
  //     << "\nreadbasesinlargestcontig: " << readbasesinlargestcontig
  //     << endl;

  int64 livereads=totalreads+readsinlargestcontig;
  int64 livebases=totalexpectedbases+readbasesinlargestcontig;

  //cout << "livereads: " << livereads
  //     << "\nlivebases: " << livebases << endl;

  double avgcov=static_cast<double>(totalexpectedbases/seqsize);
  avgcov-=avgcov/8; // in general we have 12% loss of usable data

  int64 numskimhits=static_cast<int64>(avgcov*850000);  // estimate skim hits, very rough

  int64 memneeded=0;

  // what do the reads need?
  memneeded=
    livereads*sizeof(Read)       // class size
    +livereads*200               // additional strings etc.
    +livereads*4*sizeof(tag_t)   // on average 4 tags per read
    +livebases*8;                // sequences, qualities, adjustments, base flags

  // new: solexa reads don't have adjustments
  // yeah, but estimate is already small enough, keep it
  //memneeded-=(numsxareads*avgsxalen) * 2;


  // what does a contig need?
  //  (note: the needs for the reads themselves are already
  //   accounted for in the section above)
  memneeded+=
    //readsinlargestcontig*sizeof(Contig::contigread_t)
    //+readsinlargestcontig*sizeof(Contig::out_order_t)

    readsinlargestcontig*40       // 40 == rough guesstimate for PlacedContigReads
    +totalreads*9                              /* templates, mapping
						 allowedrefids */
    +largestcontigexpected*sizeof(Contig::consensus_counts_t)
    +largestcontigexpected*10;              // adjustments and some reserve

  int64 memforlargetables=0;
  // some more overhead by the assembly class
  memforlargetables+= totalreads*20;

  // get the skim edges accounted
  int64 skimhitsmem=numskimhits*2*sizeof(skimedges_t);
  // since 2.9.40 there's the possibility to cap that memory
  // use default value
  if(skimhitsmem>1024L*1024*1024){
    skimhitsmem=2LL*1024*1024*1024;
    if(numsxareads>0) skimhitsmem*=2;
  }

  // mem needed for temporary skim need
  int64 tmpskim=500*1000*1000;

  memforlargetables+=max(skimhitsmem,tmpskim);

  // possible vector leftover clip
  int64 memforpvlc=0;
  {
    // AS_readhitmiss & AS_readhmcovered
    memforpvlc=totalexpectedbases*8;
    // overhead of the structures above
    memforpvlc+=sizeof(vector<uint32>)*totalreads*2;

    // AS_count_rhm
    memforpvlc+=totalreads*4;
  }

  // ok, 1MB of additional small things
  int64 memneededfordata=memneeded+(1024*1024);

  // experience shows that not all has been accounted for
  //  and internal mem caching of memory allocators add another
  //  layer of RAM needs
  //
  // add 40% to estimates
  //  but not if whe have mapping with Solexas
  if(denovomapping!='m' && numsxareads==0){
    memneededfordata+=memneededfordata/100*40;
    memforlargetables+=memforlargetables/100*40;
  }

  cout.setf(ios::fixed, ios::floatfield);
  //cout.setf(ios::showpoint);
  cout.precision(1);

  cout << "\n\n************************* Estimates *************************\n\n";
  // last, if it's an EST assembly, there is no seqsize

  if(ptype=='e'){
    cout << "EST assembly, cannot give coverage estimate. Also, estimates"
      "\nmay be way off for pathological cases.\n";

  }else{
    cout << "The contigs will have an average coverage of ~ " << avgcov
	 << " (+/- 10%)"
      "\nEstimates may be way off for pathological cases.\n";
  }

  cout << "\nRAM estimates:"
    "\n" << setw(40) << "reads+contigs (unavoidable): ";
  byteToHumanReadableSize(memneededfordata,cout);
  cout << "\n" << setw(40) << "large tables (tunable): ";
  byteToHumanReadableSize(memforlargetables,cout);
  cout << "\n" << setw(40) << "" << "---------"
    "\n" << setw(40) << "total (peak): ";
  byteToHumanReadableSize(memforlargetables+memneededfordata,
			  cout);
  cout << "\n\n" << setw(40) << "add if using -CL:pvlc=yes : ";
  byteToHumanReadableSize(memforpvlc,cout);
  if(denovomapping=='m' && numsxareads>0){
    int64 notusingmerge=memneededfordata/100*40;
    cout << "\n" << setw(40) << "add if setting -CO:msr=no : ";
    byteToHumanReadableSize(notusingmerge,cout);
  }
  cout << "\n\n"
    "Note that some algorithms might try to grab more memory if"
    "\nthe need arises and the system has enough RAM. The options"
    "\nfor automatic memory management control this:"
    "\n  -AS:amm, -AS:kpmf, -AS:mps"
    "\nFurther switches that might reduce RAM (at cost of run time"
    "\nor accuracy):"
    "\n  -SK:mhim, -SK:mchr (both runtime); -SK:mhpr (accuracy)\n"
    "*************************************************************\n";

}


// usual linux stack size of 8Mb will lead to segfaults in very long
//  alignments (>15-18k) in align.C
// therefore, get some more stack, atm 64 Mb

#include <sys/resource.h>
void getMoreStack()
{
  struct rlimit rl;
  const rlim_t wantstacksize=64L*1024L*1024L;

  auto result = getrlimit(RLIMIT_STACK, &rl);
  if (result == 0){
    //cout << "Have cur " << rl.rlim_cur << endl;
    //cout << "Have max " << rl.rlim_max << endl;
    if(rl.rlim_cur<wantstacksize){
      rl.rlim_cur=wantstacksize;
      result = setrlimit(RLIMIT_STACK, &rl);
    }
  }else{
    cout << "could not query stack size?\n";
  }
}

int main(int argc, char ** argv)
{
#ifdef MIRAMEMORC
  MemORC::setChecking(true);
#endif

  fixQuirks();
  getMoreStack();

#ifdef TIMERESTRICTED
  {
    struct stat st;
    int rc=stat(dir_params.dir_log.c_str(),&st);
    struct tm *mytm;
    mytm=localtime(&st.st_mtime);

    if(mytm->tm_year > TR_MAXYEAR
       || (mytm->tm_year == TR_MAXYEAR
	   && mytm->tm_mon >= TR_OUT_MAXMONTH)){
      cerr << "\n\nThis version of MIRA is definitively old, please get a newer version of the assembler.\n";
      exit(0);
    }
  }
#endif

  //cpu_set_t mask;
  //cout << "####" << sizeof(cpu_set_t) << endl;
  //sched_getaffinity(0,sizeof(cpu_set_t),&mask);
  //
  //cout << "Affinity: " << (uint8 *) &mask[0] << endl;


  string path;
  string miraprog;

  splitFullPathAndFileName(argv[0],path,miraprog);
  boost::to_lower(miraprog);

  try{
    if(miraprog=="mira"){
      miraMain(argc,argv, false);
    } else if(miraprog=="miraresume"){
      miraMain(argc,argv, true);
    } else if(miraprog=="mirasearchestsnps"){
      miraEST(argc,argv);
    } else if(miraprog=="miraclip"){
      miraClip(argc,argv);
    } else if(miraprog=="mirapre"){
      miraPre(argc,argv);
    } else if(miraprog=="miramem"){
      miraMemEstimate();
    } else {
      cout << miraprog << " is a non-recognised program name of MIRA.\n"
	"The programs SHOULD be named either\n"
	"\"mira\", \"miraResume\", \"miraSearchESTSNPs\", \"miraCLIP\", \"miraPRE\" or \"miraMEM\""
	   "\nAssuming 'mira'\n" << endl;

      miraMain(argc,argv, false);
    }
  }
  catch(Notify n){
    n.handleError("main");
  }
  catch(Flow f){
    cout << "INTERNAL ERROR: Unexpected exception: Flow()\n";
    doAbort();
  }
  catch(const std::bad_alloc & e){
    cout << "Out of memory detected, exception message is: ";
    cout << e.what() << endl;

    if(sizeof(size_t) == sizeof(int32)){
      cout << "\nYou are running a 32 bit executable. Please note that the maximum"
	"\ntheoretical memory a 32 bit programm can use (be it in Linux, Windows or"
	"\nother) is 4 GiB, in practice less: between 2.7 and 3.3 GiB. This is valid"
	"\neven if your machine has hundreds of GiB."
	"\nShould your machine have more that 4 GiB, use a 64 bit OS and a 64 bit"
	"\nversion of MIRA.";
    }

    cout << "\n\nIf you have questions on why this happened, please send the last 1000"
      "\nlines of the output log (or better: the complete file) to the author"
      "\ntogether with a short summary of your assembly project.\n\n";

    doAbort();
  }
  catch(const ios_base::failure & e){
    cout << "Failure in IO stream detected, exception message is: "
	 << e.what() << endl
	 << "\nWe perhaps ran out of disk space or hit a disk quota?\n";
    doAbort();
  }
  catch (exception& e)
  {
    cout << "A 'standard' exception occured (that's NOT normal):\n" << e.what() << "\n\nIf the cause is not immediatly obvious, please contact: bach@chevreux.org\n\n";
    doAbort();
  }
  catch(...){
    cout << "Unknown exception caught, aborting the process.\n\nPlease contact: bach@chevreux.org\n\n";
    doAbort();
  }

#ifndef PUBLICQUIET
  Read::dumpStringContainerStats(cout);
#endif

  return 0;
}
