/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "BowtieWorker.h"
#include "BowtiePlugin.h"

#include <workflow/IntegralBusModel.h>
#include <workflow/WorkflowEnv.h>
#include <workflow/WorkflowRegistry.h>
#include <workflow_support/CoreDataTypes.h>
#include <workflow_library/BioDatatypes.h>
#include <workflow_library/BioActorLibrary.h>
#include <workflow_support/DelegateEditors.h>
#include <workflow_support/CoreLibConstants.h>
#include <core_api/Log.h>
#include <util_gui/DialogUtils.h>

/* TRANSLATOR GB2::LocalWorkflow::BowtieWorker */

namespace GB2 {
namespace LocalWorkflow {

static const QString READS_PORT("reads");
static const QString EBWT_PORT("ebwt");

const QString BowtieWorkerFactory::ACTOR_ID("bowtie.assembly");
const QString BowtieBuildWorkerFactory::ACTOR_ID("bowtie.indexer");
const QString BowtieIndexReaderWorkerFactory::ACTOR_ID("bowtie.index.reader");

const QString REFSEQ_URL_ATTR("ref-seq-url");
const QString EBWT_URL_ATTR("ebwt");
const QString USE_PREBUILT_INDEX_ATTR("use-prebuilt-index");
const QString N_MODE_MISMATCHES_ATTR("n-mismatches");
const QString V_MODE_MISMATCHES_ATTR("v-mismatches");
const QString MAQERR_ATTR("maqerr");
const QString SEEDLEN_ATTR("seedlen");
const QString NOMAQROUND_ATTR("nomaqround");
const QString NOFW_ATTR("nofw");
const QString NORC_ATTR("norc");
const QString MAXBTS_ATTR("maxbts");
const QString TRYHARD_ATTR("tryhard");
const QString CHUNKMBS_ATTR("chunkmbs");
const QString SEED_ATTR("seed");

static LogCategory log(ULOG_CAT_WD);

void BowtieWorkerFactory::init() {
	QList<PortDescriptor*> p; QList<Attribute*> a;
	Descriptor readsd(READS_PORT, BowtieWorker::tr("Short read sequences"), BowtieWorker::tr("Short reads to be aligned."));
	Descriptor ebwtd(EBWT_PORT, BowtieWorker::tr("EBWT index"), BowtieWorker::tr("EBWT index of reference sequence."));
	Descriptor oud(CoreLibConstants::OUT_PORT_ID, BowtieWorker::tr("Short reads alignment"), BowtieWorker::tr("Result of alignment."));
	
	p << new PortDescriptor(readsd, BioDataTypes::DNA_SEQUENCE_TYPE(), true /*input*/);
	p << new PortDescriptor(ebwtd, BowtiePlugin::EBWT_INDEX_TYPE(), true /*input*/, true /*multi*/, BusPort::BLIND_INPUT);
	p << new PortDescriptor(oud, BioDataTypes::MULTIPLE_ALIGNMENT_TYPE(), false /*input*/, true /*multi*/);
	
	//Descriptor refseq(REFSEQ_URL_ATTR, BowtieWorker::tr("Reference"), 
	//	BowtieWorker::tr("Reference sequence url. The short reads will be aligned to this reference genome."));
	Descriptor desc(ACTOR_ID, BowtieWorker::tr("Bowtie aligner"), 
		BowtieWorker::tr("An ultrafast memory-efficient short read aligner, http://bowtie-bio.sourceforge.net"));
	//Descriptor use_prebuilt_index(USE_PREBUILT_INDEX_ATTR, BowtieWorker::tr("prebuilt index"), 
	//	BowtieWorker::tr("Using prebuilt ebwt index instead of reference sequence"));
	Descriptor n_mismatches(N_MODE_MISMATCHES_ATTR, BowtieWorker::tr("-n alignment mode"), 
		BowtieWorker::tr("Alignments may have no more than N mismatches (where N is a number 0-3, set with -n) in the first L bases \
						 (where L is a number 5 or greater, set with -l) on the high-quality (left) end of the read. The first L bases are called the \"seed\"."));
	Descriptor v_mismatches(V_MODE_MISMATCHES_ATTR, BowtieWorker::tr("-v alignment mode"), 
		BowtieWorker::tr("\"-1\" - use default value. Report alignments with at most <int> mismatches. -e and -l options are ignored and quality values have no effect on what alignments are valid. -v is mutually exclusive with -n."));
	Descriptor maqerr(MAQERR_ATTR, BowtieWorker::tr("Maximum permitted total of quality values"), 
		BowtieWorker::tr("bowtie \"-e/--maqerr\" option. Maximum permitted total of quality values at all mismatched read positions throughout the entire alignment, not just in the \"seed\". The default is 70."));
	Descriptor seedlen(SEEDLEN_ATTR, BowtieWorker::tr("Seed length"), 
		BowtieWorker::tr("bowtie \"-l/--seedlen\" option. The \"seed length\"; i.e., the number of bases on the high-quality end of the read to which the -n ceiling applies. The lowest permitted setting is 5 and the default is 28. bowtie is faster for larger values of -l."));
	Descriptor nomaqround(NOMAQROUND_ATTR, BowtieWorker::tr("Skip Maq quality rounding"), 
		BowtieWorker::tr("bowtie \"--nomaqround\" option. Maq accepts quality values in the Phred quality scale, but internally rounds values to the nearest 10, with a maximum of 30. By default, bowtie also rounds this way. --nomaqround prevents this rounding in bowtie."));
	Descriptor nofw(NOFW_ATTR, BowtieWorker::tr("Do not align against the forward reference strand"), 
		BowtieWorker::tr("bowtie \"--nofw\" option. If --nofw is specified, bowtie will not attempt to align against the forward reference strand."));
	Descriptor norc(NORC_ATTR, BowtieWorker::tr("Do not align against the reverse-complement reference strand"), 
		BowtieWorker::tr("bowtie \"--norc\" option. If --norc is specified, bowtie will not attempt to align against the reverse-complement reference strand."));
	Descriptor maxbts(MAXBTS_ATTR, BowtieWorker::tr("Maximum number of backtracks permitted"), 
		BowtieWorker::tr("bowtie \"--maxbts\" option. The maximum number of backtracks permitted when aligning a read in -n 2 or -n 3 mode (default: 125 without --best, 800 with --best). \
						 A \"backtrack\" is the introduction of a speculative substitution into the alignment. Without this limit, the default parameters will \
						 sometimes require that bowtie try 100s or 1,000s of backtracks to align a read, especially if the read has many low-quality bases and/or \
						 has no valid alignments, slowing bowtie down significantly. However, this limit may cause some valid alignments to be missed. Higher limits \
						 yield greater sensitivity at the expensive of longer running times."));
	Descriptor tryhard(TRYHARD_ATTR, BowtieWorker::tr("Tryhard"), 
		BowtieWorker::tr("bowtie \"-y/--tryhard\" option.Try as hard as possible to find valid alignments when they exist, including paired-end alignments."));
	Descriptor chunkmbs(CHUNKMBS_ATTR, BowtieWorker::tr("Chunk Mbs"), 
		BowtieWorker::tr("bowtie \"--chunkmbs\" option. The number of megabytes of memory a given thread is given to store path descriptors in --best mode."));
	Descriptor seed(SEED_ATTR, BowtieWorker::tr("Pseudo random seed number"), 
		BowtieWorker::tr("bowtie \"--seed\" option. Use <int> as the seed for pseudo-random number generator. \"-1\" - use pseudo random"));

	//a << new Attribute(refseq, CoreDataTypes::STRING_TYPE(), true /*required*/, QString());
	//a << new Attribute(use_prebuilt_index, CoreDataTypes::BOOL_TYPE(), true /*required*/, false);

	a << new Attribute(n_mismatches, CoreDataTypes::NUM_TYPE(), false /*not required*/, 2);
	a << new Attribute(v_mismatches, CoreDataTypes::NUM_TYPE(), false /*not required*/, -1);
	a << new Attribute(maqerr, CoreDataTypes::NUM_TYPE(), false /*not required*/, 70);
	a << new Attribute(seedlen, CoreDataTypes::NUM_TYPE(), false /*not required*/, 28);
	a << new Attribute(nomaqround, CoreDataTypes::BOOL_TYPE(), false /*not required*/, false);
	a << new Attribute(nofw, CoreDataTypes::BOOL_TYPE(), false /*not required*/, false);
	a << new Attribute(norc, CoreDataTypes::BOOL_TYPE(), false /*not required*/, false);
	a << new Attribute(maxbts, CoreDataTypes::NUM_TYPE(), false /*not required*/, -1);
	a << new Attribute(tryhard, CoreDataTypes::BOOL_TYPE(), false /*not required*/, false);
	a << new Attribute(chunkmbs, CoreDataTypes::NUM_TYPE(), false /*not required*/, 64);
	a << new Attribute(seed, CoreDataTypes::NUM_TYPE(), false /*not required*/, -1);

	ActorPrototype* proto = new BusActorPrototype(desc, p, a);

	QMap<QString, PropertyDelegate*> delegates;    

	//delegates[REFSEQ_URL_ATTR] = new URLDelegate(DialogUtils::prepareDocumentsFileFilter(true), QString(), true);
	{
		QVariantMap _n; _n["minimum"] = 0; _n["maximum"] = 3;
		delegates[N_MODE_MISMATCHES_ATTR] = new SpinBoxDelegate(_n);
		QVariantMap _v; _v["minimum"] = -1; _v["maximum"] = 3;
		delegates[V_MODE_MISMATCHES_ATTR] = new SpinBoxDelegate(_v);
		QVariantMap _l; _l["minimum"] = 5;
		delegates[SEEDLEN_ATTR] = new SpinBoxDelegate(_l);
		QVariantMap _e; _e["minimum"] = 1;
		delegates[MAQERR_ATTR] = new SpinBoxDelegate(_e);
		QVariantMap _maxbts; _maxbts["minimum"] = -1;
		delegates[MAXBTS_ATTR] = new SpinBoxDelegate(_maxbts);
		QVariantMap _chunkmbs; _chunkmbs["minimum"] = 1;
		delegates[CHUNKMBS_ATTR] = new SpinBoxDelegate(_chunkmbs);
		QVariantMap _seed; _seed["minimum"] = -1;
		delegates[SEED_ATTR] = new SpinBoxDelegate(_seed);
	}
	proto->setEditor(new DelegateEditor(delegates));
	proto->setPrompter(new BowtiePrompter());
	proto->setIconPath(":core/images/align.png");
	WorkflowEnv::getProtoRegistry()->registerProto(BioActorLibrary::CATEGORY_ASSEMBLY(), proto);

	DomainFactory* localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
	localDomain->registerEntry(new BowtieWorkerFactory());
}

QString BowtiePrompter::composeRichDoc() {
	Actor* readsProducer = qobject_cast<BusPort*>(target->getPort(READS_PORT))->getProducer(READS_PORT);
	Actor* ebwtProducer = qobject_cast<BusPort*>(target->getPort(EBWT_PORT))->getProducer(EBWT_PORT);
	
	QString readsName = readsProducer ? tr(" from <u>%1</u>").arg(readsProducer->getLabel()) : "";
	QString ebwtName = ebwtProducer ? tr(" from <u>%1</u>").arg(ebwtProducer->getLabel()) : "";

	QString doc = tr("Align short reads %1 to the reference genome %2 and send it to output.")
		.arg(readsName).arg(ebwtName);

	return doc;
}

void BowtieWorker::init() {
	reads = ports.value(READS_PORT);
	ebwt = ports.value(EBWT_PORT);
	output = ports.value(CoreLibConstants::OUT_PORT_ID);
	//settings.refSeqUrl = actor->getParameter(REFSEQ_URL_ATTR)->getAttributeValue<QString>();
	//settings.setCustomValue(BowtieTask::OPTION_PREBUILT_INDEX, actor->getParameter(USE_PREBUILT_INDEX_ATTR)->getAttributeValue<bool>());
	settings.setCustomValue(BowtieTask::OPTION_PREBUILT_INDEX, true);
	settings.setCustomValue(BowtieTask::OPTION_N_MISMATCHES, actor->getParameter(N_MODE_MISMATCHES_ATTR)->getAttributeValue<int>());
	settings.setCustomValue(BowtieTask::OPTION_V_MISMATCHES, actor->getParameter(V_MODE_MISMATCHES_ATTR)->getAttributeValue<int>());
	settings.setCustomValue(BowtieTask::OPTION_MAQERR, actor->getParameter(MAQERR_ATTR)->getAttributeValue<int>());
	settings.setCustomValue(BowtieTask::OPTION_SEED_LEN, actor->getParameter(SEEDLEN_ATTR)->getAttributeValue<int>());
	settings.setCustomValue(BowtieTask::OPTION_NOMAQROUND, actor->getParameter(NOMAQROUND_ATTR)->getAttributeValue<bool>());
	settings.setCustomValue(BowtieTask::OPTION_NOFW, actor->getParameter(NOFW_ATTR)->getAttributeValue<bool>());
	settings.setCustomValue(BowtieTask::OPTION_NORC, actor->getParameter(NORC_ATTR)->getAttributeValue<bool>());
	settings.setCustomValue(BowtieTask::OPTION_MAXBTS, actor->getParameter(MAXBTS_ATTR)->getAttributeValue<int>());
	settings.setCustomValue(BowtieTask::OPTION_TRYHARD, actor->getParameter(TRYHARD_ATTR)->getAttributeValue<bool>());
	settings.setCustomValue(BowtieTask::OPTION_CHUNKMBS, actor->getParameter(CHUNKMBS_ATTR)->getAttributeValue<int>());
	settings.setCustomValue(BowtieTask::OPTION_SEED, actor->getParameter(SEED_ATTR)->getAttributeValue<int>());
}

bool BowtieWorker::isReady() {
	return (reads && reads->hasMessage() && ebwt && ebwt->hasMessage());
}

Task* BowtieWorker::tick() {
	while (!reads->isEnded()) {
		DNASequence read = reads->get().getData().value<DNASequence>();
		log.trace(BowtieWorker::tr("Loaded short read %1").arg(read.getName())); 
		settings.shortReads.append(read);
	}

	settings.refSeqUrl = GUrl(ebwt->get().getData().value<QString>());

	if( settings.shortReads.isEmpty() ) {
		log.error(BowtieWorker::tr("Short reads list is empty."));
		return NULL;
	}

	if( settings.refSeqUrl.isEmpty()) {
		log.trace(BowtieWorker::tr("Reference sequence URL is empty")); 
		return NULL;
	}

	Task* t = new BowtieTask(settings);
	connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
	return t;
}

void BowtieWorker::sl_taskFinished() {
	BowtieTask* t = qobject_cast<BowtieTask*>(sender());
	if (t->getState() != Task::State_Finished) {
		return;
	}

	QVariant v = qVariantFromValue<MAlignment>(t->getResult());
	output->put(Message(BioDataTypes::MULTIPLE_ALIGNMENT_TYPE(), v));
	if (reads->isEnded() && ebwt->isEnded()) {
		output->setEnded();
	}
	log.trace(tr("Bowtie alignment finished. Result name is %1").arg(t->getResult().getName()));
}

bool BowtieWorker::isDone() {
	return !reads || reads->isEnded();
}

/************************************************************************/
/* Bowtie build worker                                                  */
/************************************************************************/

void BowtieBuildWorkerFactory::init() {
	QList<PortDescriptor*> p; QList<Attribute*> a;
	Descriptor oud(CoreLibConstants::OUT_PORT_ID, BowtieBuildWorker::tr("EBWT index"), BowtieBuildWorker::tr("Result ebwt index."));
	p << new PortDescriptor(oud, BowtiePlugin::EBWT_INDEX_TYPE(), false /*input*/, true /*multi*/);
	Descriptor refseq(REFSEQ_URL_ATTR, BowtieBuildWorker::tr("Reference"), 
		BowtieBuildWorker::tr("Reference sequence url. The short reads will be aligned to this reference genome."));
	Descriptor desc(ACTOR_ID, BowtieBuildWorker::tr("Bowtie-build indexer"), 
		BowtieWorker::tr("Bowtie-build builds a Bowtie index from a set of DNA sequences. bowtie-build outputs a set of 6 files with suffixes .1.ebwt, .2.ebwt, .3.ebwt, .4.ebwt, .rev.1.ebwt, and .rev.2.ebwt. These files together constitute the index: they are all that is needed to align reads to that reference. The original sequence files are no longer used by Bowtie once the index is built."));
	Descriptor ebwt(EBWT_URL_ATTR, BowtieBuildWorker::tr("EBWT"), 
		BowtieBuildWorker::tr("Output index url."));

	a << new Attribute(refseq, CoreDataTypes::STRING_TYPE(), true /*required*/, QString());
	a << new Attribute(ebwt, CoreDataTypes::STRING_TYPE(), true /*required*/, QString());

	ActorPrototype* proto = new BusActorPrototype(desc, p, a);

	QMap<QString, PropertyDelegate*> delegates;    

	delegates[REFSEQ_URL_ATTR] = new URLDelegate(DialogUtils::prepareDocumentsFileFilter(true), QString(), true);
	delegates[EBWT_URL_ATTR] = new URLDelegate(DialogUtils::prepareDocumentsFileFilter(true), QString(), true);

	proto->setEditor(new DelegateEditor(delegates));
	proto->setPrompter(new BowtieBuildPrompter());
	proto->setIconPath(":core/images/align.png");
	WorkflowEnv::getProtoRegistry()->registerProto(BioActorLibrary::CATEGORY_ASSEMBLY(), proto);

	DomainFactory* localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
	localDomain->registerEntry(new BowtieBuildWorkerFactory());
}

QString BowtieBuildPrompter::composeRichDoc() {
	QString refSeqUrl = getParameter(REFSEQ_URL_ATTR).toString();
	QString refSeq = (refSeqUrl.isEmpty() ? "" : QString("<u>%1</u>").arg(GUrl(refSeqUrl).fileName()) );

	QString doc = tr("Build ebwt index from %1 and send it url to output.").arg(refSeq);

	return doc;
}

void BowtieBuildWorker::init() {
	output = ports.value(CoreLibConstants::OUT_PORT_ID);
	refSeqUrl = actor->getParameter(REFSEQ_URL_ATTR)->getAttributeValue<QString>();
	ebwtUrl = actor->getParameter(EBWT_URL_ATTR)->getAttributeValue<QString>();
}

bool BowtieBuildWorker::isReady() {
	return !isDone();
}

Task* BowtieBuildWorker::tick() {

	if( refSeqUrl.isEmpty()) {
		log.trace(BowtieBuildWorker::tr("Reference sequence URL is empty")); 
		return NULL;
	}
	if( ebwtUrl.isEmpty()) {
		log.trace(BowtieBuildWorker::tr("Reference sequence URL is empty")); 
		return NULL;
	}

	Task* t = new BowtieBuildTask(refSeqUrl.getURLString(), ebwtUrl.getURLString());
	connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
	return t;
}

void BowtieBuildWorker::sl_taskFinished() {
	BowtieBuildTask* t = qobject_cast<BowtieBuildTask*>(sender());
	if (t->getState() != Task::State_Finished) {
		return;
	}

	done = true;

	QVariant v = qVariantFromValue<QString>(t->getEbwtPath());
	output->put(Message(BowtiePlugin::EBWT_INDEX_TYPE(), v));
	output->setEnded();
	log.trace(tr("Bowtie index building finished. Result name is %1").arg(t->getEbwtPath()));
}

bool BowtieBuildWorker::isDone() {
	return done;
}

/************************************************************************/
/* Bowtie index reader                                                  */
/************************************************************************/

void BowtieIndexReaderWorkerFactory::init() {
	QList<PortDescriptor*> p; QList<Attribute*> a;
	Descriptor oud(CoreLibConstants::OUT_PORT_ID, BowtieIndexReaderWorker::tr("EBWT index"), BowtieIndexReaderWorker::tr("Result of alignment."));
	p << new PortDescriptor(oud, BowtiePlugin::EBWT_INDEX_TYPE(), false /*input*/, true /*multi*/);
	Descriptor desc(ACTOR_ID, BowtieIndexReaderWorker::tr("Bowtie index reader"), 
		BowtieWorker::tr("Read a set of 6 files with suffixes .1.ebwt, .2.ebwt, .3.ebwt, .4.ebwt, .rev.1.ebwt, and .rev.2.ebwt. These files together constitute the index: they are all that is needed to align reads to that reference."));
	Descriptor ebwt(EBWT_URL_ATTR, BowtieIndexReaderWorker::tr("EBWT"), 
		BowtieIndexReaderWorker::tr("Output index url."));

	a << new Attribute(ebwt, CoreDataTypes::STRING_TYPE(), true /*required*/, QString());

	ActorPrototype* proto = new BusActorPrototype(desc, p, a);

	QMap<QString, PropertyDelegate*> delegates;    

	delegates[EBWT_URL_ATTR] = new URLDelegate(DialogUtils::prepareDocumentsFileFilter(true), QString(), true);

	proto->setEditor(new DelegateEditor(delegates));
	proto->setPrompter(new BowtieIndexReaderPrompter());
	proto->setIconPath(":core/images/align.png");
	WorkflowEnv::getProtoRegistry()->registerProto(BioActorLibrary::CATEGORY_ASSEMBLY(), proto);

	DomainFactory* localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
	localDomain->registerEntry(new BowtieIndexReaderWorkerFactory());
}

QString BowtieIndexReaderPrompter::composeRichDoc() {
	QString ebwtUrl = getParameter(EBWT_URL_ATTR).toString();
	QString ebwt = (ebwtUrl.isEmpty() ? "" : QString("<u>%1</u>").arg(GUrl(ebwtUrl).fileName()) );

	QString doc = tr("Read ebwt index from %1 and send it url to output.").arg(ebwt);

	return doc;
}

void BowtieIndexReaderWorker::init() {
	output = ports.value(CoreLibConstants::OUT_PORT_ID);
	ebwtUrl = actor->getParameter(EBWT_URL_ATTR)->getAttributeValue<QString>();
}

bool BowtieIndexReaderWorker::isReady() {
	return !isDone();
}

Task* BowtieIndexReaderWorker::tick() {

	if( ebwtUrl.isEmpty()) {
		log.trace(BowtieIndexReaderWorker::tr("Reference sequence URL is empty")); 
		return NULL;
	}
	Task* t = new Task("Bowtie index reader", TaskFlags_NR_FOSCOE);
	connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
	return t;
}

void BowtieIndexReaderWorker::sl_taskFinished() {
	QVariant v = qVariantFromValue<QString>(ebwtUrl.getURLString());
	output->put(Message(BowtiePlugin::EBWT_INDEX_TYPE(), v));
	output->setEnded();
	done = true;
	log.trace(tr("Reading bowtie index finished. Result name is %1").arg(ebwtUrl.getURLString()));
}

bool BowtieIndexReaderWorker::isDone() {
	return done;
}

} //namespace LocalWorkflow
} //namespace GB2
