/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "CollocationWorker.h"

#include <workflow/IntegralBusModel.h>
#include <workflow/WorkflowEnv.h>
#include <workflow/WorkflowRegistry.h>
#include <workflow_support/CoreDataTypes.h>
#include <workflow_support/QVariantUtils.h>
#include <workflow_library/BioDatatypes.h>
#include <workflow_library/BioActorLibrary.h>
#include <workflow_support/DelegateEditors.h>
#include <workflow_support/CoreLibConstants.h>

#include <datatype/AnnotationData.h>
#include <datatype/DNASequence.h>
#include <core_api/DNATranslation.h>
#include <core_api/DNAAlphabet.h>
#include <core_api/AppContext.h>
#include <core_api/Log.h>

#include "CollocationsSearchAlgorithm.h"
#include "CollocationsDialogController.h"

/* TRANSLATOR GB2::LocalWorkflow::CollocationWorker */

namespace GB2 {
namespace LocalWorkflow {

static const QString NAME_ATTR("a_name");
static const QString ANN_ATTR("b_annotations");
static const QString LEN_ATTR("c_minlen");
static const QString FIT_ATTR("d_mustfit");

const QString SEQ_SLOT = BioActorLibrary::SEQ_SLOT_ID;
const QString FEATURE_TABLE_SLOT = BioActorLibrary::FEATURE_TABLE_SLOT_ID;


const QString CollocationWorkerFactory::ACTOR_ID("annotator.collocation");

static LogCategory log(ULOG_CAT_WD);


class CollocationValidator : public ConfigurationValidator {
public:
    virtual bool validate(const Configuration* cfg, QStringList& output) const {
        QString annotations = cfg->getParameter(ANN_ATTR)->getAttributeValue<QString>();
        QSet<QString> names = QSet<QString>::fromList(annotations.split(QRegExp("\\W+"), QString::SkipEmptyParts));
        if (names.size() < 2) {
            output.append(CollocationWorker::tr("At least 2 annotations are required for collocation search."));
            return false;
        }
        return true;
    }
};

void CollocationWorkerFactory::init() {

    QMap<Descriptor, DataTypePtr> m;
    {
        //Descriptor sd(SEQ_SLOT, CollocationWorker::tr("sequence-slot"), CollocationWorker::tr("seq-slot-doc"));
        //Descriptor fd(FEATURE_TABLE_SLOT, CollocationWorker::tr("feature-table"), CollocationWorker::tr("feature-table-doc"));
        m[BioActorLibrary::SEQ_SLOT()] = BioDataTypes::DNA_SEQUENCE_TYPE();
        m[BioActorLibrary::FEATURE_TABLE_SLOT()] = BioDataTypes::ANNOTATION_TABLE_LIST_TYPE();
    }
    DataTypePtr inSet(new MapDataType(Descriptor("regioned.sequence"), m));
    DataTypeRegistry* dr = WorkflowEnv::getDataTypeRegistry();
    assert(dr);
    dr->registerEntry(inSet);

    QList<PortDescriptor*> p; QList<Attribute*> a;
    p << new PortDescriptor(Descriptor(CoreLibConstants::IN_PORT_ID, CollocationWorker::tr("Input data"), CollocationWorker::tr("An input sequence and a set of annotations to search in.")), 
        inSet, true /*input*/);
    p << new PortDescriptor(Descriptor(CoreLibConstants::OUT_PORT_ID, CollocationWorker::tr("Group annotations"), CollocationWorker::tr("Annotated regions containing found collocations.")),
        BioDataTypes::ANNOTATION_TABLE_TYPE(), false /*input*/, true/*multi*/);
    
    {
        //LRegion             searchRegion;
        Descriptor nd(NAME_ATTR, CollocationWorker::tr("Result annotation"), CollocationWorker::tr("Name of the result annotations to mark found collocations"));
        Descriptor ad(ANN_ATTR, CollocationWorker::tr("Group of annotations"), CollocationWorker::tr("A list of annotation names to search. Found regions will contain all the named annotations."));
        Descriptor ld(LEN_ATTR, CollocationWorker::tr("Region size"), CollocationWorker::tr("Effectively this is the maximum allowed distance between the interesting annotations in a group"));
        Descriptor fd(FIT_ATTR, CollocationWorker::tr("Must fit into region"), CollocationWorker::tr("Whether the interesting annotations should entirely fit into the specified region to form a group"));
        a << new Attribute(nd, CoreDataTypes::STRING_TYPE(), true, QVariant("misc_feature"));
        a << new Attribute(ad, CoreDataTypes::STRING_TYPE(), true);
        a << new Attribute(ld, CoreDataTypes::NUM_TYPE(), false, QVariant(1000));
        a << new Attribute(fd, CoreDataTypes::BOOL_TYPE(), false, QVariant(false));
    }

    Descriptor desc(ACTOR_ID, CollocationWorker::tr("Collocation search"), 
        CollocationWorker::tr("Finds groups of specified annotations in each supplied set of annotations, stores found regions as annotations."));
    ActorPrototype* proto = new BusActorPrototype(desc, p, a);
    QMap<QString, PropertyDelegate*> delegates;    
    
    QVariantMap lenMap; lenMap["minimum"] = QVariant(0); lenMap["maximum"] = QVariant(INT_MAX);
    delegates[LEN_ATTR] = new SpinBoxDelegate(lenMap);
       
    proto->setEditor(new DelegateEditor(delegates));
    proto->setValidator(new CollocationValidator());
    proto->setIconPath(":annotator/images/regions.png");
    proto->setPrompter(new CollocationPrompter());
    WorkflowEnv::getProtoRegistry()->registerProto(BioActorLibrary::CATEGORY_BASIC(), proto);

    DomainFactory* localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
    localDomain->registerEntry(new CollocationWorkerFactory());
}

QString CollocationPrompter::composeRichDoc() {
    BusPort* input = qobject_cast<BusPort*>(target->getPort(CoreLibConstants::IN_PORT_ID));
    Actor* seqProducer = input->getProducer(SEQ_SLOT);
    QString seqName = seqProducer ? tr(" sequence from <u>%1</u>").arg(seqProducer->getLabel()) : "";
    QString annName = getProducers(CoreLibConstants::IN_PORT_ID, FEATURE_TABLE_SLOT);
    if (!annName.isEmpty()) {
        annName = tr(" set of annotations from <u>%1</u>").arg(annName);
    }

    QString data;
    if (seqName.isEmpty() && annName.isEmpty()) {
        //return "<font color='red'>"+tr("unset")+"</font>";
    } else if (!seqName.isEmpty() && !annName.isEmpty()) {
        data = tr("For each %1 and %2,").arg(seqName).arg(annName);
    } else {
        data = tr("For each %1%2,").arg(seqName).arg(annName);
    }

    QString annotations = getParameter(ANN_ATTR).toString();
    QStringList names = annotations.split(QRegExp("\\W+"), QString::SkipEmptyParts).toSet().toList();
    annotations = names.join(", ");
    if (annotations.isEmpty()) {
        annotations = getRequiredParam(ANN_ATTR);
    }

    int distance = getParameter(LEN_ATTR).toInt();
    bool mode = getParameter(FIT_ATTR).toBool();

    QString extra;
    if (mode) {
        extra = tr(" Annotations themselves may not span beyond the region.");
    }

    QString resultName = getRequiredParam(NAME_ATTR);
    QString doc = tr("%1 look if <u>%2</u> annotations appear collocated within same region of length <u>%3</u>.%4"
        "<br>Output the list of found regions annotated as <u>%5</u>.")
        .arg(data) //sequence from Read Fasta 1
        .arg(annotations)
        .arg(distance)
        .arg(extra)
        .arg(resultName);

    return doc;
}

void CollocationWorker::init() {
    input = ports.value(CoreLibConstants::IN_PORT_ID);
    output = ports.value(CoreLibConstants::OUT_PORT_ID);
}

bool CollocationWorker::isReady() {
    return (input && input->hasMessage());
}

Task* CollocationWorker::tick() {
    Message inputMessage = getMessageAndSetupScriptValues(input);
    cfg.distance = actor->getParameter(LEN_ATTR)->getAttributeValue<int>();
    cfg.st = actor->getParameter(FIT_ATTR)->getAttributeValue<bool>() ? 
        CollocationsAlgorithm::NormalSearch : CollocationsAlgorithm::PartialSearch;
    resultName = actor->getParameter(NAME_ATTR)->getAttributeValue<QString>();
    QString annotations = actor->getParameter(ANN_ATTR)->getAttributeValue<QString>();
    names = QSet<QString>::fromList(annotations.split(QRegExp("\\W+"), QString::SkipEmptyParts));
    QVariantMap qm = inputMessage.getData().toMap();
    DNASequence seq = qm.value(SEQ_SLOT).value<DNASequence>();
    
    QList<SharedAnnotationData> atl = QVariantUtils::var2ftl(qm.value(FEATURE_TABLE_SLOT).toList());
    if (!seq.isNull() && !atl.isEmpty()) {
        cfg.searchRegion.len = seq.length();
        Task* t = new CollocationSearchTask(atl, names, cfg);
        connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
        return t;
    } else {
        // void tick
        output->put(Message(BioDataTypes::ANNOTATION_TABLE_TYPE(), QVariant()));
        if (input->isEnded()) {
            output->setEnded();
        }
        return NULL;
    }
}

void CollocationWorker::sl_taskFinished() {
    CollocationSearchTask* t = qobject_cast<CollocationSearchTask*>(sender());
    if (t->getState() != Task::State_Finished) return;
    QList<LRegion> res = t->popResults();
    if (output) {
        QList<SharedAnnotationData> list;
        foreach(LRegion r, res) {
            SharedAnnotationData data; data = new AnnotationData();
            data->location.append(r);
            data->complement = false;
            data->aminoStrand = TriState_No;
            data->name = resultName;
            list.append(data);
        }

        QVariant v = qVariantFromValue<QList<SharedAnnotationData> >(list);
        output->put(Message(BioDataTypes::ANNOTATION_TABLE_TYPE(), v));
        if (input->isEnded()) {
            output->setEnded();
        }
        log.info(tr("Found %1 collocations").arg(res.size()));
    }
}

bool CollocationWorker::isDone() {
    return !input || input->isEnded();
}

} //namespace LocalWorkflow
} //namespace GB2
