/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "EnzymesTests.h"

#include "EnzymesIO.h"
#include "FindEnzymesTask.h"

#include <gobjects/DNASequenceObject.h>
#include <gobjects/AnnotationTableObject.h>

namespace GB2 {

void GTest_FindEnzymes::init(XMLTestFormat *tf, const QDomElement& el) {
    Q_UNUSED(tf);
    loadTask = NULL;
    aObj = new AnnotationTableObject("annotations");
    aObj->setParent(this);

    seqObjCtx = el.attribute("sequence");
    if (seqObjCtx.isEmpty()) {
        stateInfo.setError(  "Sequence object context not specified" );
        return;
    }

    // read url of a file with enzymes 
    enzymesUrl = el.attribute("url");
    if (seqObjCtx.isEmpty()) {
        stateInfo.setError(  "Enzymes database URL not specified" );
        return;
    }
    enzymesUrl=env->getVar("COMMON_DATA_DIR")+"/" + enzymesUrl;

    // get region to analyze
    QString regionStr = el.attribute("region");
    if (!regionStr.isEmpty()) {
        QRegExp rx("(\\d+)(..)(\\d+)");
        if (rx.indexIn(regionStr, 0)!=-1) {
            region.startPos = rx.cap(1).toInt() - 1;
            region.len = rx.cap(3).toInt() - region.startPos;
        }
        if (region.isEmpty() || region.len < 0) {
            stateInfo.setError(  QString("Invalid region: '%1'").arg(regionStr) );
            return;
        }
    }

    // get enzymes names to find
    QString ensymesStr = el.attribute("enzymes");
    enzymeNames = ensymesStr.split(",", QString::SkipEmptyParts);
    if (enzymeNames.isEmpty()) {
        stateInfo.setError(  QString("Invalid enzyme names: '%1'").arg(ensymesStr) );
        return;
    }
    
    // read expected results
    QString resultsStr = el.attribute("result");
    if (resultsStr.isEmpty()) {
        stateInfo.setError(  "'result' value not set" );
        return;
    }
    QStringList perEnzymeResults = resultsStr.split(";", QString::SkipEmptyParts);
    foreach(const QString& enzymeResult, perEnzymeResults) {
        int nameIdx = enzymeResult.indexOf(':');
        if (nameIdx <=0 || nameIdx+1 == enzymeResult.size()) {
            stateInfo.setError(  QString("Error parsing results token %1").arg(enzymeResult) );
            return;
        }
        QString enzymeId = enzymeResult.left(nameIdx);
        QString regions = enzymeResult.mid(nameIdx+1);

        if (!enzymeNames.contains(enzymeId)) {
            stateInfo.setError(  QString("Result enzyme not in the search list %1").arg(enzymeId) );
            return;
        }

        QRegExp rx2("(\\d+)(..)(\\d+)");
        int pos = 0;
        while ((pos = rx2.indexIn(regions, pos)) != -1) {
            int start=rx2.cap(1).toInt();
            int end=rx2.cap(3).toInt();
            resultsPerEnzyme.insert(enzymeId, LRegion(start-1, end - start + 1));
            pos += rx2.matchedLength();
        }
        if (!resultsPerEnzyme.contains(enzymeId)) {
            stateInfo.setError(  QString("Can't parse regions in results token: %1").arg(enzymeResult) );
            return;
        }
    }
}

void GTest_FindEnzymes::prepare() {
    if (hasErrors() || isCanceled()) {
        return;
    }
    
    //get sequence object
    seqObj = getContext<DNASequenceObject>(this, seqObjCtx);
    if (seqObj == NULL) {
        stateInfo.setError(  QString("Sequence context not found %1").arg(seqObjCtx) );
        return;
    }
    if (region.isEmpty()) {
        region = seqObj->getSequenceRange();
    }

    loadTask = new LoadEnzymeFileTask(enzymesUrl);
    addSubTask(loadTask);
}


QList<Task*> GTest_FindEnzymes::onSubTaskFinished(Task* subTask) {
    QList<Task*> res;
    if (hasErrors() || isCanceled()) {
        return res;
    }
    if (subTask!=loadTask || loadTask->enzymes.isEmpty()) {
        return res;
    }

    QList<SEnzymeData> enzymesToSearch;
    foreach(const QString& enzymeId, enzymeNames) {
        SEnzymeData enzyme = EnzymesIO::findEnzymeById(enzymeId, loadTask->enzymes);
        if (enzyme.constData() == NULL) {
            stateInfo.setError(  QString("Enzyme not found: %1").arg(enzymeId) );
            return res;
        }
        enzymesToSearch.append(enzyme);
    }

    FindEnzymesToAnnotationsTask* t = new FindEnzymesToAnnotationsTask(aObj, "", seqObj->getDNASequence(), region, enzymesToSearch);
    res.append(t);
    return res;
}

Task::ReportResult GTest_FindEnzymes::report() {
    if (hasErrors() || isCanceled()) {
        return Task::ReportResult_Finished;
    }
    //for each enzyme from resultsPerEnzyme check that all annotations are present
    foreach(const QString& enzymeId, resultsPerEnzyme.keys()) {
        QList<LRegion> regions = resultsPerEnzyme.values(enzymeId);
        AnnotationGroup* ag = aObj->getRootGroup()->getSubgroup(enzymeId, false);
        if (ag == NULL) {
            stateInfo.setError(  QString("Group not found %1").arg(enzymeId) );
            break;
        }
        const QList<Annotation*> anns = ag->getAnnotations();
        if (anns.size() != regions.size()) {
            stateInfo.setError( QString("Number of results not matched for :%1, results: %2, expected %3")
                .arg(enzymeId).arg(anns.size()).arg(regions.size()) );    
            break;
        }
        foreach(const Annotation* a, anns) {
            LRegion r = a->getLocation().first();
            if (!regions.contains(r)) {
                stateInfo.setError( QString("Illegal region! Enzyme :%1, region %2..%3")
                    .arg(enzymeId).arg(r.startPos + 1).arg(r.endPos()) );    
                break;
            }
        }
    }

    return Task::ReportResult_Finished;
}


void GTest_FindEnzymes::cleanup() {
    delete aObj;
}

//////////////////////////////////////////////////////////////////////////

QList<XMLTestFactory*> EnzymeTests::createTestFactories() {
    QList<XMLTestFactory*> res;
    res.append(GTest_FindEnzymes::createFactory());
    return res;
}

}//namespace
