/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "FastqFormat.h"
#include "DocumentFormatUtils.h"

#include <core_api/Task.h>
#include <core_api/IOAdapter.h>
#include <core_api/DNAAlphabet.h>

#include <gobjects/DNASequenceObject.h>
#include <gobjects/AnnotationTableObject.h>
#include <gobjects/GObjectTypes.h>
#include <util_text/TextUtils.h>
#include <util_gui/GUIUtils.h>

#include <memory>

/* TRANSLATOR GB2::FastqFormat */

namespace GB2 {

bool FastqFormat::isDataFormatSupported(const char* data, int size) const {
    if (size <= 0 || data[0] != '@' ) {
        return false;
    }
    return !TextUtils::contains(TextUtils::BINARY, data, size);
}

bool FastqFormat::isObjectOpSupported(const Document*, DocumentFormat::DocObjectOp, GObjectType t) const {
    return (t == GObjectTypes::DNA_SEQUENCE);
}


bool FastqFormat::checkConstraints(const DocumentFormatConstraints& c) const {
    if (c.mustSupportWrite) {
        return false;
    }
    bool ret = true;
    foreach (GObjectType t, c.supportedObjectTypes) {
        ret = ret && ( GObjectTypes::DNA_SEQUENCE == t );
    }
    if( !ret ) {
        return ret;
    }
    if (c.checkRawData) {
        ret = ret && isDataFormatSupported(c.rawData.constData(), c.rawData.size());
    }
    return ret;
}

#define BUFF_SIZE  4096

static bool readLine(QByteArray& target, IOAdapter* io, TaskStateInfo& ti, bool last = false) {
    bool lineOK = false;
    qint64 len, total = target.size();
    do
    {
        if (target.capacity() - total < BUFF_SIZE) {
            target.reserve(target.capacity() + BUFF_SIZE);
        }
        char* buff = target.data() + total;
        len = io->readUntil(buff, BUFF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Exclude, &lineOK);
        ti.progress = io->getProgress();
        total += len;
    } while (!ti.cancelFlag && !lineOK && len == BUFF_SIZE);
    if (lineOK) {
        target.resize(total);
        //put back start of another line
        //io->skip(len - BUFF_SIZE);
        //eat trailing white
        char ch;
        bool get_white = io->getChar(&ch) && TextUtils::LINE_BREAKS[ch];
        assert(get_white);
        lineOK = get_white;
    } else if (!last) {
        ti.setError(GB2::FastqFormat::tr("Unexpected end of file"));
    }
    return lineOK;
}

/**
 * FASTQ format specification: http://maq.sourceforge.net/fastq.shtml
 */
static void load(IOAdapter* io, const QString& docUrl, QList<GObject*>& objects, TaskStateInfo& ti,
                 int gapSize, int predictedSize, QString& writeLockReason, bool onlyOne ) {
     writeLockReason.clear();
     QByteArray readBuff, secondBuff;

     bool merge = gapSize!=-1;
     QByteArray sequence;
     QStringList headers;
     QSet<QString> names;
     QList<LRegion> mergedMapping;
     QByteArray gapSequence((merge ? gapSize : 0), 0);

     sequence.reserve(predictedSize);

     int sequenceStart = 0;
     while (!ti.cancelFlag) {
         //read header
         readBuff.clear();
         if (!readLine(readBuff, io, ti, (merge && !headers.isEmpty()) || !names.isEmpty())) {
             break;
         }
         if (readBuff[0]!= '@') {
             ti.setError(GB2::FastqFormat::tr("Not a valid FASTQ file: %1").arg(docUrl));
             break;
         }

         //read sequence
         if (!merge) {
             sequence.clear();
         } else if (sequence.size() > 0) {
             sequence.append(gapSequence);
         }
         sequenceStart = sequence.size();
         if (!readLine(sequence, io, ti)) {
             break;
         }
         int seqLen = sequence.size() - sequenceStart;

         // read +<seqname>
         secondBuff.clear();
         secondBuff.reserve(readBuff.size());
         if (!readLine(secondBuff, io, ti)) {
             break;
         }
         if (secondBuff[0]!= '+' || (secondBuff.size() != 1 && secondBuff.size() != readBuff.size())
             || (readBuff.size() == secondBuff.size() && strncmp(readBuff.data()+1, secondBuff.data()+1, readBuff.size() - 1))) {
             ti.setError(GB2::FastqFormat::tr("Not a valid FASTQ file: %1").arg(docUrl));
             break;
         }

         // skip qualities
         char ch;
         if (!io->skip(seqLen) || !io->getChar(&ch) || !TextUtils::LINE_BREAKS[ch]) {
             ti.setError(GB2::FastqFormat::tr("Not a valid FASTQ file: %1").arg(docUrl));
             break;
         }

         QString headerLine = QString::fromLatin1(readBuff.data()+1, readBuff.length()-1);
         if (merge) {
             headers.append(headerLine);
             mergedMapping.append(LRegion(sequenceStart, seqLen));
         } else {
             QString objName = TextUtils::variate(headerLine, "_", names);
             names.insert(objName);
             DNASequence seq( headerLine, sequence );
             seq.info.insert(DNAInfo::ID, headerLine);
             DocumentFormatUtils::addSequenceObject(objects, objName, seq);
         }

         if( onlyOne ) {
             break;
         }
     }

     assert(headers.size() == mergedMapping.size());

     if (!ti.hasErrors() && !ti.cancelFlag && merge && !headers.isEmpty()) {
         DocumentFormatUtils::addMergedSequenceObject(objects, docUrl, headers, sequence, mergedMapping);
     }

     if (merge && headers.size() > 1) {
         writeLockReason = DocumentFormat::MERGED_SEQ_LOCK;
     }
}

Document* FastqFormat::loadExistingDocument(IOAdapterFactory* iof, const QString& url, TaskStateInfo& ti, const QVariantMap& _fs) {
    //read file
    std::auto_ptr<IOAdapter> io(iof->createIOAdapter());
    if (!io->open(url, IOAdapterMode_Read)) {
        ti.setError(Translations::errorOpeningFileRead(url));
        return NULL;
    }

    QVariantMap fs = _fs;
    QList<GObject*> objects;

    //get settings
    int gapSize = qBound(-1, DocumentFormatUtils::getIntSettings(fs, MERGE_MULTI_DOC_GAP_SIZE_SETTINGS, -1), 1000*1000);
    int predictedSize = qMax(100*1000,
        DocumentFormatUtils::getIntSettings(fs, MERGE_MULTI_DOC_SEQUENCE_SIZE_SETTINGS, gapSize==-1 ? 0 : io->left()));

    QString lockReason;
    load(io.get(), url, objects, ti, gapSize, predictedSize, lockReason, false );
    io->close();

    if (ti.hasErrors() || ti.cancelFlag) {
        return NULL;
    }

    DocumentFormatUtils::updateFormatSettings(objects, fs);
    Document* doc = new Document(this, iof, url, objects, fs, lockReason);
    return doc;

}

Document* FastqFormat::loadExistingDocument( IOAdapter* io, TaskStateInfo& ti, const QVariantMap& _fs ) {
    if( NULL == io || !io->isOpen() ) {
        ti.setError(Translations::badArgument("IO adapter"));
        return NULL;
    }
    QVariantMap fs = _fs;
    QList<GObject*> objects;

    int gapSize = qBound(-1, DocumentFormatUtils::getIntSettings(fs, MERGE_MULTI_DOC_GAP_SIZE_SETTINGS, -1), 1000*1000);
    int predictedSize = qMax(100*1000,
        DocumentFormatUtils::getIntSettings(fs, MERGE_MULTI_DOC_SEQUENCE_SIZE_SETTINGS, gapSize==-1 ? 0 : io->left()));

    QString lockReason;
    load( io, io->getUrl(), objects, ti, gapSize, predictedSize, lockReason, true );

    if (ti.hasErrors() || ti.cancelFlag) {
        return NULL;
    }

    DocumentFormatUtils::updateFormatSettings(objects, fs);
    Document* doc = new Document( this, io->getFactory(), io->getUrl(), objects, fs, lockReason );
    return doc;
}

}//namespace
