/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "ClustalWAlnFormat.h"

#include "DocumentFormatUtils.h"

#include <core_api/Task.h>
#include <core_api/IOAdapter.h>
#include <gobjects/GObjectTypes.h>
#include <gobjects/MAlignmentObject.h>
#include <util_text/TextUtils.h>
#include <util_algorithm/MSAUtils.h>

#include <memory>

namespace GB2 {

/* TRANSLATOR GB2::ClustalWAlnFormat */    
/* TRANSLATOR GB2::IOAdapter */    

ClustalWAlnFormat::ClustalWAlnFormat(QObject* p) : DocumentFormat(p) 
{
    formatName = tr("CLUSTALW");
}

QStringList ClustalWAlnFormat::getSupportedDocumentFileExtensions() {
    QStringList l;
    l<<"aln";
    return l;
}


//todo: code duplication with MAlignment::checkModel
static bool checkModel(const MAlignment& al, TaskStateInfo& ti) {
    //check that all sequences are of equal size
    int size = 0;
    for (int i=0, n = al.getNumSequences(); i<n; i++) {
        const MAlignmentItem& item = al.alignedSeqs[i];
        if (i == 0) {
            size = item.sequence.size();
        } else {
            int itemSize = item.sequence.size();
            if (size!=itemSize) {
                ti.error = ClustalWAlnFormat::tr("found_sequences_of_different_size");
                return false;
            }
        }
    }
    if (size == 0) {
        ti.error = ClustalWAlnFormat::tr("model_of_0_size");
        return false;
    } 
    return true;
}

void ClustalWAlnFormat::load(IOAdapter* io, QList<GObject*>& objects, TaskStateInfo& ti) {
    static int READ_BUFF_SIZE = 1024;
    QByteArray readBuffer(READ_BUFF_SIZE, '\0');
    char* buff  = readBuffer.data();

    MAlignmentItem consensus;

    const QBitArray& LINE_BREAKS = TextUtils::LINE_BREAKS;
    const QBitArray& WHITES = TextUtils::WHITES;
    MAlignment al;
    bool lineOk = false;
    bool firstBlock = true;
    int sequenceIdx = 0;
    int valStartPos = 0;
    int valEndPos = 0;

    //1 skip first line
    int len = io->readUntil(buff, READ_BUFF_SIZE, LINE_BREAKS, IOAdapter::Term_Include, &lineOk);
    if (!lineOk || len < 40 || !readBuffer.startsWith("CLUSTAL W")) {
        ti.error = ClustalWAlnFormat::tr("illegal_header_line");
    }

    //read data
    while (!ti.cancelFlag && (len = io->readUntil(buff, READ_BUFF_SIZE, LINE_BREAKS, IOAdapter::Term_Include, &lineOk)) > 0) {
        int numNs = 0;
        while(len > 0 && LINE_BREAKS[(uchar)buff[len-1]]) {
            if ( buff[len-1] =='\n') {
                numNs++;
            }
            len--;
        }
        if (len == 0) {
            if (al.getNumSequences() == 0) {
                continue;//initial empty lines
            }
            ti.error = ClustalWAlnFormat::tr("error_parsing_file");
            break;
        }

        QByteArray line = QByteArray::fromRawData(buff, len);
        if (valStartPos == 0) {
            int spaceIdx = line.indexOf(' ');
            int valIdx = spaceIdx + 1;
            while (valIdx < len && WHITES[(uchar)buff[valIdx]]) {
                valIdx++;
            }
            if (valIdx <= 0 || valIdx >= len-1) {
                ti.error = ClustalWAlnFormat::tr("invalid_alignment_format");
                break;
            }
            valStartPos = valIdx;
        }

        valEndPos = valStartPos + 1; //not inclusive
        while (valEndPos < len && !WHITES[(uchar)buff[valEndPos]]) {
            valEndPos++;
        }
        if (valEndPos!=len) { //there were numbers trimmed -> trim spaces now
            while (valEndPos > valStartPos && buff[valEndPos] == ' ') {
                valEndPos--;
            }
            valEndPos++; //leave non-inclusive
        }

        QByteArray name = line.left(valStartPos).trimmed();
        QByteArray value = line.mid(valStartPos, valEndPos - valStartPos);
                
        int seqsInModel = al.getNumSequences();
        bool lastBlockLine = (!firstBlock && sequenceIdx == seqsInModel) 
            || numNs >=2
            || name.isEmpty()
            || value.contains(' ') || value.contains(':') || value.contains('.');

        if (firstBlock) {
            if (lastBlockLine) {
                consensus.name = name;
                consensus.sequence = value;
            } else {
                assert(al.alignedSeqs.size() == sequenceIdx);
                al.alignedSeqs.append(MAlignmentItem(name, value));
            }
        } else {
            MAlignmentItem* item = NULL;
            if (sequenceIdx < seqsInModel) { 
                item = &al.alignedSeqs[sequenceIdx];
            } else if (sequenceIdx == seqsInModel) {
                assert(lastBlockLine);
                item = &consensus;
            } else {
                ti.error = ClustalWAlnFormat::tr("incorrect_num_seqs_in_block");
                break;
            } 
            if (item->name!=name) {
                ti.error = ClustalWAlnFormat::tr("seq_name_not_matched");
                break;
            }
            item->sequence.append(value);
        }
        if (lastBlockLine) {
            firstBlock = false;
            if (!checkModel(al, ti)) {
                break;
            }
            sequenceIdx = 0;
        } else {
            sequenceIdx++;
        }

        ti.progress = io->getProgress();
    }
    checkModel(al, ti);
    if (ti.hasErrors()) {
        return;
    }
    DocumentFormatUtils::assignAlphabet(al);
    if (al.alphabet == NULL) {
        ti.error = ClustalWAlnFormat::tr("alphabet_unknown");
        return;
    }

    MAlignmentObject* obj = new MAlignmentObject(al, MA_OBJECT_NAME);
    objects.append(obj);
}

Document* ClustalWAlnFormat::loadExistingDocument(IOAdapterFactory* iof, const QString& url, TaskStateInfo& ti, const QVariantMap& fs) {
    std::auto_ptr<IOAdapter> io(iof->createIOAdapter());

    if (!io->open(url, IOAdapterMode_Read)) {
        ti.error = IOAdapter::tr("error_opening_url_for_read '%1'").arg(url);
        return NULL;
    }
    QList<GObject*> objects;
    load(io.get(), objects, ti);
    io->close();

    if (ti.hasErrors()) {
        return NULL;
    }
    assert(objects.size() == 1);
    return new Document(this, iof, url, objects, fs);
}

#define MAX_LINE_LEN    80
#define MAX_NAME_LEN    39
#define SEQ_ALIGNMENT    5

void ClustalWAlnFormat::save(IOAdapter* io, Document* d, TaskStateInfo& ti) {
    assert(d->getObjects().size() == 1);
    const MAlignmentObject* obj = qobject_cast<const MAlignmentObject*>(d->getObjects().first());
    assert(obj!=NULL);
    const MAlignment& ma = obj->getMAlignment();
    assert(ma.isNormalized());

    //write header
    QByteArray header("CLUSTAL W 2.0 multiple sequence alignment\n\n");
    int len = io->writeBlock(header);
    if (len != header.length()) {
        ti.error = IOAdapter::tr("file_write_error");
        return;
    }

    //precalculate seq writing params
    int maxNameLength = 0;
    foreach(const MAlignmentItem& item, ma.alignedSeqs) {
        maxNameLength = qMax(maxNameLength, item.name.length());
    }
    maxNameLength = qMin(maxNameLength, MAX_NAME_LEN);

    int aliLen = ma.getLength();
    QByteArray consensus(aliLen, MAlignment_GapChar);
    MSAUtils::updateConsensus(ma, consensus, MSAConsensusType_ClustalW);

    int maxNumLength  = 1 + (int)log10((double)aliLen);

    int seqStart = maxNameLength + 2; //+1 for space separator
    if (seqStart % SEQ_ALIGNMENT != 0) {
        seqStart = seqStart + SEQ_ALIGNMENT - (seqStart % SEQ_ALIGNMENT);
    }
    int seqEnd = MAX_LINE_LEN - maxNumLength - 1;
    if (seqEnd % SEQ_ALIGNMENT != 0) {
        seqEnd = seqEnd - (seqEnd % SEQ_ALIGNMENT);
    }
    assert(seqStart % SEQ_ALIGNMENT == 0 && seqEnd % SEQ_ALIGNMENT == 0 && seqEnd > seqStart);

    int seqPerPage = seqEnd - seqStart;
    const char* spaces = TextUtils::SPACE_LINE.constData();

    //write sequence
    for(int i = 0; i < aliLen; i+=seqPerPage) {
        int partLen = i + seqPerPage > aliLen ? aliLen - i : seqPerPage;
        foreach(const MAlignmentItem& item, ma.alignedSeqs) {
            QByteArray line = item.name.toAscii();
            if (line.length() > MAX_NAME_LEN) {
                line = line.left(MAX_NAME_LEN);
            }
            TextUtils::replace(line.data(), line.length(), TextUtils::WHITES, '_');
            line.append(QByteArray::fromRawData(spaces, seqStart - line.length()));
            line.append(item.sequence.mid(i, partLen));
            line.append(' ');
            line.append(QString::number(qMin(i+seqPerPage, aliLen)));
            assert(line.length() <= MAX_LINE_LEN);
            line.append('\n');

            len = io->writeBlock(line);
            if (len != line.length()) {
                ti.error = IOAdapter::tr("file_write_error");
                return;
            }
        }
        //write consensus
        QByteArray line = QByteArray::fromRawData(spaces, seqStart);
        line.append(consensus.mid(i, partLen));
        line.append("\n\n");
        len = io->writeBlock(line);
        if (len != line.length()) {
            ti.error = IOAdapter::tr("file_write_error");
            return;
        }
    }
}

void ClustalWAlnFormat::storeDocument(Document* doc, TaskStateInfo& ti, IOAdapterFactory* iof, const QString& newDocURL) {
    if (iof == NULL) {
        iof = doc->getIOAdapterFactory();
    }
    std::auto_ptr<IOAdapter> io(iof->createIOAdapter());
    QString url = newDocURL.isEmpty() ? doc->getURL() : newDocURL;
    if (!io->open(url, IOAdapterMode_Write)) {
        ti.error = IOAdapter::tr("error_opening_url_for_write '%1'").arg(url);
        return;
    }
    save(io.get(), doc, ti);
    io->close();
}


bool ClustalWAlnFormat::isDataFormatSupported(const char* data, int size) const {
    if (TextUtils::contains(TextUtils::BINARY, data, size)) {
        return false;
    }
    QByteArray str(data, size);
    if (!str.startsWith("CLUSTAL W")) {
        return false;
    }
    QTextStream s(str);
    QString line = s.readLine();
    if (!line.endsWith("multiple sequence alignment")) {
        return false;
    }
    return true;
}

bool ClustalWAlnFormat::isObjectOpSupported(const Document* d , DocumentFormat::DocObjectOp op, GObjectType t) const{
    if (t != GObjectTypes::MULTIPLE_ALIGNMENT ) {
        return false;
    }
    bool res = false;
    if (op == DocumentFormat::DocObjectOp_Add) {
        res = d->getObjects().isEmpty();
    } 
    return res;
}

bool ClustalWAlnFormat::checkConstraints(const DocumentFormatConstraints& c) const {
    foreach (GObjectType t, c.supportedObjectTypes) {
        if (t!=GObjectTypes::MULTIPLE_ALIGNMENT) {
            return false;
        }
    }
    if (c.checkRawData) {
        return isDataFormatSupported(c.rawData.constData(), c.rawData.size());
    }

    return true;
}


}//namespace
