/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "NewickFormat.h"

#include <core_api/Task.h>
#include <core_api/IOAdapter.h>
#include <gobjects/PhyTreeObject.h>
#include <util_text/TextUtils.h>
#include <memory>

namespace GB2 {

/* TRANSLATOR GB2::IOAdapter */    

NewickFormat::NewickFormat(QObject* p) : DocumentFormat(p) 
{
	formatName = tr("Newick Standard");
}

QStringList NewickFormat::getSupportedDocumentFileExtensions() {
	QStringList l;
	l<<"nwk"<<"newick";
	return l;
}


#define BUFF_SIZE 1024

static PhyTree parseTree(const QByteArray& text, TaskStateInfo& si);

Document* NewickFormat::loadExistingDocument(IOAdapterFactory* iof, const QString& url, TaskStateInfo& ti, const QVariantMap& fs) {
	std::auto_ptr<IOAdapter> io(iof->createIOAdapter());
	
	if (!io->open(url, IOAdapterMode_Read)) {
		ti.error = IOAdapter::tr("error_opening_url_for_read '%1'").arg(url);
		return NULL;
	}
	QByteArray text;
    int size = io->left();
    if (size > 0) {
        text.reserve(size);
    }
	QByteArray block(BUFF_SIZE, '\0');
	int blockLen = 0;
	while ((blockLen = io->readBlock(block.data(), BUFF_SIZE)) > 0) {
        //todo: text.append can fail on realloc for large sizes
        text.append(QByteArray(block.data(), blockLen));
        ti.progress = io->getProgress();
	}
	io->close();

    PhyTree tree = parseTree(text, ti);
    if (ti.hasErrors()) {
        return NULL;
    }
    QList<GObject*> objects;
    objects.append(new PhyTreeObject(tree, "Tree"));
	Document* d = new Document(this, iof, url, objects, fs);
	return d;
}

void NewickFormat::storeDocument(Document* d, TaskStateInfo& ti, IOAdapterFactory* iof, const QString& newDocURL) {
	assert(d->getDocumentFormat() == this);
	assert(d->getObjects().size() ==1);
	
    if (iof == NULL) {
        iof = d->getIOAdapterFactory();
    }
	assert(iof);
	
	std::auto_ptr<IOAdapter> io(iof->createIOAdapter());
	
	GObject* obj = d->getObjects().first();
	assert(obj!=NULL);
	QByteArray text;
    assert(0);
	
    QString url = newDocURL.isEmpty() ? d->getURL() : newDocURL;
	if (!io->open(url, IOAdapterMode_Write)) {
		ti.error = IOAdapter::tr("error_opening_url_for_write '%1'").arg(url);
		return;
	}
	int nWritten = 0;
	int nTotal = text.size();
	while(nWritten < nTotal) {
		int l = io->writeBlock(text.data() + nWritten, nTotal - nWritten);
		assert(l > 0);
		nWritten+= l;
	}
}
	

bool NewickFormat::isDataFormatSupported(const char* data, int size) const {
	bool containsBinary = TextUtils::contains(TextUtils::BINARY, data, size);
    if (containsBinary) {
        return false;
    }
    QByteArray str(data, size);
    int newLen = TextUtils::remove(data, size, str.data(), TextUtils::WHITES);
    str.resize(newLen);
    int bracket = str.indexOf('(');
    bool ok = false;
    if (bracket!=-1) {
        if (bracket == 0) {
            ok = true;
        } else {
            char c = str[bracket-1];
            if (c == ',') {
                ok = true;
            }
        }
    }
    if (!ok && bracket < 1) {
        //check if document contains only one node
        ok = data[size-1] == ';' && TextUtils::fits(TextUtils::ALPHAS, data, size-1);
    }
    //todo: add more heuristics here to make auto detection more strict
    return ok;
}

bool NewickFormat::isObjectOpSupported(const Document* d , DocumentFormat::DocObjectOp op, GObjectType t) const{
	return (t == GObjectTypes::PHYLOGENETIC_TREE && (op == DocObjectOp_Remove || d->getObjects().isEmpty()));
}

bool NewickFormat::checkConstraints(const DocumentFormatConstraints& c) const {
	foreach (GObjectType t, c.supportedObjectTypes) {
		if (t!=GObjectTypes::PHYLOGENETIC_TREE) {
			return false;
		}
	}
	if (c.checkRawData) {
		return isDataFormatSupported(c.rawData.constData(), c.rawData.size());
	}
	return true;
}


/* TODO:
 Unquoted labels may not contain blanks, parentheses, square brackets, single_quotes, colons, semicolons, or commas.
 Single quote characters in a quoted label are represented by two single quotes.
 Blanks or tabs may appear anywhere except within unquoted labels or branch_lengths.
 Newlines may appear anywhere except within labels or branch_lengths.
 Comments are enclosed in square brackets and may appear anywhere newlines are permitted. 
*/
static PhyTree parseTree(const QByteArray& text, TaskStateInfo& si) {
    PhyTree res(new PhyTreeData());
    PhyNode* rd = new PhyNode();
    res->rootNode = rd;
    
    QBitArray ops(256);
    ops['('] = ops[')'] = ops[':']  = ops[','] = ops[';'] = true;
    enum ReadState {RS_NAME, RS_WEIGHT};
//    int tokenStart = -1;
//    Q_UNUSED(tokenStart);
    ReadState state = RS_NAME;
    const char* str = text.constData();
    QString lastStr;

    QStack<PhyNode*> nodeStack;
    QStack<PhyBranch*>  branchStack;
    nodeStack.push(rd);
    for (int i=0, n = text.size(); i < n ; i++) {
        unsigned char c = str[i];
        if (TextUtils::WHITES[(uchar)c]) {
            continue;
        }
        if (!ops[(uchar)c]) { //not ops -> cache
            lastStr.append(c);
            continue;
        }
        // use cached value
        if (state == RS_NAME) {
            nodeStack.top()->name = lastStr.replace('_', ' ');
        } else {
            assert(state == RS_WEIGHT);
            if (!branchStack.isEmpty()) { //ignore root node weight if present
                if (nodeStack.size() < 2) {
                    si.error = NewickFormat::tr("weight_unexpected_%1").arg(lastStr);
                }
                bool ok = false;
                branchStack.top()->distance = lastStr.toDouble(&ok);
                if (!ok) {
                    si.error = NewickFormat::tr("weight_parse_error_%1").arg(lastStr);
                    break;
                }           
            }
        }
        
        // advance in state
        if (c == '(') { //new child
            assert(!nodeStack.isEmpty());
            PhyNode* pn = new PhyNode();
            nodeStack.push(pn);
            PhyBranch* bd = PhyNode::addBranch(pn, nodeStack.top(), 0);
            branchStack.push(bd);
            state = RS_NAME;
        } else if (c == ':') { //weight start
            if (state == RS_WEIGHT) {
                si.error = NewickFormat::tr("unexpected_weight_start_%1").arg(lastStr);
                break;
            }
            state = RS_WEIGHT;
        } else if ( c == ',') { //new sibling
            nodeStack.pop();
            assert(!nodeStack.isEmpty());
            assert(!branchStack.isEmpty());
            branchStack.pop();
            PhyNode* pn = new PhyNode();
            nodeStack.push(pn);
            PhyBranch* bd = PhyNode::addBranch(pn, nodeStack.top(), 0);
            branchStack.push(bd);
            state = RS_NAME;
        } else if ( c == ')' ) { //end of the branch, go up
            nodeStack.pop();
            if (nodeStack.isEmpty()) {
                si.error = NewickFormat::tr("unexpected_closing_bracket_%1").arg(lastStr);
                break;
            }
            assert(!branchStack.isEmpty());
            branchStack.pop();
            state = RS_NAME;
        } else if (c == ';') {
            break;
        } 
        lastStr.clear();
    }
    if (!branchStack.isEmpty() || nodeStack.size()!=1) {
        si.error = NewickFormat::tr("unexpected_eof");
    }
    return res;
}

}//namespace
