/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "GenbankPlainTextFormat.h"
#include "GenbankLocationParser.h"
#include "DocumentFormatUtils.h"

#include <gobjects/AnnotationTableObject.h>
#include <gobjects/DNASequenceObject.h>
#include <gobjects/GObjectUtils.h>
#include <gobjects/GObjectRelationRoles.h>
#include <core_api/IOAdapter.h>
#include <core_api/Task.h>
#include <core_api/DNAAlphabet.h>

#include <util_text/TextUtils.h>

#include <memory>

namespace GB2 {

/* TRANSLATOR GB2::GenbankPlainTextFormat */    
/* TRANSLATOR GB2::EMBLGenbankAbstractDocument */    
/* TRANSLATOR GB2::IOAdapter */    

GenbankPlainTextFormat::GenbankPlainTextFormat(QObject* p) 
: EMBLGenbankAbstractDocument(BaseDocumentFormats::PLAIN_GENBANK, tr("Genbank"), 79, p) 
{
	extensions << "gb" << "gbk" << "gen" << "genbank";
    sequenceStartPrefix = "ORIGIN";
    fPrefix = "  ";
}

bool GenbankPlainTextFormat::isDataFormatSupported(const char* data, int size) const {
	//todo: improve handling
	bool textOnly = !TextUtils::contains(TextUtils::BINARY, data, size);
	if (!textOnly || size < 100) {
		return false;
	}
	return TextUtils::equals("LOCUS ", data, 6);
}


EMBLGenbankAbstractIDLine* GenbankPlainTextFormat::readIdLine(const QString& line, TaskStateInfo& si) {
	QString locusStr = line.trimmed();
	int len = locusStr.length();
	if ( len < 40) {
		si.error = GenbankPlainTextFormat::tr("error_parsing_locus");
		return NULL;
	}
	EMBLGenbankAbstractIDLine* locus = new EMBLGenbankAbstractIDLine();
    QByteArray locusArr = locusStr.toAscii();
	const char* str = locusArr.constData();
	
	//28 is the last char in locus 'name' by standard, we support some deviations->longer names
	int nameEnd = locusStr.indexOf(" ", 28); 
	locus->name = locusStr.mid(12, nameEnd-12).trimmed();
	for (; nameEnd<len && str[nameEnd]==' '; nameEnd++){};
	int lenEnd = nameEnd;
	for (; lenEnd < len && str[lenEnd]!=' '; lenEnd++){};
	locus->seqLen = locusStr.mid(nameEnd, lenEnd-nameEnd).trimmed().toInt(); //sp+1>=30
    return locus;
}

EMBLGenbankAbstractHeader* GenbankPlainTextFormat::readHeader(IOAdapter* io, TaskStateInfo& si) {
	static int READ_BUFF_SIZE = 4096;
	
	EMBLGenbankAbstractHeader* hdr = new EMBLGenbankAbstractHeader();
	qint64 len;
    QString _name;
	QByteArray readBuffer(READ_BUFF_SIZE, '\0');
	char* cbuff = readBuffer.data();
	QString lastTagName;
	int refNum = 1;
	bool lineOk = true;
	while ( (len = io->readUntil(cbuff, READ_BUFF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &lineOk)) > 0 ) {
        if (si.cancelFlag) {
            break;
        }
		if (!lineOk) {
			si.error = GenbankPlainTextFormat::tr("line_is_too_long_or_unexpected_oef");
			break;
		}
		cbuff[len-1]='\0';
		QString line = QString(cbuff).trimmed();//todo: optimize and avoid qstring here
		if (line.isEmpty()) {
			continue;
		}
		if (hdr->idLine==NULL) {
			if (!line.startsWith("LOCUS")) {
				si.error = GenbankPlainTextFormat::tr("locus_not_first_line");
			} else {
				hdr->idLine = readIdLine(line, si);
			}
			assert(si.hasErrors() || hdr->idLine!= NULL);
			if (si.hasErrors()) {
				break;			
			}
			continue;
		}
			
		//read simple tag;		
		if (line.startsWith("FEATURES") || line.startsWith("ORIGIN") || line.startsWith("//") ) {
			io->skip(-len);
			break; // end of header
		}
        if (TextUtils::equals(cbuff, "BASE COUNT", 10)) { //this line is not parsed. 
            continue;
        }
		bool newTag = cbuff[0]!=' ' && cbuff[0]!='\t';
		if (newTag) {
			int i = line.indexOf(' ', 0);
			lastTagName = (i!=-1)? line.left(i) : line;
			if (lastTagName == "REFERENCE") {
				lastTagName = "REFERENCE"+QString::number(refNum);
				refNum++;
			}
			QString val;
            if (i != -1) {
                while(i < line.length() && line[i]==' ')  {
                    i++;
                }
                val = line.mid(i);
            }
			hdr->tags[lastTagName] =  val;
		} else {
			QString prevVal = hdr->tags[lastTagName];
			hdr->tags[lastTagName] =  prevVal + line;
		}
        si.progress = io->getProgress();
	}
	if (hdr->idLine== NULL) {
		delete hdr;
		return NULL;
	}
	return hdr;
}

QList<SharedAnnotationData> GenbankPlainTextFormat::readAnnotations(IOAdapter* io, TaskStateInfo& si, int offset) {
	static int READ_BUFF_SIZE = 8192;

	QList<SharedAnnotationData> list;
	//TODO: +1 here but no +1 in readHeader?
	QByteArray readBuffer(READ_BUFF_SIZE+1, '\0');
	char* cbuff = readBuffer.data();
	
	bool lineOk = true;
	int len = io->readUntil(cbuff, READ_BUFF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &lineOk);
	if (len <= 0) {
		return list;
	}
	if (!lineOk) {
		si.error = GenbankPlainTextFormat::tr("line_is_too_long");
		return list;
	}
	if (!TextUtils::equals(cbuff, "FEATURES", 8)) {
		io->skip(-len);
		return list;
	}
	while ((len = io->readUntil(cbuff, READ_BUFF_SIZE, TextUtils::LINE_BREAKS, IOAdapter::Term_Include, &lineOk)) > 0) {
        if (si.cancelFlag) {
            break;
        }
		if (!lineOk) {
            EMBLGenbankAbstractDocument::tr("line_is_too_long_or_unexpected_oef");
			break;
		}
		if (TextUtils::equals(cbuff, "LOCUS", 5) || TextUtils::equals(cbuff, "ORIGIN", 6) || TextUtils::equals(cbuff, "//", 2)) {
			// end of feature table
			io->skip(-len);
			break; 
		}
		if (TextUtils::equals(cbuff, "BASE COUNT", 10)) { //this line is not parsed
			continue;
		}
		if (len < 6 || !TextUtils::equals(cbuff, "     ", 5)) {//check line format: key starts on offset 6; (max len 15);
			io->skip(-len);
			si.error = EMBLGenbankAbstractDocument::tr("invalid_feature_format");
			break;
		}
		
        //parsing feature;
        SharedAnnotationData f = readAnnotation(io, cbuff, len, READ_BUFF_SIZE, si, offset);
        if (si.hasErrors()) {
            break;
        }
        list.push_back(f);
        si.progress = io->getProgress();
	}
	return list;
}



//////////////////////////////////////////////////////////////////////////
/// saving

void GenbankPlainTextFormat::storeDocument(Document* doc, TaskStateInfo& ti, IOAdapterFactory* iof, const QString& newDocURL) {
	assert(doc->getDocumentModLock(DocumentModLock_FORMAT_AS_INSTANCE) == NULL);
    if (iof == NULL) {
        iof = doc->getIOAdapterFactory();
    }
	std::auto_ptr<IOAdapter> io(iof->createIOAdapter());
	QString url = newDocURL.isEmpty() ? doc->getURL() : newDocURL;
	if (!io->open(url, IOAdapterMode_Write)) {
		ti.error = IOAdapter::tr("error_opening_url_for_write '%1'").arg(url);
		return;
	}
	save(io.get(), doc, ti);
	io->close();
}

static QString genLocusString(AnnotationTableObject* ao, DNASequenceObject* so);
static void writeAnnotations(IOAdapter* io, AnnotationTableObject* ao, TaskStateInfo& si);
static void writeSequence(IOAdapter* io, DNASequenceObject* ao, TaskStateInfo& si);
static void prepareMultiline(QString& lineToChange, int spacesOnLineStart, bool newLineAtTheEnd = true, int maxLineLen = 79);

void GenbankPlainTextFormat::save(IOAdapter* io, Document* doc, TaskStateInfo& si) {
    QList<GObject*> seqs = doc->findGObjectByType(GObjectTypes::DNA_SEQUENCE);
    QList<GObject*> anns = doc->findGObjectByType(GObjectTypes::ANNOTATION_TABLE);

    while (!seqs.isEmpty() || !anns.isEmpty()) {

        DNASequenceObject* so = seqs.isEmpty() ? NULL : static_cast<DNASequenceObject*>(seqs.takeFirst());
        AnnotationTableObject* ao = NULL;
        if (so) {
            if (!anns.isEmpty()) {
                QList<GObject*> relAnns = GObjectUtils::findObjectsRelatedToObjectByRole(so, GObjectTypes::ANNOTATION_TABLE, GObjectRelationRole::SEQUENCE, anns);
                if (relAnns.size() > 0) {
                    ao = qobject_cast<AnnotationTableObject*>(relAnns.first());
                    anns.removeOne(ao);
                }
            }
        } else {
            assert(!anns.isEmpty());
            ao = static_cast<AnnotationTableObject*>(anns.takeFirst());
        }

	    // write locus string
	    QString locusString = genLocusString(ao, so) + "\n";
	    qint64 len = io->writeBlock(locusString.toAscii());
	    if (len!=locusString.size()) {
		    si.error = GenbankPlainTextFormat::tr("error writing document");
		    return;
	    }
    	
	    //write tool mark
        QString unimark = UNIMARK;
        if (ao!=NULL) {
            unimark += TextUtils::getLineOfSpaces(12 - UNIMARK.length()) + ao->getGObjectName();
        }
        unimark+="\n";

	    len = io->writeBlock(unimark.toLocal8Bit());
	    if (len!=unimark.size()) {
		    si.error = GenbankPlainTextFormat::tr("error writing document");
		    return;
	    }

	    // write annotations
	    if (ao) {
		    writeAnnotations(io, ao, si);
		    if (si.hasErrors()) {
			    return;
		    }
	    }

	    if (so) {
		    //todo: store sequence alphabet!
		    writeSequence(io, so, si);
		    if (si.hasErrors()) {
			    return;
		    }
	    }

	    // write last line marker
	    QByteArray lastLine("//\n");
	    len = io->writeBlock(lastLine);
	    if (len!=lastLine.size()) {
		    si.error = GenbankPlainTextFormat::tr("error writing document");
		    return;
	    }
    }
}

static QString getDate(){
	const char* MonthsInEng[] = {" ", "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"};
	return QDate::currentDate().toString("d-") + 
		MonthsInEng[QDate::currentDate().month()] + 
		QDate::currentDate().toString("-yyyy");
}

static QString genLocusString(AnnotationTableObject* ao, DNASequenceObject* so) {
	QString name = ao != NULL ? ao->getGObjectName() : so!= NULL ? so->getGObjectName() : "unknown";
    if (name.length() > 22) {
        assert(0);
        name = name.left(22);
    }
	QString res = "LOCUS       "+name;

	int seqLen = so == NULL ? 0 : so->getSequence().size();
	
	QString len = QString::number(seqLen);
	int nspaces = 40 - len.length() - res.length();
	res+=TextUtils::getLineOfSpaces(nspaces);
	res+=len;
	res+=" bp ";

	QString date = getDate();
	nspaces = 79 - res.length() - date.length();
	res+=TextUtils::getLineOfSpaces(nspaces);
	res+=date;
	return res;
}

static void writeQualifier(const QString& name, const QString& val, IOAdapter* io, TaskStateInfo& si, const char* spaceLine) {
    int len = io->writeBlock(spaceLine, 21);
    if (len != 21) {
        si.error = GenbankPlainTextFormat::tr("error writing document");
        return;
    }
    QString qstr = "/"+name+ "=\""+val+"\"";
    prepareMultiline(qstr, 21); 
    len = io->writeBlock(qstr.toLocal8Bit());
    if (len != qstr.length()) {
        si.error = GenbankPlainTextFormat::tr("error writing document");
    }
}

static void writeAnnotations(IOAdapter* io, AnnotationTableObject* ao, TaskStateInfo& si) {
	QByteArray header("FEATURES             Location/Qualifiers\n");

	//write "FEATURES"
	qint64 len = io->writeBlock(header);
	if (len!=header.size()) {
		si.error = GenbankPlainTextFormat::tr("error writing document");
		return;
	}

	//write every feature
	const char* spaceLine = TextUtils::SPACE_LINE.data();
    QList<Annotation*> sortedAnnotations = ao->getAnnotations();
    qSort(sortedAnnotations.begin(), sortedAnnotations.end(), annotationLessThanByRegion);
	foreach(Annotation* a, sortedAnnotations) {
		//write name of the feature
		assert(a->getAnnotationName().length() <= 15);
		len = io->writeBlock(spaceLine, 5);
		if (len!=5) {
			si.error = GenbankPlainTextFormat::tr("error writing document");
			return;
		}
		const QString& name = a->getAnnotationName();
		len = io->writeBlock(name.toAscii());
		if (len!=name.length()) {
			si.error = GenbankPlainTextFormat::tr("error writing document");
			return;
		}
		int nspaces = 22 - name.length() - 6;
		assert(nspaces > 0);
		len = io->writeBlock(spaceLine, nspaces);
		if (len !=nspaces) {
			si.error = GenbankPlainTextFormat::tr("error writing document");
			return;
		}
		
		//write location
		QString multiLineLocation = Genbank::LocationParser::buildLocationString(a->data());
		prepareMultiline(multiLineLocation, 21);
		len = io->writeBlock(multiLineLocation.toAscii());
		if (len != multiLineLocation.size()) {
			si.error = GenbankPlainTextFormat::tr("error writing document");
			return;
		}

		//write qualifiers
		foreach (const Qualifier& q, a->getQualifiers()) {
            writeQualifier(q.getQualifierName(), q.getQualifierValue(), io, si, spaceLine);
            if (!si.error.isEmpty()) {
                return;
            }
		}
        //write strand info
        if (a->getAminoStrand() != TriState_Unknown) {
            const QString& val = a->getAminoStrand() == TriState_No ? 
                EMBLGenbankAbstractDocument::AMINO_STRAND_QVAL_NO
                : EMBLGenbankAbstractDocument::AMINO_STRAND_QVAL_YES;
            writeQualifier(EMBLGenbankAbstractDocument::AMINO_STRAND_QNAME, val, io, si, spaceLine);
        }
	}
}

static void writeSequence(IOAdapter* io, DNASequenceObject* ao, TaskStateInfo& si) {
    static const int charsInLine = 60;
    
    const QByteArray& seq = ao->getSequence();
	int slen = seq.length();
	const char* sequence = seq.constData();
	const char* spaces = TextUtils::SPACE_LINE.constData();
    QByteArray num;
    bool ok = true;
    int blen = io->writeBlock(QByteArray("ORIGIN\n"));
    if (blen != 7) {
        si.error = IOAdapter::tr("file_write_error");
        return;
    }
    for (int pos = 0; pos < slen; pos+=charsInLine) {
		num.setNum(pos+1);
        
        //right spaces
        blen = 10 - num.length()-1;
        int l = (int)io->writeBlock(QByteArray::fromRawData(spaces, blen));
        if (l!=blen) {
            ok = false;
            break;
        }

	    //current pos
        l = (int)io->writeBlock(num);
        if (l != num.length()) {
            ok = false;
            break;
        }

        //sequence
		int last = qMin(pos+charsInLine, slen);
		for (int j=pos; j < last; j+=10) {
			l = (int)io->writeBlock(QByteArray::fromRawData(" ", 1));
            if (l != 1) {
                ok = false;
                break;
            }
            int chunkLen = qMin(10, slen - j);
			l = io->writeBlock(QByteArray::fromRawData(sequence + j, chunkLen));
            if (l!=chunkLen) {
                ok = false;
                break;
            }
		}
        if (!ok) {
            break;
        }
        
        //line end
		l = (int)io->writeBlock(QByteArray("\n", 1));
        if (l != 1) {
            ok = false;
            break;
        }        
	}
    if (!ok) {
        si.error = IOAdapter::tr("file_write_error");
    }
}


// splits line into multiple lines adding 'spacesOnLineStart' to every line except first one
// and '\n' to the end of every new line
static void prepareMultiline(QString& line, int spacesOnLineStart, bool newLineAtTheEnd, int maxLineLen) {
	Q_ASSERT(spacesOnLineStart < maxLineLen);
	const int len = line.length() ;
	if (spacesOnLineStart + len > maxLineLen) {
		QByteArray spacesPrefix(spacesOnLineStart, ' ');
        QString newLine;
		int charsInLine = maxLineLen - spacesOnLineStart;
		int pos = 0;
		do {
			if (pos!=0) {
				newLine.append('\n');
				newLine.append(spacesPrefix);
			}
			int pos2 =  pos + charsInLine - 1;
			if (pos2 < len) { //not the last line
				while (pos2 > pos && !line[pos2].isSpace() && (line[pos2]!= ',')) {
					pos2--;
				}
				if (pos == pos2) { //we failed to find word end
					pos2 = pos + charsInLine - 1; 
				}
				newLine.append(line.mid(pos, pos2 + 1 - pos));
			} else { //last line
				newLine.append(line.mid(pos, len-pos));
			}
			pos=pos2+1;
		} while (pos<len);
		line = newLine;
	}
	if (newLineAtTheEnd) {
		line+="\n";
	}
}

}//namespace
