/*****************************************************************
* Unipro UGENE - Integrated Bioinformatics Suite
* Copyright (C) 2008,2009 Unipro, Russia (http://ugene.unipro.ru)
* All Rights Reserved
* 
*     This source code is distributed under the terms of the
*     GNU General Public License. See the files COPYING and LICENSE
*     for details.
*****************************************************************/

#include "SCFFormat.h"
#include "IOLibUtils.h"
#include "DocumentFormatUtils.h"

#include <core_api/Task.h>
#include <core_api/IOAdapter.h>
#include <core_api/DNAAlphabet.h>
#include <core_api/GObjectReference.h>

#include <gobjects/DNASequenceObject.h>
#include <gobjects/GObjectTypes.h>
#include <gobjects/GObjectRelationRoles.h>
#include <gobjects/DNAChromatogramObject.h>

#include <util_gui/GUIUtils.h>
#include <util_text/TextUtils.h>

#include <memory>

/* TRANSLATOR GB2::SCFFormat */    

namespace GB2 {

SCFFormat::SCFFormat(QObject* p) : DocumentFormat(p)
{
	formatName = tr("SCF");
}

QStringList SCFFormat::getSupportedDocumentFileExtensions() {
	QStringList l;
	l<<"scf";
	return l;
}

bool SCFFormat::isDataFormatSupported(const char* data, int size) const {
	if (size <= 4 || data[0]!='.' || data[1]!='s' || data[2]!='c' || data[3]!='f') {
		return false;
	}
	return TextUtils::contains(TextUtils::BINARY, data, size);
}

bool SCFFormat::isObjectOpSupported(const Document* d , DocumentFormat::DocObjectOp op, GObjectType t) const 
{
    Q_UNUSED(d); Q_UNUSED(op);
    return (t == GObjectTypes::DNA_SEQUENCE || t == GObjectTypes::DNA_CHROMATOGRAM); 
}


bool SCFFormat::checkConstraints(const DocumentFormatConstraints& c) const {
	foreach (GObjectType t, c.supportedObjectTypes) {
		if (t!=GObjectTypes::DNA_SEQUENCE) {
			return false;
		}
	}
	if (c.checkRawData) {
		return isDataFormatSupported(c.rawData.constData(), c.rawData.size());
	}

    if (c.mustSupportWrite) {
        return false;
    }

    if( c.supportsStreamingRead ) {
        return false;
    }
    
	return true;
}

Document* SCFFormat::loadExistingDocument(IOAdapterFactory* iof, const QString& url, TaskStateInfo& ti, const QVariantMap& fs) {
    // read in the file
    std::auto_ptr<IOAdapter> io(iof->createIOAdapter());
    bool ok = io->open(url, IOAdapterMode_Read);
    if (!ok) {
        ti.setError(Translations::errorOpeningFileRead(url));
        return NULL;
    }
    QByteArray readBuff;
    QByteArray block(BUFF_SIZE, 0);
    quint64 len = 0;
    while ((len=io->readBlock(block.data(),BUFF_SIZE)) > 0) {
        readBuff.append(QByteArray(block.data(), len));
        if (readBuff.size()>CHECK_MB) {
            ti.setError(Translations::errorFileTooLarge(url)); 
            break;
        }
    }
    io->close();
    if (ti.hasErrors()) {
        return NULL;
    }

    SeekableBuf sf;
    sf.head = readBuff.constData();
    sf.pos = 0;
    sf.size = readBuff.size();
    Document* doc = parseSCF(&sf, iof, url, fs);
    if (doc == NULL && !ti.hasErrors()) {
        ti.setError(tr("Not a valid SCF file: %1").arg(url));
    }
    return doc;
}

/*
* Copyright (c) Medical Research Council 1994. All rights reserved.
*
* Permission to use, copy, modify and distribute this software and its
* documentation for any purpose is hereby granted without fee, provided that
* this copyright and notice appears in all copies.
*
* This file was written by James Bonfield, Simon Dear, Rodger Staden,
* as part of the Staden Package at the MRC Laboratory of Molecular
* Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
*
* MRC disclaims all warranties with regard to this software.
*/

/* 
Title:       read_scf.c

Purpose:	 read IO of Standard Chromatogram Format sequences
Last update:   August 18 1994

Change log:
4 Feb 1992,  Now draft proposal version 2
20 Feb 1992, Grab info from comment lines
19 Aug 1992, If SCF file has clip information, don't clip automatically
10 Nov 1992  SCF comments now stored in seq data structure
18 Aug 1994  Renamed from  ReadIOSCF.c; now purely SCF IO (no Seq structs)

*/

/* The SCF magic number */
#define SCF_MAGIC ((int) ((((('.'<<8)+'s')<<8)+'c')<<8)+'f')

/* prior to this was a different format */
#define SCF_VERSION_OLDEST 2.00
#define SCF_VERSION_OLD 2.02

/* The current SCF format level */
#define SCF_VERSION 3.00

/*
 *-----------------------------------------------------------------------------
 * Structures and typedefs
 *-----------------------------------------------------------------------------
 */

/*
 * Type definition for the Header structure
 */
typedef struct {
    uint magic_number;       /* SCF_MAGIC */
    uint samples;            /* Number of elements in Samples matrix */
    uint samples_offset;     /* Byte offset from start of file */
    uint bases;              /* Number of bases in Bases matrix */
    uint bases_left_clip;    /* OBSOLETE: No. bases in left clip (vector) */
    uint bases_right_clip;   /* OBSOLETE: No. bases in right clip (qual) */
    uint bases_offset;       /* Byte offset from start of file */
    uint comments_size;      /* Number of bytes in Comment section */
    uint comments_offset;    /* Byte offset from start of file */
    char   version[4];	       /* "version.revision" */
    uint sample_size;	       /* precision of samples (in bytes) */
    uint code_set;	       /* uncertainty codes used */
    uint private_size;       /* size of private data, 0 if none */
    uint private_offset;     /* Byte offset from start of file */
    uint spare[18];          /* Unused */
} Header;

/*
 * Header.sample_size == 1.
 */
typedef struct {
    uchar sample_A;			/* Sample for A trace */
    uchar sample_C;			/* Sample for C trace */
    uchar sample_G;			/* Sample for G trace */
    uchar sample_T;			/* Sample for T trace */
} Samples1;

/*
 * Header.sample_size == 2.
 */
typedef struct {
    ushort sample_A;			/* Sample for A trace */
    ushort sample_C;			/* Sample for C trace */
    ushort sample_G;			/* Sample for G trace */
    ushort sample_T;			/* Sample for T trace */
} Samples2;

/*
 * Type definition for the sequence data
 */
typedef struct {
    uint peak_index;        /* Index into Samples matrix for base position */
    uchar prob_A;            /* Probability of it being an A */
    uchar prob_C;            /* Probability of it being an C */
    uchar prob_G;            /* Probability of it being an G */
    uchar prob_T;            /* Probability of it being an T */
    char base;		      /* Base called */
    uchar spare[3];          /* Spare */
} Bases;


/*
 * Type definition for the comments
 */
typedef char Comments;      /* Zero terminated list of \n separated entries */

/*
* Reading SCF routines
* -----------------
* Return:
*    0 - success
*   -1 - failure
*/
int read_scf_header(SeekableBuf *fp, Header *h)
{
    int i;

    if (be_read_int_4(fp,&h->magic_number)==0)        return -1;

    if (h->magic_number != SCF_MAGIC)
        return -1;

    if (be_read_int_4(fp,&h->samples)==0)             return -1;
    if (be_read_int_4(fp,&h->samples_offset)==0)      return -1;
    if (be_read_int_4(fp,&h->bases)==0)               return -1;
    if (be_read_int_4(fp,&h->bases_left_clip)==0)     return -1;
    if (be_read_int_4(fp,&h->bases_right_clip)==0)    return -1;
    if (be_read_int_4(fp,&h->bases_offset)==0)        return -1;
    if (be_read_int_4(fp,&h->comments_size)==0)       return -1;
    if (be_read_int_4(fp,&h->comments_offset)==0)     return -1;
    if (!fp->read(&h->version[0],sizeof(h->version))) return -1;
    if (be_read_int_4(fp,&h->sample_size)==0)         return -1;
    if (be_read_int_4(fp,&h->code_set)==0)            return -1;
    if (be_read_int_4(fp,&h->private_size)==0)        return -1;
    if (be_read_int_4(fp,&h->private_offset)==0)      return -1;
    for (i=0;i<18;i++)
        if (be_read_int_4(fp,&h->spare[i])==0)        return -1;

    return 0;
}


int read_scf_sample1(SeekableBuf *fp, Samples1 *s)
{
    uchar buf[4];

    if (!fp->read((char*)buf, 4)) return -1;
    s->sample_A = buf[0];
    s->sample_C = buf[1];
    s->sample_G = buf[2];
    s->sample_T = buf[3];

    return 0;
}


int read_scf_sample2(SeekableBuf *fp, Samples2 *s)
{
    if (be_read_int_2(fp,&s->sample_A)==0) return -1;
    if (be_read_int_2(fp,&s->sample_C)==0) return -1;
    if (be_read_int_2(fp,&s->sample_G)==0) return -1;
    if (be_read_int_2(fp,&s->sample_T)==0) return -1;

    return 0;
}

int read_scf_samples1(SeekableBuf *fp, Samples1 *s, size_t num_samples) 
{
    for (size_t i = 0; i < num_samples; i++) {
        if (-1 == read_scf_sample1(fp, &(s[i])))
            return -1;
    }

    return 0;
}


int read_scf_samples2(SeekableBuf *fp, Samples2 *s, size_t num_samples) 
{
    for (size_t i = 0; i < num_samples; i++) {
        if (-1 == read_scf_sample2(fp, &(s[i])))
            return -1;
    }

    return 0;
}

void scf_delta_samples1 (char samples[], int num_samples) 
{
    /* do the reverse to:
    change a series of sample points to a series of delta delta values:
    ie change them first: delta = current_value - previous_value
    then delta_delta = delta - previous_delta
    */
#ifdef CLEAR_BUT_SLOW
        int1 p_sample;

        p_sample = 0;
        for (i=0;i<num_samples;i++) {
            samples[i] = samples[i] + p_sample;
            p_sample = samples[i];
        }
        p_sample = 0;
        for (i=0;i<num_samples;i++) {
            samples[i] = samples[i] + p_sample;
            p_sample = samples[i];
        }
#else
        char p_sample1, p_sample2;

        p_sample1 = p_sample2 = 0;
        for (int i = 0; i < num_samples; i++) {
            p_sample1  = p_sample1 + samples[i];
            samples[i] = p_sample1 + p_sample2;
            p_sample2  = samples[i];
        }
#endif
}

void scf_delta_samples2 (ushort samples[], int num_samples) 
{
#ifdef CLEAR_BUT_SLOW
        register uint_2 p_sample;

        p_sample = 0;
        for (i=0;i<num_samples;i++) {
            samples[i] = samples[i] + p_sample;
            p_sample = samples[i];
        }
        p_sample = 0;
        for (i=0;i<num_samples;i++) {
            samples[i] = samples[i] + p_sample;
            p_sample = samples[i];
        }
#else
        ushort p_sample1, p_sample2;

        p_sample1 = p_sample2 = 0;
        for (int i = 0; i < num_samples; i++) {
            p_sample1  = p_sample1 + samples[i];
            samples[i] = p_sample1 + p_sample2;
            p_sample2  = samples[i];
        }
#endif
}


int read_scf_samples32(SeekableBuf *fp, Samples2 *s, size_t num_samples) 
{
    size_t i;
    QVarLengthArray<ushort> arr(num_samples);
    ushort* samples_out = arr.data();

    /* version to read delta delta data in 2 bytes */

    for (i = 0; i < num_samples; i++) {
        if (be_read_int_2(fp, samples_out + i) == 0) return -1;
    }
    scf_delta_samples2 ( samples_out, num_samples);
    for (i = 0; i < num_samples; i++) {
        (&s[i])->sample_A = samples_out[i];
    }

    for (i = 0; i < num_samples; i++) {
        if (be_read_int_2(fp, samples_out + i) == 0) return -1;
    }
    scf_delta_samples2 ( samples_out, num_samples);
    for (i = 0; i < num_samples; i++) {
        (&s[i])->sample_C = samples_out[i];
    }

    for (i = 0; i < num_samples; i++) {
        if (be_read_int_2(fp, samples_out + i) == 0) return -1;
    }
    scf_delta_samples2 ( samples_out, num_samples);
    for (i = 0; i < num_samples; i++) {
        (&s[i])->sample_G = samples_out[i];
    }

    for (i = 0; i < num_samples; i++) {
        if (be_read_int_2(fp, samples_out + i) == 0) return -1;
    }
    scf_delta_samples2 ( samples_out, num_samples);
    for (i = 0; i < num_samples; i++) {
        (&s[i])->sample_T = samples_out[i];
    }
    return 0;
}

int read_scf_samples31(SeekableBuf *fp, Samples1 *s, size_t num_samples) 
{
    size_t i;
    QVarLengthArray<char> arr(num_samples);
    char* samples_out = arr.data();

    /* version to read delta delta data in 1 byte */

    if (fp->read(samples_out, num_samples)) return -1;
    scf_delta_samples1 ( samples_out, num_samples);
    for (i = 0; i < num_samples; i++) {
        (&s[i])->sample_A = samples_out[i];
    }

    if (fp->read(samples_out, num_samples)) return -1;
    scf_delta_samples1 ( samples_out, num_samples);
    for (i = 0; i < num_samples; i++) {
        (&s[i])->sample_C = samples_out[i];
    }

    if (fp->read(samples_out, num_samples)) return -1;
    scf_delta_samples1 ( samples_out, num_samples);
    for (i = 0; i < num_samples; i++) {
        (&s[i])->sample_G = samples_out[i];
    }

    if (fp->read(samples_out, num_samples)) return -1;
    scf_delta_samples1 ( samples_out, num_samples);
    for (i = 0; i < num_samples; i++) {
        (&s[i])->sample_T = samples_out[i];
    }

    return 0;
}

int read_scf_base(SeekableBuf *fp, Bases *b)
{
    uchar buf[12];

    if (!fp->read((char*)buf, 12)) return -1;
    b->peak_index = be_int4(buf);
    b->prob_A = buf[4];
    b->prob_C = buf[5];
    b->prob_G = buf[6];
    b->prob_T = buf[7];
    b->base   = buf[8];
    b->spare[0] = buf[9];
    b->spare[1] = buf[10];
    b->spare[2] = buf[11];

    return 0;
}

int read_scf_bases(SeekableBuf *fp, Bases *b, size_t num_bases) {
    size_t i;

    for (i = 0; i < num_bases; i++) {
        if (-1 == read_scf_base(fp, &(b[i])))
            return -1;
    }

    return 0;
}

int read_scf_bases3(SeekableBuf *fp, Bases *b, size_t num_bases)
{
    size_t i;
    QVarLengthArray<uchar> arr1(num_bases * 8);
    uchar *buf1 = arr1.data();

    for (i = 0; i < num_bases; i++) {
        uint buf4;
        if (be_read_int_4(fp, &buf4) == 0) return -1;
        (&b[i])->peak_index = buf4;
    }

    if (!fp->read((char*)buf1, 8 * num_bases)) return -1;

    for (i=0; i < num_bases; i++) {
        (&b[i])->prob_A   = buf1[i];
        (&b[i])->prob_C   = buf1[i+num_bases];
        (&b[i])->prob_G   = buf1[i+2*num_bases];
        (&b[i])->prob_T   = buf1[i+3*num_bases];
        (&b[i])->base     = buf1[i+4*num_bases];
        (&b[i])->spare[0] = buf1[i+5*num_bases];
        (&b[i])->spare[1] = buf1[i+6*num_bases];
        (&b[i])->spare[2] = buf1[i+7*num_bases];
    }

    return 0;
}

Document* SCFFormat::parseSCF(SeekableBuf* fp, IOAdapterFactory* iof, const QString& url, const QVariantMap& fs) {    
    Header h;
    float scf_version;
    int sections = READ_ALL;

    /* Read header */
    if (read_scf_header(fp, &h) == -1) {
        return false;
    }

    /* fake things for older style SCF -- SD */
    if (h.sample_size != 1 && h.sample_size != 2) h.sample_size = 1;

    QString ver(h.version);
    ver.chop(4);
    scf_version = ver.toFloat();

    /* Allocate memory */
    DNAChromatogram cd;
    QByteArray sequence(h.bases, 0);

    if (sections & READ_SAMPLES) {
        /* Read samples */
        if (SeekBuf(fp, h.samples_offset, 0 /* SEEK_SET */) != 0) {
            return NULL;
        }

        cd.A.resize(h.samples);
        cd.C.resize(h.samples);
        cd.G.resize(h.samples);
        cd.T.resize(h.samples);
        cd.traceLength = h.samples;
        int err;

        if (h.sample_size == 1) {
            QVector<Samples1> samples(h.samples);
            if ( 2.9 > scf_version ) {
                err= read_scf_samples1(fp, samples.data(), h.samples);
            } else {
                err= read_scf_samples31(fp, samples.data(), h.samples);
            }
            if (-1 == err) {
                return NULL;
            }
            for (uint i = 0; i < h.samples; i++) {
                cd.A[i] = samples[i].sample_A;
                cd.C[i] = samples[i].sample_C;
                cd.G[i] = samples[i].sample_G;
                cd.T[i] = samples[i].sample_T;
            }
        }
        else {
            QVector<Samples2> samples(h.samples);
            if (2.9 > scf_version ) {
                err= read_scf_samples2(fp, samples.data(), h.samples);  
            } else {
                err= read_scf_samples32(fp, samples.data(), h.samples);
            }
            if (-1 == err) {
                return NULL;
            }
            for (uint i = 0; i < h.samples; i++) {
                cd.A[i] = samples[i].sample_A;
                cd.C[i] = samples[i].sample_C;
                cd.G[i] = samples[i].sample_G;
                cd.T[i] = samples[i].sample_T;
            }
        }
    }

    if (sections & READ_BASES) {

        /* Read bases */
        if (SeekBuf(fp, h.bases_offset, 0 /* SEEK_SET */) != 0) {
            return NULL;
        }

        QVector<Bases> bases(h.bases);

        if ( 2.9 > scf_version ) {
            if (-1 == read_scf_bases(fp, bases.data(), h.bases)) {
                return NULL;
            }
        }
        else {
            if (-1 == read_scf_bases3(fp, bases.data(), h.bases)) {
                return NULL;
            }
        }

        cd.seqLength = h.bases;
        cd.baseCalls.resize(h.bases);
        cd.prob_A.resize(h.bases);
        cd.prob_C.resize(h.bases);
        cd.prob_G.resize(h.bases);
        cd.prob_T.resize(h.bases);

        for (uint i = 0; i<h.bases; i++) {
            cd.prob_A[i] = bases[i].prob_A;
            cd.prob_C[i] = bases[i].prob_C;
            cd.prob_G[i] = bases[i].prob_G;
            cd.prob_T[i] = bases[i].prob_T;
            cd.baseCalls[i] = bases[i].peak_index;
            sequence[i] = bases[i].base;
        }
    }

    QString comments;
    if (sections & READ_COMMENTS) {
        /* Try reading comments */
        if (SeekBuf(fp,(h.comments_offset), 0) == 0) {
            QByteArray arr(h.comments_size, 0);
            if (fp->read(arr.data(), h.comments_size) ) {
                comments.append(arr);
            }
        }
    }

    cd.hasQV = true;

    DNASequence dna(sequence);
    QString sampleName;
    QStringList vals = comments.split("\n");
    // detect sample name per http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=format
    foreach(const QString& val, vals) {
        if (val.startsWith("NAME=")) {
            sampleName = val.mid(5);
            break;
        }
    }
    if (sampleName.isEmpty()) {
        QFileInfo fi(url);
        sampleName = fi.baseName();
    }
    if (!sampleName.isEmpty()) {
        dna.info.insert(DNAInfo::ID, sampleName);
    }
    dna.info.insert(DNAInfo::COMMENT, vals);

    QList<GObject*> objects;
    DNASequenceObject* seqObj = DocumentFormatUtils::addSequenceObject(objects, sampleName + " sequence", dna);
    DNAChromatogramObject* chromObj = new DNAChromatogramObject(cd, sampleName + " chromatogram");
    objects.append(chromObj);
    Document* doc = new Document(this, iof, url, objects, fs);
    chromObj->addObjectRelation(GObjectRelation(GObjectReference(seqObj), GObjectRelationRole::SEQUENCE));
    return doc;
}

}//namespace
