/* This file is part of Strigi Desktop Search
 *
 * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */
#define STRIGI_IMPORT_API //todo: could also define this in cmake...
#include <jstreamsconfig.h>
#include <strigi_plugins.h>

#include <Object.h>
#include <GlobalParams.h>
#include <PDFDoc.h>
#include <TextOutputDev.h>

using namespace jstreams;

class PdfEndAnalyzer : public jstreams::StreamEndAnalyzer {
private:
    static int objectCount;
    static void writeText(void *, char *text, int len);
public:
    PdfEndAnalyzer();
    ~PdfEndAnalyzer();
    bool checkHeader(const char* header, int32_t headersize) const;
    char analyze(std::string filename, jstreams::InputStream *in, int depth,
        jstreams::StreamIndexer *indexer, jstreams::Indexable*);
    static char staticAnalyze(std::string filename, jstreams::InputStream *in,
        int depth, jstreams::StreamIndexer *indexer, jstreams::Indexable*);
    const char* getName() const { return "PdfEndAnalyzer"; }
};

/*
class JStreamStream : public BaseStream {
private:
    Object o;
public:
    JStreamStream() : BaseStream(&o) {}
    StreamKind getKind();
    void reset();
    int getChar();
    int lookChar();
    int getPos();
    void setPos(Guint, int);
    Stream* makeSubStream(Guint, GBool, Guint, Object*);
    Guint getStart();
    void moveStart(int);
};

StreamKind
JStreamStream::getKind() {
}
void
JStreamStream::reset() {
}
int
JStreamStream::getChar() {
}
int
JStreamStream::lookChar() {
}
int
JStreamStream::getPos() {
}
void
JStreamStream::setPos(Guint, int) {
}
Stream*
JStreamStream::makeSubStream(Guint, GBool, Guint, Object*) {
}
Guint
JStreamStream::getStart() {
}
void
JStreamStream::moveStart(int) {
}*/

int PdfEndAnalyzer::objectCount = 0;

PdfEndAnalyzer::PdfEndAnalyzer() {
    if (objectCount++ == 0) {
        globalParams = new GlobalParams("");
    }
}
PdfEndAnalyzer::~PdfEndAnalyzer() {
    if (--objectCount == 0) {
        delete globalParams;
    }
}
void
PdfEndAnalyzer::writeText(void *id, char *text, int len) {
    // check if the text is really text
    // this method is rather coarse at the moment, it uses isalnum()
    for (int i=0; i<len; ++i) {
        if (!isalnum(text[i])) return;
    }
    Indexable* idx = static_cast<Indexable*>(id);
    std::string s(text, len);
    s += ' ';
    idx->addText(s.c_str(), s.length());
}
bool
PdfEndAnalyzer::checkHeader(const char* header, int32_t headersize) const {
    return (headersize >= 4 && strncmp(header, "%PDF", 4) == 0);
}
char
PdfEndAnalyzer::analyze(std::string filename, InputStream *in,
        int depth, StreamIndexer *indexer, jstreams::Indexable* idx) {
    int32_t size = 20000;
    const char* b;
    int32_t nread = in->read(b, size, size);
    while (nread == size) {
        in->reset(0);
        size *= 2;
        nread = in->read(b, size, size);
    }
    if (nread <= 0) {
        error = "Error reading stream.";
        return -1;
    }
    Object obj;
    obj.initNull();
    MemStream* ms = new MemStream((char*)b, 0, nread, &obj);
    PDFDoc doc(ms);
    if (doc.isOk()) {
        printf("pdf analyzed %s\n", filename.c_str());
        // get page range
        int firstPage, lastPage;
        firstPage = 1;
        lastPage = doc.getNumPages();
    
        // get document info
        Object info;
        doc.getDocInfo(&info);
        if (info.isDict()) {
            // figure out how to get metadata
        }
        info.free();
    
        int dummy;
        TextOutputDev* textOut = new TextOutputDev(writeText, idx,
            gTrue, gTrue);
        if (textOut->isOk()) {
            doc.displayPages(textOut, firstPage, lastPage, 72, 72, 0,
    		      gFalse, gTrue, gFalse);
        }
        delete textOut;
    } else {
        error = "No valid PDF file.";
        return -1;
    }

    return 0;
}

STRIGI_END_PLUGINS_START
STRIGI_END_PLUGINS_REGISTER(PdfEndAnalyzer)
STRIGI_END_PLUGINS_END
