/*
     This file is part of doodle.
     (C) 2004 Christian Grothoff (and other contributing authors)

     doodle is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published
     by the Free Software Foundation; either version 2, or (at your
     option) any later version.

     doodle is distributed in the hope that it will be useful, but
     WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     General Public License for more details.

     You should have received a copy of the GNU General Public License
     along with doodle; see the file COPYING.  If not, write to the
     Free Software Foundation, Inc., 59 Temple Place - Suite 330,
     Boston, MA 02111-1307, USA.
*/

/**
 * @file index.c
 * @brief index file using libextractor
 * @author Christian Grothoff
 *
 * This function effectively describes the bridge between libextractor
 * and libdoodle.
 */

#include "config.h"
#include "helper2.h"
#include "gettext.h"
#include "doodle.h"

/**
 * Find keywords in the given file and append
 * the string describing these keywords to the
 * long string (for the suffix tree).
 */
int buildIndex(EXTRACTOR_ExtractorList * elist,
	       const char * filename,
	       struct DOODLE_SuffixTree * tree,
	       int do_filenames) {
  EXTRACTOR_KeywordList * head;
  EXTRACTOR_KeywordList * pos;

  head = EXTRACTOR_getKeywords(elist,
			       filename);
  head = EXTRACTOR_removeDuplicateKeywords
    (head,
     EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN);
  pos  = head;
  while (pos != NULL) {
    char * cpos;
    size_t slen;

    if (EXTRACTOR_isBinaryType(pos->keywordType)) {
      pos = pos->next;
      continue;
    }
    cpos = pos->keyword;
    slen = strlen(cpos);
    if (slen > MAX_LENGTH) {
      char section[MAX_LENGTH+1];
      char * xpos;
      int j;

      section[MAX_LENGTH] = '\0';
      for (j=0;j<slen;j+=MAX_LENGTH/2) {
	strncpy(section,
		&cpos[j],
		MAX_LENGTH);
	xpos = &section[0];
	while (xpos[0] != '\0') {
	  if (0 != DOODLE_tree_expand(tree,
				      xpos,
				      filename)) {
	    EXTRACTOR_freeKeywords(head);
	    return 0;
	  }
	  xpos++;
	}
      }
    } else {
      while (cpos[0] != '\0') {
	if (0 != DOODLE_tree_expand(tree,
				    cpos,
				    filename)) {
	  EXTRACTOR_freeKeywords(head);
	  return 0;
	}
	cpos++;
      }
    }
    pos = pos->next;
  }
  EXTRACTOR_freeKeywords(head);
  if (do_filenames) {
    const char * cpos = filename;
    while (cpos[0] != '\0') {
      if (0 != DOODLE_tree_expand(tree,
				  cpos,
				  filename))
	return 0;
      cpos++;
    }
  }
  return 1;
}
