/*

    File: file_doc.c

    Copyright (C) 1998-2006 Christophe GRENIER <grenier@cgsecurity.org>
  
    This software is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
  
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
  
    You should have received a copy of the GNU General Public License along
    with this program; if not, write the Free Software Foundation, Inc., 51
    Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include <stdio.h>
#include "types.h"
#include "common.h"
#include "photorec.h"
#include "ole.h"

static void file_check_doc(t_file_recovery *file_recovery);
static const char* header_check_doc(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only,  t_file_recovery *file_recovery);
static const unsigned char * find_in_mem(const unsigned char *haystack, const unsigned int haystack_size, const unsigned char *needle, const unsigned int needle_size);
static uint64_t test_OLE(FILE *file);

const t_file_hint file_hint_doc= {
  .extension="doc",
  .description="Microsoft Office Document (doc/xls/ppt/vis/...)",
  .min_header_distance=0,
  .min_filesize=0,
  .max_filesize=PHOTOREC_MAX_FILE_SIZE,
  .recover=1,
  .header_check=&header_check_doc,
  .data_check=NULL,
  .file_check=&file_check_doc
};

static const unsigned char * find_in_mem(const unsigned char *haystack, const unsigned int haystack_size, const unsigned char *needle, const unsigned int needle_size)
{
  unsigned int i;
  for(i=0;i<haystack_size-needle_size;i++)
    if(memcmp(&haystack[i],needle,needle_size)==0)
      return &haystack[i];
  return NULL;
}

static void file_check_doc(t_file_recovery *file_recovery)
{
  uint64_t doc_file_size=test_OLE(file_recovery->handle);
  file_recovery->file_size=(doc_file_size>0?((doc_file_size<=(file_recovery->file_size))?doc_file_size:0):0);
#ifdef DEBUG_OLE
  ecrit_rapport("size found : %llu\n",(long long unsigned)doc_file_size);
  ecrit_rapport("==> size : %llu\n",(long long unsigned)file_recovery->file_size);
#endif
}

static const char* header_check_doc(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only,  t_file_recovery *file_recovery)
{
  const unsigned char doc_header[]= { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1};
  if(memcmp(buffer,doc_header,sizeof(doc_header))==0)
  {
    const struct OLE_HDR *header=(const struct OLE_HDR *)buffer;
    if(le16(header->reserved)!=0 || le32(header->reserved1)!=0 || le32(header->reserved2)!=0)
      return NULL;
    if(le16(header->uMiniSectorShift)!=6 || le16(header->uSectorShift)!=9)
      return NULL;
    if(le32(header->num_FAT_blocks)==0 || le32(header->num_FAT_blocks)>100)
      return NULL;
    if(le32(header->num_FAT_blocks)==0 ||
	le32(header->num_extra_FAT_blocks)>50 ||
	le32(header->num_FAT_blocks)>109+le32(header->num_extra_FAT_blocks)*((1<<le16(header->uSectorShift))-1))
      return NULL;
    /* TODO read the Root Directory */
    if(find_in_mem(buffer,buffer_size,"WordDocument",12)!=NULL)
    {
      return "doc";
    }
    else if(find_in_mem(buffer,buffer_size,"StarDraw",8)!=NULL)
    {
      return "sda";
    }
    else if(find_in_mem(buffer,buffer_size,"StarCalc",8)!=NULL)
    {
      return "sdc";
    }
    else if(find_in_mem(buffer,buffer_size,"StarImpress",11)!=NULL)
    {
      return "sdd";
    }
    else if(find_in_mem(buffer,buffer_size,"Worksheet",9)!=NULL ||
	find_in_mem(buffer,buffer_size,"Book",4)!=NULL || 
	find_in_mem(buffer,buffer_size,"Workbook",8)!=NULL || 
	find_in_mem(buffer,buffer_size,"W\0o\0r\0k\0b\0o\0o\0k\0",16)!=NULL || 
	find_in_mem(buffer,buffer_size,"Calc",4)!=NULL)
    {
      return "xls";
    }
    else if(find_in_mem(buffer,buffer_size,"Power",5)!=NULL)
    {
      return "ppt";
    }
    else if(find_in_mem(buffer,buffer_size,"AccessObjSiteData",17)!=NULL)
    {
      return "mdb";
    }
    else if(find_in_mem(buffer,buffer_size,"Visio",5)!=NULL)
    {
      return "vis";
    }
    else if(find_in_mem(buffer,buffer_size,"Sfx",3)!=NULL)
    {
      return "sdw";
    }
    else if(find_in_mem(buffer,buffer_size,"CPicPage",8)!=NULL)
    {	/* Flash */
      return "fla";
    }
    else  if(find_in_mem(buffer,buffer_size,"Microsoft Publisher",19)!=NULL)
    {
      return "pub";
    }
    return file_hint_doc.extension;
  }
  return NULL;
}

static uint64_t test_OLE(FILE *IN)
{
  unsigned char buffer_header[512];
  uint64_t totalsize;
  uint32_t *dif;
  uint32_t *fat;
  unsigned int freesect_count=0;  
  struct OLE_HDR *header=(struct OLE_HDR*)&buffer_header;
  if(!IN)
    return 0;
  fseek(IN,SEEK_SET,0);
  fread(&buffer_header,sizeof(buffer_header),1,IN);	/*reads first sector including OLE header */
  /*
  ecrit_rapport("num_FAT_blocks       %u\n",le32(header->num_FAT_blocks));
  ecrit_rapport("num_extra_FAT_blocks %u\n",le32(header->num_extra_FAT_blocks));
  */
  /* Sanity check */
  if(le32(header->num_FAT_blocks)==0 ||
      le32(header->num_extra_FAT_blocks)>50 ||
      le32(header->num_FAT_blocks)>109+le32(header->num_extra_FAT_blocks)*((1<<le16(header->uSectorShift))-1))
    return 0;
  dif=(uint32_t*)MALLOC(109*4+(le32(header->num_extra_FAT_blocks)<<le16(header->uSectorShift)));
  memcpy(dif,(header+1),109*4);
  if(le32(header->num_extra_FAT_blocks)>0)
  { /* Load DIF*/
    uint32_t *dif_pos=dif+109;
    unsigned long int i;
    unsigned long int block=le32(header->FAT_next_block);
    for(i=0;i<le32(header->num_extra_FAT_blocks) && block!=0xFFFFFFFF && block!=0xFFFFFFFE;i++)
    {
//      ecrit_rapport("pointeur:0x%x\n",block);
      if(fseek(IN,512+(block<<le16(header->uSectorShift)),SEEK_SET)<0)
      {
	free(dif);
	return 0;
      }
      fread(dif_pos, (i<le32(header->num_extra_FAT_blocks)?128:(le32(header->num_FAT_blocks)-109)%127),4,IN);
      dif_pos+=(((1<<le16(header->uSectorShift))/4)-1);
      block=le32(dif[109+i*(((1<<le16(header->uSectorShift))/4)-1)+127]);
    }
  }
  fat=(uint32_t*)MALLOC(le32(header->num_FAT_blocks)<<le16(header->uSectorShift));
  { /* Load FAT */
    unsigned long int j;
    for(j=0;j<le32(header->num_FAT_blocks);j++)
    {
      if(fseek(IN,512+(le32(dif[j])<<le16(header->uSectorShift)),SEEK_SET)<0)
      {
	free(dif);
	free(fat);
	return 0;
      }
      fread(fat+((j<<le16(header->uSectorShift))/4),(1<<le16(header->uSectorShift)),1,IN);
    }
  }
  { /* Search how many entries are not used at the end of the FAT */
    unsigned long int i;
    for(i=(le32(header->num_FAT_blocks)<<le16(header->uSectorShift))/4-1;
	i>((le32(header->num_FAT_blocks)-1)<<le16(header->uSectorShift))/4 && le32(fat[i])==0xFFFFFFFF; i--)
      freesect_count++;
  }
  totalsize=512+((le32(header->num_FAT_blocks)*128-freesect_count)<<le16(header->uSectorShift));
  free(dif);
  free(fat);
  return totalsize;
}
