/***************************************************************************
 *   copyright           : (C) 2002 by Hendrik Sattler                     *
 *   mail                : post@hendrik-sattler.de                         *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#include "charsets.h"
#include "helpers.h"
#include "common.h"

#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <iconv.h>

size_t ucs4len (ucs4char_t* input) {
  size_t retval = 0;
  if (input != 0) {
    while (*input != 0) {
      ++input;
      ++retval;
    }
  }
  return retval;
}

#include "../config.h"
#ifdef WORDS_BIGENDIAN
#  define INTERNAL_CHARSET "UCS-4BE"
#else
#  define INTERNAL_CHARSET "UCS-4LE"
#endif

ucs4char_t* convert_to_internal (char* from_code,
				 char* input,
				 size_t insize)
{
  iconv_t cd;

  ucs4char_t* outbuf;
  char* outptr;
  size_t outsize;

  cd = iconv_open(INTERNAL_CHARSET,from_code);
  if (cd == (iconv_t)-1) {
    errexit ("Error on text conversion: %s\n", strerror(errno));
  }

  outsize = insize*sizeof(ucs4char_t);
  outbuf  = mem_alloc(outsize+sizeof(ucs4char_t),1);
  outptr  = (char*)outbuf;

  if (iconv(cd,(ICONV_CAST)&input,&insize,&outptr,&outsize) == (size_t)-1) {
    errexit ("Error on text conversion to internal charset: %s\n", strerror(errno));
  }
  iconv_close(cd);

  return mem_realloc(outbuf,(ucs4len(outbuf)+1)*sizeof(ucs4char_t));
}

size_t replace_char_escape (char **inbuf, size_t *inbytesleft,
			    char **outbuf, size_t *outbytesleft)
{
  if (inbuf == NULL || *inbuf == NULL ||
      outbuf == NULL || *outbuf == NULL ||
      *inbytesleft <= 0) {
    return 0;
  }
  if (*outbytesleft < 5) {
    return (size_t)-1;
  }
  sprintf(*outbuf,"\\%04X",(uint16_t)(*((ucs4char_t*)*inbuf) & UINT16_MAX));
  *inbuf += sizeof(ucs4char_t);
  *inbytesleft -= sizeof(ucs4char_t);
  *outbuf += 5;
  *outbytesleft -= 5;
  return 0;
}

size_t replace_char_questionmark (char **inbuf, size_t *inbytesleft,
				  char **outbuf, size_t *outbytesleft)
{
  if (inbuf == NULL || *inbuf == NULL ||
      outbuf == NULL || *outbuf == NULL ||
      *inbytesleft <= 0) {
    return 0;
  }
  if (*outbytesleft < 5) {
    return (size_t)-1;
  }
  sprintf(*outbuf,"?");
  *inbuf += sizeof(ucs4char_t);
  *inbytesleft -= sizeof(ucs4char_t);
  *outbuf += 1;
  *outbytesleft -= 1;
  return 0;
}

size_t replace_char(enum repmode replacement_mode,
		    char **inbuf, size_t *inbytesleft,
		    char **outbuf, size_t *outbytesleft)
{
  size_t retval = 0;
  switch(replacement_mode) {
  case REPMODE_ABORT:
    errexit("Unicode character 0x%lx cannot be converted.\n",*((ucs4char_t*)*inbuf));
    break;
  case REPMODE_ESCAPE_CHARS:
    retval = replace_char_escape(inbuf,inbytesleft,outbuf,outbytesleft);
    break;
  case REPMODE_QUESTIONMARK:
    retval = replace_char_questionmark(inbuf,inbytesleft,outbuf,outbytesleft);
    break;
  }
  return retval;
}

char* convert_from_internal (char* to_code,
			     ucs4char_t* input,
			     enum repmode replacement_mode)
{
  iconv_t cd;
  size_t status;
  int estatus;

  size_t insize;
  size_t insize_conv;
  char* inptr;
  char* inptr_conv;

  char* retval;
  char* outptr;
  char* outptr_conv;
  size_t outsize;
  size_t outsize_conv;
  
  cd = iconv_open(to_code,INTERNAL_CHARSET);
  if (cd == (iconv_t)-1) {
    errexit ("Error on text conversion: %s\n", strerror(errno));
  }

  insize = sizeof(ucs4char_t);
  inptr = (char*)input;
  inptr_conv = inptr;

  //this should be enough even for 6 Bytes of UTF-8  
  outsize = ucs4len(input)*6;
  outsize_conv = outsize;
  retval = mem_alloc(outsize+6,1); //not to be modified later
  outptr = retval;
  outptr_conv = retval;

  while (ucs4len((ucs4char_t*)inptr) > 0) {
    insize_conv = insize;
    status = iconv(cd,
		   (ICONV_CAST)&inptr_conv,&insize_conv,
		   &outptr_conv,&outsize_conv);
    estatus = errno;
    /* the character conversion may have failed
     * because the target charset has no such char
     */
    if (status > (size_t)0) {
      insize_conv = insize;
      //set the vars back to before conversion try
      inptr_conv = inptr;
      outptr_conv = outptr;
      outsize_conv = outsize;
      status = replace_char(replacement_mode,
			    &inptr_conv,&insize_conv,
			    &outptr_conv,&outsize_conv);
      if (status == (size_t)-1) {
	//there is only one implemented
	estatus = E2BIG;
      }
    }
    /* the character conversion/replacement may be buggy
     */
    if (status == (size_t)-1) {
      switch (estatus) {
      case E2BIG: //we have to resize outbuf, should never happen
	errexit("Error: insufficient memory on unicode decoding. Please report as bug.\n");
	break;
      case EINVAL:
      case EILSEQ:
	errexit("Error with internal charset: %s\n",strerror(estatus));
	break;
      default:
	errexit("Error: %s\n",strerror(estatus));
	break;
      }
    }
    //we update the loop-external values, too
    inptr = inptr_conv;
    outptr = outptr_conv;
    outsize = outsize_conv;
  }
  iconv_close(cd);

  return retval;
}

#ifdef HAVE_LANGINFO_H
#  include <langinfo.h>
#endif

char* get_system_charset () {
#ifdef NO_NL_LANGINFO
  return "ANSI_X3.4-1968";
#else
#  ifdef HAVE_LIBICONV
  /* The default implementation uses nl_langinfo(CODESET)
   * If this gives you problems with your mixture
   * of libc and libiconv, fix it here.
   */
#  endif
  return nl_langinfo(CODESET);
#endif
}

#include <ctype.h>

ucs4char_t* convert_from_system (char* input) {
  ucs4char_t* retval;
  size_t i = 0;
  size_t k = 0;
  int counter;
  char buffer[5]; //buffer for \XXXX
  size_t offset = 0;

  retval = convert_to_internal(get_system_charset(),input,strlen(input));

  /* Now we have to handle all direct \XXXX character inputs
   */
  while (retval[i] != 0) {
    switch (retval[i]) {
    case 0x5c: // '\'
      switch (retval[i+1]) {
      case 0x6e: // 'n'
	retval[k]=0x0a;
	offset = 1;
	break;
      case 0x5c: // '\'
	retval[k]=0x5c;
	offset = 1;
	break;		    
      default:
	for (counter=0;counter<4;counter++) {
	  if (retval[i+1+counter] > 0x00 &&
	      retval[i+1+counter] <= 0x7f) {
	    if (isxdigit((int)(retval[i+1+counter]&0x7f))) {
	      /* We can do this because Unicode is based on ASCII
	       * and we just tested for ASCII
	       */
	      buffer[counter] = (char)(retval[i+1+counter]&0x7f);
	    } else {
	      errexit ("Error on text conversion to internal charset: character %d (%c) is not a hexdigit.\n",
		       i+1+counter,retval[i+1+counter]);
	    }
	  } else {
	    errexit ("Error on text conversion to internal charset: character %d is not ACSII.\n",
		     i+1+counter);
	  }
	}
	buffer[4] = 0;
	retval[k] = (ucs4char_t)hexstr2int(buffer,sizeof(ucs4char_t));
	offset = 4;
	break;
      }
      i += offset;
      memmove(&retval[i+1-offset],&retval[i+1],(ucs4len(retval+i+1)+1)*sizeof(ucs4char_t));
      //no break
    default:
      ++k;
      ++i;
      break;
    }
  }
  retval[k] = 0;

  return mem_realloc(retval,(ucs4len(retval)+1)*sizeof(ucs4char_t));
}

char* convert_to_system (ucs4char_t* input,
			 enum repmode replacement_mode)
{
  return convert_from_internal(get_system_charset(),
			       input,replacement_mode);
}
