/*
 * Main part of code, written by:
 *
 * Copyright (C) 1999-2001  Hvard Kvlen <havardk@xmms.org>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 * 02111-1307, USA.
 *
 */

#include <config.h>
#include <stdlib.h>
#include <glib.h>
#include <string.h>
#include <errno.h>
#include <glib/gi18n-lib.h>

#ifdef HAVE_LANGINFO_CODESET
#include <langinfo.h>
#endif

#include "charset.h"
#include "setting.h"



/****************
 * Declarations *
 ****************/

#define CHARSET_TRANS_ARRAY_LEN ( sizeof(charset_trans_array) / sizeof((charset_trans_array)[0]) )
const CharsetInfo charset_trans_array[] = { 
    {N_("Arabic (IBM-864)"),                  "IBM864"        },
    {N_("Arabic (ISO-8859-6)"),               "ISO-8859-6"    },
    {N_("Arabic (Windows-1256)"),             "windows-1256"  },
    {N_("Baltic (ISO-8859-13)"),              "ISO-8859-13"   },
    {N_("Baltic (ISO-8859-4)"),               "ISO-8859-4"    },
    {N_("Baltic (Windows-1257)"),             "windows-1257"  },
    {N_("Celtic (ISO-8859-14)"),              "ISO-8859-14"   },
    {N_("Central European (IBM-852)"),        "IBM852"        },
    {N_("Central European (ISO-8859-2)"),     "ISO-8859-2"    },
    {N_("Central European (Windows-1250)"),   "windows-1250"  },
    {N_("Chinese Simplified (GB18030)"),      "gb18030"       },
    {N_("Chinese Simplified (GB2312)"),       "GB2312"        },
    {N_("Chinese Traditional (Big5)"),        "Big5"          },
    {N_("Chinese Traditional (Big5-HKSCS)"),  "Big5-HKSCS"    },
    {N_("Cyrillic (IBM-855)"),                "IBM855"        },
    {N_("Cyrillic (ISO-8859-5)"),             "ISO-8859-5"    },
    {N_("Cyrillic (ISO-IR-111)"),             "ISO-IR-111"    },
    {N_("Cyrillic (KOI8-R)"),                 "KOI8-R"        },
    {N_("Cyrillic (Windows-1251)"),           "windows-1251"  },
    {N_("Cyrillic/Russian (CP-866)"),         "IBM866"        },
    {N_("Cyrillic/Ukrainian (KOI8-U)"),       "KOI8-U"        },
    {N_("English (US-ASCII)"),                "us-ascii"      },
    {N_("Greek (ISO-8859-7)"),                "ISO-8859-7"    },
    {N_("Greek (Windows-1253)"),              "windows-1253"  },
    {N_("Hebrew (IBM-862)"),                  "IBM862"        },
    {N_("Hebrew (Windows-1255)"),             "windows-1255"  },
    {N_("Japanese (EUC-JP)"),                 "EUC-JP"        },
    {N_("Japanese (ISO-2022-JP)"),            "ISO-2022-JP"   },
    {N_("Japanese (Shift_JIS)"),              "Shift_JIS"     },
    {N_("Korean (EUC-KR)"),                   "EUC-KR"        },
    {N_("Nordic (ISO-8859-10)"),              "ISO-8859-10"   },
    {N_("South European (ISO-8859-3)"),       "ISO-8859-3"    },
    {N_("Thai (TIS-620)"),                    "TIS-620"       },
    {N_("Turkish (IBM-857)"),                 "IBM857"        },
    {N_("Turkish (ISO-8859-9)"),              "ISO-8859-9"    },
    {N_("Turkish (Windows-1254)"),            "windows-1254"  },
    {N_("Unicode (UTF-7)"),                   "UTF-7"         },
    {N_("Unicode (UTF-8)"),                   "UTF-8"         },
    {N_("Unicode (UTF-16BE)"),                "UTF-16BE"      },
    {N_("Unicode (UTF-16LE)"),                "UTF-16LE"      },
    {N_("Unicode (UTF-32BE)"),                "UTF-32BE"      },
    {N_("Unicode (UTF-32LE)"),                "UTF-32LE"      },
    {N_("Vietnamese (VISCII)"),               "VISCII"        },
    {N_("Vietnamese (Windows-1258)"),         "windows-1258"  },
    {N_("Visual Hebrew (ISO-8859-8)"),        "ISO-8859-8"    },
    {N_("Western (IBM-850)"),                 "IBM850"        },
    {N_("Western (ISO-8859-1)"),              "ISO-8859-1"    },
    {N_("Western (ISO-8859-15)"),             "ISO-8859-15"   },
    {N_("Western (Windows-1252)"),            "windows-1252"  }

    /*
     * From this point, character sets aren't supported by iconv
     */
/*    {N_("Arabic (IBM-864-I)"),                "IBM864i"              },
    {N_("Arabic (ISO-8859-6-E)"),             "ISO-8859-6-E"         },
    {N_("Arabic (ISO-8859-6-I)"),             "ISO-8859-6-I"         },
    {N_("Arabic (MacArabic)"),                "x-mac-arabic"         },
    {N_("Armenian (ARMSCII-8)"),              "armscii-8"            },
    {N_("Central European (MacCE)"),          "x-mac-ce"             },
    {N_("Chinese Simplified (GBK)"),          "x-gbk"                },
    {N_("Chinese Simplified (HZ)"),           "HZ-GB-2312"           },
    {N_("Chinese Traditional (EUC-TW)"),      "x-euc-tw"             },
    {N_("Croatian (MacCroatian)"),            "x-mac-croatian"       },
    {N_("Cyrillic (MacCyrillic)"),            "x-mac-cyrillic"       },
    {N_("Cyrillic/Ukrainian (MacUkrainian)"), "x-mac-ukrainian"      },
    {N_("Farsi (MacFarsi)"),                  "x-mac-farsi"},
    {N_("Greek (MacGreek)"),                  "x-mac-greek"          },
    {N_("Gujarati (MacGujarati)"),            "x-mac-gujarati"       },
    {N_("Gurmukhi (MacGurmukhi)"),            "x-mac-gurmukhi"       },
    {N_("Hebrew (ISO-8859-8-E)"),             "ISO-8859-8-E"         },
    {N_("Hebrew (ISO-8859-8-I)"),             "ISO-8859-8-I"         },
    {N_("Hebrew (MacHebrew)"),                "x-mac-hebrew"         },
    {N_("Hindi (MacDevanagari)"),             "x-mac-devanagari"     },
    {N_("Icelandic (MacIcelandic)"),          "x-mac-icelandic"      },
    {N_("Korean (JOHAB)"),                    "x-johab"              },
    {N_("Korean (UHC)"),                      "x-windows-949"        },
    {N_("Romanian (MacRomanian)"),            "x-mac-romanian"       },
    {N_("Turkish (MacTurkish)"),              "x-mac-turkish"        },
    {N_("User Defined"),                      "x-user-defined"       },
    {N_("Vietnamese (TCVN)"),                 "x-viet-tcvn5712"      },
    {N_("Vietnamese (VPS)"),                  "x-viet-vps"           },
    {N_("Western (MacRoman)"),                "x-mac-roman"          },
    // charsets whithout possibly translatable names
    {"T61.8bit",                              "T61.8bit"             },
    {"x-imap4-modified-utf7",                 "x-imap4-modified-utf7"},
    {"x-u-escaped",                           "x-u-escaped"          },
    {"windows-936",                           "windows-936"          }
*/
};

static GHashTable *encodings;



/*************
 * Functions *
 *************/


/* stolen from gnome-desktop-item.c */
static gboolean
check_locale (const char *locale)
{
    GIConv cd = g_iconv_open ("UTF-8", locale);
    if ((GIConv)-1 == cd)
        return FALSE;
    g_iconv_close (cd);
    return TRUE;
}

/* stolen from gnome-desktop-item.c */
static void
insert_locales (GHashTable *encodings, char *enc, ...)
{
    va_list args;
    char *s;

    va_start (args, enc);
    for (;;) {
        s = va_arg (args, char *);
        if (s == NULL)
            break;
        g_hash_table_insert (encodings, s, enc);
    }
    va_end (args);
}

/* stolen from gnome-desktop-item.c */
void
Charset_Insert_Locales_Init (void)
{
/* make a standard conversion table from the desktop standard spec */
    encodings = g_hash_table_new (g_str_hash, g_str_equal);

    /* "C" is plain ascii */
    insert_locales (encodings, "ASCII", "C", NULL);

    insert_locales (encodings, "ARMSCII-8", "by", NULL);
    insert_locales (encodings, "BIG5", "zh_TW", NULL);
    insert_locales (encodings, "CP1251", "be", "bg", NULL);
    if (check_locale ("EUC-CN")) {
        insert_locales (encodings, "EUC-CN", "zh_CN", NULL);
    } else {
        insert_locales (encodings, "GB2312", "zh_CN", NULL);
    }
    insert_locales (encodings, "EUC-JP", "ja", NULL);
    insert_locales (encodings, "UHC", "ko", NULL);
    /*insert_locales (encodings, "GEORGIAN-ACADEMY", NULL);*/
    insert_locales (encodings, "GEORGIAN-PS", "ka", NULL);
    insert_locales (encodings, "ISO-8859-1", "br", "ca", "da", "de", "en", "es", "eu", "fi", "fr", "gl", "it", "nl", "wa", "no", "pt", "pt", "sv", NULL);
    insert_locales (encodings, "ISO-8859-2", "cs", "hr", "hu", "pl", "ro", "sk", "sl", "sq", "sr", NULL);
    insert_locales (encodings, "ISO-8859-3", "eo", NULL);
    insert_locales (encodings, "ISO-8859-5", "mk", "sp", NULL);
    insert_locales (encodings, "ISO-8859-7", "el", NULL);
    insert_locales (encodings, "ISO-8859-9", "tr", NULL);
    insert_locales (encodings, "ISO-8859-13", "lt", "lv", "mi", NULL);
    insert_locales (encodings, "ISO-8859-14", "ga", "cy", NULL);
    insert_locales (encodings, "ISO-8859-15", "et", NULL);
    insert_locales (encodings, "KOI8-R", "ru", NULL);
    insert_locales (encodings, "KOI8-U", "uk", NULL);
    if (check_locale ("TCVN-5712")) {
        insert_locales (encodings, "TCVN-5712", "vi", NULL);
    } else {
        insert_locales (encodings, "TCVN", "vi", NULL);
    }
    insert_locales (encodings, "TIS-620", "th", NULL);
    /*insert_locales (encodings, "VISCII", NULL);*/
}

void
Charset_Insert_Locales_Destroy (void)
{
    g_hash_table_destroy (encodings);
}

/* stolen from gnome-desktop-item.c */
static const char *
get_encoding_from_locale (const char *locale)
{
    char lang[3];
    const char *encoding;

    if (locale == NULL)
        return NULL;

    /* if locale includes encoding (that isn't UTF-8), use it */
    encoding = strchr (locale, '.');
    if (encoding != NULL && strncmp (encoding, ".UTF-8", 6)) {
        return encoding+1;
    }

    /* first try the entire locale (at this point ll_CC) */
    encoding = g_hash_table_lookup (encodings, locale);
    if (encoding != NULL)
        return encoding;

    /* Try just the language */
    strncpy (lang, locale, 2);
    lang[2] = '\0';
    return g_hash_table_lookup (encodings, lang);
}

/*
char *
rb_unicodify (const char *str)
{
    char *ret = NULL;
    const char *char_encoding;

    // Try validating it as UTF-8 first
    if (g_utf8_validate (str, -1, NULL))
        return g_strdup (str);

    // Failing that, try the legacy encoding associated with the locale.
    char_encoding = get_encoding_from_locale (getenv ("LANG"));
    if (char_encoding == NULL)
        ret = NULL;
    else
        ret = g_convert (str, -1, "UTF-8", char_encoding,
                 NULL, NULL, NULL);
    // Failing that, try ISO-8859-1
    if (!ret)
        ret = g_convert (str, -1, "UTF-8", "ISO-8859-1",
                 NULL, NULL, NULL);

    return ret;
}*/





gchar *convert_string (const gchar *string, const gchar *from_codeset, const gchar *to_codeset)
{
    gchar *output;
    GError *error = NULL;

    if (!string)
        return NULL;
    
    output = g_convert(string, -1, to_codeset, from_codeset, NULL, NULL, &error);

    if (output == NULL)
    {
        gchar *escaped_str = g_strescape(string, NULL);
        g_warning("convert_string(): Failed conversion from charset '%s' to '%s'. "
                  "String '%s'. Errcode %d (%s).\n",
                  from_codeset, to_codeset, escaped_str, error->code, error->message);
        g_free(escaped_str);
        g_error_free(error);
        return g_strdup(string);
    }

    return output;
}


/*
 * Conversion with UTF-8 for Ogg Vorbis and FLAC tags (current_charset <===> UTF-8)
 */
gchar *convert_to_utf8 (const gchar *string)
{
    gchar *output;
    GError *error = NULL;

    if (!string)
        return NULL;

    output = g_locale_to_utf8(string, -1, NULL, NULL, &error);

    if (output == NULL)
    {
        const gchar *usercharset;
        gchar *escaped_str = g_strescape(string, NULL);
        g_get_charset(&usercharset);
        g_warning("convert_to_utf8(): Failed conversion from charset '%s'. "
                  "String '%s'. Errcode %d (%s).\n",
                  usercharset, escaped_str, error->code, error->message);
        g_free(escaped_str);

        if (g_utf8_validate(string, -1, NULL))
            g_warning("convert_to_utf8(): String was valid UTF8.\n");
        else
            g_warning("convert_to_utf8(): String was INVALID UTF8.\n");

        g_error_free(error);
        return g_strdup(string);
    }

    return output;
}

gchar *convert_from_utf8 (const char *string)
{
    gchar *output;
    GError *error = NULL;

    if (!string)
        return NULL;

    output = g_locale_from_utf8(string, -1, NULL, NULL, &error);

    if (output == NULL)
    {
        const gchar *usercharset;
        gchar *escaped_str = g_strescape(string, NULL);
        g_get_charset(&usercharset);
        g_warning("convert_from_utf8(): Failed conversion to charset '%s'. "
                  "String '%s'. Errcode %d (%s).\n",
                  usercharset, escaped_str, error->code, error->message);
        g_free(escaped_str);

        if (g_utf8_validate(string, -1, NULL))
            g_warning("convert_from_utf8(): String was valid UTF8.\n");
        else
            g_warning("convert_from_utf8(): String was INVALID UTF8.\n");

        g_error_free(error);
        return g_strdup(string);
    }

    return output;
}



/*
 * Conversion with ISO-8859-1 for ID3v2.3 tags (current_charset <===> ISO-8859-1)
 */
/***
char *convert_to_iso88591 (const char *string)
{
    const gchar *charset;
    g_get_charset(&charset);

    // No conversion needed
    if (strcmp(charset, "ANSI_X3.4-1968") == 0)
        return g_strdup(string);

    return convert_string(string, charset, "ISO-8859-1");
}

char *convert_from_iso88591 (const char *string)
{
    const gchar *charset;
    g_get_charset(&charset);

    // No conversion needed
    if (strcmp(charset, "ANSI_X3.4-1968") == 0)
        return g_strdup(string);

    return convert_string(string, "ISO-8859-1", charset);
}
***/



/*
 * Conversion with "this_charset" for ID3v2.3 tags (current_charset <===> this_charset)
 */
/***
// Convert from the locale charset to 'this_charset'
char *convert_to_this_charset (const char *string, char *this_charset)
{
    const gchar *charset;
    g_get_charset(&charset);

    return convert_string(string, charset, this_charset);
}

// Convert from 'this_charset' to the locale charset
char *convert_from_this_charset (const char *string, char *this_charset)
{
    const gchar *charset;
    g_get_charset(&charset);

    return convert_string(string, this_charset, charset);
}
***/


/*
 * Functions to translate filename to UTF-8
 * Based around the ideas under "File Name Encodings" at
 *    http://developer.gnome.org/doc/API/2.0/glib/glib-Character-Set-Conversion.html
 */
/*
 * Convert a string from the filename system encoding to UTF-8.
 *  - conversion OK : returns the UTF-8 string (new allocated)
 *  - conversion KO : tries others encodings else returns an 'escaped' string
 */
gchar *filename_to_display (const gchar *string)
{
    GError *error = NULL;
    gchar *temp;
    
    if (!string)
        return NULL;

    temp = g_filename_to_utf8(string, -1, NULL, NULL, &error);
    if (!temp)
    {
        // Conversion KO!
        const gchar *char_encoding;
        gchar *ret;
        
        // Try the legacy encoding associated with the locale.
        char_encoding = get_encoding_from_locale(getenv("LANG"));
        if (char_encoding)
        {
            ret = g_convert(string, -1, "UTF-8", char_encoding, NULL, NULL, NULL);
        }else
        {
            // Failing that, try ISO-8859-1
            ret = g_convert(string, -1, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
        }

        if (!ret)
        {
            gchar *escaped_str = g_strescape(string, NULL);
            g_warning(_("The filename '%s' couldn't be converted into UTF-8 (%s).\n"
                        "To fix : this file name musn't be in UTF-8 encoding. So "
                        "specify the right encoding for file name in the "
                        "G_FILENAME_ENCODING environment variable (for example : "
                        "export G_FILENAME_ENCODING=ISO-8859-1).\n"),
                        escaped_str, error->message ? error->message : _("Invalid UTF-8"));
            g_clear_error(&error);
    
            return escaped_str;
        }else
        {
            return ret;
        }
        
    }else
    {
        // Conversion OK
        return temp;
    }
}

/*
 * Convert a string from UTF-8 to the filename system encoding.
 *  - conversion OK : returns the string in filename system encoding (new allocated)
 *  - conversion KO : display error message + returns nothing!
 */
gchar *filename_from_display (const gchar *string)
{
    gchar *temp;

    if (!string) return NULL;
    
    temp = g_filename_from_utf8(string, -1, NULL, NULL, NULL);
    if (!temp)
    {
        // Conversion KO!
        gchar *escaped_str = g_strescape(string, NULL);
        g_warning(_("The UTF-8 string '%s' couldn't be converted into filename "
                    "encoding\n"), escaped_str);
        g_free(escaped_str);
    }

    return temp; // We need to catch errors (e.g. temp=NULL) in the real code
}


void Charset_Populate_Combobox (GtkComboBox *combo, gchar *select_charset)
{
    guint i;

    for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
    {
        gtk_combo_box_append_text(combo, _(charset_trans_array[i].charset_title));

        if (select_charset && strcmp(charset_trans_array[i].charset_name, select_charset) == 0)
            gtk_combo_box_set_active(combo, i);
    }
}


/*
 * Return charset_name from charset_title
 */
gchar *Charset_Get_Name_From_Title (const gchar *charset_title)
{
    guint i;

    if (charset_title)
        for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
            if ( strcasecmp(_(charset_title),_(charset_trans_array[i].charset_title)) == 0 )
                return charset_trans_array[i].charset_name;
    return "";
}


/*
 * Return charset_title from charset_name
 */
gchar *Charset_Get_Title_From_Name (gchar *charset_name)
{
    guint i;

    if (charset_name)
        for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
            if ( strcasecmp(charset_name,charset_trans_array[i].charset_name) == 0 )
                return _(charset_trans_array[i].charset_title);
    return "";
}



/*
 * Test if the conversion is supported between two character sets ('from' and 'to)
 */

gboolean test_conversion_charset (const gchar *from, const gchar *to)
{
    gchar *temp;
    GError *error = NULL;

    if (!from || !to)
        return FALSE;
    
    // Do a quick test conversion and examine error output
    temp = g_convert("a", -1, to, from, NULL, NULL, &error);

    if (!temp)
    {
        // Error in conversion
        if (error && error->code == G_CONVERT_ERROR_NO_CONVERSION)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_NO_CONVERSION)\n",from,to);
        } else if (error && error->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_ILLEGAL_SEQUENCE)\n",from,to);
        } else if (error && error->code == G_CONVERT_ERROR_FAILED)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_FAILED)\n",from,to);
        } else if (error && error->code == G_CONVERT_ERROR_PARTIAL_INPUT)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_PARTIAL_INPUT)\n",from,to);
        } else if (error && error->code == G_CONVERT_ERROR_BAD_URI)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_BAD_URI)\n",from,to);
        } else if (error && error->code == G_CONVERT_ERROR_NOT_ABSOLUTE_PATH)
        {
            g_print("Conversion error from '%s' to '%s' (G_CONVERT_ERROR_NOT_ABSOLUTE_PATH)\n",from,to);
        } else
        {
            g_print("Conversion error from '%s' to '%s' (unknown : %d)\n",from,to,error->code);
        }
        
        if (error)
            g_error_free(error);
        return FALSE;
    } else
    {
        // No error
        if (error)
            g_error_free(error);
        g_free(temp);
        return TRUE;
    }
}
