/******************************************************************************
    AUTHOR:
    File written and Copyrighted by Zachary Dovel. All Rights Reserved.

    LICENSE:
    This file is part of gWaei.

    gWaei is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    gWaei is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with gWaei.  If not, see <http://www.gnu.org/licenses/>.
*******************************************************************************/

//! 
//! @file src/formatting.c
//!
//! @brief Adds general text formatting to strings
//!
//! Functions to add proper formatting to results/queries depending on the
//! context.
//!


#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
#include <libintl.h>

#include <glib.h>

#include <gwaei/definitions.h>
#include <gwaei/regex.h>
#include <gwaei/utilities.h>
#include <gwaei/dictionary-objects.h>
#include <gwaei/search-objects.h>
#include <gwaei/interface.h>
#include <gwaei/preferences.h>


//!
//! @brief Copies a string while adding some special formatting
//!
//! The formatting added will be to decide of the word will have have hiragara
//! and katakana variations of it searched and if four kanji woulds have the
//! two kanji pieces also searched.
//!
//! @param output Character array the formatting string is copied to
//! @param output Character array to format
//! @param item GwSearchItem to get misc data from
//!
gboolean gw_fmt_strcpy_with_query_preformatting (char* output, char* input, GwSearchItem *item)
{
    char buffer[MAX_QUERY];
    strncpy(buffer, input, MAX_QUERY);
    buffer[MAX_QUERY - 1] = '\0';

    //Load the preformatting preferences from pref
    gboolean hira_kata_conv_pref;
    hira_kata_conv_pref = gw_pref_get_boolean (GCKEY_GW_HIRA_KATA, TRUE);
      
    gboolean kata_hira_conv_pref;
    kata_hira_conv_pref = gw_pref_get_boolean (GCKEY_GW_KATA_HIRA, TRUE);

    int roman_kana_conv_pref;
    roman_kana_conv_pref = gw_pref_get_int (GCKEY_GW_ROMAN_KANA, 2);

    //Load the preformatting preferences from pref
    if (item->dictionary->type == GW_DICT_KANJI || item->dictionary->type == GW_DICT_RADICALS)
    {
      strcpy(output, buffer);
      return FALSE;
    }
  
    //Hiragana query preprocessing
    else if (hira_kata_conv_pref == TRUE && gw_util_is_hiragana_str(buffer))
    {
      char hira[MAX_QUERY], kata[MAX_QUERY];
      strcpy(hira, buffer);
      strcpy(kata, buffer);
      gw_util_str_shift_hira_to_kata(kata);

      int leftover;
      leftover = MAX_QUERY;
      strncpy(output, "(", leftover); 
      leftover -= 1;
      strncat(output, hira, leftover);
      leftover -= strlen(hira);
      strncat(output, ")|(", leftover);
      leftover -= 3;
      strncat(output, kata, leftover);
      leftover -= strlen(kata);
      strncat(output, ")", leftover); 

      return TRUE;
    }

    //Katakana query preprocessing
    else if (kata_hira_conv_pref == TRUE && gw_util_is_katakana_str(buffer))
    {
      char hira[MAX_QUERY], kata[MAX_QUERY];
      strcpy(hira, buffer);
      strcpy(kata, buffer);
      gw_util_str_shift_kata_to_hira(hira);

      int leftover;
      leftover = MAX_QUERY;
      strncpy(output, "(", leftover); 
      leftover -= 1;
      strncat(output, kata, leftover);
      leftover -= strlen(kata);
      strncat(output, ")|(", leftover);
      leftover -= 3;
      strncat(output, hira, leftover);
      leftover -= strlen(hira);
      strncat(output, ")", leftover); 

      return TRUE;
    }

    //Kanji 四字熟語 query preprocessing
    else if (gw_util_is_kanji_str(buffer) && g_utf8_strlen(buffer, -1) == 4)
    {
      char first_half[20];
      char second_half[20];

      char *middle = g_utf8_next_char(g_utf8_next_char(buffer));

      //Split the query into two equal halves
      char *src_ptr = buffer;
      char *dest_ptr = first_half;
      while(src_ptr != middle)
      {
        *dest_ptr = *src_ptr;
        src_ptr++;
        dest_ptr++;
      }
      *dest_ptr = '\0';
            
      src_ptr = middle;
      dest_ptr = second_half;
      while(*src_ptr != '\0')
      {
        *dest_ptr = *src_ptr;
        src_ptr++;
        dest_ptr++;
      }
      *dest_ptr = '\0';

      strcpy(output, "(");
      strcat(output, first_half);
      strcat(output, second_half);
      strcat(output, ")|(");
      strcat(output, first_half);
      strcat(output, ")|(");
      strcat(output, second_half);
      strcat(output, ")");

      return TRUE;
    }

    //Romanji/other query preprocessing
    else if (
             (roman_kana_conv_pref == 0                                 ) || 
             (roman_kana_conv_pref == 2 && gw_util_is_japanese_locale() == FALSE)
            )
    {
      char *input_ptr = buffer;
      char kana[MAX_QUERY];
      char *kana_ptr = kana;
      *kana_ptr = '\0';

      int leftover;
      leftover = MAX_QUERY;
      while (leftover-- > 0)
      {
        kana_ptr = gw_util_roma_to_hira (input_ptr, kana_ptr);
        if (kana_ptr == NULL || input_ptr == NULL)
          break;

        input_ptr = gw_util_next_hira_char_from_roma (input_ptr);
        if (kana_ptr == NULL || input_ptr == NULL)
          break;

        kana_ptr = &kana_ptr[strlen(kana_ptr)];
      }

      gboolean success;
      success = (input_ptr != NULL && strlen(input_ptr) == 0);

      //On success, copy the string to the end of the query
      if (success == TRUE)
      {
        leftover = MAX_QUERY;
        strncpy(output, "(", leftover);
        leftover -= 1;
        strncat(output, buffer, leftover);
        leftover -= strlen(buffer);
        strncat(output, ")|(", leftover);
        leftover -= 3;
        //Add a hiragana version
        strncat(output, kana, leftover);
        leftover -= strlen(kana);

        //Add a katakana version
        strncat(output, ")|(", leftover);
        leftover -= 3;

        gw_util_str_shift_hira_to_kata(kana);
        strncat(output, kana, leftover);
        leftover -= strlen(kana);
        strncat(output, ")",  leftover);

        return TRUE;
      }
    }

    //No conversions were necissary
    strcpy(output, input);
    return FALSE;
}


//!
//! @brief Copies a string while adding some special formatting
//!
//! This function parses a string, adding delimiters for search atoms and then
//! writes the edited string to the output.
//!
//! @param output Character array the formatting string is copied to
//! @param output Character array to format
//! @param item GwSearchItem to get misc data from
//!
void gw_fmt_strcpy_with_query_formatting (char* output, char* input, GwSearchItem *item)
{
    //Searching in the kanji sidebar only look for a matching first character
    if (item->target == GW_TARGET_KANJI)
    {
      strcpy(output, "^(");
      strcat(output, input);
      strcat(output, ")");
      strcat(output, DELIMITOR_STR);
    }

    //General Radical and kanji searches look for every single atom separated by
    //the delimitor
    else if (item->dictionary->type == GW_DICT_KANJI || item->dictionary->type == GW_DICT_RADICALS)
    {
      //Radical and kanji searches don't use regex
      //so the search should be cleaned before sending.

      char *output_ptr = &output[0];

      //s = start
      char *s = NULL;
      //e = end
      char *e = NULL;

      s = &input[0];

      //copy the Kanji characters
      while(*s != '\0')
      {
        if (g_utf8_get_char(s) > L'ン') { // 0x30A1 = 'ァ'
          e = g_utf8_next_char(s);
          while (s != e) {
            *output_ptr = *s;
            s++;
            output_ptr++;
          }
          *output_ptr = DELIMITOR_CHR;
          output_ptr++;
        }
        else {
          s = g_utf8_next_char(s);
        }
      }
      *output_ptr = '\0';

      //copy the Grade search atom
      if (gw_regex_locate_boundary_byte_pointers(input, "G[0-9]{1,2}", &s, &e))
      {
        *output_ptr = ' ';
        output_ptr++;
        while(s != e)
        {
          *output_ptr = *s;
          output_ptr++;
          s++;
        }
        *output_ptr = ' ';
        output_ptr++;
        *output_ptr = DELIMITOR_CHR;
        output_ptr++;
      }
      *output_ptr = '\0';

      //copy the Stroke search atom
      if (gw_regex_locate_boundary_byte_pointers(input, "S[0-9]{1,2}", &s, &e))
      {
        *output_ptr = ' ';
        output_ptr++;
        while(s != e)
        {
          *output_ptr = *s;
          output_ptr++;
          s++;
        }
        *output_ptr = ' ';
        output_ptr++;
        *output_ptr = DELIMITOR_CHR;
        output_ptr++;
      }
      *output_ptr = '\0';

      //copy the Frequency search atom
      if (gw_regex_locate_boundary_byte_pointers(input, "F[0-9]{1,8}", &s, &e))
      {
        *output_ptr = ' ';
        output_ptr++;
        while(s != e)
        {
          *output_ptr = *s;
          output_ptr++;
          s++;
        }
        *output_ptr = ' ';
        output_ptr++;
        *output_ptr = DELIMITOR_CHR;
        output_ptr++;
      }
      *output_ptr = '\0';

      //copy the JLPT search atom
      if (gw_regex_locate_boundary_byte_pointers(input, "J[0-4]", &s, &e)){
        *output_ptr = ' ';
        output_ptr++;
        while(s != e)
        {
          *output_ptr = *s;
          output_ptr++;
          s++;
        }
        *output_ptr = ' ';
        output_ptr++;
        *output_ptr = DELIMITOR_CHR;
        output_ptr++;
      }
      *output_ptr = '\0';

      //copy the English search atom
      if (gw_regex_locate_boundary_byte_pointers(input, "[A-Za-z][a-z ]{1,20}", &s, &e))
      {
        while(s != e)
        {
          *output_ptr = *s;
          output_ptr++;
          s++;
        }
        *output_ptr = DELIMITOR_CHR;
        output_ptr++;
      }

      //Finalize the string
      *output_ptr = '\0';

      //copy the hirakana/kanakana search atom
      char exp[1000];
      strcpy(exp, "[(");
      strcat(exp, HIRAGANA);
      strcat(exp, "|");
      strcat(exp, KATAKANA);
      strcat(exp, ")]+");
      if (gw_regex_locate_boundary_byte_pointers(input, exp, &s, &e) && (e - s) >= 3 )
      {
        while(s != e && s != '\0')
        {
          *output_ptr = *s;
          output_ptr++;
          s++;
        }
        *output_ptr = DELIMITOR_CHR;
        output_ptr++;
      }

      //Finalize the string
      *output_ptr = '\0';
    }


    //Query setup for general searches
    else
    {
      //Copy the string to output
      strcpy(output, input);
     
      //Truncate at the first possible user delimitor
      char* user_delimitor_ptr = &output[0]; 
      while (*user_delimitor_ptr != '\0' && *user_delimitor_ptr != DELIMITOR_CHR)
      {
        if (*user_delimitor_ptr == '&')
          *user_delimitor_ptr = DELIMITOR_CHR;
        user_delimitor_ptr++;
      }
      *user_delimitor_ptr = '\0'; 

      //convert any '&' symbols to delimitors
      user_delimitor_ptr = &output[0]; 
      while (*user_delimitor_ptr != '\0')
      {
        if (*user_delimitor_ptr == '&')
          *user_delimitor_ptr = DELIMITOR_CHR;
        user_delimitor_ptr++;
      }
      *user_delimitor_ptr = '\0'; 
      
      //Add our own delimitor
      strcat(output, DELIMITOR_STR);
    }
}

