/*
   Name: $RCSfile: regex.c,v $
   Author: Nelson Loyola
   $Date: 2005/09/16 21:31:54 $
   $Revision: 1.3 $
   $Id: regex.c,v 1.3 2005/09/16 21:31:54 a_j_moran Exp $

   Legal Notice:

   This program is free software; you can redistribute it and/or
   modify it under the terms of the license contained in the
   COPYING file that comes with this distribution.

 */

/**
   @file

   @brief Wrapper code for PCRE.

*/

#include "regex.h"
#include <pcreposix.h>
#include <pcre.h>

/* internal details */
struct rpl_regex_s
{
    rpl_str_t src_str_m;
    pcre *   pcre_pm;
    unsigned num_sub_patterns_m;
    int      result_m;
    unsigned ovec_size_m;
    int *    ovector_pam;
};

/**
   Creates a regular expression object.

   @param pattern String containing the RE pattern.

   @param flags The flags to be used by the RE.

*/
rpl_regex_t * rpl_regex_create (rpl_str_t pattern, unsigned flags)
{
    int           pcreflags = 0;
    rpl_c_str_t    perrormsg_p;
    int           errorOffset;
    rpl_regex_t * re_p
        = (rpl_regex_t *) rpl_me_malloc (sizeof (struct rpl_regex_s));

    if (flags & RPL_REGEX_FLAG_IGNORE_CASE)
    {
        pcreflags |= PCRE_CASELESS;
    }

    if (flags & RPL_REGEX_FLAG_MULTILINE)
    {
        pcreflags |= PCRE_MULTILINE;
    }

    re_p->pcre_pm = pcre_compile (pattern,
                                  pcreflags,
                                  &perrormsg_p,
                                  &errorOffset,
                                  NULL);

    /* pcre_compile() failed with perrormsg_p */
    assert (re_p->pcre_pm != NULL);

    // Get number of subpatterns that will be returned
    pcre_fullinfo (re_p->pcre_pm,
                   NULL,
                   PCRE_INFO_CAPTURECOUNT,
                   &re_p->num_sub_patterns_m);

    re_p->ovec_size_m = (re_p->num_sub_patterns_m + 1) * 3; // see pcre docs
    re_p->ovector_pam = (int *) rpl_me_malloc (re_p->ovec_size_m * sizeof (int));
    return re_p;
}

/**
   Deallocates the memory associated with the RE.

   @param re_p Pointer to the RE.

*/
void rpl_regex_destroy (rpl_regex_t * re_p)
{
    assert (re_p != NULL);

    if (re_p->src_str_m != NULL)
        rpl_me_free (re_p->src_str_m);

    if (re_p->pcre_pm != NULL)
        rpl_me_free (re_p->ovector_pam);

    if (re_p->ovector_pam != NULL)
        pcre_free (re_p->pcre_pm);
}

/**
   Returns true if the RE is found in the string.

   @param re_p Pointer to the RE.

   @param str the string to search for the RE.

   @param offset the offset into the string to start the search. If set to -1
   then the search starts at the start of the string.
*/
unsigned rpl_regex_match (rpl_regex_t * re_p,
                          rpl_str_t      str,
                          int           offset)
{
    size_t len = strlen (str);

    assert (re_p != NULL);
    assert (re_p->pcre_pm != NULL);
    assert (re_p->ovector_pam != NULL);

    if ((offset > len) || (len == 0)) return 0;

    /* replace the source string but first free the memory used by previous
     * string
     */
    if (re_p->src_str_m != NULL)
        rpl_me_free (re_p->src_str_m);
    re_p->src_str_m = (rpl_str_t) rpl_me_malloc (len);
    strncpy (re_p->src_str_m, str, len);

    if (offset < 0)
    {
        offset = 0;
    }

    re_p->result_m = pcre_exec (re_p->pcre_pm,
                                NULL,
                                str,
                                len,
                                offset,
                                0,
                                re_p->ovector_pam,
                                re_p->ovec_size_m);

    return (re_p->result_m > 0);
}

/**
   Performs RE substring capturing.

   @param re_p Pointer to the RE.

   @param substr_num The substring to capture. Values of 1 to
   rpl_regex_capture_substr_num() are valid.

   @return Pointer to the new allocated string where the captured substring was
   stored.
*/
rpl_str_t rpl_regex_capture (rpl_regex_t * re_p, unsigned substr_num)
{
    size_t   len;
    int      start_pos, end_pos, result;
    rpl_str_t cap_str;

    assert (re_p != NULL);
    assert (re_p->pcre_pm != NULL);
    assert (re_p->result_m > 0);
    assert (substr_num > 0);
    assert (substr_num < re_p->result_m);

    start_pos = re_p->ovector_pam [2 * substr_num];
    end_pos   = re_p->ovector_pam [2 * substr_num + 1];

    len = end_pos - start_pos + 1;
    cap_str = (rpl_str_t) rpl_me_malloc (len);

    result = pcre_copy_substring(re_p->src_str_m,
                                 re_p->ovector_pam,
                                 re_p->result_m,
                                 substr_num,
                                 cap_str,
                                 len);
    assert (result >= 0);
    return cap_str;
}

/**
   Returns a captured substring's start and end position.

   @param re_p Pointer to the RE.

   @param substr_num The substring to capture. Values of 1 to
   rpl_regex_capture_substr_num() are valid.

   @param start_pos_p Pointer to where to store the start of the
   subsring. Returns -1 if there was no match.

   @param end_pos_p Pointer to where to store the end of the subsring. Returns
   -1 if there was no match.

*/
void rpl_regex_capture_info (rpl_regex_t * re_p,
                             unsigned      substr_num,
                             int          *start_pos_p,
                             int          *end_pos_p)
{
    assert (re_p != NULL);
    assert (re_p->pcre_pm != NULL);
    assert (re_p->result_m > 0);
    assert (substr_num > 0);
    assert (substr_num < re_p->result_m);

    *start_pos_p = re_p->ovector_pam [2 * substr_num];
    *end_pos_p = re_p->ovector_pam [2 * substr_num + 1];
}

/**
   Returns the number of substrings captured by the RE.

   @param re_p Pointer to the RE.
*/
unsigned rpl_regex_capture_substr_num (rpl_regex_t * re_p)
{
    assert (re_p != NULL);
    assert (re_p->pcre_pm != NULL);
    return re_p->result_m - 1;
}

/**
   Performs text replacement using an RE.
TODO: global flag does not work - instead a loop has to be entered 
to get the global replacements (see quote_ampersand function in
parser.c for an example)

   @param re_p The regular expression to use on the text.

   @param str The string to do the replacement on.

   @param subst_str The string to substitute for.

*/
rpl_str_t rpl_regex_replace (rpl_regex_t * re_p,
                            rpl_str_t      str,
                            rpl_str_t      subst_str)
{
    rpl_str_t replace_str;
    size_t   len, subst_len, new_len;
    int      startPos, endPos;

    assert (re_p != NULL);

    if (!rpl_regex_match (re_p, str, 0))
    {
        return NULL;
    }

    len = strlen (str);
    subst_len = strlen (subst_str);

    rpl_regex_capture_info (re_p, 1, &startPos, &endPos);
    new_len = len + endPos - startPos + subst_len;
    replace_str = (rpl_str_t) rpl_me_malloc (new_len);
    strncpy (replace_str, str, startPos);
    strcat (replace_str, subst_str);
    strncat (replace_str, &str [endPos], len - endPos);

    return replace_str;
}

