/* ***** BEGIN LICENSE BLOCK *****
 * Source last modified: $Id: parsing.cpp,v 1.1.2.1 2004/07/09 01:50:20 hubbe Exp $
 * 
 * Portions Copyright (c) 1995-2004 RealNetworks, Inc. All Rights Reserved.
 * 
 * The contents of this file, and the files included with this file,
 * are subject to the current version of the RealNetworks Public
 * Source License (the "RPSL") available at
 * http://www.helixcommunity.org/content/rpsl unless you have licensed
 * the file under the current version of the RealNetworks Community
 * Source License (the "RCSL") available at
 * http://www.helixcommunity.org/content/rcsl, in which case the RCSL
 * will apply. You may also obtain the license terms directly from
 * RealNetworks.  You may not use this file except in compliance with
 * the RPSL or, if you have a valid RCSL with RealNetworks applicable
 * to this file, the RCSL.  Please see the applicable RPSL or RCSL for
 * the rights, obligations and limitations governing use of the
 * contents of the file.
 * 
 * Alternatively, the contents of this file may be used under the
 * terms of the GNU General Public License Version 2 or later (the
 * "GPL") in which case the provisions of the GPL are applicable
 * instead of those above. If you wish to allow use of your version of
 * this file only under the terms of the GPL, and not to allow others
 * to use your version of this file under the terms of either the RPSL
 * or RCSL, indicate your decision by deleting the provisions above
 * and replace them with the notice and other provisions required by
 * the GPL. If you do not delete the provisions above, a recipient may
 * use your version of this file under the terms of any one of the
 * RPSL, the RCSL or the GPL.
 * 
 * This file is part of the Helix DNA Technology. RealNetworks is the
 * developer of the Original Code and owns the copyrights in the
 * portions it created.
 * 
 * This file, and the files included with this file, is distributed
 * and made available on an 'AS IS' basis, WITHOUT WARRANTY OF ANY
 * KIND, EITHER EXPRESS OR IMPLIED, AND REALNETWORKS HEREBY DISCLAIMS
 * ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET
 * ENJOYMENT OR NON-INFRINGEMENT.
 * 
 * Technology Compatibility Kit Test Suite(s) Location:
 *    http://www.helixcommunity.org/content/tck
 * 
 * Contributor(s):
 * 
 * ***** END LICENSE BLOCK ***** */

/////////////////////////////////////////////////////////////////////////////
//
//  PARSING.CPP
//
//
//  This file contains the functions that are used in RealText rendering to 
//  parse the text and deal with the tokens.
//
//  (1) ULONG32 skipSpacesTabsAndNewlineChars(_CHAR* pBuf, ULONG32 bufLen,
//		ULONG32 startIndx). 
//  //Takes a pointer to a char buffer and skips all contiguous space
//  //	chars, tab chars, newline chars, and/or carriage return chars
//  //	and returns the index of the first non-such char (at or past
//  //	startIndx) in the buffer. Note that this character may be the
//  //	terminating '\0' char.
//
//  (2) ULONG32 findNextSpaceTabOrNewLineChar(_CHAR* pBuf, ULONG32 bufLen,
//		ULONG32 startIndx, ULONG32 &indexOfEqualsSign,
//		ULONG32 ulCurCharset);
//  //Takes a pointer to a char buffer and skips all contiguous
//  //	characters that are NOT space chars, tab chars, newline chars,
//  //	or carriage return chars and returns the index of the first
//  //	space, tab, newline, or carriage return char (at or past
//  //	startIndx) encountered in the buffer.  If no '=' sign is found,
//  //	indexOfEqualsSign is set to bufLen.  Note that the index returned
//  //	may be of the terminating '\0' char.  Note also that ulCurCharset
//  //  is needed because we may want to skip trail byte if a lead DBCS
//  //  byte is found, but only if the charset is a DBCS one.
//
//  (3) void convertToUpperCase(_CHAR* pBuf, ULONG32 bufLen);
//  //Converts all lower-case alphabet chars in buf to uppercase except
//  //  chars inside quotes.
//
//  (4) ULONG32 findNextChar(_CHAR charToFind, _CHAR* pBuf, ULONG32 ulBufLen,
//		ULONG32 ulStartIndex, ULONG32 ulCurCharset);
//  //Finds the next instance of the specified character in pBuf starting its
//  // search at ulStartIndex and returning the index of the next
//  // occurance of charToFind (or ulBufLen if charToFind was not found).
//  // Note that ulCurCharset is needed because we may want to skip trail
//  // byte if a lead DBCS byte is found, but only if the charset is a DBCS one.
//
//  (5) BOOL GetNextTokenLvalueRvaluePair(_CHAR* pBuf, ULONG32 ulBufLen,
//	        ULONG32& ulLvalueStartIndex, ULONG32& ulLvalueEndIndex,
//		ULONG32& ulRvalueStartIndex, ULONG32& ulRvalueEndIndex)
//  // Finds the start and end indices of both elements of the next 
//  //  "lvalue=rvalue" token in pBuf.
//  //
//  //  Example 1. : if pBuf is the contents inside the following []'s: 
//  //	[ COLOR= red]
//  //  then this function will set ulLvalueStartIndex to 1, 
//  //  ulLvalueEndIndex to 6, ulRvalueStartIndex to 8, and 
//  //  ulRvalueEndIndex to 11; "COLOR" and "red" are the values found.
//  //
//  //  Exmple 2: if pBuf is the contents inside the following []'s: 
//  //	[COLOR  ="light blue" blah blah]
//  //  then this function will set ulLvalueStartIndex to 0, 
//  //  ulLvalueEndIndex to 5, ulRvalueStartIndex to 10, and 
//  //  ulRvalueEndIndex to 20; "COLOR" and "light blue" are the vals found.
//
//  (6) BOOL lookForStartAndEndQuotesOfString(
//		_CHAR* pBuf,
//		ULONG32 ulBufLen,
//		BOOL& bStartQuoteWasFound,
//		BOOL& bEndQuoteWasFound);
//  //Returns TRUE if either a start or end quote was found.  Note: this
//  // function does NOT mess with the pBuf in any way, so it is the
//  // responsibility of the caller to do a pBuf++ if start quote is to
//  // be removed and to put a '\0' in place of the end quote if it is
//  // to be removed.  If the string is only one character long and that
//  // char is a quote, then bStartQuoteWasFound is set to TRUE and
//  // bEndQuoteWasFound is set to FALSE.
//
//
//
//

#include "hxtypes.h"
#include "rt_types.h" //for _CHAR

#include "fontdefs.h" //for CHARSET defines.

#include "parsing.h"

#include "hxheap.h"
#ifdef _DEBUG
#undef HX_THIS_FILE		
static char HX_THIS_FILE[] = __FILE__;
#endif





/////////////////////////////////////////////////////////////////////////////
//	Function: void skipSpacesTabsAndNewlineChars();
//
//  - Takes a pointer to a char buffer and skips all contiguous space chars,
//	tab char, newline chars, and/or carriage return chars.
//
//  - Returns the index of the pBuf where the next non-space, tab, ...etc
//	char was found.  This character can be the '\0' terminating
//	character, so caller needs to check to see if returned index is end
//	of buffer.
//
ULONG32 skipSpacesTabsAndNewlineChars(_CHAR* pBuf, ULONG32 bufLen,
	ULONG32 startIndx)
{
    ULONG32 curIndx = startIndx;
    _CHAR* pCurChar;
    _CHAR ch;

    if(startIndx >= bufLen)
    {
	return bufLen;
    }
    pCurChar = &(pBuf[startIndx]);

    ch=*pCurChar;

    //Added the following code to handle DBCS chars:
    if(ch!='\0'  &&  (UCHAR)ch >= DBCS_MIN_LEAD_BYTE_VAL)
    {
	return curIndx;
    }
    
    while(ch!='\0'  &&  curIndx!=bufLen)
    {
	if(		ch==' ' 
		||  ch=='\t' //horizontal tab char.
		||  ch=='\v' //vertical tab char.
		||  ch=='\n' //newline char.
		||  ch=='\r') //carriage return char.
	{
	    curIndx++;
	    pCurChar++;
	}
	else
	{
	    break;
	}

	ch=*pCurChar;

	//Added the following code to handle DBCS chars:
	if((UCHAR)ch >= DBCS_MIN_LEAD_BYTE_VAL)
	{
	    break;
	}
    }

    return curIndx;
}



/////////////////////////////////////////////////////////////////////////////
//  Function: ULONG32 findNextSpaceTabOrNewLineChar();
//
//  - Takes a pointer to a char buffer and skips all contiguous characters
//	that are NOT space chars, tab chars, newline chars, or carriage
//	return chars and returns the index of the first space, tab, newline,
//	or carriage return char (at or past startIndx) encountered in the
//	buffer.  The index of the first '=' char, if any, encountered is
//	placed in indexOfEqualsSign parameter.  If no '=' sign is found,
//	indexOfEqualsSign is set to bufLen.  
//
//  - Note: the index returned may be that of the terminating '\0' char.
//
//  - Note also that ulCurCharset is needed because we may want to skip
//	trail byte if a lead DBCS byte is found, but only if the charset
//	is a DBCS one.
//
ULONG32 findNextSpaceTabOrNewLineChar(_CHAR* pBuf, ULONG32 bufLen,
	ULONG32 startIndx, ULONG32 &indexOfEqualsSign, ULONG32 ulCurCharset)
{
    ULONG32 curIndx = startIndx;
    _CHAR* pCurChar;
    _CHAR ch;
    BOOL equalsSignFoundAlready=FALSE;
    
    indexOfEqualsSign = bufLen;

    if(startIndx >= bufLen)
    {
	return bufLen;
    }
    pCurChar = &(pBuf[startIndx]);
    
    ch=*pCurChar;

    while(ch!='\0'  &&  curIndx!=bufLen)
    {
	if((ulCurCharset & HX_DBCS_CHARSET)  &&
		(UCHAR)ch >= DBCS_MIN_LEAD_BYTE_VAL)
	{
	    curIndx+=2;	 //skip the trail byte.
	    if(curIndx >= bufLen)
	    {
		return bufLen; //we've gone past the end of the buffer.
	    }
	    pCurChar+=2;
	    ch=*pCurChar;
	    continue;
	}

	if(	    ch==' '	//space char.
		||  ch=='\t'	//horizontal tab char.
		||  ch=='\v'	//vertical tab char.
		||  ch=='\n'	//newline char.
		||  ch=='\r')	//carriage return char.
	{
		break;
	}

	if(ch == '='  &&  !equalsSignFoundAlready)
	{
	    indexOfEqualsSign = curIndx;
	    equalsSignFoundAlready = TRUE;
	}
	curIndx++;
	pCurChar++;
	ch=*pCurChar;
    }
    
    return curIndx;
}



/////////////////////////////////////////////////////////////////////////////
//Converts all lower-case alphabet chars in buf to uppercase:
// Added code so text inside quotes won't go to uppercase,
// e.g., [blah "blah"] will be converted to [BLAH "blah"]:
void convertToUpperCase(_CHAR* pBuf, ULONG32 bufLen)
{
    if(!pBuf  ||  !bufLen)
    {
	return;
    }

    _CHAR* pCur;
    _CHAR ch;
    ULONG32 indx=0;
    BOOL bIsInsideQuotes = FALSE;

    pCur = pBuf;
    ch = *pCur;

    while(ch!='\0'  &&  indx<bufLen)
    {
/*XXXEH- we're always inside a markup tag when this function,
  convertToUpperCase(), gets called so pBuf should never be
  DBCS.  However, if we're inside quotes, we might allow DBCS
  chars...not sure how to handle this in both DBCS/SBCS worlds;
  disallow DBCS inside quotes for now:
	//Added the following code to handle DBCS chars:
	if(bIsInsideQuotes  &&  (UCHAR)ch >= DBCS_MIN_LEAD_BYTE_VAL)
	{
	    indx+=2;	 //skip the trail byte.
	    if(indx >= bufLen)
	    {
		return; //we've gone past the end of the buffer.
	    }
	    pCur+=2;
	    ch = *pCur;
	    continue;
	}
*/

	//added the following code to make sure
	// text inside quotes is NOT converted to upper case:
	if('\"' == ch)
	{
	    bIsInsideQuotes = !bIsInsideQuotes;
	}

	if(bIsInsideQuotes)
	{
	    indx++;
	    pCur++;
	    ch = *pCur;
	    continue;
	}

	if(ch >= 'a'  &&  ch <= 'z')
	{
	    *pCur += ('A' - 'a');
	}
	indx++;
	pCur++;
	ch = *pCur;
    }
}



/////////////////////////////////////////////////////////////////////////////
// Finds the next instance of the specified character in pBuf starting its
// search at ulStartIndex and returning the index of the next occurance of
// charToFind (or ulBufLen if charToFind was not found):
// Note that ulCurCharset is needed because we may want to skip trail
// byte if a lead DBCS byte is found, but only if the charset is a DBCS one.
//
ULONG32 findNextChar(_CHAR charToFind, _CHAR* pBuf, ULONG32 ulBufLen,
	ULONG32 ulStartIndex, ULONG32 ulCurCharset)
{
    ULONG32 index;
    
    if(!pBuf  ||  ulStartIndex > ulBufLen)
    {
	return ulBufLen;
    }

    _CHAR* pBufSearch = &pBuf[ulStartIndex];
    for(index = ulStartIndex; index<ulBufLen; index++, pBufSearch++)
    {
	//Added the following code to handle DBCS chars;
	// assumes charToFind is a SINGLE BYTE CHAR!:
	if((ulCurCharset & HX_DBCS_CHARSET)  &&
		(UCHAR)(*pBufSearch) >= DBCS_MIN_LEAD_BYTE_VAL)
	{
	    index++; //increment one extra to skip the trail byte.
	    pBufSearch++;
	}

	if(charToFind == *pBufSearch)
	{
	   break;
	}
    }

    return index;
}


/////////////////////////////////////////////////////////////////////////////
// Finds the start and end indices of both elements of the next 
//  "lvalue=rvalue" token in pBuf.
//
//  Example 2: if pBuf is the contents inside the following []'s: 
//	[ COLOR= red]
//  then this function will set ulLvalueStartIndex to 1, 
//  ulLvalueEndIndex to 6, ulRvalueStartIndex to 8, and 
//  ulRvalueEndIndex to 11; "COLOR" and "red" are the values found.
//
//  Example 2: if pBuf is the contents inside the following []'s: 
//	[COLOR  ="light blue" blah blah]
//  then this function will set ulLvalueStartIndex to 0, 
//  ulLvalueEndIndex to 5, ulRvalueStartIndex to 10, and 
//  ulRvalueEndIndex to 20; "COLOR" and "light blue" are the values found.
//
BOOL GetNextTokenLvalueRvaluePair(_CHAR* pBuf, ULONG32 ulBufLen,
	ULONG32& ulLvalueStartIndex, ULONG32& ulLvalueEndIndex,
	ULONG32& ulRvalueStartIndex, ULONG32& ulRvalueEndIndex)
{
    ulLvalueStartIndex = ulLvalueEndIndex = 
	    ulRvalueStartIndex = ulRvalueEndIndex = 0L;
    
    if(!pBuf  ||  ulBufLen<1)
    {
	return FALSE;
    }

    ULONG32 ulIndexOfEqualsSign;

    //First, find the first non-space/tab/newline char; it's the start of the
    // lvalue string (or is the end of the buffer):
    ulLvalueStartIndex = skipSpacesTabsAndNewlineChars(pBuf, ulBufLen, 0L);
    if(ulLvalueStartIndex >= ulBufLen)
    {
	return FALSE; //no non-space/tab/newline chars were found.
    }
    if('=' == pBuf[ulLvalueStartIndex])
    {
	return FALSE;//nothing but spaces/tabs/newlines were found before '='
    }

    ULONG32 ulTempEnd = findNextSpaceTabOrNewLineChar(pBuf, ulBufLen,
	    ulLvalueStartIndex, ulIndexOfEqualsSign,
	    CHARSET__us_ascii); //<- in-tag text is always us-ascii.
    
    ulLvalueEndIndex = ulTempEnd;
    
    if(ulIndexOfEqualsSign >= ulBufLen)
    {
	//no '=' sign was found
	if(ulTempEnd >= ulBufLen)
	{
	    return FALSE; //no equals sign exists.
	}
	
	//Now, find the first non-space/tab/newline char; it's supposed to
	// be the '=' (if not, return FALSE):
	ulIndexOfEqualsSign =
		skipSpacesTabsAndNewlineChars(pBuf, ulBufLen, ulTempEnd);
	if(ulIndexOfEqualsSign >= ulBufLen)
	{
	    return FALSE; //no equals sign exists.
	}

	if('=' != pBuf[ulIndexOfEqualsSign])
	{
	    return FALSE; //equals sign not found next as expected.
	}

	//We found the equals sign, so go find the Rvalue part...
    }
    else
    {
	//'=' immediately follows lvalue part so end is at '='s index:
	ulLvalueEndIndex = ulIndexOfEqualsSign;
    }

    //'=' was found; now find "value" part:
    //First, find the first non-space/tab/newline char; it's the start
    // of the rvalue string (or is the end of the buffer):
    ulRvalueStartIndex =
	    skipSpacesTabsAndNewlineChars(pBuf, ulBufLen, 
	    ulIndexOfEqualsSign+1);

    if(ulRvalueStartIndex >= ulBufLen)
    {
	return FALSE; //nothing but spaces/tabs/newlines found after the '='.
    }


    if('\"' != pBuf[ulRvalueStartIndex])
    {
	//now, find the end of the rvalue part, i.e., find the next
	// space/tab/newline or end-of-pBuf:
	ULONG32 ulDummy;
	ulRvalueEndIndex = findNextSpaceTabOrNewLineChar(pBuf, ulBufLen,
		ulRvalueStartIndex, ulDummy,
		CHARSET__us_ascii);//<- in-tag text is always us-ascii.
    }
    else //ulRvalueEndIndex is where the next '\"' char is (or end-of-buf):
    {
	ulRvalueStartIndex++; //We want to return the foo part of "foo".
	ulRvalueEndIndex = 
		findNextChar('\"', pBuf, ulBufLen, ulRvalueStartIndex,
		CHARSET__us_ascii);//<- in-tag text is always us-ascii.
    }
	 
    return TRUE;
}//end of GetNextTokenLvalueRvaluePair().


/////////////////////////////////////////////////////////////////////////////
//Returns TRUE if either a start or end quote was found.  Note: this
// function does NOT mess with the pBuf in any way, so it is the
// responsibility of the caller to do a pBuf++ if start quote is to
// be removed and to put a '\0' in place of the end quote if it is
// to be removed.  If the string is only one character long and that
// char is a quote, then bStartQuoteWasFound is set to TRUE and
// bEndQuoteWasFound is set to FALSE.
BOOL lookForStartAndEndQuotesOfString(_CHAR* pBuf, ULONG32 ulBufLen,
	BOOL& bStartQuoteWasFound, BOOL& bEndQuoteWasFound)
{
    if(!pBuf  ||  !ulBufLen)
    {
	return FALSE;
    }

    bStartQuoteWasFound = bEndQuoteWasFound = FALSE;

    if(pBuf[0]=='\"')
    {
	bStartQuoteWasFound = TRUE;
	if(ulBufLen==1L)
	{
	    return TRUE;
	}
    }

    if(pBuf[ulBufLen-1]=='\"')
    {
	bEndQuoteWasFound = TRUE;
    }
    
    return(bStartQuoteWasFound ||  bEndQuoteWasFound);
}

