/***************************************************************************
 Mutella - A commandline/HTTP client for the Gnutella filesharing network.

 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 gnuwordhash.cpp  -  the heart of query-routing protocol (QRP) implementation

 the original version of this file was taken from Gnucleus
 (http://gnucleus.sourceforge.net)
                          
                             -------------------
    begin                : Mon Oct 28 2002
    copyright            : (C) 2002 by Max Zaitsev
    email                : maksik@gmx.co.uk
 ***************************************************************************/

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "mutella.h"
#include "structures.h"
#include "conversions.h"

#include "asyncsocket.h"
#include "gnudirector.h"
#include "gnunode.h"
#include "gnuwordhash.h"

#define A_INT 0x4F1BBCDC;

MGnuWordHash::MGnuWordHash(MGnuDirector* pDirector)
{
	m_pDirector = pDirector;
	m_pShare    = m_pDirector->GetShare();

	memset(m_PatchTable, 0, 1 << TABLE_BITS);

	m_dwTableSize     = 1 << TABLE_BITS;
	m_dwHashedWords   = 0;
	m_dwLargestRehash = 0;
	m_dwUniqueSlots	= 0;
	m_dwRemoteSlots	= 0;
}

MGnuWordHash::~MGnuWordHash()
{
}

void MGnuWordHash::ClearLocalTable()
{
	m_mutexTable.lock();

	for(int i = 0; i < 1 << TABLE_BITS; i++)
		if(m_HashTable[i].LocalKey)
		{
			delete m_HashTable[i].LocalKey;
			m_HashTable[i].LocalKey = NULL;
		}

	m_mutexTable.unlock();

	memset(m_PatchTable, 0, 1 << TABLE_BITS);

	m_dwHashedWords   = 0;
	m_dwLargestRehash = 0;
	m_dwUniqueSlots	= 0;
}

void MGnuWordHash::ResetTable(DWORD dwResetNodeID)
{
	list<DWORD>::iterator itNodeID;
	list<DWORD>::iterator itNextID;

	for(int i = 0; i < 1 << TABLE_BITS; i++)
	{
		if(m_HashTable[i].RemoteKey == NULL)
			continue;

		m_mutexTable.lock();

		for(itNodeID = m_HashTable[i].RemoteKey->begin(); itNodeID != m_HashTable[i].RemoteKey->end(); itNodeID++)
			if(*itNodeID == dwResetNodeID)
			{
				itNextID = ++itNodeID;

				m_HashTable[i].RemoteKey->erase(--itNodeID);

				itNodeID = itNextID;
			}

		// Nothing remote at this key now
		if(m_HashTable[i].RemoteKey->empty())
		{
			delete m_HashTable[i].RemoteKey;
			m_HashTable[i].RemoteKey = NULL;

			m_dwRemoteSlots--;
		}

		m_mutexTable.unlock();
	}
}

void MGnuWordHash::ApplyPatch(MGnuNode* pNode, int EntryBits)
{
	UINT RemoteTableSize  = pNode->m_TableLength;
	UINT LocalTableSize	  = 1 << TABLE_BITS;

	UINT RemoteTablePos   = 0;
	UINT LocalTablePos    = 0;
	UINT HashPos		  = 0;

	double Factor = (double) LocalTableSize / (double) RemoteTableSize;

	list<DWORD>::iterator itNodeID;
	list<DWORD>::iterator itNextID;
	bool NodeFound = false;

	char PatchValue = 0;

	for(int i = 0; i < RemoteTableSize; i++)
	{
		// Get patch value from remote table
		if(EntryBits == 4)
		{
			if(i % 2 == 0)
				PatchValue = pNode->m_PatchTable[i / 2] >> 4; // Get high byte
			else
				PatchValue = pNode->m_PatchTable[(i - 1) / 2]  & 0xF; // Get low byte

			// Convert 4 bit nib to 8 bit byte
			if(PatchValue > 7)
				PatchValue = PatchValue - 16;
		}
		else if(EntryBits == 8)
		{
			PatchValue = pNode->m_PatchTable[i];
		}

		// Convert remote postion to local postion, tables may vary in size
		LocalTablePos = UINT(i * Factor);

		// Apply patch to main table

		m_mutexTable.lock();
		
		for(double Next = 0; Next < Factor; Next++)
		{
			HashPos = UINT(LocalTablePos + Next);

			// Remove node at pos
			if(PatchValue > 0 && m_HashTable[HashPos].RemoteKey)
			{
				for(itNodeID = m_HashTable[HashPos].RemoteKey->begin(); itNodeID != m_HashTable[HashPos].RemoteKey->end(); itNodeID++)
					if(*itNodeID == pNode->GetID())
					{
						itNextID = ++itNodeID;
						m_HashTable[HashPos].RemoteKey->erase(--itNodeID);
						itNodeID = itNextID;
					}

				if(m_HashTable[HashPos].RemoteKey->empty())
				{
					delete m_HashTable[HashPos].RemoteKey;
					m_HashTable[HashPos].RemoteKey = NULL;

					m_dwRemoteSlots--;
				}
			}

			// Add node at pos
			else if(PatchValue < 0)
			{
				// Nothing remote at this key now
				if(m_HashTable[HashPos].RemoteKey == NULL)
				{
					m_HashTable[HashPos].RemoteKey = new std::list<DWORD>;

					m_dwRemoteSlots++;
				}

				NodeFound = false;

				for(itNodeID = m_HashTable[HashPos].RemoteKey->begin(); itNodeID != m_HashTable[HashPos].RemoteKey->end(); itNodeID++)
					if(*itNodeID == pNode->GetID())
						NodeFound = true;


				if(!NodeFound)
					m_HashTable[HashPos].RemoteKey->push_back(pNode->GetID());
			}
		}

		m_mutexTable.unlock();
	}

}

void MGnuWordHash::InsertString(CString sName, int nIndex, bool bBreakString, CString sMetaTag)
{
	// Breakup file name into keywords
	vector< CString > vsKeywords;
	
	// Make sure not a hash value
	if(bBreakString)
		BreakupName(sName, vsKeywords);
	else
		vsKeywords.push_back(sName);

	// Hash keywords and put the hash in the hash table, and a table in the shared file
	for(int i = 0; i < vsKeywords.size(); i++)
	{
		CString sCurrentWord = vsKeywords[i];

		if(!sMetaTag.empty())
		{
			sCurrentWord.insert(0, sMetaTag + "=");
		}

		int nWordHash = Hash(sCurrentWord, TABLE_BITS);

		// Modify patch table for new hash
		m_PatchTable[nWordHash] = 1 - TABLE_INFINITY;

		// Add word to hash table
		bool bAddWord = true;

		// Check if word is already in table
		if(m_HashTable[nWordHash].LocalKey)
			for(int j = 0; j < m_HashTable[nWordHash].LocalKey->size(); j++)
				if((*m_HashTable[nWordHash].LocalKey)[j].Text == sCurrentWord)
				{
					bool bIndexFound = false;

					for(int k = 0; k < (*m_HashTable[nWordHash].LocalKey)[j].Indexes.size(); k++)
						if((*m_HashTable[nWordHash].LocalKey)[j].Indexes[k] == nIndex)
						{
							bIndexFound = true;
							break;
						}

					if(!bIndexFound)
					{
						(*m_HashTable[nWordHash].LocalKey)[j].Indexes.push_back(nIndex);
					}
					
					bAddWord = false;
				}

		// If not add word to table
		if(bAddWord)
		{
			WordData NewWord;
			NewWord.Text = sCurrentWord;
			NewWord.Indexes.push_back(nIndex);

			m_mutexTable.lock();

			if(m_HashTable[nWordHash].LocalKey == NULL)
			{
				m_HashTable[nWordHash].LocalKey = new vector<WordData>;

				m_dwUniqueSlots++;
			}

			m_HashTable[nWordHash].LocalKey->push_back(NewWord);

			m_mutexTable.unlock();

			m_dwHashedWords++;

			if(m_HashTable[nWordHash].LocalKey->size() > m_dwLargestRehash)
				m_dwLargestRehash = m_HashTable[nWordHash].LocalKey->size();
		}
	}
}

void MGnuWordHash::InsertWordHash(int nWordHash, int nIndex)
{
}

void MGnuWordHash::BreakupName(CString sName, vector< CString >& Keywords)
{
	// all non-alphanumric characters 0 - 127 are treated as spaces, except for apostraphe
	// 0 - 9 -> ASCII 48 - 57
	// A - Z -> ASCII 65 - 90
	// a - z -> ASCII 97 - 122
	// ' -> ASCII 96

	// Break Query into individual words
	CString BuildWord = "";

	for(int i = 0; i < sName.size(); i++)
	{
		// Check for end of filename
		if(sName[i] == '\0')
		{
			if(BuildWord.size() > 1)
			{
				AddWord(Keywords, BuildWord);
				BuildWord = "";
			}

			break;
		}

		// Special characters
		else if(sName[i] == '\'')
		{
			if(BuildWord.size() > 1)
				AddWord(Keywords, BuildWord);
		}

		// Break characters
		else if(  sName[i] < 48 ||
			     (sName[i] > 57 && sName[i] < 65) ||
				 (sName[i] > 90 && sName[i] < 97) ||
				  sName[i] > 122)
		{
			if(BuildWord.size() > 1)
				AddWord(Keywords, BuildWord);

			BuildWord = "";
		}

		// Add character
		else
		{
			// Break up alpha and numeric part of filename
			/*if(((LastChar >= 95 && LastChar <= 90) || (LastChar >= 97 && LastChar <= 122)) &&
				(Name[i] >= 48 && Name[i] <= 57))
			{
				if(BuildWord.size() > 2)
					AddWord(Keywords, BuildWord);
			}*/


			BuildWord += ToLower(sName[i]);
		}
	}

	if(BuildWord.size() > 1)
		AddWord(Keywords, BuildWord);

}

void MGnuWordHash::AddWord(vector< CString >& Keywords, CString Word)
{
	bool Add = true;

	for(int i = 0; i < Keywords.size(); i++)
		if(Keywords[i] == Word)
			Add = false;

	if(Add)
		Keywords.push_back(Word);
}

void MGnuWordHash::LookupLocalSha1(const CString& sSha1, list<UINT> &Indexes)
{
	CString sKey = "sha1:" + sSha1;
	UINT WordHash = Hash(sKey, TABLE_BITS);
	bool LocalMatch = false;
	int j = 0;
	m_mutexTable.lock();

	// See if the sha1 is in hash table
	if(m_HashTable[WordHash].LocalKey)
		for(int j = 0; j < m_HashTable[WordHash].LocalKey->size(); j++)
			if((*m_HashTable[WordHash].LocalKey)[j].Text == sKey)
			{
				LocalMatch = true;
				break;
			}

	if(LocalMatch)
	{
		// Intersect indexes in hash table with current results
		for(int k = 0; k < (*m_HashTable[WordHash].LocalKey)[j].Indexes.size(); k++)
			Indexes.push_back((*m_HashTable[WordHash].LocalKey)[j].Indexes[k]);
	}
	m_mutexTable.unlock();
}

void MGnuWordHash::LookupQuery(QueryComp& SearchQuery, list<UINT> &Indexes, list<DWORD> &RemoteNodes)
{
	// Break Query into individual words
	vector< CString > Keywords;
	BreakupName( SearchQuery.Text, Keywords);

	int i, j, k;

	// extended query part
	if (!SearchQuery.ExtendedPart.empty())
	{
		list<CString> ExtendedQuery;
		split_str(SearchQuery.ExtendedPart, 0x1C, ExtendedQuery);
		CString QueryEx;

		for (list<CString>::iterator it = ExtendedQuery.begin(); it != ExtendedQuery.end(); ++it)
		{
			QueryEx = *it;
			QueryEx.make_lower();

			// urn:sha1:[32-character-SHA1]
			if(QueryEx.find("urn:sha1:") == 0 && QueryEx.length() == 9 + 32)
			{
				// Insert sha1:[hash] into table
				Keywords.push_back( QueryEx.substr(4,37) ) ;
			}
			else
			{
				// break out meta data
				BreakupMeta( QueryEx, Keywords);
			}
		}
	}

	// Go through words and match them with indexes in the table
	bool LocalMatch  = true;
	bool RemoteMatch = true;

	for(i = 0; i < Keywords.size(); i++)
	{
		UINT WordHash = Hash(Keywords[i], TABLE_BITS);


		// Intersect with local files to get results
		if(LocalMatch)
		{
			LocalMatch = false;

			m_mutexTable.lock();

			// See if keyword is in hash table
			if(m_HashTable[WordHash].LocalKey)
				for(j = 0; j < m_HashTable[WordHash].LocalKey->size(); j++)
					if((*m_HashTable[WordHash].LocalKey)[j].Text == Keywords[i])
					{
						LocalMatch = true;
						break;
					}

			if(LocalMatch)
			{
				// Intersect indexes in hash table with current results
				if(Indexes.size())
					LocalMatch = IntersectIndexes(Indexes, (*m_HashTable[WordHash].LocalKey)[j].Indexes);
				else
				{
					for(k = 0; k < (*m_HashTable[WordHash].LocalKey)[j].Indexes.size(); k++)
						Indexes.push_back((*m_HashTable[WordHash].LocalKey)[j].Indexes[k]);

					LocalMatch = true;
				}
			}

			m_mutexTable.unlock();

			if(!LocalMatch)
				Indexes.clear();
		}


		// Intersect remote nodes (children) for results
		if(RemoteMatch)
		{
			RemoteMatch = false;

			m_mutexTable.lock();

			// See if hash is in hash table
			if(m_HashTable[WordHash].RemoteKey)
				RemoteMatch = true;


			// Intersect node in hash table with current results
			if(RemoteMatch)
			{
				if(RemoteNodes.size())
					RemoteMatch = IntersectNodes(RemoteNodes, *m_HashTable[WordHash].RemoteKey);
				else
				{
					std::list<DWORD>::iterator itNodeID = m_HashTable[WordHash].RemoteKey->begin();

					for( ; itNodeID != m_HashTable[WordHash].RemoteKey->end(); itNodeID++)
						RemoteNodes.push_back(*itNodeID);

					RemoteMatch = true;
				}
			}

			m_mutexTable.unlock();

			if(!RemoteMatch)
				RemoteNodes.clear();
		}
	}
}

bool MGnuWordHash::IntersectIndexes(std::list<UINT> &Index, std::vector<UINT> &CompIndex)
{
	bool Match = false;

	std::list<UINT>::iterator itIndex;
	std::list<UINT>::iterator itNext;

	for(itIndex = Index.begin(); itIndex != Index.end(); itIndex++)
	{
		Match = false;

		for(int i = 0; i < CompIndex.size(); i++)
			if(*itIndex == CompIndex[i])
				Match = true;

		if(!Match)
		{
			itNext = ++itIndex;

			Index.erase(--itIndex);

			itIndex = itNext;
		}
	}

	if(Index.empty())
		return false;

	return true;
}

bool MGnuWordHash::IntersectNodes(std::list<DWORD> &Nodes, std::list<DWORD> &CompNodes)
{
	bool Match = false;

	std::list<DWORD>::iterator itNodeID;
	std::list<DWORD>::iterator itCompID;
	std::list<DWORD>::iterator itNextID;

	for(itNodeID = Nodes.begin(); itNodeID != Nodes.end(); itNodeID++)
	{
		Match = false;

		for(itCompID = CompNodes.begin(); itCompID != CompNodes.end(); itCompID++)
			if(*itNodeID == *itCompID)
				Match = true;

		if(!Match)
		{
			itNextID = ++itNodeID;

			Nodes.erase(--itNodeID);

			itNodeID = itNextID;
		}
	}

	if(Nodes.empty())
		return false;

	return true;
}

void MGnuWordHash::BreakupMeta(CString& QueryEx, vector< CString >& Keywords)
{
	// Get rid of <?xml version='1.0'?>
	int TrashPos = QueryEx.find(">?");
	if(TrashPos != -1)
		QueryEx = QueryEx.substr(TrashPos + 2);

	// Get rid of xsi:nonamespaceschemalocation
	TrashPos = QueryEx.find("xsi:");
	if(TrashPos != -1)
	{
		TrashPos = QueryEx.find(">", TrashPos);
		if(TrashPos != -1)
			QueryEx = QueryEx.substr(TrashPos + 1);
	}

	int SpacePos = QueryEx.find(" ");
	int TagPos   = -1;

	if(SpacePos != -1)
		TagPos = QueryEx.substr(SpacePos).rfind('<');

	if(TagPos != -1)
	{
		QueryEx = QueryEx.substr(TagPos + 1);
		ReplaceSubStr(QueryEx, "\"", "'");

		// Get meta name
		SpacePos = QueryEx.find(" ");

		if(SpacePos != -1)
		{
			CString MetaName = QueryEx.substr(SpacePos);

			if(!MetaName.empty())
				Keywords.push_back( MetaName );
			else
				return;


			// Get attributes
			int EqualPos = QueryEx.find("=");

			while(EqualPos != -1)
			{
				CString AttributeName = QueryEx.substr(SpacePos + 1, EqualPos - SpacePos - 1);

				int FrontQuotePos = QueryEx.find("'", EqualPos);
				int BackQuotePos  = QueryEx.find("'", FrontQuotePos + 1);

				if( !AttributeName.empty() && FrontQuotePos < BackQuotePos)
				{
					CString AttributeValue = QueryEx.substr(FrontQuotePos + 1, BackQuotePos - FrontQuotePos - 1);

					// Break up any value into keywords
					std::vector< CString > KeyValues;
					BreakupName( AttributeValue, KeyValues);

					for(int i = 0; i < KeyValues.size(); i++)
						Keywords.push_back( MetaName + "." + AttributeName + "=" + KeyValues[i] ) ;
				}
				else
					break;



				SpacePos = QueryEx.find(" ", BackQuotePos);
				EqualPos = QueryEx.find("=", BackQuotePos);
			}
		}
	}
}

//////////////////////////////////////////////////////////////////////////////////////////
// Code from LimeWire QRP standard of hashing keywords for tables

/**
 * Returns the same value as hash(x.substring(start, end), bits),
 * but tries to avoid allocations.  Note that x is lower-cased
 * when hashing.
 *
 * @param x the string to hash
 * @param bits the number of bits to use in the resulting answer
 * @param start the start offset of the substring to hash
 * @param end just PAST the end of the substring to hash
 * @return the hash value
 */

UINT MGnuWordHash::Hash(CString x, BYTE bits)
{
	int start = 0;
	int end   = x.length();


    //1. First turn x[start...end-1] into a number by treating all 4-byte
    //chunks as a little-endian quadword, and XOR'ing the result together.
    //We pad x with zeroes as needed.
    //    To avoid having do deal with special cases, we do this by XOR'ing
    //a rolling value one byte at a time, taking advantage of the fact that
    //x XOR 0==x.

    unsigned long long Xor = 0;  //the running total
    unsigned long long j   = 0;  //the byte position in Xor.  INVARIANT: j == (i - start) % 4

    for (int i = start; i < end; i++)
	{
        unsigned long long b = ToLower(x[i]) & 0xFF;
        b   = b << (j * 8);
        Xor = Xor ^ b;
        j   = (j + 1) % 4;
    }

    //2. Now map number to range 0 - (2^bits-1).
    return HashFast(Xor, bits);
}


/**
 * Returns the n-<b>bit</b> hash of x, where n="bits".  That is, the
 * returned value value can fit in "bits" unsigned bits, and is
 * between 0 and (2^bits)-1.
 */

UINT MGnuWordHash::HashFast(unsigned long long x, BYTE bits)
{
    //Multiplication-based hash function.  See Chapter 12.3.2. of CLR.
    unsigned long long prod = x * A_INT;
    unsigned long long ret  = prod << 32;
    ret       = ret >> (32 + (32 - bits)); // >>> ?

    return (UINT) ret;
}


