/* Copyright (C) 2000, 2001, 2002 by SWsoft
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/*  $Id: hrefs.h,v 1.13 2002/01/09 11:50:14 kir Exp $
    Author : Alexander F. Avdonkin
*/

#ifndef _HREFS_H_
#define _HREFS_H_

#include "aspseek-cfg.h"
#ifdef HAVE_EXT_HASH_MAP_SET
#include <ext/hash_map>
#else
#include <hash_map>
#endif

#include <pthread.h>
#include "documents.h"

using std::hash_map;

typedef CFixedString<MAX_URL_LEN + 1> CURLName;

class CServer;

/// This class holds information about particular URL, generated by "Server" commands in config file
class CHref
{
public:
	int m_hops;		///< Depth of URL in the hyperlink tree
	int m_referrer;		///< URL ID of URL, which has outgoing link to this URL
	int m_stored;		///< Not used
	int m_delta;		///< Time to subtract from current time to use as next index time for URL
	ULONG m_urlID;		///< URL ID of this URL
	string m_server;	///< Server name for URL, generated by "Server" commands in config file

public:
	CHref()
	{
		m_stored = 0;
		m_delta = 0;
		m_urlID = 0;
	}
};

/// This class holds information about particular URL in cache of outgoung hyperlinks
class CHrefInfo
{
public:
	CHrefInfo* m_lru;		///< Pointer to less recently used hyperlink info
	CHrefInfo* m_mru;		///< Pointer to more recently used hyperlink info
	ULONG m_urlID;			///< URL ID of this URL
	const CURLName* m_this;		///< Pointer to the URL name used as key in hash table
	ULONG m_refs;
	CEventLink* m_waiting;

public:
	CHrefInfo()
	{
		m_mru = m_lru = NULL;
		m_this = NULL;
		m_waiting = NULL;
		m_urlID = 0;
		m_refs = 0;
	}
	~CHrefInfo()
	{
		// This is for debugging
		m_this = (CURLName*)0x80808080;
		m_lru = (CHrefInfo*)0xFF00FF00;
		m_mru = (CHrefInfo*)0xCCCCCCCC;
	}
};

namespace std {
	struct hash<CURLName> {
		size_t operator()(const CURLName& __s) const
		{
			return __stl_hash_string(__s.c_str());
		}
	};
}

class CSQLDatabaseI;
/**
 * This class reperesents outgoing hyperlink cache.
 * Instantiated once as global variable
 * Used to keep URL IDs for URLs of hyperlinks from recently indexed pages
 * to reduce database load. It restricts total number of URLs by value
 * of "HrefCacheSize" in config file by removing of URLs, which
 * was not encountered long time, to reduce amount of memory used for caching.
 */
class CStoredHrefs : public hash_map<CURLName, CHrefInfo>
{
public:
	// Below 3 members used in printing statistics
	ULONG m_queries;	///< Number of queries
	ULONG m_hits;		///< Number of hits
	ULONG m_lost;		///< Number of removed URLs
	ULONG m_waits, m_waits1;

	pthread_mutex_t m_mutex;	///< Mutex, used to protect hash table and linked list
	CHrefInfo* m_lru;		///< Least recently used URL
	ULONG m_maxSize;		///< Maximum number of stored URLs

public:
	CStoredHrefs()
	{
		m_queries = m_hits = m_lost = 0;
		m_waits = m_waits1 = 0;
		pthread_mutex_init(&m_mutex, NULL);
		m_lru = NULL;
		m_maxSize = 10000;
	}
	~CStoredHrefs()
	{
		pthread_mutex_destroy(&m_mutex);
	}
	/// "GetHref" methods return URL ID of specified URL. If URL is not found in cache and database, it is inserted to them
	ULONG GetHref(CSQLDatabaseI* database, CServer* srv, const char* chref, int referrer, int hops, const char* server, int hopsord, int delta);
	ULONG GetHref(CSQLDatabaseI* database, const char* chref, int referrer, int hops, const char* server, int hopsord, int delta);
	/// Adds URL to the cache, called from GetHref
	void AddHref1(const char* url, ULONG urlID);
	/// AddHref adds URL to the cache if it is not found, otherwise makes it the most recently used
	void AddHref(const char* url, ULONG lost, ULONG urlID);
	int Contains(const char* url);	///< Not used now
	void MoveMRU(iterator& it);	///< Makes URL the most recently used
	CHrefInfo* AddEmptyHref(const char* url);
	void SetHref(const char* url, CHrefInfo* hinfo);
};

typedef hash_map<CURLName, CHref> CStringToHrefMap;
extern CStringToHrefMap Hrefs;		///< URLs generated by "Server" commands in config file
extern CStoredHrefs StoredHrefs;	///< Instance of outgoing hyperlink cache

void AddHref(char *href,int referrer,int hops, const char* server, int delta);
void AddHref(char *href,int referrer,int hops, int delta);

void TestHrefCacheN(CSQLDatabaseI* database);

#endif
