
/* Web Polygraph       http://www.web-polygraph.org/
 * (C) 2003-2006 The Measurement Factory
 * Licensed under the Apache License, Version 2.0 */

#include "xstd/xstd.h"

#include "xstd/h/iomanip.h"

#include "xstd/String.h"
#include "xstd/StrIdentifier.h"


/* StrIdentifier */

StrIdentifier::StrIdentifier(): theCount(0), theLastId(0), isLocked(false) {
}

StrIdentifier::~StrIdentifier() {
	while (theStrings.count()) delete theStrings.pop();
}

int StrIdentifier::add(const String &str) {
	return doAdd(str, ++theLastId);
}

int StrIdentifier::add(const String &str, int id) {
	Assert(theLastId != id); // weak check for uniqueness
	return doAdd(str, theLastId = id);
}

int StrIdentifier::doAdd(const String &str, int id) {
	Assert(!isLocked); // no additions if optimize()-d
	String *copy = new String(str);
	theHead.add(copy, id, -1);
	theStrings.put(copy, id);
	theCount++;
	return id;
}

StrIdentifierIter StrIdentifier::iterator() const {
	return StrIdentifierIter(*this);
}

int StrIdentifier::lookup(const String &str) const {
	return lookup(str.data(), str.len());
}

int StrIdentifier::lookup(const char *buf, int len) const {
	return theHead.lookup(buf, len);
}

// collapses single-branch paths
void StrIdentifier::optimize() {
	// new entries cannot be added from now on
	// note: we can support run-time additions, but Node::add 
	//       must not assume sequntial lookup positions
	isLocked = true; 
	theHead.optimize();
}

void StrIdentifier::report(ostream &os) const {
	theHead.report(os, 0);
}


/* StrIdentifierIter */

StrIdentifierIter::StrIdentifierIter(const StrIdentifier &anIdfr):
	theIdfr(anIdfr), thePos(0) {
	sync();
}

StrIdentifierIter &StrIdentifierIter::operator ++() {
	if (!atEnd()) {
		++thePos;
		sync();
	}
	return *this;
}

void StrIdentifierIter::sync() {
	while (thePos < theIdfr.theStrings.count() && !theIdfr.theStrings[thePos]) {
		++thePos;
	}
}

bool StrIdentifierIter::atEnd() const {
	return thePos >= theIdfr.theStrings.count();
}

const String &StrIdentifierIter::str() const {
	Assert(!atEnd());
	return *theIdfr.theStrings[thePos];
}

int StrIdentifierIter::id() const {
	Assert(!atEnd());
	return thePos; // same as id in this StrIdentifier implementation
}

/* StrIdNode */

StrIdNode::StrIdNode(): theId(0) {
	u.theTab = 0;
	u.theStr = 0;
}

void StrIdNode::add(String *str, int id, int myPos) {
	if (!theId) { // virgin node
		Assert(id > 0 && str);
		theId = id;
		u.theStr = str;
	} else
	if (theId > 0) { // conflict, create next level table
		String *s = u.theStr;
		u.theTab = new StrIdTable(myPos+1);
		u.theTab->add(s, theId);
		u.theTab->add(str, id);
		theId = -1;
	} else {  // intermediate node, move further
		u.theTab->add(str, id);
	}
}

int StrIdNode::lookup(const char *buf, int len) const {
	if (!theId)
		return 0;
	else
	if (theId > 0)
		return u.theStr->casePrefixOf(buf, len) ? theId : 0;
	else
		return u.theTab->lookup(buf, len);
}

void StrIdNode::optimize() {
	// shrink outgoing branch link by link until hit a fan-out
	StrIdNode n;
	while (theId < 0 && u.theTab->single(n)) {
		delete u.theTab;
		*this = n;
	}
	
	// recurse if needed
	if (theId < 0)
		u.theTab->optimize();
}

void StrIdNode::report(ostream &os, int myLevel) const {
	if (!theId)
		os << "(0) <none>" << endl;
	else
	if (theId > 0)
		os << '(' << theId << ") " << *u.theStr << endl;
	else
	if (theId < 0)
		u.theTab->report(os, myLevel+1);
}


/* StrIdTable */

StrIdTable::StrIdTable(int aPos): theLookupPos(aPos) {
	Assert(0 <= theLookupPos && theLookupPos <= 255);
}

void StrIdTable::add(String *str, int id) {
	const int idx = lookupIdx(*str);
	if (idx >= theNodes.capacity()) { // out of bounds
		theNodes.stretch(idx+1);
		theNodes.count(theNodes.capacity());
	}
	theNodes[idx].add(str, id, theLookupPos);
}

int StrIdTable::lookup(const char *buf, int len) const {
	const int idx = lookupIdx(buf, len);
	if (idx < 0) // buffer is too short already
		return 0;
	else
	if (idx >= theNodes.count()) // lookup char is out of bounds
		return 0;
	else
		return theNodes[idx].lookup(buf, len);
}

void StrIdTable::optimize() {
	for (int i = 0; i < theNodes.count(); ++i) {
		if (!theNodes[i].dead())
			theNodes[i].optimize();
	}
}

bool StrIdTable::single(StrIdNode &n) {
	// to optimize we could've keep track on the number of live nodes
	// but it is combersome because of node conflicts/propogation
	int idx = -1;
	for (int i = 0; i < theNodes.count(); ++i) {
		if (!theNodes[i].dead()) {
			if (idx >= 0)
				return false; // more than one found
			idx = i;
		}
	}
	if (idx < 0) // none found
		return false;
	n = theNodes[idx];
	return true;
}

void StrIdTable::report(ostream &os, int myLevel) const {
	os << endl;
	for (int i = 0; i < theNodes.count(); ++i) {
		if (!theNodes[i].dead()) {
			os << setw(2*myLevel) << " ";
			if (i > 32)
				os << (char)i;
			else
				os << '<' << i << '>';
			os << ": ";
			theNodes[i].report(os, myLevel);
		}
	}
}
