//******************************************************************************
//
// File:    MaxParsExh.java
// Package: edu.rit.phyl.pars
// Unit:    Class edu.rit.phyl.pars.MaxParsExh
//
// This Java source file is copyright (C) 2007 by Alan Kaminsky. All rights
// reserved. For further information, contact the author, Alan Kaminsky, at
// ark@cs.rit.edu.
//
// This Java source file is part of the Parallel Java Library ("PJ"). PJ is free
// software; you can redistribute it and/or modify it under the terms of the GNU
// General Public License as published by the Free Software Foundation; either
// version 3 of the License, or (at your option) any later version.
//
// PJ is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
// A PARTICULAR PURPOSE. See the GNU General Public License for more details.
//
// A copy of the GNU General Public License is provided in the file gpl.txt. You
// may also obtain a copy of the GNU General Public License on the World Wide
// Web at http://www.gnu.org/licenses/gpl.html.
//
//******************************************************************************

package edu.rit.phyl.pars;

import edu.rit.pj.Comm;

import java.io.File;

/**
 * Class MaxParsExh is the main program for maximum parsimony phylogenetic tree
 * construction using exhaustive search. The program reads a list of DNA
 * sequences from the given input file in interleaved PHYLIP format; see class
 * {@linkplain DnaSequenceList} for further information. If the argument
 * <I>S</I> is given, the program considers only the first <I>S</I> DNA
 * sequences, otherwise the program considers all the DNA sequences. The program
 * generates all possible rooted bifurcating trees with those DNA sequences and
 * computes each tree's parsimony score (number of state changes) using the
 * Fitch algorithm. The program prints the best tree or trees -- those with the
 * smallest score -- on the standard output in Newick Standard format.
 * <P>
 * Usage: java edu.rit.phyl.pars.MaxParsExh <I>infile</I> [ <I>S</I> ]
 * <BR><I>infile</I> = DNA sequence file in interleaved PHYLIP format
 * <BR><I>S</I> = Number of DNA sequences to consider (default: all)
 *
 * @author  Alan Kaminsky
 * @version 06-May-2007
 */
public class MaxParsExh
	{

// Prevent construction.

	private MaxParsExh()
		{
		}

// Global variables.

	// List of DNA sequences read from the input file.
	static DnaSequenceList sequences;

	// Number of DNA sequences.
	static int S;

	// List of DNA sequences with uninformative sites excised.
	static DnaSequenceList excised;

	// Number of state changes due to uninformative sites.
	static int uninformativeStateChanges;

	// Stack of trees. The tree at level (index) i contains i+1 DNA sequences.
	static DnaSequenceTree[] treeStack;

	// List of DNA sequence trees with the best score found so far.
	static DnaSequenceTreeList bestTreeList;

	// Stringifier for printing trees.
	static DnaSequenceTree.Stringifier stringifier =
		new DnaSequenceTree.Stringifier()
			{
			public String toString
				(DnaSequenceTree.Node node)
				{
				return sequences.getName (node.sequence().score());
				}
			};

// Main program.

	/**
	 * Main program.
	 */
	public static void main
		(String[] args)
		throws Exception
		{
		Comm.init (args);

		// Start timing.
		long time = -System.currentTimeMillis();

		// Parse command line arguments.
		if (args.length < 1 || args.length > 2) usage();
		File infile = new File (args[0]);
		S = args.length < 2 ? 0 : Integer.parseInt (args[1]);

		// Read DNA sequences from input file, truncate unwanted ones, warn of
		// duplicates.
		System.out.println ("Reading input file ...");
		sequences = DnaSequenceList.read (infile);
		if (S > 0) sequences.truncate (S);
		S = sequences.length();
		System.out.println (S + " sequences");
		for (int s = 0; s < S; ++ s)
			{
			System.out.println ("\t" + sequences.getName (s));
			}
		System.out.println (sequences.getDnaSequence(0).length() + " sites");
		sequences.warnOfDuplicates();

		// Excise uninformative sites, warn of duplicates.
		System.out.println ("Excising uninformative sites ...");
		excised = new DnaSequenceList();
		uninformativeStateChanges =
			sequences.exciseUninformativeSites (excised);
		System.out.println
			(excised.getDnaSequence(0).length() + " informative sites");
		System.out.println
			(uninformativeStateChanges +
			 " state changes from uninformative sites");
		excised.warnOfDuplicates();

		// Allocate tree stack with S levels.
		treeStack = new DnaSequenceTree [S];
		for (int s = 0; s < S; ++ s)
			{
			treeStack[s] = new DnaSequenceTree (S);
			}

		// Allocate best tree list.
		bestTreeList = new DnaSequenceTreeList (S);

		// Set stringifier for printing tree tip nodes.
		DnaSequenceTree.defaultStringifier (stringifier);

		// Generate trees at all levels.
		treeStack[0].addTipNode (0, excised.getDnaSequence (0));
		generateTrees (0);

		// Stop timing.
		time += System.currentTimeMillis();
		System.out.println (time + " msec");

		// Print best trees.
		System.out.println
			(bestTreeList.bestScore() + " state changes in best tree(s)");
		for (DnaSequenceTree tree : bestTreeList)
			{
			System.out.println (tree);
			}
		}

// Hidden operations.

	/**
	 * Generate all trees at the given level.
	 *
	 * @param  level  Level.
	 */
	private static void generateTrees
		(int level)
		{
		DnaSequenceTree currentTree = treeStack[level];
		int levelPlus1 = level + 1;

		// If all DNA sequences are in the current tree, record it in the best
		// tree list.
		if (levelPlus1 == S)
			{
			treeStack[level] = bestTreeList.add (currentTree);
			}

		// If not all DNA sequences are in the current tree, generate all
		// possible trees at the next level.
		else
			{
			DnaSequence seq = excised.getDnaSequence (levelPlus1);
			int n = currentTree.nodeCount();
			for (int i = 0; i < n; ++ i)
				{
				DnaSequenceTree nextTree = treeStack[levelPlus1];
				nextTree.copy (currentTree);
				nextTree.updateFitchScore (nextTree.addTipNode (i, seq));
				generateTrees (levelPlus1);
				}
			}
		}

	/**
	 * Print a usage message and exit.
	 */
	private static void usage()
		{
		System.err.println ("Usage: java edu.rit.phyl.pars.MaxParsExh <infile> [<S>]");
		System.err.println ("<infile> = DNA sequence file in interleaved PHYLIP format");
		System.err.println ("<S> = Number of DNA sequences to consider (default: all)");
		System.exit (1);
		}

	}
