/*
 * The MIT License
 *
 * Copyright (c) 2009 The Broad Institute
 * Copyright (c) 2013 German Tischler
 * Copyright (c) 2013 Genome Research Limited
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
#if ! defined(LIBMAUS2_BAMBAM_DUPLICATIONMETRICS_HPP)
#define LIBMAUS2_BAMBAM_DUPLICATIONMETRICS_HPP

#include <libmaus2/types/types.hpp>
#include <libmaus2/exception/LibMausException.hpp>
#include <libmaus2/util/NumberSerialisation.hpp>
#include <ostream>
#include <cmath>
#include <map>

namespace libmaus2
{
	namespace bambam
	{
		/**
		 * duplication metrics class
		 **/
		struct DuplicationMetrics
		{
			//! number of unmapped reads
			uint64_t unmapped;
			//! number of unpaired reads
			uint64_t unpaired;
			//! number of examined pairs
			uint64_t readpairsexamined;
			//! number of unpaired duplicates
			uint64_t unpairedreadduplicates;
			//! number of paired duplicates
			uint64_t readpairduplicates;
			//! number of optical duplicates
			uint64_t opticalduplicates;

			/**
			 * constructor
			 **/
			DuplicationMetrics()
			:
				unmapped(0),
				unpaired(0),
				readpairsexamined(0),
				unpairedreadduplicates(0),
				readpairduplicates(0),
				opticalduplicates(0)
			{

			}

			/**
			 * constructor from stream
			 *
			 * @param istr input stream
			 **/
			DuplicationMetrics(std::istream & in)
			{
				deserialise(in);
			}

			/**
			 * Code imported from picard for library size estimation
			 *
			 * Estimates the size of a library based on the number of paired end molecules observed
			 * and the number of unique pairs ovserved.
			 *
			 * Based on the Lander-Waterman equation that states:
			 *     C/X = 1 - exp( -N/X )
			 * where
			 *     X = number of distinct molecules in library
			 *     N = number of read pairs
			 *     C = number of distinct fragments observed in read pairs
			 *
			 * @param readPairs number of read pairs
			 * @param uniqueReadPairs number of unique read pairs (total minus duplicates)
			 * @return estimated library size
			 */
			static int64_t estimateLibrarySize(int64_t const readPairs, int64_t const uniqueReadPairs)
			{
				int64_t const readPairDuplicates = readPairs - uniqueReadPairs;

				if (readPairs > 0 && readPairDuplicates > 0)
				{
					int64_t const n = readPairs;
					int64_t const c = uniqueReadPairs;

					double m = 1.0, M = 100.0;

					if (c >= n || f(m*c, c, n) < 0)
					{
						::libmaus2::exception::LibMausException se;
						se.getStream() << "[E] Invalid values for pairs and unique pairs: " << n << ", " << c << std::endl;
						se.finish();
						throw se;
					}

					uint64_t po = 0;
					uint64_t const polimit = 16*1024;
					while( f(M*c, c, n) >= 0 && po < polimit )
					{
						M *= 10.0;
						po += 1;
					}
					if ( po == polimit )
					{
						::libmaus2::exception::LibMausException se;
						se.getStream() << "[E] Detected (most likely) non terminating while loop" << std::endl;
						se.finish();
						throw se;
					}

					for ( int i=0; i < 40; ++i )
					{
						double const r = (m+M)/2.0;
						double const u = f( r * c, c, n );
						if ( u == 0 ) break;
						else if ( u > 0 ) m = r;
						else if ( u < 0 ) M = r;
					}

					return static_cast<int64_t> (c * (m+M)/2.0);
				}
				else
				{
					return -1;
				}
			}

			/**
			 * function that is used in the computation of the estimated library size;
			 * yields c/x - 1 + e^(-n/x)
			 *
			 * @param x
			 * @param c
			 * @param n
			 * @return c/x - 1 + e^(-n/x)
			 **/
			static double f(double const x, double const c, double const n)
			{
				return c/x - 1 + ::std::exp(-n/x);
			}

			/**
			 * Estimates the ROI (return on investment) that one would see if a library was sequenced to
			 * x higher coverage than the observed coverage.
			 *
			 * @param estimatedLibrarySize the estimated number of molecules in the library
			 * @param x the multiple of sequencing to be simulated (i.e. how many X sequencing)
			 * @param pairs the number of pairs observed in the actual sequencing
			 * @param uniquePairs the number of unique pairs observed in the actual sequencing
			 * @return a number z <= x that estimates if you had pairs*x as your sequencing then you
			 *         would observe uniquePairs*z unique pairs.
			 */
			 static double estimateRoi(int64_t estimatedLibrarySize, double x, int64_t pairs, int64_t uniquePairs)
			 {
			 	return estimatedLibrarySize * ( 1 - ::std::exp(-(x*pairs)/estimatedLibrarySize) ) / uniquePairs;
			}

			/**
			 * print header for duplication metrics stats
			 *
			 * @param CL command line
			 * @param out output stream
			 * @return output stream
			 **/
                        static std::ostream & printFormatHeader(std::string const & CL, std::ostream & out)
			{
                                out << "# " << CL << std::endl << std::endl << "##METRICS" << std::endl;
                                out << "LIBRARY\tUNPAIRED_READS_EXAMINED\tREAD_PAIRS_EXAMINED\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATES\tPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE\n";
				return out;
			}

			/**
			 * Calculates a histogram using the estimateRoi method to estimate the effective yield
			 * doing x sequencing for x=1..10.
			 *
			 * @return ROI histogram
			 */
			std::map<unsigned int,double> calculateRoiHistogram()  const
			{
				std::map<unsigned int, double> H;

				try
				{
					int64_t const ESTIMATED_LIBRARY_SIZE = estimateLibrarySize(
						static_cast<int64_t>(readpairsexamined) - static_cast<int64_t>(opticalduplicates),
						static_cast<int64_t>(readpairsexamined) - static_cast<int64_t>(readpairduplicates)
					);

					if ( ESTIMATED_LIBRARY_SIZE < 0 )
						return H;

					int64_t const uniquePairs = readpairsexamined - readpairduplicates;

					for ( double x = 1.0; x <= 100.0; x+=1.0 )
						H[x] += estimateRoi(ESTIMATED_LIBRARY_SIZE, x, readpairsexamined, uniquePairs);

					return H;
				}
				catch(...)
				{
					return H;
				}
			}

			/**
			 * print histogram
			 *
			 * @param out output stream
			 * @return out
			 **/
			std::ostream & printHistogram(std::ostream & out) const
			{
				std::map<unsigned int,double> const H = calculateRoiHistogram();

				for ( std::map<unsigned int,double>::const_iterator ita = H.begin(); ita != H.end(); ++ita )
					if ( ita->second )
						out << ita->first << "\t" << ita->second << std::endl;

				return out;
			}

			/**
			 * print duplication metrics
			 *
			 * @param out output stream
			 * @param libraryName name of seq library
			 * @return output stream
			 **/
			std::ostream & format(std::ostream & out, std::string const libraryName) const
			{
				int64_t const ESTIMATED_LIBRARY_SIZE = estimateLibrarySize(readpairsexamined - opticalduplicates, readpairsexamined - readpairduplicates);
				double const PERCENT_DUPLICATION =
					(unpaired + readpairsexamined*2) ?
					((unpairedreadduplicates + 2*readpairduplicates) /
					static_cast<double> (unpaired + readpairsexamined*2)) : 0;

				out
					<< libraryName << "\t"
					<< unpaired << "\t"
					<< readpairsexamined << "\t"
					<< unmapped << "\t"
					<< unpairedreadduplicates << "\t"
					<< readpairduplicates << "\t"
					<< opticalduplicates << "\t"
					<< PERCENT_DUPLICATION << "\t"
					<< ESTIMATED_LIBRARY_SIZE << "\n";
				return out;
			}

			/**
			 * serialise to stream
			 *
			 * @param ostr output stream
			 **/
			void serialise(std::ostream & out) const
			{
				libmaus2::util::NumberSerialisation::serialiseNumber(out,unmapped);
				libmaus2::util::NumberSerialisation::serialiseNumber(out,unpaired);
				libmaus2::util::NumberSerialisation::serialiseNumber(out,readpairsexamined);
				libmaus2::util::NumberSerialisation::serialiseNumber(out,unpairedreadduplicates);
				libmaus2::util::NumberSerialisation::serialiseNumber(out,readpairduplicates);
				libmaus2::util::NumberSerialisation::serialiseNumber(out,opticalduplicates);
			}

			/**
			 * read object from stream
			 *
			 * @param in inputstream
			 **/
			void deserialise(std::istream & in)
			{
				unmapped = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
				unpaired = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
				readpairsexamined = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
				unpairedreadduplicates = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
				readpairduplicates = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
				opticalduplicates = libmaus2::util::NumberSerialisation::deserialiseNumber(in);
			}

			/**
			 * add stats from object O to this object
			 *
			 * @param O other object to be added
			 * @return reference to this object
			 **/
			DuplicationMetrics & operator+=(DuplicationMetrics const & O)
			{
				unmapped += O.unmapped;
				unpaired += O.unpaired;
				readpairsexamined += O.readpairsexamined;
				unpairedreadduplicates += O.unpairedreadduplicates;
				readpairduplicates += O.readpairduplicates;
				opticalduplicates += O.opticalduplicates;
				return *this;
			}

			static std::map<uint64_t,libmaus2::bambam::DuplicationMetrics> add(
				std::map<uint64_t,libmaus2::bambam::DuplicationMetrics> const & MA,
				std::map<uint64_t,libmaus2::bambam::DuplicationMetrics> const & MB
			)
			{
				std::map<uint64_t,libmaus2::bambam::DuplicationMetrics> MO;
				std::map<uint64_t,libmaus2::bambam::DuplicationMetrics>::const_iterator a_ita = MA.begin(), a_ite = MA.end();
				std::map<uint64_t,libmaus2::bambam::DuplicationMetrics>::const_iterator b_ita = MB.begin(), b_ite = MB.end();

				while ( a_ita != a_ite && b_ita != b_ite )
				{
					if ( a_ita->first < b_ita->first )
					{
						MO[a_ita->first] = a_ita->second;
						a_ita++;
					}
					else if ( b_ita->first < a_ita->first )
					{
						MO[b_ita->first] = b_ita->second;
						b_ita++;
					}
					else
					{
						assert ( a_ita->first == b_ita->first );
						libmaus2::bambam::DuplicationMetrics M = a_ita->second;
						M += b_ita->second;
						MO[a_ita->first] = M;
						a_ita++;
						b_ita++;
					}
				}

				while ( a_ita != a_ite )
				{
					MO[a_ita->first] = a_ita->second;
					a_ita++;
				}

				while ( b_ita != b_ite )
				{
					MO[b_ita->first] = b_ita->second;
					b_ita++;
				}

				return MO;
			}
		};

		/**
		 * print DuplicationMetrics object on output stream out
		 *
		 * @param out output stream
		 * @param M duplication metrics object
		 * @return output stream
		 **/
		std::ostream & operator<<(std::ostream & out, libmaus2::bambam::DuplicationMetrics const & M);
	}
}
#endif
