//                                               -*- C++ -*-
/**
 * @file  CorrelationAnalysis.cxx
 * @brief CorrelationAnalysis implements the sensitivity analysis methods based on correlation coefficients
 *
 *  (C) Copyright 2005-2011 EDF-EADS-Phimeca
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License.
 *
 *  This library is distributed in the hope that it will be useful
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 *
 *  @author: $LastChangedBy: schueller $
 *  @date:   $LastChangedDate: 2011-06-30 11:29:46 +0200 (Thu, 30 Jun 2011) $
 *  Id:      $Id: CorrelationAnalysis.cxx 1975 2011-06-30 09:29:46Z schueller $
 */
#include <cmath>

#include "CorrelationAnalysis.hxx"
#include "Exception.hxx"
#include "LinearModelFactory.hxx"
#include "LinearModel.hxx"
#include "LinearLeastSquares.hxx"

namespace OpenTURNS
{
  namespace Base
  {
    namespace Stat
    {

      typedef Common::InvalidArgumentException             InvalidArgumentException;
      typedef Common::InvalidDimensionException            InvalidDimensionException;
      typedef Type::NumericalPoint                         NumericalPoint;
      typedef Type::SymmetricMatrix                        SymmetricMatrix;
      typedef Type::SymmetricTensor                        SymmetricTensor;
      typedef Base::MetaModel::LinearLeastSquares          LinearLeastSquares;

      /* Default constructor */
      CorrelationAnalysis::CorrelationAnalysis() {}

      /* Compute the Pearson correlation coefficient between the component number index of the input sample and the 1D outputSample */
      NumericalScalar CorrelationAnalysis::PearsonCorrelation(const NumericalSample & inputSample,
                                                              const NumericalSample & outputSample,
                                                              const UnsignedLong index)
      {
        if (index >= inputSample.getDimension()) throw InvalidArgumentException(HERE) << "Error: given index out of bound";
        if (outputSample.getDimension() != 1) throw InvalidDimensionException(HERE) << "Error: output sample must be 1D";
        if (inputSample.getSize() != outputSample.getSize()) throw InvalidArgumentException(HERE) << "Error: input and output samples must have the same size";
        const UnsignedLong size(inputSample.getSize());
        NumericalSample pairedSample(size, 2);
        for (UnsignedLong i = 0; i < size; ++i)
          {
            pairedSample[i][0] = inputSample[i][index];
            pairedSample[i][1] = outputSample[i][0];
          }
        return pairedSample.computePearsonCorrelation()(0, 1);
      }

      /* Compute the Spearman correlation coefficient between the component number index of the input sample and the 1D outputSample */
      NumericalScalar CorrelationAnalysis::SpearmanCorrelation(const NumericalSample & inputSample,
                                                               const NumericalSample & outputSample,
                                                               const UnsignedLong index)
      {
        if (index >= inputSample.getDimension()) throw InvalidArgumentException(HERE) << "Error: given index out of bound";
        if (outputSample.getDimension() != 1) throw InvalidDimensionException(HERE) << "Error: output sample must be 1D";
        if (inputSample.getSize() != outputSample.getSize()) throw InvalidArgumentException(HERE) << "Error: input and output samples must have the same size";
        return PearsonCorrelation(inputSample.rank(), outputSample.rank());
      }

      /* Compute the Standard Regression Coefficients (SRC) between the input sample and the output sample */
      CorrelationAnalysis::NumericalPoint CorrelationAnalysis::SRC(const Base::Stat::NumericalSample & inputSample,
                                                                   const Base::Stat::NumericalSample & outputSample)
      {
        if (outputSample.getDimension() != 1) throw InvalidDimensionException(HERE) << "Error: output sample must be 1D";
        if (inputSample.getSize() != outputSample.getSize()) throw InvalidArgumentException(HERE) << "Error: input and output samples must have the same size";
        const UnsignedLong dimension(inputSample.getDimension());
        LinearLeastSquares regressionAlgorithm(inputSample, outputSample);
        regressionAlgorithm.run();
        const NumericalPoint linear(regressionAlgorithm.getLinear() * NumericalPoint(1, 1.0));
        const NumericalScalar varOutput(outputSample.computeVariancePerComponent()[0]);
        NumericalPoint src(inputSample.computeVariancePerComponent());
        for (UnsignedLong i = 0; i < dimension; ++i) src[i] *= linear[i] * linear[i] / varOutput;
        return src;
      }

      /* Compute the Partial Correlation Coefficients (PCC) between the input sample and the output sample */
      CorrelationAnalysis::NumericalPoint CorrelationAnalysis::PCC(const NumericalSample & inputSample,
                                                                   const NumericalSample & outputSample)
      {
        if (inputSample.getDimension() < 2) throw InvalidDimensionException(HERE) << "Error: input sample must have dimension > 1";
        if (outputSample.getDimension() != 1) throw InvalidDimensionException(HERE) << "Error: output sample must be 1D";
        if (inputSample.getSize() != outputSample.getSize()) throw InvalidArgumentException(HERE) << "Error: input and output samples must have the same size";
        const UnsignedLong dimension(inputSample.getDimension());
        const UnsignedLong size(inputSample.getSize());
        NumericalPoint pcc(dimension);
        // For each component i, perform an analysis on the truncated input sample where Xi has been removed
        NumericalSample truncatedInput(size, dimension - 1);
        NumericalSample remainingInput(size, 1);
        for (UnsignedLong index = 0; index < dimension; ++index)
          {
            // Build the truncated sample
            for (UnsignedLong i = 0; i < size; ++i)
              {
                for (UnsignedLong j = 0; j < index; ++j) truncatedInput[i][j] = inputSample[i][j];
                for (UnsignedLong j = index + 1; j < dimension; ++j) truncatedInput[i][j - 1] = inputSample[i][j];
                remainingInput[i][0] = inputSample[i][index];
              }
            // Build the linear models
            const LinearModel outputVersusTruncatedInput(LinearModelFactory().build(truncatedInput, outputSample));
            const LinearModel remainingVersusTruncatedInput(LinearModelFactory().build(truncatedInput, remainingInput));
            // Compute the correlation between the residuals
            const NumericalSample residualOutput(outputVersusTruncatedInput.getResidual(truncatedInput, outputSample));
            const NumericalSample residualRemaining(remainingVersusTruncatedInput.getResidual(truncatedInput, remainingInput));
            pcc[index] = PearsonCorrelation(residualOutput, residualRemaining);
          }
        return pcc;
      }

      /* Compute the Standard Rank Regression Coefficients (SRRC) between the input sample and the output sample */
      CorrelationAnalysis::NumericalPoint CorrelationAnalysis::SRRC(const NumericalSample & inputSample,
                                                                    const NumericalSample & outputSample)
      {
        if (outputSample.getDimension() != 1) throw InvalidDimensionException(HERE) << "Error: output sample must be 1D";
        if (inputSample.getSize() != outputSample.getSize()) throw InvalidArgumentException(HERE) << "Error: input and output samples must have the same size";
        return SRC(inputSample.rank(), outputSample.rank());
      }

      /* Compute the Partial Rank Correlation Coefficients (PRCC) between the input sample and the output sample */
      CorrelationAnalysis::NumericalPoint CorrelationAnalysis::PRCC(const NumericalSample & inputSample,
                                                                    const NumericalSample & outputSample)
      {
        // Perform the basic checks of the inputs, to avoid costly ranking if finally PCC will fail
        if (inputSample.getDimension() < 2) throw InvalidDimensionException(HERE) << "Error: input sample must have dimension > 1";
        if (outputSample.getDimension() != 1) throw InvalidDimensionException(HERE) << "Error: output sample must be 1D";
        if (inputSample.getSize() != outputSample.getSize()) throw InvalidArgumentException(HERE) << "Error: input and output samples must have the same size";
        return PCC(inputSample.rank(), outputSample.rank());
      }


      /** Compute the Sobol' indices given two input samples and a 1d function */
      SobolIndicesResult CorrelationAnalysis::SobolIndices(const SobolIndicesParameters & sobolIndicesParameters,
                                                           const NumericalSample & firstInputSample,
                                                           const NumericalSample & secondInputSample,
                                                           const NumericalMathFunction & function)
      {
        
        LOGWARN(OSS() << "The method CorrelationAnalysis::SobolIndices is deprecated and should not be used anymore. Use class SensitivityAnalysis instead.");
        const UnsignedLong size(firstInputSample.getSize());
        const UnsignedLong dimension(function.getInputDimension());

        if(size != secondInputSample.getSize())
          throw InvalidDimensionException(HERE) << "Error: input samples must have the same size";

        if(firstInputSample.getDimension() != dimension || secondInputSample.getDimension() != dimension)
          throw InvalidDimensionException(HERE) << "Error: input samples dimension must fit the function input dimension";

        if(function.getOutputDimension() != 1)
          throw InvalidDimensionException(HERE) << "Error: function output must be 1D";

        if(sobolIndicesParameters.getMaximumOrder() > dimension)
          throw InvalidDimensionException(HERE) << "Error: indice order cannot exceed input dimension";

        if((sobolIndicesParameters.getMaximumOrder() < 1) && (!sobolIndicesParameters.getTotalIndiceComputation()))
          throw InvalidDimensionException(HERE) << "Error: no indice to compute";

        // Declare indices
        NumericalPoint firstOrderIndices;
        SymmetricMatrix secondOrderIndices;
        SymmetricTensor thirdOrderIndices;
        NumericalPoint totalOrderIndices;
        // Input samples
        NumericalSample inputSample1(firstInputSample);
        NumericalSample inputSample2(secondInputSample);

        // Allocate first order indices
        if(sobolIndicesParameters.getMaximumOrder() >= 1)
          firstOrderIndices = NumericalPoint(dimension, 0.0);

        // Allocate second order indices
        if(sobolIndicesParameters.getMaximumOrder() >= 2)
          secondOrderIndices = SymmetricMatrix(dimension);

        // Allocate second order indices
        if(sobolIndicesParameters.getMaximumOrder() >= 3)
          thirdOrderIndices = SymmetricTensor(dimension, dimension);

        // Allocate total order indices
        if(sobolIndicesParameters.getTotalIndiceComputation())
          totalOrderIndices = NumericalPoint(dimension, 0.0);

        // Aggregate all the input points of the function into one huge sample in order to exploit parallelism
        NumericalSample hugeInputSample(0, dimension);
        // One component modification of the second input sample if the indices of order at least 1 are needed
        if (sobolIndicesParameters.getMaximumOrder() >= 1)
          {
            // Loop over the index
            for(UnsignedLong index0 = 0; index0 < dimension; ++index0)
              {
                for (UnsignedLong i = 0; i < size; ++i)
                  {
                    NumericalPoint point2OneModification(inputSample2[i]);
                    point2OneModification[index0] = inputSample1[i][index0];
                    hugeInputSample.add(point2OneModification);
                  }
                // Two components modification of the second input sample if the indices of order at least 2 are needed
                if(sobolIndicesParameters.getMaximumOrder() >= 2)
                  {
                    // Loop over the index
                    for (UnsignedLong index1 = 0; index1 < index0; ++index1)
                      {
                        for (UnsignedLong i = 0; i < size; ++i)
                          {
                            NumericalPoint point2TwoModifications(inputSample2[i]);
                            point2TwoModifications[index0] = inputSample1[i][index0];
                            point2TwoModifications[index1] = inputSample1[i][index1];
                            hugeInputSample.add(point2TwoModifications);
                          }
                        // Three components modification of the second input sample if the indices of order at least 3 are needed
                        if(sobolIndicesParameters.getMaximumOrder() >= 3)
                          {
                            // Loop over the index
                            for (UnsignedLong index2 = 0; index2 < index1; ++index2)
                              {
                                for (UnsignedLong i = 0; i < size; ++i)
                                  {
                                    NumericalPoint point2ThreeModifications(inputSample2[i]);
                                    point2ThreeModifications[index0] = inputSample1[i][index0];
                                    point2ThreeModifications[index1] = inputSample1[i][index1];
                                    point2ThreeModifications[index2] = inputSample1[i][index2];
                                    hugeInputSample.add(point2ThreeModifications);
                                  }
                              } // Loop over index2
                          } // Sobol order >= 3
                      } // Loop over index1
                  } // Sobol order >= 2
              } // Loop over index0
          } // Sobol order >= 1
        // One component modification of the first input sample if the indice
        if (sobolIndicesParameters.getTotalIndiceComputation())
          {
            // Loop over the index
            for(UnsignedLong index0 = 0; index0 < dimension; ++index0)
              {
                for (UnsignedLong i = 0; i < size; ++i)
                  {
                    NumericalPoint point1OneModification(inputSample1[i]);
                    point1OneModification[index0] = inputSample2[i][index0];
                    hugeInputSample.add(point1OneModification);
                  }
              }
          } // total indices

        // Add the inputSample1 at the end in order to use the split method on the resulting huge output sample
        for (UnsignedLong i = 0; i < size; ++i) hugeInputSample.add(inputSample1[i]);

        // Evaluation of the model on the huge sample
        // Not const because it will be split in two parts
        NumericalSample hugeOutputSample(function(hugeInputSample));
        // The output sample for inputSample1 corresponds to the size last values.
        const NumericalSample outputSample1(hugeOutputSample.split(hugeOutputSample.getSize() - size));
        // The mean and the variance are estimated using inputSample1
        const NumericalScalar meanEstimate(outputSample1.computeMean()[0]);
        const NumericalScalar varianceEstimate(outputSample1.computeCovariance()(0, 0));
        const NumericalScalar meanEstimateSquared(meanEstimate * meanEstimate);
        // Perform the computations using the exact same order as the one used to build hugeInputSample
        UnsignedLong hugeIndex(0);
        // One component modification of the second input sample if the indices of order at least 1 are needed
        if (sobolIndicesParameters.getMaximumOrder() >= 1)
          {
            // Loop over the index
            for(UnsignedLong index0 = 0; index0 < dimension; ++index0)
              {
                // We compute the first order indices
                NumericalScalar Um(0.0);
                // The sum of f(x^1_i) * f(y^2_i), where y^2_i is a modification of x^2_i at the component m=index0. It gives the NxU_m term in the reference.
                for (UnsignedLong i = 0; i < size; ++i)
                  {
                    Um += outputSample1[i][0] * hugeOutputSample[hugeIndex][0];
                    ++hugeIndex;
                  }
                Um /= size;
                // The estimate of the first order index is S_m = (U_m - E(Y)^2) / V(Y)
                const NumericalScalar Sm((Um - meanEstimateSquared) / varianceEstimate);
                if ((Sm < 0.0) || (Sm > 1.0)) LOGWARN(OSS() << "The estimated first order Sobol index (" << index0 << ") is not in the range [0, 1]. You may increase the sampling size.");
                firstOrderIndices[index0] = Sm;
                // Two components modification of the second input sample if the indices of order at least 2 are needed
                if(sobolIndicesParameters.getMaximumOrder() >= 2)
                  {
                    // Loop over the index
                    for (UnsignedLong index1 = 0; index1 < index0; ++index1)
                      {
                        // We compute the second order indices
                        NumericalScalar Umn(0.0);
                        const NumericalScalar Sn(firstOrderIndices[index1]);
                        // The sum of f(x^1_i) * f(y^2_i), where y^2_i is a modification of x^2_i at the components m=index0 and n=index1. It gives the NxU_mn term in the reference.
                        for (UnsignedLong i = 0; i < size; ++i)
                          {
                            Umn += outputSample1[i][0] * hugeOutputSample[hugeIndex][0];
                            ++hugeIndex;
                          }
                        Umn /= size;
                        // The estimate of the second order index is S_mn = (U_mn - E(Y)^2) / V(Y) - S_m - S_n
                        const NumericalScalar Smn((Umn - meanEstimateSquared) / varianceEstimate - Sm - Sn);
                        if ((Smn < 0.0) || (Smn > 1.0)) LOGWARN(OSS() << "The estimated second order Sobol index (" << index0 << ", " << index1 << ") is not in the range [0, 1]. You may increase the sampling size.");
                        // This affectation insures that both (index0, index1) and (index1, index0) will be filled
                        secondOrderIndices(index0, index1) = Smn;

                        // Three components modification of the second input sample if the indices of order at least 3 are needed
                        if(sobolIndicesParameters.getMaximumOrder() >= 3)
                          {
                            // Loop over the index
                            for (UnsignedLong index2 = 0; index2 < index1; ++index2)
                              {
                                // We compute the third order indices
                                NumericalScalar Umnp(0.0);
                                const NumericalScalar Sp(firstOrderIndices[index2]);
                                const NumericalScalar Smp(secondOrderIndices(index0, index2));
                                const NumericalScalar Snp(secondOrderIndices(index1, index2));
                                // The sum of f(x^1_i) * f(y^2_i), where y^2_i is a modification of x^2_i at the components m=index0, n=index1 and p=index2. It gives the NxU_mnp term in the reference.
                                for (UnsignedLong i = 0; i < size; ++i)
                                  {
                                    Umnp += outputSample1[i][0] * hugeOutputSample[hugeIndex][0];
                                    ++hugeIndex;
                                  }
                                Umnp /= size;
                                // The estimate of the third order index is S_mnp = (U_mnp - E(Y)^2) / V(Y) - S_m - S_n - S_p - S_mn - S_mp - S_np
                                const NumericalScalar Smnp((Umnp - meanEstimateSquared) / varianceEstimate - Sm - Sn - Sp - Smn - Smp - Snp);
                                if ((Smnp < 0.0) || (Smnp > 1.0)) LOGWARN(OSS() << "The estimated third order Sobol index (" << index0 << ", " << index1 << ", " << index2 << ") is not in the range [0, 1]. You may increase the sampling size.");
                                // This affectation only insure that (index0, index1, index2) and (index1, index0, index2) will be filled
                                thirdOrderIndices(index0, index1, index2) = Smnp;
                                // We add explicitely (index0, index2, index1) and (index2, index0, index1)
                                thirdOrderIndices(index0, index2, index1) = Smnp;
                                // We add explicitely (index1, index2, index0) and (index2, index1, index0)
                                thirdOrderIndices(index1, index2, index0) = Smnp;
                              } // Loop over index2
                          } // Sobol order >= 3
                      } // Loop over index1
                  } // Sobol order >= 2
              } // Loop over index0
          } // Sobol order >= 1

        // One component modification of the first input sample if the indice
        if (sobolIndicesParameters.getTotalIndiceComputation())
          {
            // Loop over the index
            for(UnsignedLong index0 = 0; index0 < dimension; ++index0)
              {
                // Computation of the first order total index of rank index0
                totalOrderIndices[index0] = 0.0;
                // The sum of f(x^1_i) * f(y^1_i), where y^1_i is a modification of x^1_i at the component index0. It gives the NxU_~i term in the reference.
                for (UnsignedLong i = 0; i < size; ++i)
                  {
                    totalOrderIndices[index0] += outputSample1[i][0] * hugeOutputSample[hugeIndex][0];
                    ++hugeIndex;
                  }
                // The estimate of the total index is S_Ti = 1 - (U_~i - E(Y)^2) / V(Y)
                const NumericalScalar St(1.0 - (totalOrderIndices[index0] / size - meanEstimateSquared) / varianceEstimate);
                if ((St < 0.0) || (St > 1.0)) LOGWARN(OSS() << "The estimated first order total Sobol index (" << index0 << ") is not in the range [0, 1]. You may increase the sampling size.");
                totalOrderIndices[index0] = St;
              } // Loop over the index
          } // total indices

        // create a SobolIndicesResult object to return all the indices
        SobolIndicesResult result;
        result.setFirstOrderIndice(firstOrderIndices);
        result.setSecondOrderIndice(secondOrderIndices);
        result.setThirdOrderIndice(thirdOrderIndices);
        result.setTotalOrderIndice(totalOrderIndices);
        return result;
      }


    } // namespace Stat
  } // namespace Base
} // namespace OpenTURNS
