/*****************************************************************************
*
* ALPS Project: Algorithms and Libraries for Physics Simulations
*
* ALPS Libraries
*
* Copyright (C) 2001-2002 by Rene Villiger <rvilliger@smile.ch>,
*                            Prakash Dayal <prakash@comp-phys.org>,
*                            Matthias Troyer <troyer@comp-phys.org>
*
* This software is part of the ALPS libraries, published under the ALPS
* Library License; you can use, redistribute it and/or modify it under
* the terms of the license, either version 1 or (at your option) any later
* version.
* 
* You should have received a copy of the ALPS Library License along with
* the ALPS Libraries; see the file LICENSE.txt. If not, the license is also
* available from http://alps.comp-phys.org/.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
* FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 
* SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 
* FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/

/* $Id: jacobi.h,v 1.6 2003/09/05 08:12:38 troyer Exp $ */

#ifndef IETL_JACOBI_H
#define IETL_JACOBI_H

#include <ietl/traits.h>
#include <ietl/fmatrix.h>
#include <complex>
#include <vector>

namespace ietl
{

   template <class MATRIX, class VS>
   class jcd_left_preconditioner
   {
      public:
         typedef typename vectorspace_traits<VS>::vector_type vector_type;
         typedef typename vectorspace_traits<VS>::scalar_type scalar_type;
         typedef typename ietl::number_traits<scalar_type>::magnitude_type magnitude_type;
         
         jcd_left_preconditioner(const MATRIX& matrix, const VS& vec, const int& max_iter);
         void operator()(const vector_type& u, const magnitude_type& theta, const vector_type& r, vector_type& t, const magnitude_type& rel_tol);
         
      private:
         void sysv(const char& uplo, const int& n, const int& nrhs, float a[], const int& lda, int ipiv[], float b[], const int& ldb, float work[], const int& lwork, int& info)
            { ssysv_(uplo, n, nrhs, a, lda, ipiv, b, ldb, work, lwork, info); };
         void sysv(const char& uplo, const int& n, const int& nrhs, double a[], const int& lda, int ipiv[], double b[], const int& ldb, double work[], const int& lwork, int& info)
            { dsysv_(uplo, n, nrhs, a, lda, ipiv, b, ldb, work, lwork, info); };
         void sysv(const char& uplo, const int& n, const int& nrhs, std::complex<float> a[], const int& lda, int ipiv[], std::complex<float> b[], const int& ldb, std::complex<float> work[], const int& lwork, int& info)
            { chesv_(uplo, n, nrhs, a, lda, ipiv, b, ldb, work, lwork, info); };
         void sysv(const char& uplo, const int& n, const int& nrhs, std::complex<double> a[], const int& lda, int ipiv[], std::complex<double> b[], const int& ldb, std::complex<double> work[], const int& lwork, int& info)
            { zhesv_(uplo, n, nrhs, a, lda, ipiv, b, ldb, work, lwork, info); };
         MATRIX K;
         VS vecspace_;
         int n_;
         int max_iter_;
   };
   
   template <class MATRIX, class VS>
   class jcd_simple_solver
   {
      public:
         typedef typename vectorspace_traits<VS>::vector_type vector_type;
         typedef typename vectorspace_traits<VS>::scalar_type scalar_type;
         typedef typename ietl::number_traits<scalar_type>::magnitude_type magnitude_type;
         
         jcd_simple_solver(const MATRIX& matrix, const VS& vec);
         void operator()(const vector_type& u, const magnitude_type& theta, const vector_type& r, vector_type& t, const magnitude_type& rel_tol);
         
      private:
         MATRIX matrix_;
         VS vecspace_;
         int n_;
   };

   template <class MATRIX, class VS>
     class jacobi_davidson
     {
      public:
         typedef typename vectorspace_traits<VS>::vector_type vector_type;
         typedef typename vectorspace_traits<VS>::scalar_type scalar_type;
         typedef typename ietl::number_traits<scalar_type>::magnitude_type magnitude_type;

         
         jacobi_davidson(const MATRIX& matrix, 
                         const VS& vec);
         ~jacobi_davidson();
         
         template <class GEN, class SOLVER, class ITER>
         std::pair<magnitude_type, vector_type> calculate_eigenvalue(const GEN& gen, 
                                                                     SOLVER& solver,
                                                                     ITER& iter);
         
      private:
         void get_largest_eigenvalue(magnitude_type& theta, double s[], int dim);
         void get_largest_eigenvalue(magnitude_type& theta, float s[], int dim);
         void get_largest_eigenvalue(magnitude_type& theta, std::complex<float> s[], int dim);
         void get_largest_eigenvalue(magnitude_type& theta, std::complex<double> s[], int dim);
         MATRIX matrix_;
         VS vecspace_;
         int n_;
         FortranMatrix<scalar_type> M;
         magnitude_type atol_;
   };
   

   template <class MATRIX, class VS>
   jcd_simple_solver<MATRIX, VS>::jcd_simple_solver(const MATRIX& matrix, const VS& vec) :
      matrix_(matrix),
      vecspace_(vec)
   {
      n_ = vecspace_.vec_dimension();
   }
   
   template <class MATRIX, class VS>
   void jcd_simple_solver<MATRIX, VS>::operator()(const vector_type& u, const magnitude_type& theta, const vector_type& r, vector_type& t, const magnitude_type& rel_tol)
   {
      for (int i=0;i<n_;i++)
         t[i] = -r[i] / ( matrix_(i,i) - theta );
   }
   
   
   template <class MATRIX, class VS>
   jcd_left_preconditioner<MATRIX, VS>::jcd_left_preconditioner(const MATRIX& matrix, const VS& vec, const int& max_iter) :
      K(matrix),
      vecspace_(vec),
      max_iter_(max_iter)
   {
      n_ = vecspace_.vec_dimension();
   }
   
   template <class MATRIX, class VS>
   void jcd_left_preconditioner<MATRIX, VS>::operator()(const vector_type& u, const magnitude_type& theta, const vector_type& r, vector_type& t, const magnitude_type& rel_tol)
   {
      // define variables
      FortranMatrix<scalar_type> Ktmp(n_,n_);
      vector_type u_hat = new_vector(vecspace_);
      vector_type vec1  = new_vector(vecspace_);
      vector_type vec2  = new_vector(vecspace_);
      magnitude_type mu, norm;
      
      // initialize variables
      for (int i=0;i<n_;i++) for (int j=0;j<n_;j++)
         Ktmp(i,j) = K(i,j);
      for (int i=0;i<n_;i++)
         Ktmp(i,i) -= theta;
      
      // define variables for LAPACK
      char uplo='U'; int n=n_; int nrhs=1; int lda=n; int ipiv[n]; int ldb=n; int lwork=n*n; scalar_type work[lwork]; int info;
      
      // Solve u_hat from K*u_hat = u,  mu = u^star * u_hat
      ietl::copy(u,u_hat);
      sysv(uplo, n, nrhs, Ktmp.data(), lda, ipiv, u_hat.data(), ldb, work, lwork, info);
      mu = std::real(ietl::dot(u,u_hat));
      
      // compute r_tilde = K_tilde^{-1} * r as
      //   solve r_hat from K*r_hat = r
      //   r_tilde = r_hat - \frac{u^\star r_hat}{mu} u_hat
      ietl::copy(r,vec1);
      for (int i=0;i<n_;i++)
         Ktmp(i,i) = K(i,i);
      uplo='L';
      sysv(uplo, n, nrhs, Ktmp.data(), lda, ipiv, vec1.data(), ldb, work, lwork, info);
      vec1 -= ietl::dot(u,vec1)/mu*u_hat;
      vec1*=-1.;
      
      // aplly a Krylov subspace method with t_0=0; operator K_tilde ^{-1} A_tilde
      // and right hand side -r_tilde; given v, z=K_tilde ^{-1} A_tilde v is computed as
      //   y = (A-\theta I)v
      //   solve y_hat from K y_hat = y
      for (int i=0; i<max_iter_; i++) {
         t = vec1/ietl::two_norm(vec1);
         ietl::mult(K,t,vec1);
         for (int j=0;j<n_;j++) for (int k=0;k<n_;k++)
            Ktmp(j,k) = K(j,k);
         for (int j=0;j<n_;j++)
            Ktmp(j,j) -= theta;
         sysv(uplo, n, nrhs, Ktmp.data(), lda, ipiv, vec1.data(), ldb, work, lwork, info);
         vec1 -= -ietl::dot(u,vec1)/mu * u_hat;
         norm = ietl::real(ietl::dot(t,vec1));
         vec2 = vec1-norm*t;
         if ( ietl::two_norm(vec2) < rel_tol * std::abs(norm) )
            break;
      }
   }
   
   // C L A S S :   J A C O B I _ D A V I D S O N ////////////////////////////////////
   
   template <class MATRIX, class VS>
   jacobi_davidson<MATRIX, VS>::jacobi_davidson(const MATRIX& matrix, const VS& vec) : 
      matrix_(matrix),
      vecspace_(vec),
      M(1,1)
   {
      n_ = vecspace_.vec_dimension();
   }
   
   template <class MATRIX, class VS>
   jacobi_davidson<MATRIX, VS>::~jacobi_davidson()
   {
   
   }
   
   template <class MATRIX, class VS> 
   template <class GEN, class SOLVER, class ITER>
   std::pair<typename jacobi_davidson<MATRIX,VS>::magnitude_type, typename jacobi_davidson<MATRIX,VS>::vector_type> 
     jacobi_davidson<MATRIX, VS>::calculate_eigenvalue(const GEN& gen, SOLVER& solver, ITER& iter)
   {
      vector_type t  = new_vector(vecspace_);
      vector_type u  = new_vector(vecspace_);
      vector_type uA = new_vector(vecspace_);
      vector_type vA = new_vector(vecspace_);
      vector_type r  = new_vector(vecspace_);
      scalar_type s[iter.max_iter()];
      std::vector<vector_type> V(iter.max_iter());
      for (int k=0;k<iter.max_iter();k++)
         V[k] = new_vector(vecspace_);
      int i,j;
      M.resize(iter.max_iter(), iter.max_iter());
      magnitude_type theta, tau;
      magnitude_type kappa = 0.25;
      magnitude_type rel_tol;
      atol_ = iter.atol();
       
      // Start with t=v_o, starting guess
      ietl::generate(t,gen);
      ietl::project(t,vecspace_);
         
      // Start iteration
      do
      {
         // Modified Gram-Schmidt Orthogonalization with Refinement
         tau = ietl::two_norm(t);
         for (i=1;i<=iter.iterations();i++)
           t -= ietl::dot(V[i-1],t)*V[i-1];
         if (ietl::two_norm(t) < kappa * tau)
            for (i=1;i<=iter.iterations();i++)
               t -= ietl::dot(V[i-1],t) * V[i-1];
         
         
         // v_m = t / |t|_2,  v_m^A = A v_m
         V[iter.iterations()] = t/ietl::two_norm(t);
         ietl::mult(matrix_, V[iter.iterations()], vA);
         
         // for i=1, ..., iter
         //   M_{i,m} = v_i ^\star v_m ^A
         // end for
         for (i=1;i<=iter.iterations()+1;i++)
            M(i-1,iter.iterations()) = ietl::dot(V[i-1],vA);
         
         // compute the largest eigenpair (\theta, s) of M (|s|_2 = 1)
         get_largest_eigenvalue(theta,s,iter.iterations()+1);

         // u = V s
         for (i=0;i<n_;i++)
         {
            u[i] = V[0][i] * s[0];
            for (j=1;j<=iter.iterations();j++)
            {
               u[i] += V[j][i] * s[j];
            }
         }
         
         // u^A = V^A s
         ietl::mult(matrix_,u,uA);
         
         // r = u^A - \theta u
         r = uA-theta*u;

         // if (|r|_2 < \epsilon) stop
         ++iter;
         if (iter.finished(ietl::two_norm(r),theta))
            break;
            
         // solve (approximately) a t orthogonal to u from
         //   (I-uu^\star)(A-\theta I)(I- uu^\star)t = -r
         rel_tol = 1. / pow(2.,double(iter.iterations()+1));
         solver(u, theta, r, t, rel_tol);
      } while (true);
      
      // accept lambda=theta and x=u
      return std::make_pair(theta, u);
   }
   
   template <class MATRIX, class VS>
   void jacobi_davidson<MATRIX, VS>::get_largest_eigenvalue(magnitude_type& theta, double s[], int dim)
   {
      FortranMatrix<scalar_type> M_(dim,dim);
      for (int i=0;i<dim;i++) for (int j=0;j<=i;j++)
         M_(j,i) = M(j,i);
      double abstol = atol_;
      char jobz='V';     char range='I';   char uplo='U';
      int n=dim;
      int lda=dim;       int il=n;         int iu=n;
      int m;             double w[n];      double z[n];
      int ldz=n;         int lwork=8*n;    double work[lwork];
      int iwork[5*n];    int ifail[n];     int info;
      double vl, vu;
      dsyevx_(jobz, range, uplo, n, M_.data(), lda, vl, vu, il, iu, abstol, m, w, z, ldz, work, lwork, iwork, ifail, info);
      theta = w[0];
      for (int i=0;i<n;i++)
         s[i] = z[i];
   }

   template <class MATRIX, class VS>
   void jacobi_davidson<MATRIX, VS>::get_largest_eigenvalue(magnitude_type& theta, float s[], int dim)
   {
      FortranMatrix<scalar_type> M_(dim,dim);
      for (int i=0;i<dim;i++) for (int j=0;j<=i;j++)
         M_(j,i) = M(j,i);
      float abstol = atol_;
      char jobz='V';     char range='I';   char uplo='U';
      int n=dim;
      int lda=dim;       int il=n;         int iu=n;
      int m;             float w[n];       float z[n];
      int ldz=n;         int lwork=8*n;    float work[lwork];
      int iwork[5*n];    int ifail[n];     int info;
      float vl, vu;
      ssyevx_(jobz, range, uplo, n, M_.data(), lda, vl, vu, il, iu, abstol, m, w, z, ldz, work, lwork, iwork, ifail, info);
      theta = w[0];
      for (int i=0;i<n;i++)
         s[i] = z[i];
   }
   
   template <class MATRIX, class VS>
   void jacobi_davidson<MATRIX, VS>::get_largest_eigenvalue(magnitude_type& theta, std::complex<double> s[], int dim)
   {
      FortranMatrix<scalar_type> M_(dim,dim);
      for (int i=0;i<dim;i++) for (int j=0;j<=i;j++)
         M_(j,i) = M(j,i);
      double abstol = atol_;
      char jobz='V';     char range='I';   char uplo='U';
      int n=dim;
      int lda=dim;       int il=n;         int iu=n;
      int m;             double w[n];      std::complex<double> z[n];
      int ldz=n;         int lwork=8*n;    std::complex<double> work[lwork];
      int iwork[5*n];    int ifail[n];     int info; 
      double vl, vu;     double rwork[7*n];
      zheevx_(jobz, range, uplo, n, M_.data(), lda, vl, vu, il, iu, abstol, m, w, z, ldz, work, lwork, rwork, iwork, ifail, info);
      theta = w[0];
      for (int i=0;i<n;i++)
         s[i] = z[i];
   }
   
   template <class MATRIX, class VS>
   void jacobi_davidson<MATRIX, VS>::get_largest_eigenvalue(magnitude_type& theta, std::complex<float> s[], int dim)
   {
      FortranMatrix<scalar_type> M_(dim,dim);
      for (int i=0;i<dim;i++) for (int j=0;j<=i;j++)
         M_(j,i) = M(j,i);
      float abstol = atol_;
      char jobz='V';     char range='I';   char uplo='U';
      int n=dim;
      int lda=dim;       int il=n;         int iu=n;
      int m;             float w[n];       std::complex<float> z[n];
      int ldz=n;         int lwork=8*n;    std::complex<float> work[lwork];
      int iwork[5*n];    int ifail[n];     int info; 
      float vl, vu;     float rwork[7*n];
      cheevx_(jobz, range, uplo, n, M_.data(), lda, vl, vu, il, iu, abstol, m, w, z, ldz, work, lwork, rwork, iwork, ifail, info);
      theta = w[0];
      for (int i=0;i<n;i++)
         s[i] = z[i];
   }     
}
#endif

