/*  
    This code derives from the R amap package and it is modified by Davide
    Albanese <albanese@fbk.it>. 
    
    The Python interface is written by  Davide Albanese <albanese@fbk.it>.
    (C) 2008 Fondazione Bruno Kessler - Via Santa Croce 77, 38100 Trento, ITALY.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <Python.h>
#include <numpy/arrayobject.h>
#include <stdlib.h>
#include <stdio.h>
#include <float.h>
#include <math.h>
#include <stdbool.h>

#define MAX( A , B ) ((A) > (B) ? (A) : (B))
#define MIN( A , B ) ((A) < (B) ? (A) : (B))

#define EUCLIDEAN 1


#define SINGLE   1
#define COMPLETE 2
#define MCQUITTY 3
#define MEDIAN   4		  
 


long ioffst(long n, long i, long j)
{
  return j + i * n - (i + 1) * (i + 2) / 2;
}


/* Distance euclidean.
 * 
 * Euclidean distance between 2 vectors a,b is
 * d = sqrt (sum_i (a_i - b_i)^2)
 * 
 * This function compute distance between 2 vectors x[i1,] & y[i2,]
 * x and y are matrix; we use here only line i1 from x and
 * line i2 from y. Number of column (nc) is the same in x and y,
 * number of column can differ (nr_x, nr_y).
 * 
 * x:    matrix of size nr_x * nc; line i1 is of interest
 * y:    matrix of size nr_y * nc; line i1 is of interest
 * nr_x: number of row in matrix x
 * nr_y: number of row in matrix y
 * nc:   number of column in matrix x or y
 * i1:   row choosen in matrix x
 * i2:   row choosen in matrix y
 */
float distance_euclidean(double *x, double *y , long nr_x, long nr_y,
			 long nc, long i1, long i2)
{
  double dev;
  float dist;
  long count, j;

  count = 0;
  dist = 0.0;
 
  for(j = 0 ; j < nc ; j++)
    {
      dev = (x[i1] - y[i2]);
      dist += (float) dev * dev;
      i1 += nr_x;
      i2 += nr_y;
    }
   
  return sqrt(dist);
}


/*
 * Compute distance.
 * 
 * x:      input matrix
 * nr, nc: number of row and columns
 * d:      distance half matrix.
 * method: 1, 2,... method used
 */
void distance(double *x, long nr, long nc, float *d, int method)
{
  long  i, j, ij;
  float (* distfun) (double *, double *, long, long, long, long, long);
    
  switch(method)
    {
    case EUCLIDEAN:
      distfun = distance_euclidean;
      break;
      
    default:
      printf("distance(): invalid distance\n");
      exit(0);
    }

  for (j=0; j<=nr; j++)
    {
      ij = (2 * nr - j - 1) * j / 2 ;
      for(i=j+1; i<nr; i++)
        d[ij++] = distfun(x, x, nr, nr, nc, i, j);  
    }
}


/* Hierachical clustering subroutine
 * Compute hierachical clustering from a distance matrix 
 * 
 * n:        number of individuals
 * ia, ib:   result (merge)
 * iia, iib: result (merge)
 */
void hcass2(long n, long *ia, long *ib, long *iia, long *iib, long *iorder)
{
  long i, j, k, k1, k2, loc;
  
  for (i=0; i<n; i++ ) 
    {
      iia[i] = - ia[i];
      iib[i] = - ib[i];
    }
  
  for (i=0; i<(n-2); i++) 
    {
      k = MIN(ia[i], ib[i]);
      for (j=i+1; j<(n-1); j++) 
	{
	  if( ia[j] == k )
	    iia[j] = i + 1;
	  if( ib[j] == k )
	    iib[j] = i + 1;
	}
    }  
  
  for (i=0; i< (n-1); i++ ) 
    {
      if ((iia[i] > 0) && (iib[i] < 0))
	{
	  k = iia[i];
	  iia[i] = iib[i];
	  iib[i] = k;
	}
      
      if ((iia[i] > 0) && (iib[i] > 0))
	{
	  k1 = MIN (iia[i], iib[i]);
	  k2 = MAX (iia[i], iib[i]);
	  iia[i] = k1;
	  iib[i] = k2;
	}
    }
  
  /* Order */

  iorder[0] = - iia[n-2];
  iorder[1] = - iib[n-2];
  loc = 2;
  for (i=(n-3); i>=0; i--) 
    for (j=0; j<loc; j++ ) 
      if (-iorder[j] == i+1)
	{
	  /* REPLACE IORDER(J) WITH IIA(I) AND IIB(I) */ 
	  iorder[j] = -iia[i];
	  if (j == (loc-1)) 
	    {
	      loc++;
	      iorder[loc-1]= -iib[i];
		  break; /* for j */
	    }
	  
	  loc++;
	  for (k=loc-1; k>=(j+1); k--)
	    iorder[k] = iorder[k-1];
	  
	  iorder[j+1] = -iib[i];
	  break; /* for j */
	}
}  


void hclust(long n, long iopt, long *ia, long *ib, double *crit, float *diss, long *iorder)
{
  long im = 0, jm = 0, jj = 0;
  long i, j, ncl, ind, i2, j2, k, ind1, ind2, ind3;
  double inf, dmin, xx;
  long *nn;
  double *disnn;
  short int *flag;
  long *iia;
  long *iib;
  
 
  nn    = (long*) malloc (n * sizeof(long));
  disnn = (double*) malloc (n * sizeof(double));
  flag  = (short int*) malloc (n * sizeof(short int)); 
  
  /* Initialisation */
  for ( i=0; i<n ; i++)
    flag[i] = 1;
  
  ncl = n;
  inf = DBL_MAX;

  /* Carry out an agglomeration - first create list of NNs */
  for ( i=0; i<(n-1) ; i++)
    {
      dmin = inf;
      for (j=i+1; j<n; j++)
	{
	  ind = ioffst(n, i, j);
	  if (diss[ind] < dmin)
	    {
	      dmin = (double) diss[ind];
	      jm = j;
	    }
	}
      nn[i] = jm;
      disnn[i] = dmin;
    }
 
  /* Repeat previous steps until N-1 agglomerations carried out */
  while (ncl > 1)
    {
      /* Next, determine least diss. using list of NNs */
      dmin = inf;
      for ( i=0; i<(n-1) ; i++)
	if (flag[i])
	  if (disnn[i] < dmin )
	    {
	      dmin = disnn[i];
	      im = i;
	      jm = nn[i];
	    }
      
      ncl = ncl - 1;
      
      /*
       * This allows an agglomeration to be carried out.
       * At step n-ncl, we found dmin = dist[i2, j2]
       */
      
      i2 = MIN (im,jm);
      j2 = MAX (im,jm);
      ia[n-ncl-1] = i2 + 1;
      ib[n-ncl-1] = j2 + 1;
      crit[n-ncl-1] = dmin;
    	  
      /* Update dissimilarities from new cluster */
      flag[j2] = 0;
      dmin = inf;
      for (k=0; k<n; k++)
	{ 
	 

	  if(flag[k] && (k != i2) )
	    {      
	      if (i2 < k)
		ind1 = ioffst(n, i2, k);
	      else
		ind1 = ioffst(n, k, i2);
	      if (j2 < k)
		ind2 = ioffst(n, j2, k);
	      else
		ind2 = ioffst(n, k, j2);
	      
	      ind3 = ioffst(n, i2, j2);
	      xx = (double) diss[ind3];
	      
	      /*
	       * Gi & Gj are agglomerated => Gii
	       * We are calculating D(Gii,Gk) (for all k)
	       *
	       * diss[ind1] = D(Gi,Gk) (will be replaced by  D(Gii,Gk))
	       * diss[ind2] = D(Gj,Gk) 
	       * xx = diss[ind3] = D(Gi,Gj)
	       * 
	       */
	      switch(iopt)
		{	  
		  // SINGLE LINK METHOD - IOPT=1
		case 1:
		  diss[ind1] = (float) MIN (diss[ind1], diss[ind2]);
		  break; 
		  
		  // COMPLETE LINK METHOD - IOPT=2.
		case 2: 
		  diss[ind1] = (float) MAX (diss[ind1], diss[ind2]);
		  break; 
		  
		  //  MCQUITTY'S METHOD - IOPT=3.
		case 3:
		  diss[ind1] = (float) 0.5 * diss[ind1] + 0.5 * diss[ind2]; 
		  break;
		  
		  // MEDIAN (GOWER'S) METHOD - IOPT=4.
		case 4:
		  diss[ind1] = (float) 0.5 * diss[ind1] + 0.5 * diss[ind2] - 0.25 * xx;
		  break;
		  
		} 

	      if ((i2 <= k) && ( diss[ind1] < dmin ))
		{
		  dmin = (double) diss[ind1];
		  jj = k;
		}
	    } 
	}
	
      disnn[i2] = dmin;
      nn[i2] = jj;
            
      /*
       *  Update list of NNs insofar as this is required.
       */
      for (i=0; i<(n-1); i++)
	if(flag[i] && ((nn[i] == i2) || (nn[i] == j2)))
	  {
	    /* (Redetermine NN of I:)   */
	    dmin = inf;
	    for (j=i+1; j<n; j++)
	      {
		ind = ioffst(n,i,j);
		if (flag[j] && (i != j) && (diss[ind] < dmin))
		  {
		    dmin = (double) diss[ind];
		    jj = j;
		  }

		nn[i] = jj;
		disnn[i] = dmin;
	      }
	  }
    }  
  
  free(nn);
  free(disnn);
  free(flag);
  
  iia = (long*) malloc (n * sizeof(long));
  iib = (long*) malloc (n * sizeof(long));
  
  hcass2(n, ia, ib, iia, iib, iorder);
 
  for (i=0; i< n; i++ ) 
    {
      ia[i] = iia[i];
      ib[i] = iib[i];
    }
  
  free(iia);
  free(iib);

}


/* Hierarchical clustering
 *
 * x:      data nr x nc
 * nr, nc: number of row and columns		  
 * method: integer -> distance method	  
 * iopt    integer -> link used 
 * ia, ib: result (merge)			  
 * crit    result (height)
 */

void hcluster(double *x, long nr, long nc, int method, long iopt, 
	      long *ia , long *ib, double *crit, long *iorder)
{
  long  len;
  float *d;
 
 
  len = (nr * (nr - 1)) / 2;
  d = (float *) malloc (len * sizeof(float));
  
  // Calculate d: distance matrix
  distance(x, nr, nc, d, method);
  
  // Hierarchical clustering
  hclust(nr, iopt, ia, ib, crit, d, iorder);
  
  free(d);
}



void cutree(long *ia, long *ib, long n, double ht, double *heights, long *ans)
{
  long i;

  long k, l, nclust, m1, m2, j;
  bool *sing, flag;
  long *m_nr, *z;
  long which;
  
  /* compute which (number of clusters at height ht) */
  
  heights[n-1] = DBL_MAX;
  flag = false;
  i = 0;
  while(!flag)
    {
      if(heights[i] > ht)
	  flag = true;
      i++;     
    }
  
  which = n + 1 - i;

  /* using 1-based indices ==> "--" */
  sing = (bool *) malloc(n * sizeof(bool)); sing--;
  m_nr = (long *) malloc(n * sizeof(long)); m_nr--;
  z    = (long *) malloc(n * sizeof(long)); z--;
  
  for(k = 1; k <= n; k++)
    {
      sing[k] = true;  /* is k-th obs. still alone in cluster ? */
      m_nr[k] = 0;     /* containing last merge-step number of k-th obs. */
    }
  
  for(k = 1; k <= n-1; k++)
    {
      /* k-th merge, from n-k+1 to n-k atoms: (m1,m2) = merge[ k , ] */
      m1 = ia[k-1];
      m2 = ib[k-1];
      
      if(m1 < 0 && m2 < 0)
	{   
	  /* merging atoms [-m1] and [-m2] */
	  m_nr[-m1] = m_nr[-m2] = k;
	  sing[-m1] = sing[-m2] = false;
	}
      else 
	if(m1 < 0 || m2 < 0)
	  {
	    /* the other >= 0 */
	    if(m1 < 0)
	      { 
		j = -m1;
		m1 = m2;
	      }
	    else
	      j = -m2;
	    
	    /* merging atom j & cluster m1 */
	    for(l=1; l<=n; l++)
	      if (m_nr[l] == m1)
		m_nr[l] = k;
	    
	    m_nr[j] = k;
	    sing[j] = false;
	  }
	else
	  {
	    /* both m1, m2 >= 0 */
	    for(l=1; l<=n; l++)
	      if(m_nr[l]==m1 || m_nr[l]==m2)
		m_nr[l] = k;
	  }
     
      if(which == n-k)
	{
	  for(l = 1; l <= n; l++)
	    z[l] = 0;
	  
	  nclust = 0;
	  
	  for(l = 1, m1 = 0; l <= n; l++, m1++)
	    {
	      if(sing[l])
		ans[m1] = ++nclust;
	      else 
		{
		  if (z[m_nr[l]] == 0)
		    z[m_nr[l]] = ++nclust;
		  ans[m1] = z[m_nr[l]];
		}
	    }
	}
    }

  if(which == n)
    for(l = 1, m1 = 0; l <= n; l++, m1++)
      ans[m1] = l;
     
  free(sing+1);
  free(m_nr+1);
  free(z+1);
}


static PyObject *hccore_compute(PyObject *self, PyObject *args, PyObject *keywds)
{
  /* Inputs */
  PyObject *x = NULL; PyObject *xa = NULL;
  int method = 1;
  int link   = 1;
  
  npy_intp nr, nc;
  

  /* Outputs */
  PyObject *ia = NULL;
  PyObject *ib = NULL;
  PyObject *heights = NULL; 
  PyObject *order = NULL; 

  npy_intp ia_dims[1];
  npy_intp ib_dims[1];
  npy_intp heights_dims[1];
  npy_intp order_dims[1];
  
  double *xa_v;
  long *ia_v;
  long *ib_v;
  double *heights_v;
  long *order_v;  

  
  static char *kwlist[] = {"x", "method", "link", NULL};
  if (!PyArg_ParseTupleAndKeywords(args, keywds, "O|ii", kwlist, &x, &method, &link))
    return NULL;

  xa = PyArray_FROM_OTF(x, NPY_DOUBLE, NPY_IN_ARRAY);
  if (xa == NULL) return NULL;

  nr = PyArray_DIM(xa, 1);
  nc = PyArray_DIM(xa, 0);
  xa_v = (double *) PyArray_DATA(xa);

  ia_dims[0] = (npy_intp) nr;
  ia = PyArray_SimpleNew(1, ia_dims, NPY_LONG);
  ia_v = (long *) PyArray_DATA(ia);

  ib_dims[0] = (npy_intp) nr;
  ib = PyArray_SimpleNew(1, ib_dims, NPY_LONG);
  ib_v = (long *) PyArray_DATA(ib);
  
  heights_dims[0] = (npy_intp) nr;
  heights = PyArray_SimpleNew(1, heights_dims, NPY_DOUBLE);
  heights_v = (double *) PyArray_DATA(heights);

  order_dims[0] = (npy_intp) nr;
  order = PyArray_SimpleNew(1, order_dims, NPY_LONG);
  order_v = (long *) PyArray_DATA(order);
 
  hcluster(xa_v, (long)nr, (long)nc, method, link, ia_v, ib_v, heights_v, order_v); 
    
  Py_DECREF(xa);

  return Py_BuildValue("(N, N, N, N)", ia, ib, heights, order);
}


static PyObject *hccore_cut(PyObject *self, PyObject *args, PyObject *keywds)
{
  /* Inputs */
  PyObject *ia      = NULL; PyObject *iaa      = NULL;
  PyObject *ib      = NULL; PyObject *iba      = NULL;
  PyObject *heights = NULL; PyObject *heightsa = NULL;
  double ht;
  
  npy_intp n;
  
  /* Outputs */
  PyObject *cmap = NULL;
  
  npy_intp cmap_dims[1];

  
  long *ia_v;
  long *ib_v;
  double *heights_v;
  long *cmap_v;
  

  static char *kwlist[] = {"ia", "ib", "heights", "ht", NULL};
  if (!PyArg_ParseTupleAndKeywords(args, keywds, "OOOd", kwlist, &ia, &ib, &heights, &ht))
    return NULL;

  iaa = PyArray_FROM_OTF(ia, NPY_LONG, NPY_IN_ARRAY);
  if (iaa == NULL) return NULL;
  
  iba = PyArray_FROM_OTF(ib, NPY_LONG, NPY_IN_ARRAY);
  if (iba == NULL) return NULL;
  
  heightsa = PyArray_FROM_OTF(heights, NPY_DOUBLE, NPY_IN_ARRAY);
  if (heightsa == NULL) return NULL;
  
  n = PyArray_DIM(heightsa, 0);
  
  ia_v      = (long *) PyArray_DATA(iaa);
  ib_v      = (long *) PyArray_DATA(iba);
  heights_v = (double *) PyArray_DATA(heightsa);
 
  cmap_dims[0] = (npy_intp) n;
  cmap = PyArray_SimpleNew(1, cmap_dims, NPY_LONG);
  cmap_v = (long *) PyArray_DATA(cmap);

  cutree(ia_v, ib_v, n, ht, heights_v, cmap_v);
  
  Py_DECREF(iaa);
  Py_DECREF(iba);
  Py_DECREF(heightsa);

  return Py_BuildValue("N", cmap);
}


static char module_doc[] = "Hierarchical Cluster Core";
static char hccore_compute_doc[] = "Compute Hierarchical Cluster";
static char hccore_cut_doc[] = "Cuts the tree into several groups";

/* Method table */
static PyMethodDef hccore_methods[] = {
  {"compute",
   (PyCFunction)hccore_compute,
   METH_VARARGS | METH_KEYWORDS,
   hccore_compute_doc},
  {"cut",
   (PyCFunction)hccore_cut,
   METH_VARARGS | METH_KEYWORDS,
   hccore_cut_doc},
  {NULL, NULL, 0, NULL}
};


/* Init */
void inithccore()
{
  Py_InitModule3("hccore", hccore_methods, module_doc);
  import_array();
}
