/*================================================================================================
  Analysis.h
  Version 1: 12/1/2017

Copyright (c) Patrice Koehl.

>>> SOURCE LICENSE >>>

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

>>> END OF LICENSE >>>

================================================================================================== */

#ifndef _ANALYSIS_
#define _ANALYSIS_

/*================================================================================================
 Includes
================================================================================================== */

#include <math.h>
#include <cstdlib>
#include <random>

#ifndef M_PI
    #define M_PI 3.14159265358979323846
#endif

#include "bestfit.h"
BESTFIT<T> fit;

/*================================================================================================
 define matvec
================================================================================================== */

typedef void (*matvec) (int nrow, int ncol, T *x, int ldx, T *y, int ldy, void *mvparams);


/*================================================================================================
  Class
================================================================================================== */

  template <typename T>
  class Analysis {

	public:

 		// CompareBfact: compare computed and experimental B-factors
		void compareBfact(int Natoms, std::vector<Atoms<T>>& atoms, T *bfact, T *rms, T *correl);

 		// ComputeBfact: compute B-factors and compare with experimental B-factors
		void computeBfact(int Natoms, int Nat, int nmode1, int nmode2, T *eigVal, T *eigVect,
		std::vector<Atoms<T>>& atoms, std::vector<int>& perm, T *bfact, T *rms, T *correl);

 		// ComputeOverlap: compute overlap between modes and molecule displacement
		void computeOverlap(int Natoms, int nmode1, int nmode2, T *eigVect,
		std::vector<Atoms<T>>& atoms1, std::vector<Atoms<T>>& atoms2, T *rms0, T *rms, T *overlap, 
		T *quality, T *cadist);

		// Compute correlated motion matrix
		void computeCorrel(int Natoms, int Nat, int nmode1, int nmode2, T *eigVal, T *eigVect,
		T *bfact, T *correl);

		// check eigenvalues
		void checkEigVal(matvec op, int N, int NE, T Anorm, T *eigVal, T *eigVect, 
			T *err, T *Emin, T *Emax, T *Emean, void *mvparams, int nthreads);

		// check orthogonality of eigenvectors
		T  checkOrtho(int N, int M, T *V);

		void Overlap(int atom_type, std::vector<Atoms<T> >& atoms, int nmode1, int nmodes, T *eigVect,
		int check, T *rms_bfact, T *correl_bfact, std::string targetfile, std::string outputfile);

	private:

		T rigidBestfit(std::vector<Atoms<T>>& atoms1, std::vector<Atoms<T>>& atoms2, T *rms);

  };

/*================================================================================================
 Template for mass-weighted bestfit between two conformations of a molecule
================================================================================================== */

template <typename T>
T Analysis<T>::rigidBestfit(std::vector<Atoms<T>>& atoms1, std::vector<Atoms<T>>& atoms2, T *rms)
{
	int natoms = atoms1.size();
	int N = 3*natoms;
	T *coord1 = new T[N];
	T *coord2 = new T[N];
	T *mass1  = new T[natoms];
	T *mass2  = new T[natoms];
	int *List      = new int[natoms];
	T rmsd;

	for(int i = 0; i < natoms; i++)
	{
		for (int j = 0; j < 3; j++)
		{
			coord1[3*i+j] = atoms1[i].coord[j];
			coord2[3*i+j] = atoms2[i].coord[j];
		}
		mass1[i] = atoms1[i].mass;
		mass2[i] = atoms2[i].mass;
		List[i] = i;
	}

	rmsd = fit.bestfitm(coord1, natoms, coord2, natoms, natoms, List, List, mass1, mass2);

	std::cout << " " << std::endl;
	std::cout << "rmsd Start - Target " << rmsd << std::endl;
	std::cout << " " << std::endl;

	*rms = rmsd;

	for(int i = 0; i < natoms; i++)
	{
		atoms2[i].coord[0] = coord2[3*i];
		atoms2[i].coord[1] = coord2[3*i+1];
		atoms2[i].coord[2] = coord2[3*i+2];
	}

	delete [] coord1;
	delete [] coord2;
	delete [] mass1;
	delete [] mass2;
	delete [] List;

	return rmsd;
}

/*================================================================================================
 CompareBfact: compare computed and experimental B-factors
================================================================================================== */

template <typename T>
void Analysis<T>::compareBfact(int Natoms, std::vector<Atoms<T>>& atoms, T *bfact, T *rms, T *correl)
{
	T Se, See, Sc, Scc, Sec;
	T val_exp, val_cal;

	Se  = 0;
	See = 0;
	Sc  = 0;
	Scc = 0;
	Sec = 0;

	for(int i = 0; i < Natoms; i++)
	{
		val_exp = atoms[i].bfact;
		val_cal = bfact[i];
		Se  = Se + val_exp;
		See = See + val_exp*val_exp;
		Sc  = Sc + val_cal;
		Scc = Scc + val_cal*val_cal;
		Sec = Sec + val_exp*val_cal;
	}

	if(See!=0 && Scc!=0) {
		*rms    = std::sqrt( (See+Scc-2*Sec)/Natoms );
		*correl = (Natoms*Sec - Se*Sc)/
			  std::sqrt((Natoms*See-Se*Se)*(Natoms*Scc-Sc*Sc));
	} else {
		*rms = 0.0;
		*correl = 0.0;
	}
}

/*================================================================================================
 ComputeBfact: compute B-factors and compare with experimental B-factors
================================================================================================== */

template <typename T>
void Analysis<T>::computeBfact(int Natoms, int Nat, int nmode1, int nmode2, T *eigVal, T *eigVect,
	std::vector<Atoms<T>>& atoms, std::vector<int>& perm, T *bfact, T *rms, T *correl)
{
	memset(bfact, 0, Nat*sizeof(T));

	T kT = 0.593;
	T facb = 8.0*kT*M_PI*M_PI/3.0;

	T *Coord;
	T val, val2, nrm2;
	T rms_val, correl_val;

	Coord = new T[3*Natoms];

	for (int i = nmode1; i < nmode2;i++)
	{
		val = facb/eigVal[i];
#if defined(GPU)
		cudaMemcpy(Coord, &eigVect[i*3*Natoms], 3*Natoms*sizeof(T), cudaMemcpyDeviceToHost);
#else
		for(int j = 0; j < 3*Natoms; j++) Coord[j] = eigVect[i*3*Natoms+j];
#endif
		for(int j = 0; j < Nat; j++)
		{
			nrm2 = Coord[3*j]*Coord[3*j] + Coord[3*j+1]*Coord[3*j+1] + Coord[3*j+2]*Coord[3*j+2];
			val2 = nrm2*val;
			bfact[perm[j]] += val2;
		}
		compareBfact(Nat, atoms, bfact, &rms_val, &correl_val);
		rms[i] = rms_val;
		correl[i] = correl_val;
	}

	delete [] Coord;
}

/*================================================================================================
 ComputeOverlap: compute overlap between modes and molecule displacement
================================================================================================== */

template <typename T>
void Analysis<T>::computeOverlap(int Natoms, int nmode1, int nmode2, T *eigVect,
	std::vector<Atoms<T>>& atoms1, std::vector<Atoms<T>>& atoms2, T *rms0, T *rms, 
	T *overlap, T *quality, T *cadist)
{
	int N = 3*Natoms;
	int inc = 1;
	T rms_val;

//	T eps = 0.36;
//	T K_bond   = 100.0*eps;
//	T K_angle  = 20.0*eps;
//	T K_dihed1 = 0.0;
//	T K_dihed3 = 0.0;
	T *coord   = new T[N];

	rigidBestfit(atoms1, atoms2, &rms_val);
	*rms0 = rms_val;

	T *vect1 = new T[N];

	for(int i=0; i < Natoms; i++)
	{
		for(int j = 0; j < 3; j++)
		{
			vect1[3*i+j] = atoms2[i].coord[j]-atoms1[i].coord[j];
			coord[3*i+j] = atoms1[i].coord[j];
		}
	}

#if defined(DOUBLE)
	T nrm1 = dnrm2_(&N, vect1, &inc);
#else
	T nrm1 = snrm2_(&N, vect1, &inc);
#endif

	rms[0] = rms_val;
	T sum_c2 = 0;
	T c, rms2;
	T nrm2;
//	T U_bond, U_angle, U_dih, U_Go;

	for(int i = nmode1; i < nmode2; i++)
	{
		c = eig_ddot_(&N, &eigVect[N*i], &inc, vect1, &inc);
		nrm2 = eig_dnrm2_(&N, &eigVect[N*i], &inc);
		overlap[i] = std::abs(c)/(nrm1*nrm2);
		sum_c2 = sum_c2 + c*c;
		rms2 = rms_val*rms_val - sum_c2/Natoms;
		rms[i] = std::sqrt(rms2);
		eig_daxpy_(&N, &c, &eigVect[N*i], &inc, coord, &inc);
//		hess.goEnergy(atoms1, coord, Natoms, K_bond, K_angle, K_dihed1,
//		K_dihed3, &U_bond, &U_angle, &U_dih, &U_Go);
//		quality[i] = U_Go;
//		cadist[i]  = U_bond;
	}

	delete [] vect1;
}

/*================================================================================================
  Compute correlated motion matrix
================================================================================================== */

template <typename T>
void Analysis<T>::computeCorrel(int Natoms, int Nat, int nmode1, int nmode2, T *eigVal, T *eigVect,
	T *bfact, T *correl)
{
	memset(correl, 0, Nat*Nat*sizeof(T));
	for(int i = 0; i < Nat; i++)
	{
		correl[Nat*i+i]=1.0;
	}

	T kT = 0.593;
	T facb = 8.0*kT*M_PI*M_PI/3.0;

	T *Coord;
	T xi,yi,zi,val, val2;
	T xj,yj,zj;

	Coord = new T[3*Nat];

	for (int n = nmode1; n < nmode2;n++)
	{
		val = facb/eigVal[n];
#if defined(GPU)
		cudaMemcpy(Coord, &eigVect[n*3*Nat], 3*Nat*sizeof(T), cudaMemcpyDeviceToHost);
#else
		for(int j = 0; j < 3*Nat; j++) Coord[j] = eigVect[n*3*Nat+j];
#endif
		for(int i = 0; i < Nat-1; i++)
		{
			xi = Coord[3*i];
			yi = Coord[3*i+1];
			zi = Coord[3*i+2];
			for(int j = i+1; j < Nat; j++)
			{
				xj = Coord[3*j];
				yj = Coord[3*j+1];
				zj = Coord[3*j+2];
				val2 = (xi*xj+yi*yj+zi*zj)*val;
				correl[Nat*i+j] = correl[Nat*i+j]+val2;
			}
		}
	}

	for(int i = 0; i < Nat-1; i++)
	{
		for(int j = i+1; j < Nat; j++)
		{
			correl[Nat*i+j] = correl[Nat*i+j]/(std::sqrt(bfact[i]*bfact[j]));
			correl[Nat*j+i] = correl[Nat*i+j];
		}
	}
}

/*================================================================================================
  checkEigVal

	Purpose:
	========
	Computes the residuals (i.e. || Ae(i) - lambda(i) e(i) || / Anorm) for all
	eigenpairs (lambda(i), e(i)). Anorm is the (estimated) norm of the matrix

        Arguments:
        ==========
        op    	(input) pointer to function performing Matrix-Vector operations

	N	(input)	integer
		On entry, size of the matrix

	NE	(input) integer
		On entry, number of eigenpairs to check

	Anorm	(input) integer
		On entry, estimate of the matrix norm

	eigVal	(input) float or double array (size NE)
		On entry, eigenvalues to check

	eigVect	(input) float or double array (size N * NE)
		On entry, eigenvectors associated to eigVal, in column-based storage (Fortran like)

	err	(output) float or double array (size NE)
		On exit, residual norms for all NE eigenpairs

	Emin	(output) pointer to float or double
		On exit, minimal residual norm over the NE eigenpairs

	Emax	(output) pointer to float or double
		On exit, maximal residual norm over the NE eigenpairs

	Emean	(output) pointer to float or double
		On exit, average residual norm over the NE eigenpairs

	mvparam	(input) pointer
		On input, pointer to params for op

	nthreads (input) integer
		On entry, number of threads available for computation, if on CPU

	Notes:
        ======

	If computation on GPU, eigVal on CPU memory while eigVect on GPU memory

================================================================================================== */

template <typename T>
void Analysis<T>::checkEigVal(matvec op, int N, int NE, T Anorm, T *eigVal, T *eigVect, T *err, 
		T *Emin, T *Emax, T *Emean, void *mvparams, int nthreads)
{

/*================================================================================================
	Declare variables
================================================================================================== */

	int inc = 1;
	int ione = 1;

	T norm;
	T alpha;

/*================================================================================================
	Create all temporary arrays
================================================================================================== */

	T *Work;
#if defined(GPU)
	cudaMalloc((void **)&Work, N*sizeof(T));
#else
	Work     = new T[N];
#endif

/*================================================================================================
	Check each eigenpair
================================================================================================== */

	*Emean = 0;

	for(int i = 0; i < NE; i++)
	{
/*================================================================================================
		Check for convergence 
================================================================================================== */

		op(N, ione, &eigVect[N*i], N, Work, N, mvparams);
		alpha = -eigVal[i];
		eig_daxpy_(&N, &alpha, &eigVect[N*i], &inc, Work, &inc);
		norm = eig_ddot_(&N, Work, &inc, Work, &inc);

		alpha = std::sqrt(norm)/Anorm;
		err[i] = alpha;
		*Emean = *Emean + alpha;
		if(i==0) {
			*Emin = alpha;
			*Emax = alpha;
		} else {
			*Emin = std::min(*Emin, alpha);
			*Emax = std::max(*Emax, alpha);
		}
	}
	*Emean = *Emean/NE;
#if defined(GPU)
	cudaFree(Work);
#else
	delete [] Work;
#endif
}

/*================================================================================================             
  checkOrtho

	Purpose:
	========
	Checks the orthonormality of NE eigenvectors

        Arguments:
        ==========
	N		(input)	integer
			On entry, size of the matrix

	NE		(input) integer
			On entry, number of eigenpairs to check

	eigVect		(input) float or double array (size N * NE)
			On entry, eigenvectors , in column-based storage (Fortran like)

	checkOrtho	(output) float or double
			On exit, || V V^T - I ||

================================================================================================== */
                
template <typename T>
T Analysis<T>::checkOrtho(int N, int M, T *V)
{               

	T *work;
#if defined(GPU)
	cudaMalloc((void **)&work, M*M*sizeof(T));
#else
	work = new T[M*M];
#endif

	T alpha = 1.0;
	T beta  = 0.0;
	int inc = 1;
	char Trans   = 'T';
	char NoTrans = 'N';
		
	eig_dgemm_(&Trans, &NoTrans, &M, &M, &N, &alpha, V, &N, V, &N, &beta, work, &M);
		
#if defined(GPU)
	alpha = -1;
	int NBLOCKS = (M+THREADS_PER_BLOCK-1)/THREADS_PER_BLOCK;
	kernel_add_diag<<<NBLOCKS, THREADS_PER_BLOCK>>>(M, alpha, work, M);
#else
	for(int m = 0; m < M; m++) {
		work[m*M+m] = work[m*M+m] - 1.0;
	}
#endif

	int M2 = M*M;
	T norm = eig_dnrm2_(&M2, work, &inc);

#if defined(GPU)
	cudaFree(work);
#else
	delete [] work;
#endif

	return norm;

  }	     

/*================================================================================================
  Check overlap 
================================================================================================== */

template <typename T>
void Analysis<T>::Overlap(int atom_type, std::vector<Atoms<T> >& atoms, int nmode1, int nmodes, T *eigVect,
			int check, T *rms_bfact, T *correl_bfact, std::string targetfile, std::string outputfile)
{

	int natoms = atoms.size();

	T *rms_over  = new T[nmodes];
	T *overlap   = new T[nmodes];
	T *quality   = new T[nmodes];
	double *cadist    = new double[nmodes];

	memset(rms_over, 0, nmodes*sizeof(T));
	memset(overlap, 0, nmodes*sizeof(T));
	memset(quality, 0, nmodes*sizeof(T));
	memset(cadist, 0, nmodes*sizeof(T));

	std::string extension=".over";
	std::string ofile = outputfile;
	ofile.append(extension);

	double norm, rms;
	int inc = 1;

/* 	==========================================================================================
	read coordinates of target structure
   	========================================================================================== */

	std::vector<Atoms<T> > atomt;
	int nchains;
	int flag_mass = 0;

	std::size_t found = targetfile.find("pdb");
	if(found !=std::string::npos) {
		readinput.readFromPDB(targetfile, atom_type, flag_mass, atomt, &nchains);
	} else {
		found = targetfile.find("pqr");
		if(found !=std::string::npos) {
			readinput.readFromPQR(targetfile, atom_type, flag_mass, atomt, &nchains);
		} else {
			found = targetfile.find("cif");
			if(found !=std::string::npos) {
				readinput.readFromCIF(targetfile, atom_type, flag_mass, atomt, &nchains);
			} else {
				std::cout << " " << std::endl;
				std::cout << "Input file format not recognized; program can only read PDB, PQR, and CIF files" << std::endl;
				std::cout << " " << std::endl;
				exit(1);
			}
		}
	}

	int natomt = (int) atomt.size();

	if(natomt!=natoms) {
		std::cout << " " << std::endl;
		std::cout << "Number of atoms found in target    : " << natomt << std::endl;
		std::cout << "Number of atoms found in start     : " << natoms << std::endl;
		std::cout << "No match!" << std::endl;
		return;
	}

/* 	==========================================================================================
	read coordinates of target structure
   	========================================================================================== */

	computeOverlap(natoms, nmode1, nmodes, eigVect, atoms, atomt, &rms, rms_over, overlap, quality, cadist);
	norm = dnrm2_(&nmodes, overlap, &inc);

/* 	==========================================================================================
	Print and write
   	========================================================================================== */

	std::cout << std::endl;
	std::cout << "RMS between B_map and target   :  " << std::setw(10) <<  rms_over[nmodes-1] << std::endl;
	std::cout << "Total overlap NM - deformation :  " << std::setw(10) <<  norm << std::endl;
	std::cout << std::endl;

	if(outputfile != " ") {
		wres.writeOverlap(ofile, nmode1, nmodes, check, rms_bfact, correl_bfact, rms, rms_over, 
		overlap, quality, cadist);
	}

}


#endif
