/*================================================================================================
  chebDav.h
  Version 1: 08/10/2024

  Purpose: Finds the top N eigenvalues / eigenvectors of a real symmetric matrix

  Method: Block Chebyshev-Davidson algorithm

  Y. Zhou and Y. Saad, A Chebyshev-Davidson algorithm for large symmetric
  eigenproblems, SIAM J. Matrix Anal. Appl., 29, 954-971 (2007)

  Y. Zhou. A block Chebyshev-Davidson method with inner-outer restart for
  large eigenvalue problems. J. Comput. Phys, 229, 9188-9200 (2010)

Copyright (c) Patrice Koehl.

>>> SOURCE LICENSE >>>

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

>>> END OF LICENSE >>>

================================================================================================== */

#ifndef _CHEBDAV_
#define _CHEBDAV_

/*================================================================================================
 Includes
================================================================================================== */

#include <math.h>
#include <iostream>
#include <chrono>
#include <cstdlib>

/*================================================================================================
 define opA
================================================================================================== */

typedef void (*opA) (int nrow, int ncol, T *x, int ldx, T *y, int ldy, void *mvparam);

/*================================================================================================
  Define a class for computing eigenvectors / eigenvalues of a symmetric sparse matrix
  stored as sum of outer products
================================================================================================== */

template <typename T>
  class CHEBDAV {

	public:

		// initialize info struct for chebDav
		void init_info(eig_info<T> * info, int nrow, int act_max, int nblock,
		int mpoly, int method, int flag_start, int nev, int ned, T tol, int nthreads, void *mvparams);

		// initialize params struct
		void init_params(eig_info<T> * info, hessianMat<T> *csrMat, T *eigVal, T *eigVect);

		// driver for computing some eigenpairs of a symmetric sparse matrix
		void chebDav(opA opA0, opA opAD, eig_info<T> * info, int N, 
			T *eigVal, T *eigVect, int *k_conv, T *work);

	private:

		// GramSchmidt orthogonalization
		T gramSchmidt(int N, int M, T *V, T *coef);

		// applies the DGKS ortho-normalization technique to ortho-normalize V(:, M:M+Madd)
		void orthoNormalize(int N, int M, int Madd, T *V, T *coef);

		// prepare the Rayleigh quotient matrix for next iteration
		void updateH(int Nblock, int Nlow, int Nupper, T *D, T *H);

		// initialize a random vector
		void randomVector(int N, T *Vect);

		// finds the largest eigenvalue of the matrix using power lethod
		T powerMethod(opA op, eig_info<T> *info, int N, T *Temp1, T *Temp2, T tol);

		// Swap eigenpairs if not in increasing order
		int swapPair(int N, int Nval, T *eigVal, T *eigVect);

	protected:

  };


/*================================================================================================
  init_info

	Purpose:
	========
	Initializes an INFO structure. This function must be called before calling
	any other user level routine in chebDav package.

	Arguments:
	==========
	info    (input) pointer to the structure chebDav_info
	         On entry, points to the data structure to store the information
	         about the eigenvalue problem and the progress of chebDav
	         On exit, points to the initialized data structure.

	nrow    (input) integer
	         On entry, specifies the local dimension of the problem.

	act_max (input) integer
	         On entry, specifies the size of the active space used

	nblock  (input) integer
	         On entry, specifies the block size used

	mpoly	(input) integer
	         On entry, specifies the order of the Chebishev polynomial fiter

	start 	(input) integer
		 On entry, flag:
			- if 0, fresh restart
			- if 1, used existing vectors in eigVect to start

	nev      (input) integer
	          On entry, specifies the number of wanted eigenvalues and eigenvectors.

	ned      (input) integer
	          On entry, specifies the number of deflation cycles to use

	tol      (optional) float or double
	          If provided, specifies the tolerance on residual norm. By default,
	          tol is set to be sqrt(epsilon).

	mvparams (input) pointer
		 Pointer to params for matrix vector operator

	nthreads (input) integer
		 On entry, number of threads available for computation

================================================================================================== */

template <typename T>
void CHEBDAV<T>::init_info(eig_info<T> * info, int nrow, int act_max, int nblock,
		int mpoly, int method, int start, int nev, int ned, T tol, int nthreads, void *mvparams)
{

/*================================================================================================
	Parameters
================================================================================================== */

#if defined(DOUBLE)
	T dmin = DBL_MIN;
	T eps  = DBL_EPSILON;
#else
	T dmin = FLT_MIN;
	T eps  = FLT_EPSILON;
#endif

/*================================================================================================
	set tolerance
================================================================================================== */

	if (tol > 0) {
		info->tol = tol;
		if (info->tol <= dmin) {
			info->tol = eps;
		} else if (info->tol > 1.0) {
			info->tol = std::min(0.1, 1.0 / (info->tol));
		}
	} else {
		info->tol = std::sqrt(eps);
	}

/*================================================================================================
	Initialisation based on input parameters:
		n:		size of the matrix
		act_max:	active space
		block:		block size
		mpoly:		Chebishev order
		nev:		# of eigenpairs to be computed
		ned:		# of deflation cycles
================================================================================================== */

	int nev_d = nev/ned + 1;
	if(nev%ned==0) nev_d -= 1;
	info->n          = nrow;
	info->nev        = nev;
	info->ned        = ned;
	info->nev_d      = nev_d;
	info->act_max    = act_max;
	info->block      = nblock;
	info->mpoly      = mpoly;
	info->flag_start = start;
	info->flag_zero  = 1;

	info->method     = method;

	info->anrm       = -1.0;

	info->current_slice = 0;
	info->slices.clear();

/*================================================================================================
	Verbose
================================================================================================== */

	info->verbose = 1;

/*================================================================================================
	Info about computing time
================================================================================================== */

	info->clk_tot  = 0;
	info->clk_op   = 0;
	info->clk_orth = 0;

	info->walltime = 0;

	info->mvp      = 0;
	info->north    = 0;

/*================================================================================================
	Passing information for Matrix-Vector operations
================================================================================================== */

	info->mvparams = mvparams;

/*================================================================================================
	Number of threads available
================================================================================================== */

	info->nthreads = nthreads;

/*================================================================================================
	Type of Chebishev filtering: initialize to 1 (shifted, scaled filtering)
================================================================================================== */

	info->filter_type = 1;

}

/*================================================================================================
  init_params

	Purpose:
	========
	Initializes the params structure. 

	Arguments:
	==========
	info    (input) pointer to the structure eig_info
	         On entry, points to the data structure to store the information
	         about the eigenvalue problem and the progress of chebLan
	
	csrMat	(input) pointer to sparse amtrix structure
		On entry, points to the matrix A on host

	eigVal	(input) pointer to array containing eigenvalues

	eigVect	(input) pointer to array containing eigenvectors

================================================================================================== */


template <typename T>
void CHEBDAV<T>::init_params(eig_info<T> * info, hessianMat<T> *csrMat, T *eigVal, T *eigVect)
{

	opparams<T> *params;
	params    = (opparams<T> *) info->mvparams;

	params->csrHessian  = csrMat;
	params->need        = 0;
	params->Ud          = eigVect;
	params->eigVal      = eigVal;
	params->nthreads    = info->nthreads;

	params->sigma       = NULL;
	params->sigma2      = NULL;
	params->space       = NULL;

	T *SpaceED;
	if(info->method % 3 != 0) {

		int lsize, lsize1, lsize2;
		lsize1 = 2*info->nev;
		lsize2 = 2*info->nev * info->block;
		lsize  = lsize1 + 2*lsize2;

#if defined(GPU)
		cudaMalloc((void **)&SpaceED, lsize*sizeof(T));
#else
		SpaceED = new T[lsize];
#endif
		params->sigma  = SpaceED; 
		params->sigma2 = &SpaceED[lsize1];
		params->space    = &SpaceED[2*lsize1];
	}
}
/*================================================================================================
  chebDav

	Purpose:
	========
	Find the NE smallest eigenvalues of a Hessian, using the Block-Chebyshev-Davidson
	     algorithm

	Full details in:
  		Y. Zhou and Y. Saad, A Chebyshev-Davidson algorithm for large symmetric
  		eigenproblems, SIAM J. Matrix Anal. Appl., 29, 954-971 (2007)

		Y. Zhou. A block Chebyshev-Davidson method with inner-outer restart for
		large eigenvalue problems. J. Comput. Phys, 229, 9188-9200 (2010)

	Arguments:
	==========

	opA0	(input) pointer
		On entry, pointer to the Matrix-Vector operator

	opAD	(input) pointer
		On entry, pointer to the Matrix-Vector operator + deflation

	info	(input) pointer to info struct
		On entry, structure containing the parameters for the run

	N	(input) integer
		On entry, size of the matrix

	eigVal	(output) array of float or double, of size  >= k_conv
		On exit, the converged eigenvalue. Should be dimensioned >= NE

	eigVect	(output) array of float or double, of size  >= k_conv*N
		On exit, the converged eigenvector. Should be dimensioned >= NE*N

	k_conv	(output) integer
		The actual number of eigenpairs that have converged

	work	(input) arrays of float or double, or size lwork
		On entry, workspace. This work space should be large enough for all
		local arrays in chebDav.  lwork should be at least:
		lwork = 
			N*block			// for Q matrix
			+ act_max*act_max 	// for Davidson matrix
			+ act_max		// for Ritz values
			+ N*act_max		// for dsyev
================================================================================================== */

template <typename T>
 void CHEBDAV<T>::chebDav(opA opA0, opA opAD, eig_info<T> * info, int N, 
			T *eigVal, T *eigVect, int *k_conv, T *work)
  {

/*================================================================================================
	Declare some variables
================================================================================================== */

	int inc = 1;
	int i__1 = 1;
	int k_c, k_found;
	int k_sub, k_act, k_ri;
	int h_size, n_mid;
	int inf;
	int i_count, no_swap, n_swap;

	T norm;
	T alpha, beta, scale;
	T upperb, lowerb, a0;

	char Trans   = 'T';
	char NoTrans = 'N';
	char U       = 'U';
	char V       = 'V';

	clock_t clk0, clk1, clk2;
	timeval tim;
	double t1, t2, u1, u2;

/*================================================================================================
	Initialize
================================================================================================== */

	int NE       = info->nev_d;
	int dim_max  = NE + info->act_max;
	int act_max  = info->act_max;
	int Nblock   = info->block;
	int iter_max = 1000;
	T tol        = info->tol;

	std::cout << std::endl;
	std::cout << "NE      = " << NE << std::endl;
	std::cout << "Nblock  = " << Nblock << std::endl;
	std::cout << "dim_max = " << dim_max << std::endl;
	std::cout << "act_max = " << act_max << std::endl;
	std::cout << "tol     = " << tol << std::endl;

	int NNblock = N*Nblock;

	long isize1 = N* act_max;
	long isize2 = act_max* act_max;
	int lspace  = isize1;

	T *Space, *H, *D;
	Space = work;	 				// commodity space vector; should be at least N * act_max;
	H     = &work[isize1];				// matrix of size act_max * act_max
	D     = &work[isize1+isize2];			// vector of size act_max

/*================================================================================================
	Initialize clocks
================================================================================================== */

	clk0 = clock();
	gettimeofday(&tim,NULL);
	t1 = tim.tv_sec;
	u1 = tim.tv_usec;

/*================================================================================================
	Initialize procedure
================================================================================================== */

	T Anorm = info->anrm;
	if(Anorm==-1) {
		upperb = powerMethod(opA0, info, N, Space, &Space[N], tol);
		info->anrm = upperb;
		Anorm      = upperb;
	} else {
		upperb = Anorm;
	}

	lowerb = upperb/4;
	a0     = 0;

	setfilter.setFilter1(info, lowerb, upperb, a0);
	if(info->filter_type==2) filter.buildTS(info);

//	k_ri = std::max(act_max/2, act_max - 3*Nblock);
	k_ri = act_max/4;
	k_ri = 2*Nblock;

	k_sub = 0;
	k_act = 0;
	k_c   = 0;

	if(info->flag_start==0) {
		randomVector(NNblock, Space);
	} else {
		eig_dcopy_(&NNblock, eigVect, &inc, Space, &inc);
	}

//	orthoNormalize(N, k_sub, Nblock, Space, &Space[NNblock]);

/*================================================================================================
	Iterate until we have enough eigenpairs...
================================================================================================== */

	beta = 0.0;
	alpha = 1.0;
	int M;

	int iter;
	for(iter = 0; iter < iter_max; iter++)
	{

		if(info->verbose > 0 && iter % 10 == 0) {
			std::cout << "Iter #: " << iter << " # of eigenvals: " << k_c << std::endl;
		}

/*================================================================================================
		Apply polynomial filtering on current estimate of eigenvector
================================================================================================== */

		if(info->filter_type==2) {
			filter.chebFilter(opA0, info, N, Nblock, Space, &eigVect[N*k_sub], 
			&Space[N*Nblock]);
		} else {
			filter.chebFilter(opAD, info, N, Nblock, Space, &eigVect[N*k_sub], 
			&Space[N*Nblock]);
		}

/*================================================================================================
		Orthonormalize against vectors in the current sub-space
		(note that those vectors are expected to be orthonormal)
================================================================================================== */

		clk1 = clock();

		orthoNormalize(N, k_sub, Nblock, eigVect, Space);
		clk2 = clock();
		info->north += Nblock;
		info->clk_orth += (clk2-clk1);

/*================================================================================================
		Build new corresponding column in matrix H
		(note that we do not fill in the corresponding row, as dsyev only reads the
		upper diagonal part of the matrix)
================================================================================================== */

		clk1 = clock();
		opAD(N, Nblock, &eigVect[k_sub*N], N, Space, N, info->mvparams);
		clk2 = clock();
		info->mvp += Nblock;
		info->clk_op += (clk2-clk1);

		k_act = k_act + Nblock;
		k_sub = k_sub + Nblock;
		h_size = k_act;

		eig_dgemm_(&Trans, &NoTrans, &k_act, &Nblock, &N, &alpha, &eigVect[N*k_c], &N, 
		Space, &N, &beta, &H[k_act*(k_act-Nblock)], &k_act);

		eig_dsyev_(&V, &U, &k_act, H, &k_act, D, Space, &lspace, &inf);

/*================================================================================================
		Restart if the active subspace becomes too big (Inner restart)
================================================================================================== */

		if(k_act+Nblock >= info->act_max) {
			k_act = k_ri;
			k_sub = k_act + k_c;
		}

/*================================================================================================
		Perform Ritz-Raleigh refinement
================================================================================================== */

		M = N*h_size;

		eig_dcopy_(&M, &eigVect[N*k_c], &inc, Space, &inc);
		eig_dgemm_(&NoTrans, &NoTrans, &N, &k_act, &h_size, &alpha, Space,
			&N, H, &h_size, &beta, &eigVect[N*k_c], &N);

/*================================================================================================
		Check for convergence of a few current eigenvectors
================================================================================================== */

		k_found = 0;
		no_swap = 0;
		i_count = Nblock;

		for (int ic = 0; ic < i_count ; ic++) {
			clk1 = clock();
			opA0(N, i__1, &eigVect[N*(k_c+ic)], N, Space, N, info->mvparams);
			clk2 = clock();
			info->mvp++;
			info->clk_op += (clk2-clk1);

			scale = -D[ic];
			eig_daxpy_(&N, &scale, &eigVect[N*(k_c+ic)], &inc, Space, &inc);
			norm = eig_ddot_(&N, Space, &inc, Space, &inc);
			norm = std::sqrt(norm);

			if(norm <= tol*std::max(D[h_size-1], Anorm) ) {
				eigVal[k_c+k_found] = D[ic];
				n_swap = swapPair(N, k_c+k_found, eigVal, eigVect);
				k_found = k_found + 1;
				if(n_swap != 0) no_swap = 1;
			} else {
				break;
			}
		}
		k_c = k_c + k_found;

/*================================================================================================
		If we have enough eigenpairs, exit
================================================================================================== */

		if(k_c >= NE && no_swap == 0) {
			*k_conv = k_c;
			if(info->verbose > 0) std::cout << "Iter #: " << iter << " # of eigenvals: " << k_c << std::endl;
			break;
		}

/*================================================================================================
		Shift k_act, if eigenvalues found
================================================================================================== */

		if(k_found > 0) {
			k_act = k_act - k_found;
		}

/*================================================================================================
		Restart if the active subspace becomes too big (Outer restart)
================================================================================================== */

		if(k_sub >= dim_max) {
			k_sub = dim_max - 2*Nblock;
//			k_sub = std::max(k_c + Nblock, dim_max - 2*Nblock);
			k_act = k_sub - k_c;
		}

/*================================================================================================
		Update boundaries for polynomial filtering
================================================================================================== */

		n_mid = (h_size-k_found)/2;
		if((h_size-k_found) % 2 == 0) {
			lowerb = (D[n_mid-1] + D[n_mid])/2;
		} else {
			lowerb = D[n_mid];
		}
		if(a0 > D[0]) a0 = std::abs(D[0]);

		setfilter.setFilter1(info, lowerb, upperb, a0);
		if(info->filter_type==2) filter.buildTS(info);

/*================================================================================================
		Prepare for next iteration
================================================================================================== */

		eig_dcopy_(&NNblock, &eigVect[N*k_c], &inc, Space, &inc);
		updateH(Nblock, k_found, k_act + Nblock, D, H);

	}

	clk2 = clock();
	info->clk_tot += (clk2-clk0);

	gettimeofday(&tim,NULL);
	t2 = tim.tv_sec;
	u2 = tim.tv_usec;
        info->walltime = (t2-t1) + (u2-u1)*1.e-6;

	info->niter = iter + 1;

}

/*================================================================================================
  gramSchmidt

	Purpose:
	========
	applies the Gram Schmidt ortho-normalization technique to ortho-normalize V(:, M + 1)
	against V(:,0:M).
	On output, V(:,0:M+1) should be orthonormal.

	Note: It is important that the input matrix V(:, 0:M) be orthonormal.

	Arguments:
	==========

	N	(input) integer
		On entry, size of the matrix

	M 	(input) integer
		On entry, number of columns of V that are already orthonormal

	V	(input / output) array of floats or double of size N * (M+1)
		On input, the matrix V with M column orthonormal and the M+1 th
		column to be project on the previous M column
		On output, the fully orthonormalized V of size N * (M+1)

	coef	(input) array of floats or double of size at least M+1
		Will contain the coefficients of the projection

================================================================================================== */

template <typename T>
  T CHEBDAV<T>::gramSchmidt(int N, int M, T *V, T *coef)
  {
#if defined(DOUBLE)
	T eps = 2.2e-14;
#else
	T eps = 2.2e-6;
#endif

	T reorth = 0.5;

/*      ==========================================================================================
	Loop over new vector
        ========================================================================================== */

	T normX, normY, scal, new_norm;

	int inc=1;
	char Trans = 'T';
	char NoTrans = 'N';
	T alpha = 1; T alpham = -1;
	T beta0 = 0; T beta1 = 1;
	T fact;
	int repeat;

	int current = M;

/*      ==========================================================================================
		Make sure vector is not 0; if it is, set to a random vector
        ========================================================================================== */

	normX = eig_ddot_(&N, &V[current*N], &inc, &V[current*N], &inc);
	normX = std::sqrt(normX);

	fact = 1.0;
	if(normX <= eps) {
		fact = 0.0;
		randomVector(N, &V[current*N]);

		normX = eig_ddot_(&N, &V[current*N], &inc, &V[current*N], &inc);
		normX = std::sqrt(normX);
	}
	scal = 1.0/normX;
	eig_dscal_(&N, &scal, &V[current*N], &inc);
	normY = 1.0;

	if(current ==0) return normY;

/*      ==========================================================================================
		Allow for repeats (with random vectors) if orthogonalization fails twice
	        (which "should" not happen)
        ========================================================================================== */

	repeat = 0;
	while (repeat < 5 ) {

/*      ==========================================================================================
		Project new vector onto all preceeding vectors in matrix
        ========================================================================================== */

		eig_dgemv_(&Trans, &N, &current, &alpha, V, &N, &V[current*N], &inc, &beta0, 
		coef, &inc);

/*      ==========================================================================================
			Remove projection
        ========================================================================================== */

		eig_dgemv_(&NoTrans, &N, &current, &alpham, V, &N, coef, &inc, &beta1, 
		&V[current*N], &inc);

/*      ==========================================================================================
		Check new vector
        ========================================================================================== */

		new_norm = eig_ddot_(&N, &V[current*N], &inc, &V[current*N], &inc);
		new_norm = std::sqrt(new_norm);

		if(new_norm > reorth*normY) {

/*      ==========================================================================================
			If pass renormalization test, normalize and we are done for this vector
        ========================================================================================== */

			scal = 1/new_norm;
			eig_dscal_(&N, &scal, &V[current*N], &inc);
			return fact*new_norm/normY;

		} else {

/*      ==========================================================================================
			If not, repeat process
        ========================================================================================== */

			if(new_norm > eps*normY) {
				scal = 1/new_norm;
				eig_dscal_(&N, &scal, &V[current*N], &inc);
			} else {
				randomVector(N, &V[current*N]);
				normX = eig_ddot_(&N, &V[current*N], &inc, &V[current*N], &inc);
				normX = std::sqrt(normX);
				scal = 1.0/normX;
				eig_dscal_(&N, &scal, &V[current*N], &inc);
			}
		}
		repeat++;

	}
	return 0;
  }

/*================================================================================================
 orthoNormalize

	Purpose:
	========
	applies the DGKS ortho-normalization technique to ortho-normalize V(:, M:M+Madd)
	against V(:,0:M).
	On output, V(:,0:M+Madd) should be orthonormal.

	It is important that the input matrix V(:, 1:M) is orthonormal

	Arguments:
        =========

	N	(input) integer
		On entry, size of the matrix

	M 	(input) integer
		On entry, number of columns of V that are already orthonormal

	Madd	(input) integer
		On entry, the number of additional columns of V that need to
		be orthonormized

	V	(input / output) array of floats or double of size N * (M+Madd)
		On input, the matrix V with M column orthonormal and the Madd
		columns to be project on the previous M column
		On output, the fully orthonormalized V of size N * (M+Madd)

	coef	(input) array of floats or double of size at least M+Madd
		Will contain the coefficients of the projection

================================================================================================== */

template <typename T>
  void CHEBDAV<T>::orthoNormalize(int N, int M, int Madd, T *V, T *coef)
{

	int current;
	for (int m = 0; m< Madd; m++)
	{
		current = M + m;
		gramSchmidt(N, current, V, coef);
	}

  }

/*================================================================================================
  updateH

	Purpose:
	========
	prepares the Rayleigh quotient matrix for next iteration

	Arguments:
        ==========

	Nblock	(input) integer
		On entry, the number of vectors in a block

	Nlow	(input) integer
		On entry, the number of eigenvalues in D that have converged

	Nupper	(input) integer
		On entry, current active space

	D	(input) arrays of floats or double of size (Nupper)
		On entry, current eigenvalues (converged and not)

	H	(output) matrix of floats or double of size Nupper*Nupper
		On exit, the new matrix H
 
================================================================================================== */

template <typename T>
  void CHEBDAV<T>::updateH(int Nblock, int Nlow, int Nupper, T *D, T *H)
  {
	int Nsize = Nupper;
#if defined(GPU)
	cudaMemset(H, 0, Nsize*Nsize*sizeof(double));
	int Nv = Nsize - Nblock;
	int NBLOCKS = (Nv+THREADS_PER_BLOCK-1)/THREADS_PER_BLOCK;
	copyDiag<T><<<NBLOCKS, THREADS_PER_BLOCK>>>(Nsize, &D[Nlow], H, Nv);
#else
	memset(H, 0, Nsize*Nsize*sizeof(T));
	for (int i=0; i < Nsize-Nblock; i++) {
		H[i*Nsize + i] = D[Nlow+i];
	}
#endif
  }

/*================================================================================================
 randomVector

	Purpose:
	========
	Generates a random vector of size N, with each component uniform in [0,1[

	Arguments:
        ==========

	N	(input) integer
		On entry, size of the vector

	Vect	(output) array of floats or double of size (N)
		On exit, the random vector
================================================================================================== */

template <typename T>
void CHEBDAV<T>::randomVector(int N, T *Vect)
{
#if defined(GPU)
	curandGenerateUniformDouble(gen, Vect, N);
#else
	T t = (T) RAND_MAX;
	for(int i = 0; i < N; i++)
	{
		Vect[i] = rand()/t;
	}
#endif
}

/*================================================================================================
 powerMethod

	Purpose:
	========
	finds the largest eigenvalue of the matrix

	Arguments:
	==========

	op	(input) pointer
		On entry, pointer to the Matrix-Vector operator

	info	(input) pointer to info struct
		On entry, structure containing the parameters for the run

	N	(input) integer
		On entry, size of the matrix

	Temp1	(input) array of floats or double of size N
		workspace

	Temp2	(input) array of floats or double of size N
		workspace

	tol	(input) float or double
		On entry, tolerance for convergence

================================================================================================== */

template <typename T>
T CHEBDAV<T>::powerMethod(opA op, eig_info<T> *info, int N, T *Temp1, T *Temp2, T tol)
{

	int iter_max = 5000;
	int inc = 1;
	int i__1 = 1;
	T eig0, eig1;
	T norm, fact;

	clock_t clk1, clk2;

/*================================================================================================
 	Initialize eigenvector
================================================================================================== */

	randomVector(N, Temp1);
	norm = eig_dnrm2_(&N, Temp1, &inc);

/*================================================================================================
 	Power iterations
================================================================================================== */

	eig0 = 0.0;

	for(int iter =0; iter < iter_max; iter++)
	{
/*================================================================================================
 		Matrix - vector multiply
================================================================================================== */

		clk1 = clock();
		op(N, i__1, Temp1, N, Temp2, N, info->mvparams);
		clk2 = clock();
		info->mvp++;
		info->clk_op += clk2-clk1;

/*================================================================================================
 		Normalize new vector
================================================================================================== */

		norm = eig_dnrm2_(&N, Temp2, &inc);
		eig1 = eig_ddot_(&N, Temp1, &inc, Temp2, &inc);
		fact = 1.0/norm;
		eig_dscal_(&N, &fact, Temp2, &inc);

/*================================================================================================
 		Check for convergence
================================================================================================== */

		if(std::abs(eig1-eig0) < tol) {
			break;
		}

/*================================================================================================
 		Prepare for next iteration
================================================================================================== */

		eig_dcopy_(&N, Temp2, &inc, Temp1, &inc);
		eig0 = eig1;

	}

	return eig0;

}

/*================================================================================================
  swapPair

	Purpose:
	========
	swaps eigenpairs if not in increasing order

	Arguments:
	==========

	N		(input) integer
			On entry, size of the matrix

	Nval		(input) integer
			On entry, number of eigenpairs

	eigVal		(input / output) array of floats or doubles of size Nval
			On entry, unsorted eigenvalues
			On exit, sorted eigenvalues

	eigVect		(input / output) array of floats or doubles of size N*Nval
			On entry, unsorted eigenvectors
			On exit, eigenvectors sorted according to eigenvalues

================================================================================================== */

template <typename T>
int CHEBDAV<T>::swapPair(int N, int Nval, T *eigVal, T *eigVect)
{
	T mu = eigVal[Nval];
	int nswap = 0;
	int inc = 1;

	if(Nval==0) return 0;

	for(int i = Nval-1; i >=0; i--)
	{
		if (mu >= eigVal[i] ) break;

		eigVal[i+1] = eigVal[i];
		eigVal[i] = mu;
		eig_dswap_(&N, &eigVect[N*(i+1)], &inc, &eigVect[N*i], &inc);
		nswap++;
	}

	return nswap;
}

#endif
