/*================================================================================================
  setFilter.h
  Version 1: 06/21/2024

  Defines polynomial for Chebishev filtering for Lanczos

  See

	@article{Li2018a,
  		author = {Li, Ruipeng and Xi, Yuanzhe and Erlandson, Lucas and Saad, Yousef},
  		archivePrefix = {arXiv},
  		eprint = {1802.05215},
  		primaryClass = {math},
  		title = {The {{Eigenvalues Slicing Library}} 
			({{EVSL}}): {{Algorithms}}, {{Implementation}}, and {{Software}}},
  		url = {http://arxiv.org/abs/1802.05215},
	}

  pages 5-7 for details

Copyright (c) Patrice Koehl.

>>> SOURCE LICENSE >>>

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

>>> END OF LICENSE >>>

================================================================================================== */

#ifndef _SETFILTER_
#define _SETFILTER_

/*================================================================================================
 Includes
================================================================================================== */

#include <math.h>
#include <cstdlib>

/*================================================================================================
 define opA
================================================================================================== */

typedef void (*opA) (int nrow, int ncol, T *x, int ldx, T *y, int ldy, void *mvparam);

/*================================================================================================
 The setFILTER class
================================================================================================== */

  template <typename T>
  class SETFILTER {

	public:

		// Sets the values defining filter1
		void setFilter1(eig_info<T> *info, T a, T b, T a0);

		// Sets the values in poly
		void setFilter2(eig_info<T> *info, T *ab, T *space);

	private:

		// Computes damping coefficient for cheb. expansions.
		void dampcf(int m, int damping, T *jac);

		// evaluates the difference between the right and left values 
		// of the polynomial expressed in chebyshev expansion
		T diffLR(T *v, int m, T thc, T *jac);

		// function yi = chebExpansion computes yi = p_mu (xi)
		void chebExpansion(int m, T *mu, int npts, T *xi, T *yi, T*space);

		// Determines polynomial for end interval cases.
		void chebEndIntv(T aIn, T bIn, T *mu, int *max_deg, int *deg, T *gam, T *bar, T *t0);

		// Finds the roots of linear combination of chebyshev polynomials
		int rootChebExp(int m, T *v, T* jac, T tha, T thb, T *mu, T *thcOut);

	protected:

		int max_deg, min_deg, damping;
		T thresh_ext, thresh_int, intvtol;

  };

/*================================================================================================
 dampcf: 
	Purpose:
	========
	Computes damping coefficient for cheb. expansions.

	Arguments:
	==========

        m       (input) integer
                On entry, degree of the polynomial
                
        damping (input) integer
                On entry,
                        damping == 0 --> no damping 
                                == 1 --> Jackson 
                                == 2 --> Lanczos sigma damping

        jac     (output) array of floats or double size: m+1
                On exit, array of dampened coefficients

================================================================================================== */

template <typename T>
void SETFILTER<T>::dampcf(int m, int damping, T *jac) {

	T thetJ = 0.0, thetL = 0.0, a1 = 0.0, a2 = 0.0, dm = (T) m;
	int k1;

	if (damping==1){
		thetJ = M_PI/(dm+2);
		a1 = 1/(dm+2);
		a2 = std::sin(thetJ);
	}

	if (damping == 2) thetL = M_PI/(dm+1);

/*================================================================================================
	Set jac[0] = 0.5 instead of 1 in order to reflect  the 1/2 factor in zeroth term
	of Chebyshev expansion
================================================================================================== */

	jac[0] = 0.5;    

	for (int k=1; k<=m; k++) {

		if (damping == 0) {

			/*==========================================================================
			No damping (set jac to 1)
			========================================================================== */

			jac[k] = 1.0;

		} else if (damping == 1){

			/*==========================================================================
			Jackson damping
			========================================================================== */

			k1 = k+1;
			jac[k] = a1*std::sin(k1*thetJ)/a2 + (1-k1*a1)*std::cos(k*thetJ) ;

		} else {

			/*==========================================================================
			Lanczos sigma damping
			========================================================================== */

			jac[k] = std::sin(k*thetL)/(k*thetL);
		}
	}

}

/*================================================================================================
 diffLR: 

	Purpose:
	========
	Evaluates the difference between the right and left values 
	of the polynomial expressed in chebyshev expansion

	Arguments:
	==========

	m	(input) integer
		degree of polynomial

	v	(input) array of floats or double (size m+1)
		On entry, vector of coefficients [see paper]

	thc	(input) float or double
		On entry, the angle theta corresponding to peak of polynomial

	jac	(input) array of floats or double (size m+1)
		On entry, vector of damping coefficients

	diffLR (output) float or double
		On exit, difference between the right and left values 
		of the polynomial expressed in chebyshev expansion

================================================================================================== */

template <typename T>
T SETFILTER<T>::diffLR(T *v, int m, T thc, T *jac){

	T fval = 0.0;
	for (int j=0; j<=m; j++) {
		fval += v[j]*std::cos(j*thc)*jac[j];
	}

	return fval;
}

/*================================================================================================
 chebExpansion:

	Purpose:
	========

	function yi = chebExpansion computes yi = p_mu (xi), where xi is a vectors of values.

	Note:
		p_mu(x) = sum_{i=0)^M mu[i] T_i(x)
	where:
		mu:	coefficients of the Chebishev expansion
		T_i:	Chebishev polynomial of degree i, first kind
	
	Jackson (or other) dampings is not explicitly used here but
	is assumed to be incorporated in mu outside this routine.

	Arguments:
	==========

	m	(input) integer
		degree of polynomial

	mu	(input) array of floats or double (size m+1)
		On entry, Chev. expansion coefficients

	npts	(input) integer
		On entry, number of points in xi, yi

	xi	(input) array of floats or double (size npts)
		On entry, a vector of values where p(xi) is to be computed.
		warning: xi's must be in [-1 1]

	yi	(output) array of floats or double (size npts)
		On exit, p_mu(xi(:) )

	space	(input) array of floats or double
		On entry, work array of size at least 3*npts
        
================================================================================================== */

template <typename T>
void SETFILTER<T>::chebExpansion(int m, T *mu, int npts, T *xi, T *yi, T *space) {

	int inc = 1; 
	T scal;

/*================================================================================================
	Arrays for Chebishev recursion
================================================================================================== */

	T *vkm1, *vkp1, *vk;

	vkm1 = space;
	vkp1 = &space[npts];
	vk   = &space[2*npts];
	T *tmp;

/*================================================================================================
	initialize output: as T_0[x] = 1, yi[x] = mu[0]
================================================================================================== */

	for(int i = 0; i < npts; i++) yi[i] = mu[0];

/*================================================================================================
	initialize Chebishev recursion
================================================================================================== */

	memset(vkm1, 0, npts*sizeof(T));
	for(int i = 0; i < npts; i++) vk[i] = 1.0;

/*================================================================================================
	Recurrence over m, the degree of the expansion
================================================================================================== */

	for(int k = 0; k < m; k++) {

		scal = (k == 0 ? 1.0 : 2.0);

		// Recurrence
		for (int j=0; j<npts; j++) {
			vkp1[j] = scal*xi[j]*vk[j] - vkm1[j];
		}

		// Rotate pointers
    		tmp  = vkm1;
    		vkm1 = vk;
    		vk   = vkp1;
    		vkp1 = tmp;

		// expansion summation
		eig_daxpy_(&npts, &mu[k+1], vk, &inc, yi, &inc) ;

	}
}

/*================================================================================================
 chebEndIntv

	Purpose:
	========
	Determines polynomial for end interval cases.

	In these cases, polynomial is just a scaled Chebyshev polynomial. However
	we need to express it in the same basis as in the other (middle interval)
	cases. This function determines this expansion

	Arguments:
	==========

	aIn	(input) float or double
		On entry, The start index of the transformed interval

	bIn	(input) float or double
		On entry, The end index of the transformed interval

 		(Note [aIn, bIn] is the transformed interval)

	mu	(in/out) array of floats or double
		On entry, expansion coefficients of best polynomial found.

	max_deg	(in/out) integer
		On entry, maximum degree of polynomial

	deg	(in/out) integer
		Degree of polynomial

	gam	(in/out) float or double
		Site of delta function that is expanded.
 
		Accurate 'balancing' is done:
		If p(t) is best approximation to delta function at gam
		then gam is selected so that p(a)=p(b) - within the
		tolerance tolBal (set in this function to 1.e-10)

	bar	(in/out) float or double
		 If \f$P(\lambda_{i}) \geq \f$ bar, accept eigenvalue as belonging to interval; 
		else reject.

	t0	(input) array of floats or double
		On entry, work array of size at least 3*(max_deg+1)

 Notes:
	(1) [a b] is now "interval for eigenvalues to damp"
	(2) [aIn, bIn] is the interval of interest
 	(3) del is used to expand interval of interest slightly so that pn(t)<bar by some margin.

================================================================================================== */

template <typename T>
void SETFILTER<T>::chebEndIntv(T aIn, T bIn, T *mu, int *max_deg, int *deg, T *gam, T *bar, T *t0)
{

/*================================================================================================
	Parameters and local variables
================================================================================================== */

	T thresh = thresh_ext;
	T del = 0.1*std::sqrt((bIn-aIn)*0.5);

#if defined(DOUBLE)
	T eps = 1e-13;
#else
	T eps = 1e-6;
#endif

	T a, b, x, e, c, sigma, sigma1, sigma_new, g0, g1, gnew, s1, s2, s3;
	T *t1, *tnew; 

	int mbest=0;
	int m1 = *max_deg+1;

/*================================================================================================
	If degree is forced
================================================================================================== */

	if (*deg > 0) {
		thresh = -1.0;
	  	*max_deg = *deg;
	}

/*================================================================================================
	right, or left interval case
================================================================================================== */

	if (bIn >= (1.0-eps)) {
		// right interval
		x = aIn;
		b = x - del;
		a = -1.0;
		*gam = 1.0;
	} else {
		// left interval
		x = bIn;
		a = x+del;
		b = 1.0;
		*gam = -1.0;
	}

/*================================================================================================
	Prepare for polynomial expansion
================================================================================================== */

	e = (b-a)*0.5;
	c = (b+a)*0.5;
	sigma1 = e/(*gam-c);
	sigma  = sigma1;

	memset(t0, 0, 3*m1*sizeof(T));
	t1 = t0+m1;
	tnew = t1+m1;
	t0[0] = 1.0;
	t1[0] = -c*(sigma1/e);
	t1[1] = (sigma1/e);

/*================================================================================================
	Evaluating polynomial at x
================================================================================================== */

	g0 = 1.0;
	g1 = (x-c)*sigma1/e;

	if (g1 < thresh){
		mbest = 1;
	} else {
	  //-------------------- degree loop : [! make sure to start at k=2]
		// Degree loop
		for(int k=2; k<=*max_deg; k++)
		{
			sigma_new = 1.0/(2.0/sigma1-sigma);
			s1 = sigma_new/e;
			s2 = 2*s1*c;
			s3 = sigma*sigma_new;

			for(int j=2; j<=k; j++) tnew[j-1] = s1*(t1[j]+t1[j-2]);

			tnew[1] = tnew[1] + s1*t1[0];
			tnew[0] = s1*t1[1];
			tnew[k] = s1*t1[k-1];

			for(int j=0; j<=k; j++) tnew[j] = tnew[j] - s2*t1[j] - s3*t0[j];

			for(int j=0; j<=k; j++){
				t0[j] = t1[j];
				t1[j] = tnew[j];
			}

			// recurrence to evaluate pn(s)
			gnew = 2*(x-c)*s1*g1 - s3*g0;
			g0 = g1;
			g1 = gnew;

			// best degree
			mbest = k;
			if (g1<thresh) break;
			sigma = sigma_new;
		}
	}

	memcpy(mu, t1, (mbest+1)*sizeof(T));

	*bar = g1;
	*deg = mbest;

}

/*================================================================================================
   rootChebExp:

	Purpose:
	========
	Finds the roots of linear combination of chebyshev polynomials

	Arguments:
	==========

	m	(input) integer
		degree of polynomial

	v	(input) array of floats or double (size m+1)
		On entry, difference between cosines on left and right [(3.12) in paper]

	jac	(input) array of floats or double (size m+1)
		On entry, damping coefficients

	tha	(input) float or double
		On entry, theta_a [refer to paper]

	thb	(input) float or double
		On entry, theta_b [refer to paper]

	mu	(output) array of floats or double (size m+1)
		On exit, expansion coefficients
		
	thcOut	(output) float or double
		On exit, value of theta_c

================================================================================================== */

template <typename T>
int SETFILTER<T>::rootChebExp(int m, T *v, T* jac, T tha, T thb, T *mu, T *thcOut)
{

/*================================================================================================
	Parameter: maximum number of steps in Newton to balance interval
================================================================================================== */

	int MaxIterBalan = 30;		 // max steps in Newton to balance interval
	T tolBal;

/*================================================================================================
	do 2 newton steps -- if OK exit 
	otherwise continue to get root by solving eigv. pb
================================================================================================== */

	T fval = 0.0, d;
	T fa, fb, thN, thc;
#if defined(DOUBLE)
	tolBal = std::abs(tha-thb)*1.e-13;
#else
	tolBal = std::abs(tha-thb)*1.e-6;
#endif
	thc = 0.5*(tha+thb);

/*================================================================================================
	check whether or not this will work
================================================================================================== */

	fb = diffLR(v, m, thb, jac);
	fa = diffLR(v, m, tha, jac);

/*================================================================================================
	If it will not work -exit + use higher deg.
================================================================================================== */

	if ((fa>0) || (fb<0)) return 1;

/*================================================================================================
	Newton iteration to balance the interval
================================================================================================== */

	for (int it=0; it<=MaxIterBalan; it++) 
	{

		fval = diffLR(v, m, thc, jac);

		/*==================================================================================
		Do one newton step 
			- d= derivative
			- thN = thetaNewton
		==================================================================================== */

		d = 0.0;
		for (int j=1; j<=m; j++) {
			d += jac[j]*j*std::sin(j*(thc))*v[j];
		}
		thN = thc + fval/d;

		/*==================================================================================
		Test for stopping
		==================================================================================== */

#if defined(DOUBLE)
	 	if ((std::abs(fval) < tolBal) || std::abs(thc - thN) < DBL_EPSILON * std::abs(thc)) {
#else
	 	if ((std::abs(fval) < tolBal) || std::abs(thc - thN) < FLT_EPSILON * std::abs(thc)) {
#endif

		 	break;
	 	}

		/*==================================================================================
	 	Test for doing a form of bisection 
		==================================================================================== */

		if (fval >0){
			if((thN < thb) || (thN > tha)) thN = 0.5*(thc+tha);
			thb = thc;
			thc = thN;
		} else {
			if((thN < thb) || (thN > tha) ) thN = 0.5*(thc+thb);
			tha = thc;
			thc = thN;
		}
	}

/*================================================================================================
	Done - return mu and theta_c 
================================================================================================== */

	for(int j=0; j<=m; j++) {
		mu[j] = std::cos(j*(thc))*jac[j];
	}

	*thcOut = thc;

	return 0;

}

/*================================================================================================
  setFilter2

	Purpose:
	========
 	Sets the values for Chebyshev polynomial expension

	Arguments:
	==========

        info    (input) pointer to the structure eig_info
                 On entry, points to the data structure to store the information
                 about the eigenvalue problem and the progress of the eigensolver

	space	(input) array of floats or double
		On entry, work array of size at least: 6*(max_deg+1) + 3000 (set to 10,000)
================================================================================================== */

template <typename T>
void SETFILTER<T>::setFilter2(eig_info<T> *info, T *ab, T *space) 
{

/*================================================================================================
	Standard parameters for defining the Chebishev polynomial expansion
================================================================================================== */

	max_deg    = 120;	// max allowed degree
	min_deg    = 2;		// min allowed degree 
	damping    = 1;		// < 0 = no damping, 1 = Jackson, 2 = Lanczos >
	thresh_ext = 0.2;	// threshold for accepting polynom. for end intervals 
	thresh_int = 0.8;	// threshold for interior intervals 
	intvtol    = 1.e-9;	// cut-off point of middle interval 

/*================================================================================================
	Define variables
================================================================================================== */

	T  cc;              	// center of interval 
	T  dd;              	// half-width of interval
	T  gam;             	// center of delta function used 
	T  bbar;            	// p(theta)>=bbar indicates a wanted Ritz value

	int deg = info->mpoly;  // if deg == 0 before calling setFilter2 then
               		        // the polynomial degree is  computed
                       		//internally. Otherwise it is of degree deg.
                       		// [and  thresh_ext and thresh_int are not used]

	T t=0.0, itv[2],  vals[2];
	T one = 1.0, mone = -1.0;

	T tha=0.0, thb=0.0, thc=0.0;
	T thresh;
	int m, j, nitv, mbest;
	int mp1 = std::max(deg, max_deg) + 1;

	T *v = space;
	T *jac = &space[mp1];
	T *space0 = &space[2*mp1];
	T *space1 = &space[5*mp1];

	T *mu = info->mu;

	T IntTol = intvtol;

/*================================================================================================
	Define intervals
		cc, rr: center and half-width of [lmin, lmax] 
		transform [lmin, lmax] to [-1,1] by y = (x-cc) / dd
	 	transform [a, b] to [aT, bT] accordingly 
================================================================================================== */

	T intv[4];
	intv[0] = ab[0]; intv[1] = ab[1]; 
	intv[2] = info->lmin; intv[3] = info->lmax;

	T igam[2];

	T aa = std::max(intv[0], intv[2]);
	T bb = std::min(intv[1], intv[3]);

	T lmin = intv[2], lmax = intv[3];

	cc = 0.5 * (lmax + lmin);
	dd = 0.5 * (lmax - lmin);

	aa  = (aa - cc) / dd;
	bb  = (bb - cc) / dd;
	aa  = std::max(aa, mone);
	bb  = std::min(bb,  one);

	thb = std::acos(bb);
	tha = std::acos(aa);

/*================================================================================================
	deal with extremal interval cases 
================================================================================================== */

	gam = 1.0;
	if (aa-IntTol <= -1.0) {

		/*==================================================================================
		Left interval
		=================================================================================== */

		thc  = tha;
		nitv = 1;
		aa   = -1.0;
		gam  = -1.0;	     

	} else if (bb + IntTol >= 1.0) {

		/*==================================================================================
		Right interval
		=================================================================================== */

		thc  = thb;
		nitv = 1;
		bb   = 1;
		gam  = 1;

	} else {

		/*==================================================================================
		Middle interval
		=================================================================================== */

		itv[0] = aa;
		itv[1] = bb;
		nitv = 2;
		gam = 1;	           

	}

/*================================================================================================
	threshold for finding the best poly
================================================================================================== */

	if (nitv == 1) {

		/*==================================================================================
		end interval case done separately
		=================================================================================== */

		chebEndIntv(aa, bb, mu, &max_deg, &deg, &gam, &bbar, space0);
	} else {

		/*==================================================================================
		Middle interval:
		give a starting degree - around 1/(half gap) 
		=================================================================================== */

		min_deg = 2 + 0.5/(bb-aa);
		min_deg = std::max(min_deg, 2);
		min_deg = std::min(min_deg, (max_deg+1)/2);
		thresh = thresh_int;

		/*==================================================================================
		Short cut for the case of forced degree
		=================================================================================== */

		if (deg > 0){
			thresh = -1;
			max_deg = deg;
		}

		/*==================================================================================
		initialize vector v
		=================================================================================== */

		for (j=0; j<min_deg; j++) v[j] = std::cos(j*thb) - std::cos(j*tha);

		/*==================================================================================
		Degree loop
		=================================================================================== */

		for (m=min_deg; m < max_deg; m++){

			/*===========================================================================
			Define damping
			============================================================================ */

			dampcf(m, damping, jac);

			/*===========================================================================
			Update v: add one more entry
			============================================================================ */

			v[m] = std::cos(m*thb)-std::cos(m*tha);

			/*===========================================================================
			Balancing the interval and get new coefficients mu
			============================================================================ */

			if (rootChebExp(m, v, jac, tha, thb, mu, &thc)) {
				continue;
			}

			/*===========================================================================
			New center
			============================================================================ */

			gam = std::cos(thc);

			/*===========================================================================
			For scaling
			============================================================================ */

			igam[0] = gam;
			chebExpansion(m, mu, 1, igam, &t, space1);
			chebExpansion(m, mu, nitv, itv, vals, space1);

			/*===========================================================================
			Test for acceptance
			============================================================================ */

			if (vals[0] <= t*thresh && vals[1] <= t*thresh) {
				m++;
				break;
			}
		}

		mbest = m - 1;

		/*==================================================================================
		Scale the polynomial
		=================================================================================== */

		for (j=0; j<=m; j++) mu[j] /= t;
		bbar = std::min(vals[0], vals[1])/t;
		deg = mbest;

	}

	info->mpoly = deg;
	info->bar   = bbar;

	if(gam > 1.0 || gam < -1.0) {
		std::cout << "Problem defining Chebishev expension!" << std::endl;
		exit(1);
	}

}

/*================================================================================================
  setFilter1

	Purpose:
	========
 	Sets the values for Chebyshev polynomial that dampens over an interval [a, b]

	Arguments:
	==========

        info    (input) pointer to the structure eig_info
                 On entry, points to the data structure to store the information
                 about the eigenvalue problem and the progress of the eigensolver

	a	(input) float or double
		On entry, lower end of interval

	b	(input) float or double
		On entry, upper end of interval

	a0	(input) float or double
		On entry, value for scaling (a0 < a)

================================================================================================== */

template <typename T>
void SETFILTER<T>::setFilter1(eig_info<T> *info, T a, T b, T a0)
{
	info->a  = a;
	info->b  = b;
	info->a0 = a0;
}

#endif
