double time1,time2;
double second();

/*
 * Kernel CG: Solving and Unstructured Sparse Linear System by
 * the Conjugate Gradient Method (in NAS Parallel Benchmarks)
 */
/* this program was originally written by itakura@rccp.tukuba.ac.jp */
/* modified to seq'ed back by msato@trc.rwcp.or.jp */
/* this OpenMP program does not use orphan directives. */

#include "cg.h"

#ifdef __OMNI_SCASH__
#pragma omp threadprivate(stderr)
#endif

double a[NNZ], x[NN], z[NN], r[NN], p[NN], q[NN];
int   colstr[NNP1], rowidx[NNZ];

double cgsol();
void matvec();
void dataread();

main(argc, argv)
     char **argv;
{
    FILE *fp;
    
    int nn, nnp1, nnz, lenwrk, ilnwrk, niter, nitcg;
    double rcond, shift;
    
    double resid, zeta, ztz, znorminv;
    int   i, it, nnzcom, imax;
    double randlc();
    
    int  nnzchk;
    double zetchk, zettol, reschk;
    char *datafile  = "data";
    
    nn = NN;
    
    tran    = 314159265.0;
    amult   = 1220703125.0;

    if(argc == 2) datafile = argv[1];
    if((fp = fopen(datafile,"r")) == NULL)
      {
	  printf("cannot open 'data' file\n");
	  exit(1);
      }

#ifdef _OPENMP
    printf("omp_num_thread=%d\n",omp_get_num_threads());
    printf("omp_max_thread=%d\n",omp_get_max_threads());
#endif

    dataread(fp,nn,a,rowidx,colstr);
    fclose(fp);
    
    for(i=0; i< nn; i++)
      x[i]=1.0;

    time1 = second();
    for(it=0; it<NITER; it++){
	resid = cgsol(nn, a, rowidx, colstr, x, &zeta);
	printf("%5d  %12.4e  %20.14f\n", it, resid, zeta + SHIFT);
    }
    time2 = second();
    printf("time = %f, %f (%e - %e)/%d, NITCG=%d\n",
	   time2-time1,(time2-time1)/NITER,time1,time2,NITER,NITCG);
    exit(0);
}

double dotpro(n, x, y)
    int n;
    double *x, *y;
{
    double z = 0.0;
    int i;
#pragma omp parallel for reduction(+:z)
    for(i=0; i<n; i++)
      z += x[i] * y[i];
    return(z);
}

/* n: size of matrix 
 * colstr[n+1]: column vector, pointer to a and rowidx
 * a[nnz]: value vector
 * rowidx[nnz]: row index vector
 * x[n]: 
 * zeta:
 *
 * work:
 *	z[n],r[n],p[n],q[n]
 */
double cgsol(n, a, rowidx, colstr, x, zeta)
    int n;
    double a[];
    int rowidx[], colstr[];
    double x[], *zeta;
{
    int it, cols,i;
    double alpha, beta, rho, rho0, znorminv;
    
    cols = n;
#pragma omp parallel for 
    for(i = 0; i < cols; i++){
	p[i] = r[i] = x[i];  /* r = x, p = x */
	z[i] = 0.0;
    }
    rho = dotpro(cols, x, x);	/* rho = x*x */
    
    for(it=0; it<NITCG; it++){
	/* q += a*p */
	matvec(a, colstr, rowidx, p, q, cols);
	/* alpha = rho/p*q */
	alpha = rho / dotpro(cols, p, q);
	/* z += alpha*p */
#pragma omp parallel for
	for(i = 0; i < cols; i++)   z[i] += alpha*p[i];
	rho0 = rho;
	/* r += -alpha*q */
#pragma omp parallel for 
	for(i = 0; i < cols; i++)   r[i] += -alpha*q[i];
	/* rho = r*r */
	rho = dotpro(cols, r, r);
	beta = rho / rho0;
	/* p = r + beta*p; */
#pragma omp parallel for
	for(i = 0; i < cols; i++)   p[i] = r[i] + beta*p[i];
    }
    /* r = a*z */
    matvec(a, colstr, rowidx, z, r, cols);

    /* r -= x */
#pragma omp parallel for 
    for(i = 0; i < cols; i++)   r[i] = x[i] - r[i];

    /* zeta = 1.0/x*z */
    *zeta = 1.0/dotpro(cols, x, z);

    /* znorminv = 1.0/sqrt(z*z) */
    znorminv=1.0/(sqrt(dotpro(cols, z, z)));

    /* x = znorminv*z */
#pragma omp parallel for
    for(i = 0; i < cols; i++) x[i] = znorminv*z[i];

    return sqrt(dotpro(cols, r, r));
}

void matvec(a, row_start, col_idx, x, y, nn)
     int row_start[], col_idx[], nn;
     double *a, x[], y[];
{
    int i, j, start, end;
    double t;

#pragma omp parallel for private(i,j,t,start,end)
    for(i=0; i< nn; i++){
	start = row_start[i];
	end = row_start[i+1];
	t = 0.0;
	for(j= start; j< end; j++){
	    t += a[j] * x[col_idx[j]];
	}
	y[i] = t;
    }
}

#ifdef not
void matvec(a, colstr, rowidx, x, y, cols)
     int colstr[], *rowidx, cols;
     double *a, x[], y[];
{
    int i, j, k;
    
    for(i=0; i< NN; i++) y[i]=0.0;

    k=0;
    for(i=0; i<cols; i++){
	for(j=0; j<colstr[i+1]-colstr[i]; j++){
	    y[rowidx[k]] += a[k] * x[i];
	    k++;
	}
    }
}
#endif

void dataread(fp, nn, a, rowidx, colstr)
    FILE *fp;
    int nn;
    double *a;
    int *rowidx, *colstr;
{
    int n, ok_flag=1;
    
    ok_flag &= (fread(&n, sizeof(int), 1, fp) == 1);
    if(n != nn) {
	fprintf(stderr, "illegal data size.. file %d, prog %d\n", n, nn);
	exit(1);
    }
#ifdef __OMNI_SCASH__
    memset (colstr,0,NNP1*sizeof(int));
    memset (rowidx,0,NNZ*sizeof(int));
    memset (a,0,NNZ*sizeof(double));
#endif
    ok_flag &= (fread(colstr,sizeof(int),  nn+1,         fp) == nn+1);
    ok_flag &= (fread(rowidx,sizeof(int),   colstr[nn],fp)== colstr[nn]);
    ok_flag &= (fread(a,     sizeof(double),colstr[nn],fp)== colstr[nn]);
    if(!ok_flag){
	fprintf(stderr, "fread error\n");
	exit(1);
    }
}




