/*

  cgm-makedata.c: Make Data Program for CG
  
  Kernel CG: Solving and Unstructured Sparse Linear System by
  the Conjugate Gradient Method (in NAS Parallel Benchmarks)
  */

/* this program was originally written by itakura@rccp.tukuba.ac.jp */

static char RCSID[]="$Header: /cvs/cvsroot/Omni/tests/scash-test/cg/cg-makedata.c,v 1.1 2000/10/13 09:09:38 msato Exp $";
#include "cg.h"

double a[NNZ], x[NN], z[NN], r[NN], p[NN], q[NN];
int   colstr[NNP1], rowidx[NNZ];
int   iwork[ILNWRK];
double work[LENWRK];

usage(prog)
    char *prog;
{
    fprintf(stderr, "usage : %s filename \n", prog);
    exit(1);
}

main(argc, argv)
    char **argv;
{
    FILE *fp;
    
    int nn, nnp1, nnz, lenwrk, ilnwrk, niter, nitcg;
    double rcond, shift;
    
    double resid, zeta, xtz, ztz, znorminv;
    double ops, opscg, time, flops, ratio;
    int   i, it, matops, nnzcom, imax;
    double randlc();
    
    int  nnzchk;
    double zetchk, zettol, reschk;
    
    double *pua;
    int *purowidx, *pucolstr;
    
    if(argc != 2)
	usage(argv[0]);
    
    if(!(fp=fopen(argv[1], "w"))){
	perror(argv[1]);
	exit(1);
    }

    nn = NN;
    nnp1   = NNP1;
    nnz    =   NNZ;
    shift  =  SHIFT;
    rcond  = 1.0e-1;
    lenwrk = LENWRK;
    ilnwrk = ILNWRK;
    nitcg  = NITCG;
    niter  = NITER;
    
    nnzchk=NNZCHK;
    zetchk=ZETCHK;
    zettol=ZETTOL;;
    reschk=RESCHK;
    
    tran    = 314159265.0;
    amult   = 1220703125.0;
    zeta    = randlc (&tran, amult);

    makea (nn, nnz, a, rowidx, colstr, rcond, 
	   iwork, &iwork[nnz], work, &work[nnz], &iwork[2*nnz],
	   shift);


    datawrite(fp, nn, a, rowidx, colstr);

    close(fp);
    exit(0);
}

/*
 *
 *  makea.c: Make the Matrix A Routine
 * 
 * Kernel CG: Solving and Unstructured Sparse Linear System by
 * the Conjugate Gradient Method (in NAS Parallel Benchmarks)
 * 
 */

makea (n, nz, a, rowidx, colstr, rcond, acol, arow, aelt, v, iv, shift)
    int n, nz;
    int rowidx[], colstr[];
    int iv[], acol[], arow[];
    double v[], aelt[];
    double rcond, a[], shift;
{
    int i, it, nnza, iouter, ivelt, ivelt1, jrow, nzv;
    double size, ratio, scale;

    size = 1.0;
    ratio = pow(rcond , 1.0/n);
    nnza = 0;

    for(iouter=0; iouter<n; iouter++){
	nzv = NONZER;
        sprnvc(n, nzv, v, iv, rowidx, &rowidx[n]);
        vecset(n, v, iv, &nzv, iouter, 0.5);
	for(ivelt=0; ivelt<nzv; ivelt++){
	    jrow = iv[ivelt];
	    scale = size * v[ivelt];
	    for(ivelt1=0; ivelt1<nzv; ivelt1++){
		acol[nnza] = iv[ivelt1];
		arow[nnza] = jrow;
		aelt[nnza] = v[ivelt1] * scale;
		nnza = nnza + 1;
	    }
	}
	size = size * ratio;
    }

    for(i=0; i<n; i++){
         acol[nnza] = i;
         arow[nnza] = i;
         aelt[nnza] = rcond;
	nnza = nnza + 1;
    }
    
    sparse(a, rowidx, colstr, n, acol, arow, aelt, v, iv, &iv[n], nnza);

    for(i=0; i<n; i++){
	int find;
	for(find=0, it=colstr[i]; it<colstr[i+1] && (!find); it++){
	    if(rowidx[it]==i){
		a[it]=a[it]-shift;
		find=1;
	    }
	}
	if(!find){
	    fprintf(stderr, "Problem: zero diagonal element, aborting.\n");
	    exit(1);
	}
    }
}

sparse(a, rowidx, colstr, n, acol, arow, aelt, x, mark, nzloc, nnza)
     double a[];
     int rowidx[], colstr[], n, acol[], arow[];
     double aelt[], x[];
     int mark[], nzloc[], nnza;
{
    int i, j, jajp1, nza, k, nzcol;
    double xi;
    
    for(j=0; j<n; j++){
	colstr[j]=0;
	mark[j]=0;
    }
    colstr[n]=0;

    for(nza=0; nza<nnza; nza++){
	j = acol[nza] + 1;
        colstr[j] = colstr[j] + 1;
    }

    colstr[0] = 0;
    for(j=1; j<=n; j++)
	colstr[j] = colstr[j] + colstr[j-1];

    for(nza=0; nza<nnza; nza++){
	j=acol[nza];
	k=colstr[j];
	a[k]=aelt[nza];
	rowidx[k]=arow[nza];
	colstr[j]=colstr[j]+1;
    }

    for(j=n-1; j>=0; j--)
	colstr[j+1] = colstr[j];
    colstr[0]=0;
    
    nza = 0;
    for(i=0; i<n; i++){
	x[i]=0.0;
	mark[i]=0;
    }

    jajp1 = colstr[0];
    for(j=0; j<n; j++){
	nzcol=0;
	for(k=jajp1; k<colstr[j+1]; k++){
            i = rowidx[k];
	    x[i] = x[i] + a[k];
	    if((!mark[i]) && (x[i]!=0.0)){
		mark[i]=1;
		nzloc[nzcol]=i;
		nzcol=nzcol+1;
	    }
	}

	for(k=0; k<nzcol; k++){
	    i=nzloc[k];
	    mark[i] = 0;
	    xi = x[i];
	    x[i] = 0.0;
	    if(xi != 0.0){
		a[nza] = xi;
		rowidx[nza] = i;
		nza = nza + 1;
	    }
	}
	jajp1 = colstr[j+1];
	colstr[j+1] = nza + colstr[0];
    }

    printf("final nonzero count in sparse number of nonzeros  = %16d\n", nza);
}

sprnvc(n, nz, v, iv, nzloc, mark)
    double v[];
    int n, nz, iv[], nzloc[], mark[];
{
    int nn1;

    int nzrow, nzv, ii, i, icnvrt();
    double vecelt, vecloc;
    double randlc();

    for(i=0; i<n; i++)
	mark[i]=0;

    nzv = 0;
    nzrow = 0;
    nn1 = 1;
    do {
	nn1 = 2 * nn1;
    } while(nn1 < n);

    while(nzv < nz){
	vecelt = randlc(&tran, amult);
	vecloc = randlc(&tran, amult);
	i = icnvrt(vecloc, nn1);
	if ((!mark[i]) && (i<n)){
	    mark[i] = 1;
            nzloc[nzrow] = i;
            nzrow = nzrow + 1;
            v[nzv] = vecelt;
            iv[nzv] = i;
            nzv = nzv + 1;
	}
    }
    
    for(ii=0; ii<nzrow; ii++){
	i = nzloc[ii];
	mark[i] = 0;
    }
}


int icnvrt(x, ipwr2)
    double x;
    int ipwr2;
{
    int tmp;
    tmp=ipwr2*x;
    return tmp;
}


vecset(n, v, iv, nzv, i, val)
    int n, iv[], *nzv, i;
    double v[], val;
{
    int set;
    int k;
    
    set = 0;
    for(k=0; k<*nzv; k++){
	if(iv[k] == i){
	    v[k] = val;
	    set = 1;
	}
    }

    if(!set){
	v[*nzv]=val;
	iv[*nzv]=i;
	*nzv = *nzv+1;
    }
}

/*
 *
 * randlc.c: A Pseudorandom Number Generator for the Parallel NAS Kernels
 * 
 */

double myaint(a)
    double a;
{
    int b;
    double c;
    
    b=a;
    c=b;
    return c;
}

double randlc(x, a)
	double *x, a;
{
    static int ks=0;
    static double r23, r46, t23, t46;
    int i;
    double t1, t2, a1, a2, x1, x2, z, t3, t4;

    if(ks==0){
	r23=1.0;
	r46=1.0;
	t23=1.0;
	t46=1.0;
	for(i=0; i<23; i++){
	    r23 *= 0.5;
	    t23 *= 2.0;
	}
	for(i=0; i<46; i++){
	    r46 *= 0.5;
	    t46 *= 2.0;
	}
	ks=1;
    }
    
    t1=r23*a;
    a1=myaint(t1);
    a2=a-t23*a1;
    t1=r23*(*x);
    x1=myaint(t1);
    x2=(*x)-t23*x1;
    t1=a1*x2+a2*x1;
    t2=myaint(r23*t1);
    z=t1-t23*t2;
    t3=t23*z+a2*x2;
    t4=myaint(r46*t3);
    *x=t3-t46*t4;
    return r46*(*x);
}


/*

  datafile.c: datafile input-output routine
  
  Kernel CG: Solving and Unstructured Sparse Linear System by
  the Conjugate Gradient Method (in NAS Parallel Benchmarks)
  
  */

datawrite(fp, nn, a, rowidx, colstr)
    FILE *fp;
    int nn;
    double a[];
    int rowidx[], colstr[];
{
    int ok_flag=1;
    
    ok_flag &= (fwrite(&nn, sizeof(int), 1, fp) == 1);
    ok_flag &= (fwrite(colstr,sizeof(int),   nn+1,      fp) == nn+1);
    ok_flag &= (fwrite(rowidx,sizeof(int),   colstr[nn],fp) == colstr[nn]);
    ok_flag &= (fwrite(a,     sizeof(double),colstr[nn],fp) == colstr[nn]);
    if(!ok_flag){
	fprintf(stderr, "fwrite error\n");
	exit(1);
    }
}

#ifdef not /* not used */
dataread(fp, nn, a, rowidx, colstr)
    FILE *fp;
    int nn;
    double **a;
    int **rowidx, **colstr;
{
    int n, ok_flag=1;
    
    rewind(fp);
    ok_flag &= (fread(&n, sizeof(int), 1, fp) == 1);
    if(n != nn) {
	fprintf(stderr, "illegal data size.. file %d, prog %d\n", n, nn);
	exit(1);
    }

    if(!(*colstr = (int *)malloc(sizeof(int) * nn+1))){
	perror("colstr");
	exit(1);
    }
    ok_flag &= (fread(*colstr,sizeof(int),  nn+1,         fp) == nn+1);
    if(!(*rowidx = (int *)malloc(sizeof(int) * (*colstr)[nn]))){
	perror("rowidx");
	exit(1);
    }
    if(!(*a      = (double *)malloc(sizeof(double) * (*colstr)[nn]))){
	perror("matrix a");
	exit(1);
    }
    ok_flag &= (fread(*rowidx,sizeof(int),   (*colstr)[nn],fp)==(*colstr)[nn]);
    ok_flag &= (fread(*a,     sizeof(double),(*colstr)[nn],fp)==(*colstr)[nn]);
    if(!ok_flag){
	fprintf(stderr, "fread error\n");
	exit(1);
    }
}

#endif

