static char rcsid[] = "$Id: pdaxpy-test.c,v 1.3 1999/07/22 06:12:36 msato Exp $";
/* 
 * $RWC_Release: Omni-1.6 $
 * $RWC_Copyright:
 *  Omni Compiler Software Version 1.5-1.6
 *  Copyright (C) 2002 PC Cluster Consortium
 *  
 *  This software is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License version
 *  2.1 published by the Free Software Foundation.
 *  
 *  Omni Compiler Software Version 1.0-1.4
 *  Copyright (C) 1999, 2000, 2001.
 *   Tsukuba Research Center, Real World Computing Partnership, Japan.
 *  
 *  Please check the Copyright and License information in the files named
 *  COPYRIGHT and LICENSE under the top  directory of the Omni Compiler
 *  Software release kit.
 *  
 *  
 *  $
 */
#include "timer.h"
#include "ompclib.h"

/*
 * parallel daxpy: test ompc thread library
 */
#define N_MAX 1000
#define N_STEP 50

double X[N_MAX], Y[N_MAX];
double A = 5.0;

#define N_ITER 3 /* 10 */

void *daxpy_main(int);
void p_daxpy(int n,double da,double dx[],double dy[]);
void daxpy(int n,double da,double dx[],double dy[]);

main(argc, argv)
     char **argv;
{
    int i;
    int n_thd;
    int id,np;

    if(argc != 2){
	printf("Usage: %s #thread",argv[0]);
	exit(1);
    }
    n_thd = atoi(argv[1]);
    if(n_thd < 0){
	printf("bad #task < 0\n"); 
	exit(1);
    }
    printf("#thread = %d\n",n_thd);
    printf("%%param n-thd number\n");
    printf("%%param size number\n");
    printf("%%param time number\n");
    printf("%%param ops number\n");
    printf("%%field {n-thd size ops time}\n\n");

    for(i = 0; i < N_MAX; i++) X[i] = 1.0;

    _ompc_init();

    id = omp_get_thread_num();
    np = omp_get_num_threads();
    printf("id = %d, np = %d\n",id,np);

    daxpy_main(n_thd);

    _ompc_finalize();

    exit(0);
}

void *daxpy_main(int n_thd)
{
    int it,n;
    double t;

    printf("\n para:\n");
    for(n = 1; n < N_MAX; n += N_STEP){
	t = second();
	for(it=0; it<N_ITER; it++){
	    p_daxpy(n,A,X,Y);
	}
	t = second() - t;
	printf("%%data %d %d %g %g\n",n_thd,n,
	       ((double)(n*it))/(t*1.0e+6),t*1.0e+6/it);
    }
    printf("\n seq:\n");
    for(n = 1; n < N_MAX; n += N_STEP){
	t = second();
	for(it=0; it<N_ITER; it++){
	    daxpy(n,A,X,Y);
	}
	t = second() - t;
	printf("%%data %d %d %g %g\n",n_thd,n,
	       ((double)(n*it))/(t*1.0e+6),t*1.0e+6/it);
    }
    return NULL;
}

struct p_daxpy_args {
    int n;
    double da;
    double *dx;
    double *dy;
};
    
void _p_daxpy(struct p_daxpy_args *ap,struct ompc_thread *tp);

void p_daxpy(int n,double da,double dx[],double dy[])
{
    struct p_daxpy_args args;

    args.n = n;
    args.da = da;
    args.dx = dx;
    args.dy = dy;

    _ompc_do_parallel((cfunc)_p_daxpy,&args);
}

void _p_daxpy(struct p_daxpy_args *ap,struct ompc_thread *tp)
{
    int id,n_thd;
    int i,n;
    int start,end,b;
    double *dx,*dy,da;

    id = tp->num;
    n_thd = tp->parent->num_thds;
    printf("p_dapxy: n=%d id = %d n_thd = %d\n",ap->n,id,n_thd);

    n = ap->n;
    da = ap->da;
    dx = ap->dx;
    dy = ap->dy;

    _ompc_barrier();
    
    /* block partitioning */
    if(n_thd){
	b = (n+n_thd-1)/n_thd;
	start = b*id;
	end = start+b;
	if(end > n) end = n;
    } else {
	start = 0;
	end = n;
    }

    printf("id=%d: start=%d, end=%d\n",id,start,end); 
    for(i = start; i < end; i++){
        dy[i] += da*dx[i];
    }
}

void daxpy(int n,double da,double dx[],double dy[])
{
    int i;
    for(i = 0; i < n; i++){
        dy[i] += da*dx[i];
    }
}
