#ifdef not
static char rcsid[] = "$Id: ompsm_runtime.c,v 1.44 2001/09/17 12:18:40 a-hasega Exp $";
#endif
/* 
 * $RWC_Release: Omni-1.6 $
 * $RWC_Copyright:
 *  Omni Compiler Software Version 1.5-1.6
 *  Copyright (C) 2002 PC Cluster Consortium
 *  
 *  This software is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License version
 *  2.1 published by the Free Software Foundation.
 *  
 *  Omni Compiler Software Version 1.0-1.4
 *  Copyright (C) 1999, 2000, 2001.
 *   Tsukuba Research Center, Real World Computing Partnership, Japan.
 *  
 *  Please check the Copyright and License information in the files named
 *  COPYRIGHT and LICENSE under the top  directory of the Omni Compiler
 *  Software release kit.
 *  
 *  
 *  $
 */
#include <stdlib.h>
#include "ompsm.h"


#ifdef USE_SCASH
#define	L_ATOMIC	LOCK_ATOMIC
#define L_ORDERED	LOCK_ORDERED
#endif

#ifdef USE_UNIX_SHMEM
_ompc_lock_t	*_ompc_atomic_lock_p;
_ompc_lock_t	*_ompc_ordered_lock_p;

#define	L_ATOMIC	_ompc_atomic_lock_p
#define L_ORDERED	_ompc_ordered_lock_p
#endif

/* private copy */
enum sched_kind _ompc_runtime_sched_kind;
int _ompc_runtime_chunk_size; /* default */

static int _ompc_dynamic_sched_next_chunk(int *lb,int *ub,int guided);

/*
 * compiler runtime
 */
void _ompc_static_bsched(int *lb, int *ub, int *step);


void
_ompc_barrier ()
{
  if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    return;
  }
  OMPSM_LIB_IN ();
  OMPSM_BARRIER ();
  OMPSM_LIB_OUT ();
}

void
_ompc_default_sched (int *lb, int *ub, int *step)
{
  _ompc_static_bsched(lb, ub, step);
}


/* static scheduling: cyclic */
void
_ompc_static_csched (int *lb, int *up, int *step)
{
  int n,s;
  int n_thd,id;

  if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    _ompsm_thread->ompc_last_flag = 1;
    return;
  }

  id = _ompc_node_id;
  n_thd = _ompc_n_node;
  _ompsm_thread->ompc_last_flag = 0;
  s = *step;

  /* how many iteration */
  if(s > 0) {
    n = (*up-*lb+s-1)/s;
  } else {
    n = (*up-*lb+s+1)/s;
  }

  *lb += id*s;   /* adjust low bound */
  *step = s*n_thd;
  if (n > 0 && ((n-1)%n_thd) == id) {
    _ompsm_thread->ompc_last_flag = 1;
  }
}


/* static scheduling: block */
void
_ompc_static_bsched (int *lb, int *ub, int *step)
{
  int n_thd, id;
  int b,e,ee,s,blk_s;


  if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    _ompsm_thread->ompc_last_flag = 1;
    return;
  }

  s = *step;
  b = *lb;
  ee = e = *ub;
  id = _ompc_node_id;
  n_thd = _ompc_n_node;
  _ompsm_thread->ompc_last_flag = 0;
#ifdef DIST_BLOCK3
  if (s > 0) {
    int rem, st;
    int tasks = ((e-b) / s) + (((e-b)%s)?(1):(0));

    blk_s = tasks / n_thd;
    rem   = tasks % n_thd;
    if (rem == 0) {
      b += blk_s * id * s;
      e  = b + blk_s * s;
    } else {
      st  = n_thd / rem;
      b  += blk_s * id * s;
      if (id/st < rem) {
	b += ((id+st-1)/st)*s;
      } else {
	b += s * rem;
      }
      e  = b + blk_s * s;
      if ((id % st == 0) && (id < rem*st)) {
	e += s;
      }
    }
    if (e >= ee) {
      e = ee;
      if (ee > b) {
	_ompsm_thread->ompc_last_flag = 1;
      }
    }
  } else if (s < 0){
    int rem, st;
    int tasks;

    s = -s;
    tasks = ((-(e-b)) / s) + (((-(e-b))%s)?(1):(0));

    blk_s = tasks / n_thd;
    rem   = tasks % n_thd;
    if (rem == 0) {
      b -= blk_s * id * s;
      e  = b - blk_s * s;
    } else {
      st  = n_thd / rem;
      b  -= blk_s * id * s;
      if (id/st < rem) {
	b -= ((id+st-1)/st)*s;
      } else {
	b -= s * rem;
      }
      e  = b - blk_s * s;
      if ((id % st == 0) && (id < rem*st)) {
	e -= s;
      }
    }
    if (e <= ee) {
      e = ee;
      if (ee < b) {
	_ompsm_thread->ompc_last_flag = 1;
      }
    }
  } else {
    return;
  }
#else
#ifdef DIST_BLOCK2
  if (s > 0) {
    int tasks = ((e-b) / s) + (((e-b)%s)?(1):(0));
    int rem;

    blk_s = tasks / n_thd;
    rem   = tasks % n_thd;

    if (id < rem) {
      b += (blk_s + 1) * id * s;
      e  = b + (blk_s + 1) * s;
    } else {
      b += (blk_s * id + rem) * s;
      e  = b + blk_s * s;
    }
    if (e >= ee) {
      e = ee;
      if (ee > b) {
	_ompsm_thread->ompc_last_flag = 1;
      }
    }
  } else if (s < 0) {
    int tasks;
    int rem;

    s = -s;
    tasks = ((-(e-b)) / s) + (((-(e-b))%s)?(1):(0));
    
    blk_s = tasks / n_thd;
    rem   = tasks % n_thd;
 
    if (id < rem) {
      b -= (blk_s + 1) * id * s;
      e  = b - (blk_s + 1) * s;
    } else {
      b -= (blk_s * id + rem) * s;
      e  = b - blk_s * s;
    }
    if (e <= ee) {
      e = ee;
      if (ee < b) {
	_ompsm_thread->ompc_last_flag = 1;
      }
    }
  } else {
    return;
  }
#else
  if (s > 0) {
    blk_s = (e-b+n_thd-1)/n_thd;
    blk_s = ((blk_s+s-1)/s)*s;
    b += blk_s*id;
    e = b + blk_s;
    if (e >= ee) {
      e = ee;
      if (ee > b) {
	_ompsm_thread->ompc_last_flag = 1;
      }
    }
  } else if (s < 0) {
    blk_s = (e-b-n_thd+1)/n_thd;
    blk_s = ((blk_s+s+1)/s)*s;
    b += blk_s*id;
    e = b + blk_s;
    if (e <= ee) {
      e = ee;
      if(ee < b) {
	_ompsm_thread->ompc_last_flag = 1;
      }
    }
  } else {
    return;
  }
#endif
#endif

  *lb = b;
  *ub = e;
}


/*
 * static schedule
 */
void 
_ompc_static_sched_init (int lb, int up, int step, int chunk_size)
{
  if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    _ompsm_thread->ompc_loop_sched_index = lb;
    _ompsm_thread->ompc_loop_end = up;
    return;
  }

  if (chunk_size <= 0) {
    fprintf(stderr, "check size is non-positive\n");
    _ompc_fatal("_ompc_static_sched_init");
  }

  chunk_size *= step;
  _ompsm_thread->ompc_loop_sched_index = lb+chunk_size*_ompc_node_id;
  _ompsm_thread->ompc_loop_chunk_size = chunk_size;
  _ompsm_thread->ompc_loop_stride = chunk_size*_ompc_n_node;
  _ompsm_thread->ompc_loop_end = up;
  _ompsm_thread->ompc_last_flag = 0;
}

int 
_ompc_static_sched_next(int *lb, int *ub)
{
  int b,e;

  b = _ompsm_thread->ompc_loop_sched_index;
  if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    e = _ompsm_thread->ompc_loop_end;
    _ompsm_thread->ompc_last_flag = 1;
    if (b == e) {
      return FALSE;
    }
    *lb = b;
    *ub = e;
    _ompsm_thread->ompc_loop_sched_index = e;
    return TRUE;
  }

  _ompsm_thread->ompc_loop_sched_index += _ompsm_thread->ompc_loop_stride;
  e = b+_ompsm_thread->ompc_loop_chunk_size;

  if (_ompsm_thread->ompc_loop_chunk_size > 0) {
    if (b >= _ompsm_thread->ompc_loop_end) {
      return FALSE; 
    }
    if (e >= _ompsm_thread->ompc_loop_end) {
      e = _ompsm_thread->ompc_loop_end;
      _ompsm_thread->ompc_last_flag = 1;
    }
  } else {
    if (b <= _ompsm_thread->ompc_loop_end) {
      return FALSE;
    }
    if (e <= _ompsm_thread->ompc_loop_end) {
      e = _ompsm_thread->ompc_loop_end;
      _ompsm_thread->ompc_last_flag = 1;
    }
  }
  *lb = b;
  *ub = e;

  return TRUE;
}

/*
 * affinity scheduling
 */
void
_ompc_affinity_sched_init(int lb, int up, int step,
			  int mode, int blksiz, int scale, int offset)
{
  int size,n,layer,lofs;
    
  if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    _ompsm_thread->ompc_loop_sched_index = lb;
    _ompsm_thread->ompc_loop_end = up;
    return;
  }

  if (blksiz <= 0) {
    fprintf(stderr, "blksize size is non-positive, blksize=%d\n",blksiz);
    _ompc_fatal("_ompc_affinity_sched_init");
  }

  n = _ompc_n_node;

  switch (mode) {
  case MAP_BLOCK:
    blksiz = (blksiz+n-1)/n;
    break;
  case MAP_CYCLIC:
    break;
  case MAP_NONE:
    _ompc_fatal ("bad mapping mode");
    break;
  }
  size = blksiz*n;

  if (lb + offset < 0) {
    lofs  = (-(lb + offset - size + 1) / size);
    layer = ((lb + offset + lofs * size) / size);
  } else {
    lofs  = 0;
    layer = (lb + offset)/size;
  }
  _ompsm_thread->ompc_loop_sched_index = layer*size+blksiz*_ompc_node_id - offset - lofs * size;

  if (step > 0) {
    _ompsm_thread->ompc_loop_chunk_size = blksiz;
    _ompsm_thread->ompc_loop_stride = size;
  } else {
    _ompsm_thread->ompc_loop_sched_index += blksiz-1;
    _ompsm_thread->ompc_loop_chunk_size = -blksiz;
    _ompsm_thread->ompc_loop_stride = -size;
  }

  _ompsm_thread->ompc_loop_start = lb;
  _ompsm_thread->ompc_loop_step = step;
  _ompsm_thread->ompc_loop_end = up;
  _ompsm_thread->ompc_last_flag = 0;
}


int 
_ompc_affinity_sched_next(int *lb, int *ub)
{
  int b,e,loop_start,loop_end,c,s,stride;


  b = _ompsm_thread->ompc_loop_sched_index;
  if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    e = _ompsm_thread->ompc_loop_end;
    _ompsm_thread->ompc_last_flag = 1;
    if (b == e) {
      return FALSE;
    }
    *lb = b;
    *ub = e;
    _ompsm_thread->ompc_loop_sched_index = e;
    return TRUE;
  }

  c = _ompsm_thread->ompc_loop_chunk_size;
  s = _ompsm_thread->ompc_loop_step;
  loop_start = _ompsm_thread->ompc_loop_start;
  loop_end = _ompsm_thread->ompc_loop_end;
  stride = _ompsm_thread->ompc_loop_stride;

  e = b+c;
  if (c > 0) {
    if (b <= loop_start) {
      if (loop_start < e) {
	b = loop_start;
      } else {
	_ompsm_thread->ompc_loop_sched_index += stride;
	b += stride;
	e += stride;
      } 
    }

    /* adjust start */
    if (s != 1) {
      b = loop_start + ((b - loop_start+s-1)/s)*s;
    }
  } else {
    if (b >= loop_start) {
      if (loop_start > e) {
	b = loop_start;
      } else {
	_ompsm_thread->ompc_loop_sched_index += stride;
	b += stride;
	e += stride;
      } 
    }
    /* adjust start */
    if (s != -1) {
      b = loop_start + ((b - loop_start+s+1)/s)*s;
    }
  }

  /* set next iteration */
  _ompsm_thread->ompc_loop_sched_index += stride;

  /* check the last thread */
  if (c > 0) {
    if (b >= loop_end) {
      return FALSE; 
    }
    if (e >= loop_end) {
      e = loop_end;
      _ompsm_thread->ompc_last_flag = 1;
    }
  } else {
    if (b <= loop_end) {
      return FALSE;
    }
    if (e <= loop_end) {
      e = loop_end;
      _ompsm_thread->ompc_last_flag = 1;
    }
  }
  *lb = b;
  *ub = e;
  if (_ompc_debug_flag) {
    printf("affinity id=%d, b=%d, e=%d\n",_ompc_node_id,b,e);
  }

  return TRUE;
}


/* 
 * dynamic schedule
 */
void
_ompc_dynamic_sched_init(int lb, int up, int step, int chunk_size)
{
  if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) { /* not parallel */
    _ompsm_thread->ompc_loop_sched_index = lb;
    _ompsm_thread->ompc_loop_end = up;
    return; /* stride is not used */
  }

  OMPSM_LIB_IN ();
  if (chunk_size <= 0) {
    fprintf(stderr, "check size is non-positive\n");
    _ompc_fatal("_ompc_dynamic_sched_init");
  }
  _ompsm_thread->ompc_loop_chunk_size = chunk_size*step;
  _ompsm_thread->ompc_loop_end = up;
  _ompsm_thread->ompc_loop_sched_index = lb;
  _ompsm_thread->ompc_last_flag = 0;

  OMPSM_BARRIER ();
  __G__->dynamic_index = lb;
  OMPSM_BARRIER ();

  OMPSM_LIB_OUT ();
}


int
_ompc_dynamic_sched_next(int *lb, int *ub)
{
  return _ompc_dynamic_sched_next_chunk(lb,ub,FALSE);
}


int
_ompc_dynamic_sched_next_chunk(int *lb,int *ub,int guided)
{
  int exit_flag;
  int b,e,l,c;
  int rv = TRUE;


  if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {  /* not in parallel */
    b = _ompsm_thread->ompc_loop_sched_index;
    e = _ompsm_thread->ompc_loop_end;
    _ompsm_thread->ompc_last_flag = 1;
    if(b == e) {
      return FALSE;
    }
    *lb = b;
    *ub = e;
    _ompsm_thread->ompc_loop_sched_index = e;
    return TRUE;
  } 

  OMPSM_LIB_IN ();

  c = _ompsm_thread->ompc_loop_chunk_size;

  /* get my chunk, set b and e */
  if (guided) {
    OMPSM_LOCK0 ();
    OMPSM_FLUSH_LOCK ();
    OMPSM_REFRESH_VAR(&__G__->dynamic_index, sizeof(__G__->dynamic_index));
    b = __G__->dynamic_index;
    l = (_ompsm_thread->ompc_loop_end - b)/_ompc_n_node;
    l = ((l+c)/c)*c;
    if (c > 0) {
      if (c > l) {
	l = c;
      }
    } else {
      if (c < l) {
	l = c;
      }
    }
    e = b + l;
    __G__->dynamic_index = e;
    OMPSM_FLUSH_VAR(&__G__->dynamic_index, sizeof(__G__->dynamic_index));
    OMPSM_FLUSH_UNLOCK ();
    OMPSM_UNLOCK0();
  } else {
    OMPSM_LOCK0();
    OMPSM_FLUSH_LOCK ();
    OMPSM_REFRESH_VAR(&__G__->dynamic_index, sizeof(__G__->dynamic_index));
    b = __G__->dynamic_index;
    e = b + c;
    __G__->dynamic_index = e;
    OMPSM_FLUSH_VAR(&__G__->dynamic_index, sizeof(__G__->dynamic_index));
    OMPSM_FLUSH_UNLOCK ();
    OMPSM_UNLOCK0();
  }

  exit_flag = FALSE;
  if (c > 0) {
    if (_ompsm_thread->ompc_loop_sched_index >= _ompsm_thread->ompc_loop_end) {
      rv = FALSE;
      goto ret;
    }
    if (e >= _ompsm_thread->ompc_loop_end) {
      exit_flag = TRUE;
    }
  } else {
    if (_ompsm_thread->ompc_loop_sched_index <= _ompsm_thread->ompc_loop_end) {
      rv = FALSE;
      goto ret;
    }
    if (e <= _ompsm_thread->ompc_loop_end) {
      exit_flag = TRUE;
    }
  }

  if (exit_flag) {
    OMPSM_LOOP_OUT ();
  }
  _ompsm_thread->ompc_loop_sched_index = e;

  /* adjust the last iteration */
  if (c > 0) {
    if (b >= _ompsm_thread->ompc_loop_end) {
      rv = FALSE;
      goto ret;
    }
    if (e >= _ompsm_thread->ompc_loop_end) {
      e = _ompsm_thread->ompc_loop_end;
      _ompsm_thread->ompc_last_flag = 1;
    }
  } else {
    if (b <= _ompsm_thread->ompc_loop_end) {
      rv = FALSE;
      goto ret;
    }
    if (e <= _ompsm_thread->ompc_loop_end) {
      e = _ompsm_thread->ompc_loop_end;
      _ompsm_thread->ompc_last_flag = 1;
    }
  }
  *lb = b;
  *ub = e;

 ret:
  OMPSM_LIB_OUT ();

  return rv;
}


/* 
 * guided schedule
 */
void 
_ompc_guided_sched_init (int lb, int up, int step, int chunk_size)
{
  /* same as _ompc_dynamic_sched_init */
  _ompc_dynamic_sched_init (lb, up, step, chunk_size);
}


int
_ompc_guided_sched_next (int *lb, int *ub)
{
  return _ompc_dynamic_sched_next_chunk (lb, ub, TRUE);
}


/* 
 * runtime schedule 
 */
void 
_ompc_set_runtime_schedule (char *s)
{
  char *cp;
  cp = s;
  while (isspace((int)*cp)) {
    cp++;
  }
  if (*cp == 0) {
    return;
  }
  if (strncmp(cp,"static",6) == 0) {
    cp += 6;
    _ompc_runtime_sched_kind = SCHED_STATIC;
  } else if (strncmp(cp,"dynamic",7) == 0) {
    cp += 7;
    _ompc_runtime_sched_kind = SCHED_DYNAMIC;
  } else if (strncmp(cp,"guided",6) == 0) {
    cp += 6;
    _ompc_runtime_sched_kind = SCHED_GUIDED;
  }
  while (isspace((int)*cp)) {
    cp++;
  }
  if (*cp == 0) {
    return;
  }
  if (*cp != ',') {
    goto err;
  }
  cp++;
  while (isspace((int)*cp)) {
    cp++;
  }
  if (!isdigit((int)*cp)) {
    goto err;
  }
  sscanf(cp,"%d",&_ompc_runtime_chunk_size);
  if (_ompc_runtime_chunk_size <= 0) {
    _ompc_runtime_sched_kind = SCHED_NONE;
    goto err;
  }

  return;

 err:
  fprintf(stderr,"OMP_SCHEDULE ='%s'",s);
  _ompc_fatal("bad OMP_SCHEDULE");
}


void 
_ompc_runtime_sched_init(int lb, int up, int step)
{
  int chunk_size,n_thd;

  chunk_size = _ompc_runtime_chunk_size;
  switch (_ompc_runtime_sched_kind) {
  case SCHED_DYNAMIC:
  case SCHED_GUIDED:
    if (chunk_size <= 0) chunk_size = 1;
    _ompc_dynamic_sched_init (lb, up, step, chunk_size);
    break;

  case SCHED_STATIC:
  case SCHED_NONE:
  default:
    if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
      n_thd = 1;
    } else {
      n_thd = _ompc_n_node;
    }
    if (chunk_size <= 0) {
      chunk_size = (up - lb)/(step*n_thd) + (((up - lb)%(step*n_thd))?(1):(0));
      if (chunk_size <= 0) {
	chunk_size = 1;
      }
    }
    _ompc_static_sched_init(lb, up, step, chunk_size);
    break;
  }
}


int
_ompc_runtime_sched_next(int *lb, int *ub)
{
  switch (_ompc_runtime_sched_kind) {
  case SCHED_DYNAMIC:
    return _ompc_dynamic_sched_next_chunk(lb, ub,FALSE);

  case SCHED_GUIDED:
    return _ompc_dynamic_sched_next_chunk(lb, ub, TRUE);

  case SCHED_STATIC:
  case SCHED_NONE:
  default:
    return _ompc_static_sched_next(lb, ub);
  }
}


/* 
 * ordered
 */
void 
_ompc_set_loop_id(int i)
{
  _ompsm_thread->ompc_loop_id = i;
}


void 
_ompc_init_ordered(int lb,int step)
{
  if((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    return; /* not in parallel */
  }

  OMPSM_LIB_IN ();

  OMPSM_BARRIER();
  __G__->ordered_id = lb;
  __G__->ordered_step = step;
  OMPSM_BARRIER();

  OMPSM_LIB_OUT ();
}


void
_ompc_ordered_begin ()
{
  if((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) { /* sequential */
    return;
  }

  OMPSM_LIB_IN ();

  for (;;) {
    OMPSM_LOCK (L_ORDERED);
    OMPSM_FLUSH_LOCK ();
    OMPSM_REFRESH_VAR (&__G__->ordered_id, sizeof(__G__->ordered_id));
    OMPSM_FLUSH_UNLOCK ();
    if (_ompsm_thread->ompc_loop_id == __G__->ordered_id) {
      break;
    }
    OMPSM_UNLOCK (L_ORDERED);
  }
  OMPSM_FLUSH_LOCK ();
  OMPSM_REFRESH_ALL ();
  OMPSM_FLUSH_UNLOCK ();

  OMPSM_LIB_OUT ();
}


void 
_ompc_ordered_end ()
{
  if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL))	{ /* sequential */
    return;
  }

  OMPSM_LIB_IN ();
  OMPSM_FLUSH_LOCK ();
  OMPSM_REFRESH_VAR (&__G__->ordered_id, sizeof(__G__->ordered_id));
  __G__->ordered_id += __G__->ordered_step;
  OMPSM_FLUSH_VAR(&__G__->ordered_id, sizeof(__G__->ordered_id));
  OMPSM_FLUSH_ALL ();
  OMPSM_FLUSH_UNLOCK ();
  OMPSM_UNLOCK (L_ORDERED);

  OMPSM_LIB_OUT ();
}


/*
 * sections directives. section_id is allocated in round-robin manner.
 */
void
_ompc_section_init(int n_sections)
{
  if((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    _ompsm_thread->ompc_section_indx = 0;
  } else {
    _ompsm_thread->ompc_section_indx = _ompc_node_id;
  }
  _ompsm_thread->ompc_last_section_indx = n_sections - 1;
  _ompsm_thread->ompc_last_flag = 0;
}


int
_ompc_section_id ()
{
  int id = _ompsm_thread->ompc_section_indx;

  if((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    _ompsm_thread->ompc_section_indx += 1;
  } else {
    _ompsm_thread->ompc_section_indx += _ompc_n_node;
  }
  if(id == _ompsm_thread->ompc_last_section_indx) {
    _ompsm_thread->ompc_last_flag = 1;
  }
  return id;
}


int
_ompc_is_last()
{
  return (_ompsm_thread->ompc_last_flag || (NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL));
}


/*
 * single construct
 */
int
_ompc_do_single()
{
  int n;

  if((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    return 1; /* not in parallel */
  }

  OMPSM_LIB_IN ();

  n = OMPSM_COUNT_LOCK0();
  OMPSM_UNLOCK0();

  OMPSM_LIB_OUT ();

  return n == 0;
}


int
_ompc_is_master()
{
  return IS_MASTER_THREAD;
}


void
_ompc_enter_critical (_ompc_lock_t **p)
{
  struct critical_lock_list *cp;

  if (NOT_IN_PARALLEL) {
    return;
  }

  OMPSM_LIB_IN ();

  if (*p == NULL) {
    OMPSM_LOCK0 ();
    OMPSM_FLUSH_LOCK ();
    OMPSM_REFRESH_VAR (&__G__->critical_locks, sizeof(__G__->critical_locks));
    for (cp = __G__->critical_locks; cp != NULL; cp = cp->next) {
      OMPSM_REFRESH_VAR (cp, sizeof(*cp));
      if (cp->addr == p) {
	break;
      }
    }
    if (cp == NULL) {
      cp = (struct critical_lock_list *) OMPSM_ALLOC (sizeof(*cp));
      OMPSM_REFRESH_VAR (cp, sizeof(*cp));
      if (cp == NULL) {
	_ompc_fatal ("_ompc_enter_critical : can not allocate global memory");
      }
      cp->next = __G__->critical_locks;
      cp->addr = p;
      OMPSM_FLUSH_VAR (cp, sizeof(*cp));

      __G__->critical_locks = cp;
      OMPSM_FLUSH_VAR (&__G__->critical_locks, sizeof(__G__->critical_locks));

      OMPSM_INIT_SLOCK (&(cp->lock));
      OMPSM_FLUSH_VAR (&(cp->lock), sizeof(cp->lock));
    }
    *p = &(cp->lock);
    OMPSM_FLUSH_UNLOCK ();
    OMPSM_UNLOCK0 ();
  }
  OMPSM_LOCK_SLOCK ((_ompc_lock_t volatile *)*p);
  OMPSM_FLUSH_LOCK ();
  OMPSM_REFRESH_ALL ();
  OMPSM_FLUSH_UNLOCK ();

  OMPSM_LIB_OUT ();
}


void
_ompc_exit_critical (_ompc_lock_t **p)
{
  if (NOT_IN_PARALLEL) {
    return;
  }

  OMPSM_LIB_IN ();

  OMPSM_FLUSH_LOCK ();
  OMPSM_FLUSH_ALL ();
  OMPSM_FLUSH_UNLOCK ();
  OMPSM_UNLOCK_SLOCK (*p);

  OMPSM_LIB_OUT ();
}


void
_ompc_atomic_lock ()
{
  if (NOT_IN_PARALLEL) {
    return;
  }

  OMPSM_LIB_IN ();
  OMPSM_LOCK (L_ATOMIC);
  OMPSM_FLUSH_LOCK ();
  OMPSM_REFRESH_ALL ();
  OMPSM_FLUSH_UNLOCK ();
  OMPSM_LIB_OUT ();
}


void
_ompc_atomic_unlock ()
{
  if (NOT_IN_PARALLEL) {
    return;
  }

  OMPSM_LIB_IN ();
  OMPSM_FLUSH_LOCK ();
  OMPSM_FLUSH_ALL ();
  OMPSM_FLUSH_UNLOCK ();
  OMPSM_UNLOCK (L_ATOMIC);
  OMPSM_LIB_OUT ();
}


void
_ompc_flush (char *dst,int nbyte)
{
  if (NOT_IN_PARALLEL) {
    return;
  }

  OMPSM_LIB_IN ();
  OMPSM_FLUSH_LOCK ();
  OMPSM_FLUSH (dst, nbyte);
  OMPSM_FLUSH_UNLOCK ();
  OMPSM_LIB_OUT ();
}


#define DO_REDUCTION_INTEGRAL(type_t,t) {\
  vals[_ompc_node_id].t = *((type_t *)in_p); \
  if (n_node != 1) { \
    OMPSM_BARRIER(); \
  } \
  if(IS_MASTER_THREAD){ \
      any_type v; int i; \
      v.t = *((type_t *)out_p); \
      switch(op){ \
      case OMPC_REDUCTION_PLUS: \
      case OMPC_REDUCTION_MINUS: \
	for(i=0;i<n_node;i++) v.t += vals[i].t;\
	break; \
      case OMPC_REDUCTION_MUL: \
	for(i=0;i<n_node;i++) v.t *= vals[i].t;\
	break; \
      case OMPC_REDUCTION_BITAND: \
	for(i = 0; i < n_node; i++) v.t &= vals[i].t;\
	break; \
      case OMPC_REDUCTION_BITOR: \
	for(i = 0; i < n_node; i++) v.t |= vals[i].t;\
	break; \
      case OMPC_REDUCTION_BITXOR: \
	for(i = 0; i < n_node; i++) v.t ^= vals[i].t;\
	break; \
      case OMPC_REDUCTION_LOGAND: \
	if(!v.t) break; \
	for(i = 0; i < n_node; i++) \
          if(!vals[i].t) { v.t = 0; break; } \
	break; \
      case OMPC_REDUCTION_LOGOR: \
	if(v.t) break; \
	for(i = 0; i < n_node; i++) \
          if( vals[i].t) { v.t = 1; break; } \
	break; \
      case OMPC_REDUCTION_MIN: \
	for(i = 0; i < n_node; i++) \
         if(v.t>vals[i].t) v.t=vals[i].t;\
	break; \
      case OMPC_REDUCTION_MAX: \
	for(i = 0; i < n_node; i++) \
         if(v.t<vals[i].t) v.t=vals[i].t;\
	break; \
      default: \
	  _ompc_fatal("_ompc_reduction: bad op\n"); \
      } \
      *((type_t *)out_p) = v.t; \
   } \
  if (n_node != 1) { \
    OMPSM_BARRIER(); \
  } \
}

#define DO_REDUCTION_FLOAT(type_t,t) { \
  vals[_ompc_node_id].t = *((type_t *)in_p); \
  if (n_node != 1) { \
    OMPSM_BARRIER(); \
  } \
  if(IS_MASTER_THREAD){ \
      any_type v; int i; \
      v.t = *((type_t *)out_p); \
      switch(op){ \
      case OMPC_REDUCTION_PLUS: \
      case OMPC_REDUCTION_MINUS: \
	for(i = 0; i < n_node; i++) v.t += vals[i].t;\
	break; \
      case OMPC_REDUCTION_MUL: \
	for(i = 0; i < n_node; i++) v.t *= vals[i].t;\
	break; \
      case OMPC_REDUCTION_LOGAND: \
	if(!v.t) break; \
	for(i = 0; i < n_node; i++) \
          if(!vals[i].t) { v.t = 0; break; } \
	break; \
      case OMPC_REDUCTION_LOGOR: \
	if(v.t) break; \
	for(i = 0; i < n_node; i++) \
          if( vals[i].t) { v.t = 1; break; } \
	break; \
      case OMPC_REDUCTION_MIN: \
	for(i = 0; i < n_node; i++) \
         if(v.t>vals[i].t) v.t=vals[i].t;\
	break; \
      case OMPC_REDUCTION_MAX: \
	for(i = 0; i < n_node; i++) \
         if(v.t<vals[i].t) v.t=vals[i].t;\
	break; \
      default: \
	  _ompc_fatal("_ompc_reduction: bad op\n"); \
      } \
      *((type_t *)out_p) = v.t; \
   } \
  if (n_node != 1) { \
    OMPSM_BARRIER(); \
  } \
}

#define DO_REDUCTION_COMPLEX(type_t,t) { \
  vals[_ompc_node_id].t = *((type_t *)in_p); \
  if (n_node != 1) { \
    OMPSM_BARRIER(); \
  } \
  if(IS_MASTER_THREAD){ \
      any_type v,vv; int i; \
      v.t = *((type_t *)out_p); \
      switch(op){ \
      case OMPC_REDUCTION_PLUS: \
      case OMPC_REDUCTION_MINUS: \
	for(i = 0; i < n_node; i++)\
	  { v.t.re += vals[i].t.re; v.t.im += vals[i].t.im;} \
	break; \
      case OMPC_REDUCTION_MUL: \
	for(i = 0; i < n_node; i++){ \
 	    vv.t.re = v.t.re*vals[i].t.re-v.t.im*vals[i].t.im; \
	    vv.t.im = v.t.re*vals[i].t.im+v.t.im*vals[i].t.re;\
            v.t.re = vv.t.re; v.t.im = vv.t.im; }\
	break; \
      default: \
	  _ompc_fatal("_ompc_reduction: bad op\n"); \
      } \
      *((type_t *)out_p) = v.t; \
   } \
  if (n_node != 1) { \
    OMPSM_BARRIER(); \
  } \
 }
				   

void
_ompc_reduction(void *in_p,void *out_p,int type, int op)
{
  int n_node;
  any_type *vals;

  vals = __G__->reduction_vals;
  if((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)){
    n_node = 1;
  } else {
    OMPSM_LIB_IN ();
    n_node = _ompc_n_node;
  }
    
  switch(type) {
  case OMPC_REDUCTION_CHAR:
    DO_REDUCTION_INTEGRAL(char,c);
    break;
  case OMPC_REDUCTION_UNSIGNED_CHAR:
    DO_REDUCTION_INTEGRAL(unsigned char,uc);
    break;

  case OMPC_REDUCTION_SHORT:
    DO_REDUCTION_INTEGRAL(short,s);
    break;
  case OMPC_REDUCTION_UNSIGNED_SHORT:
    DO_REDUCTION_INTEGRAL(unsigned short,us);
    break;

  case OMPC_REDUCTION_SIGNED:
  case OMPC_REDUCTION_INT:
    DO_REDUCTION_INTEGRAL(int, i);
    break;
  case OMPC_REDUCTION_UNSIGNED_INT:
    DO_REDUCTION_INTEGRAL(unsigned int, ui);
    break;

  case OMPC_REDUCTION_LONG:
    DO_REDUCTION_INTEGRAL(long,l);
    break;
  case OMPC_REDUCTION_UNSIGNED_LONG:
    DO_REDUCTION_INTEGRAL(unsigned long,ul);
    break;

  case OMPC_REDUCTION_LONGLONG:
    DO_REDUCTION_INTEGRAL(long long, ll);
    break;

  case OMPC_REDUCTION_UNSIGNED_LONGLONG:
    DO_REDUCTION_INTEGRAL(unsigned long long, ll);
    break;

  case OMPC_REDUCTION_FLOAT:
    DO_REDUCTION_FLOAT(float,f);
    break;

  case OMPC_REDUCTION_DOUBLE:
    DO_REDUCTION_FLOAT(double,d);
    break;

  case OMPC_REDUCTION_COMPLEX_FLOAT:
    DO_REDUCTION_COMPLEX(ompc_complex_float,cf);
    break;
	
  case OMPC_REDUCTION_COMPLEX_DOUBLE:
    DO_REDUCTION_COMPLEX(ompc_complex_double,cd);
    break;

  case OMPC_REDUCTION_LONG_DOUBLE:
  default:
    _ompc_fatal("_ompc_reduction: bad type");
  }

  if((IN_PARALLEL) && !(IN_NESTED_PARALLEL)){
    OMPSM_LIB_OUT ();
  }
}


void 
_ompc_reduction_init(void *in_p, int type, int op)
{
  any_type *p;
  p = (any_type *)in_p;

  switch(type){
  case OMPC_REDUCTION_COMPLEX_FLOAT:
    switch(op){
    case OMPC_REDUCTION_PLUS:
    case OMPC_REDUCTION_MINUS:
      p->cf.re = 0.0;
      p->cf.im = 0.0;
      return;
    case OMPC_REDUCTION_MUL: 
      p->cf.re = 1.0;
      p->cf.im = 0.0;
      return;
    }
    break;
  case OMPC_REDUCTION_COMPLEX_DOUBLE:
    switch(op){
    case OMPC_REDUCTION_PLUS:
    case OMPC_REDUCTION_MINUS:
      p->cd.re = 0.0;
      p->cd.im = 0.0;
      return;
    case OMPC_REDUCTION_MUL: 
      p->cd.re = 1.0;
      p->cd.im = 0.0;
      return;
    }
    break;
  }
  _ompc_fatal("_ompc_reduction_init: bad op\n");
}


void 
_ompc_bcast_thdprv(void *dst, void *src, int size)
{
  if ((NOT_IN_PARALLEL) || (IN_NESTED_PARALLEL)) {
    bcopy (src,dst,size);
  } else {
    OMPSM_LIB_IN ();
    OMPSM_BCAST_THDPRV(dst, src, size);
    OMPSM_LIB_OUT ();
  }
}


void 
_ompc_bcast_copy(void *dst, void *src, int size)
{
  bcopy (src,dst,size);
}


void
_ompc_last_copy (void *gptr, void *pptr, int size)
{
  if ((IN_PARALLEL) && !(IN_NESTED_PARALLEL)) {
    OMPSM_LIB_IN ();
    OMPSM_BARRIER ();
    OMPSM_LIB_OUT ();
  }
  if (_ompc_is_last ()) {
    bcopy(pptr, gptr, size);
  }
}
