program mpitest
  !
  ! This program is a MPI version of DQMC code.
  ! It first performs the warmup sweep on one node 
  ! and then sends out the Hubbard-Statonivich field to others.
  ! After the measurement sweep (performed in parallel), 
  ! the root processor gathers all the measurements and makes
  ! statisitcs.
  !
  use DQMC_HUBBARD
  use DQMC_PHY0
  use DQMC_2DPERL
  implicit none
  include 'mpif.h'

  
  ! Parameters
  
  integer,      parameter  :: nx = 16, ny = 16, N = nx*ny
  real(wp),     parameter  :: t     = 1.0_wp
  real(wp),     parameter  :: dtau  = 0.125_wp  
  real(wp),     parameter  :: U     = TWO
  real(wp),     parameter  :: mu    = ZERO
  integer,      parameter  :: L     = 96
  integer,      parameter  :: HSF_IPT = -1
  integer,      parameter  :: nWarm = 1000, nPass = 5000
  integer,      parameter  :: nmeas = 12, nBin = 1, nHist = 50
  integer,      parameter  :: seed  = 0, nOrth = 12, nWrap = 12
  real(wp),     parameter  :: errrate = 0.001_wp, difflim = 0.001_wp

  ! Variables
  
  type(hubbard) :: Hub
  real(wp)      :: t1, t2
  integer       :: ntask, rank, ierr, i, nIter, cnt, source, rc
  integer       :: mat, p0_basic, p0_array

  ! Executable

  ! Initialize MPI
  call MPI_INIT(ierr)
  if (ierr .ne. MPI_SUCCESS) then
     print *,'Error starting MPI program. Terminating.'
     call MPI_ABORT(MPI_COMM_WORLD, rc, ierr)
  end if

  ! start timing
  t1 = MPI_WTIME()

  ! Get MPI parameters
  call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)
  call MPI_COMM_SIZE(MPI_COMM_WORLD, ntask, ierr)
  source = 0

  ! Set new data typ
  cnt = Hub%n*Hub%L
  call MPI_TYPE_CONTIGUOUS(cnt, MPI_INTEGER, mat, ierr)
  call MPI_TYPE_COMMIT(mat, ierr)
  call MPI_TYPE_CONTIGUOUS(P0_N, MPI_DOUBLE_PRECISION, p0_basic, ierr)
  call MPI_TYPE_COMMIT(p0_basic, ierr)
  call MPI_TYPE_CONTIGUOUS(Hub%P0%nClass, MPI_DOUBLE_PRECISION, p0_array, ierr)
  call MPI_TYPE_COMMIT(p0_array, ierr)

  ! Initialize the Hubbard model
  call DQMC_Default(Hub)
  call DQMC_Init_2DPerl(nx, ny, Hub%S, IMP_TRIANGLE)  
  
  if (rank .eq. 0) then
     ! The root processor allocates nTask bins.
     call DQMC_SetParam(Hub, U, t, mu, L, dtau, HSF_IPT, nWarm, &
          nPass, nMeas, nTask, nHist, seed, nOrth, nWrap, errrate, difflim)

     ! warm up step on the first node
     call DQMC_SetBnV(Hub)

     do i = 1, nWarm
        ! The second parameter means no measurement should be made.
        call DQMC_Sweep(Hub, NO_MEAS0)
     end do
  else
     ! The other processor allocates 1 bin.
     call DQMC_SetParam(Hub, U, t, mu, L, dtau, HSF_IPT, nWarm, &
          nPass, nMeas, nBin, nHist, seed, nOrth, nWrap, errrate, difflim)
     call DQMC_SetBnV(Hub)
  end if

  ! Broadcast the HSF to other node
  call  MPI_BCAST (Hub%HSF, 1, mat, source, MPI_COMM_WORLD, ierr)

  ! Measruement sweep
  ! Number of iteration = total ieration / available processors
  nIter = nPass / nTask
  
  ! Change random seed for each machines
  Hub%seed = mod(abs(Hub%seed)*(rank+1), 4095)
  if (mod(Hub%seed(4),2) .eq. 0) then
     Hub%seed(4) = Hub%seed(4) + 1
  end if
  
  do i = 1, nIter
     call DQMC_Sweep(Hub, Hub%nMeas0)
  end do
  call DQMC_Phy0_Avg(Hub%P0, Hub%u)  

  ! Gather data
  call MPI_GATHER(Hub%P0%S(:,1), 1, p0_basic, Hub%P0%S(:,1:nTask), &
       1, p0_basic, source, MPI_COMM_WORLD, ierr)
  call MPI_GATHER(Hub%P0%G_fun(:,1), 1, p0_array, Hub%P0%G_fun(:,1:nTask), &
       1, p0_array, source, MPI_COMM_WORLD, ierr)
  call MPI_GATHER(Hub%P0%SpinXX(:,1), 1, p0_array, Hub%P0%SpinXX(:,1:nTask), &
       1, p0_array, source, MPI_COMM_WORLD, ierr)
  call MPI_GATHER(Hub%P0%SpinZZ(:,1), 1, p0_array, Hub%P0%SpinZZ(:,1:nTask), &
       1, p0_array, source, MPI_COMM_WORLD, ierr)
  call MPI_GATHER(Hub%P0%Den0(:,1), 1, p0_array, Hub%P0%Den0(:,1:nTask), &
       1, p0_array, source, MPI_COMM_WORLD, ierr)
  call MPI_GATHER(Hub%P0%Den1(:,1), 1, p0_array, Hub%P0%Den1(:,1:nTask), &
       1, p0_array, source, MPI_COMM_WORLD, ierr)

  if (rank .eq. 0) then
     ! Get average result
     call DQMC_Phy0_GetErr(Hub%P0)

     ! print out results
     call DQMC_Phy0_Print(Hub%P0, Hub%S, STDOUT)
     write(STDOUT, FMT_DBLINE) 

     ! end timing
     t2 = MPI_WTIME()
     print *, "etime=", t2-t1
  end if

  call MPI_TYPE_FREE(mat, ierr)
  call MPI_TYPE_FREE(p0_basic, ierr)
  call MPI_TYPE_FREE(p0_array, ierr)
  call MPI_FINALIZE(ierr)
  
end program mpitest
