
c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine  add

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c addition of update to the vector u
c---------------------------------------------------------------------

       include 'header.h'

       integer i, j, k, m

       do m = 1, 5
          do  k = 1, grid_points(3)-2
             do  j = 1, grid_points(2)-2
                do  i = 1, grid_points(1)-2
                   u(i,j,k,m) = u(i,j,k,m) + rhs(i,j,k,m)
                end do
             end do
          end do
       end do

       return
       end

c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine  adi

c---------------------------------------------------------------------
c---------------------------------------------------------------------

       call compute_rhs

       call txinvr

       call x_solve

       call y_solve

       call z_solve

       call add

       return
       end


c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine error_norm(rms)

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c this function computes the norm of the difference between the
c computed solution and the exact solution
c---------------------------------------------------------------------

       include 'header.h'

       integer i, j, k, m, d
       double precision xi, eta, zeta, u_exact(5), rms(5), add

       do   m = 1, 5 
          rms(m) = 0.0d0
       end do

       do   k = 0, grid_points(3)-1
          zeta = dble(k) * dnzm1
          do   j = 0, grid_points(2)-1
             eta = dble(j) * dnym1
             do   i = 0, grid_points(1)-1
                xi = dble(i) * dnxm1
                call exact_solution(xi, eta, zeta, u_exact)

                do   m = 1, 5
                   add = u(i,j,k,m)-u_exact(m)
                   rms(m) = rms(m) + add*add
                end do
             end do
          end do
       end do

       do    m = 1, 5
          do    d = 1, 3
             rms(m) = rms(m) / dble(grid_points(d)-2)
          end do
          rms(m) = dsqrt(rms(m))
       end do

       return
       end



       subroutine rhs_norm(rms)

       include 'header.h'

       integer i, j, k, d, m
       double precision rms(5), add

       do    m = 1, 5
          rms(m) = 0.0d0
       end do

       do   k = 0, grid_points(3)-2
          do   j = 0, grid_points(2)-2
             do   i = 0, grid_points(1)-2
                do   m = 1, 5
                   add = rhs(i,j,k,m)
                   rms(m) = rms(m) + add*add
                end do
             end do
          end do
       end do


       do   m = 1, 5
          do   d = 1, 3
             rms(m) = rms(m) / dble(grid_points(d)-2)
          end do
          rms(m) = dsqrt(rms(m))
       end do

       return
       end



c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine exact_rhs

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c compute the right hand side based on exact solution
c---------------------------------------------------------------------

       include 'header.h'

       double precision dtemp(5), xi, eta, zeta, dtpp
       integer          c, m, i, j, k, ip1, im1, jp1, 
     >                  jm1, km1, kp1

       c = 1

c---------------------------------------------------------------------
c      initialize                                  
c---------------------------------------------------------------------
       do   m = 1, 5
          do   k= 0, grid_points(3)-1
             do   j = 0, grid_points(2)-1
                do   i = 0, grid_points(1)-1
                   forcing(i,j,k,m) = 0.0d0
                end do
             end do
          end do
       end do

c---------------------------------------------------------------------
c      xi-direction flux differences                      
c---------------------------------------------------------------------
       do   k = 1, grid_points(3)-2
          zeta = dble(k) * dnzm1
          do   j = 1, grid_points(2)-2
             eta = dble(j) * dnym1

             do  i=0, grid_points(1)-1
                xi = dble(i) * dnxm1

                call exact_solution(xi, eta, zeta, dtemp)
                do  m = 1, 5
                   ue(i,m) = dtemp(m)
                end do

                dtpp = 1.0d0 / dtemp(1)

                do  m = 2, 5
                   buf(i,m) = dtpp * dtemp(m)
                end do

                cuf(i)   = buf(i,2) * buf(i,2)
                buf(i,1) = cuf(i) + buf(i,3) * buf(i,3) + 
     >                     buf(i,4) * buf(i,4) 
                q(i) = 0.5d0*(buf(i,2)*ue(i,2) + buf(i,3)*ue(i,3) +
     >                        buf(i,4)*ue(i,4))

             end do
 
             do  i = 1, grid_points(1)-2
                im1 = i-1
                ip1 = i+1

                forcing(i,j,k,1) = forcing(i,j,k,1) -
     >                 tx2*( ue(ip1,2)-ue(im1,2) )+
     >                 dx1tx1*(ue(ip1,1)-2.0d0*ue(i,1)+ue(im1,1))

                forcing(i,j,k,2) = forcing(i,j,k,2) - tx2 * (
     >                (ue(ip1,2)*buf(ip1,2)+c2*(ue(ip1,5)-q(ip1)))-
     >                (ue(im1,2)*buf(im1,2)+c2*(ue(im1,5)-q(im1))))+
     >                 xxcon1*(buf(ip1,2)-2.0d0*buf(i,2)+buf(im1,2))+
     >                 dx2tx1*( ue(ip1,2)-2.0d0* ue(i,2)+ue(im1,2))

                forcing(i,j,k,3) = forcing(i,j,k,3) - tx2 * (
     >                 ue(ip1,3)*buf(ip1,2)-ue(im1,3)*buf(im1,2))+
     >                 xxcon2*(buf(ip1,3)-2.0d0*buf(i,3)+buf(im1,3))+
     >                 dx3tx1*( ue(ip1,3)-2.0d0*ue(i,3) +ue(im1,3))
                  
                forcing(i,j,k,4) = forcing(i,j,k,4) - tx2*(
     >                 ue(ip1,4)*buf(ip1,2)-ue(im1,4)*buf(im1,2))+
     >                 xxcon2*(buf(ip1,4)-2.0d0*buf(i,4)+buf(im1,4))+
     >                 dx4tx1*( ue(ip1,4)-2.0d0* ue(i,4)+ ue(im1,4))

                forcing(i,j,k,5) = forcing(i,j,k,5) - tx2*(
     >                 buf(ip1,2)*(c1*ue(ip1,5)-c2*q(ip1))-
     >                 buf(im1,2)*(c1*ue(im1,5)-c2*q(im1)))+
     >                 0.5d0*xxcon3*(buf(ip1,1)-2.0d0*buf(i,1)+
     >                               buf(im1,1))+
     >                 xxcon4*(cuf(ip1)-2.0d0*cuf(i)+cuf(im1))+
     >                 xxcon5*(buf(ip1,5)-2.0d0*buf(i,5)+buf(im1,5))+
     >                 dx5tx1*( ue(ip1,5)-2.0d0* ue(i,5)+ ue(im1,5))
             end do

c---------------------------------------------------------------------
c            Fourth-order dissipation                         
c---------------------------------------------------------------------
             do   m = 1, 5
                i = 1
                forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                    (5.0d0*ue(i,m) - 4.0d0*ue(i+1,m) +ue(i+2,m))
                i = 2
                forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                   (-4.0d0*ue(i-1,m) + 6.0d0*ue(i,m) -
     >                     4.0d0*ue(i+1,m) +       ue(i+2,m))
             end do

             do   m = 1, 5
                do  i = 3, grid_points(1)-4
                   forcing(i,j,k,m) = forcing(i,j,k,m) - dssp*
     >                   (ue(i-2,m) - 4.0d0*ue(i-1,m) +
     >                    6.0d0*ue(i,m) - 4.0d0*ue(i+1,m) + ue(i+2,m))
                end do
             end do

             do   m = 1, 5
                i = grid_points(1)-3
                forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                   (ue(i-2,m) - 4.0d0*ue(i-1,m) +
     >                    6.0d0*ue(i,m) - 4.0d0*ue(i+1,m))
                i = grid_points(1)-2
                forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                   (ue(i-2,m) - 4.0d0*ue(i-1,m) + 5.0d0*ue(i,m))
             end do

          end do
       end do

c---------------------------------------------------------------------
c  eta-direction flux differences             
c---------------------------------------------------------------------
       do   k = 1, grid_points(3)-2          
          zeta = dble(k) * dnzm1
          do   i=1, grid_points(1)-2
             xi = dble(i) * dnxm1

             do  j=0, grid_points(2)-1
                eta = dble(j) * dnym1

                call exact_solution(xi, eta, zeta, dtemp)
                do   m = 1, 5 
                   ue(j,m) = dtemp(m)
                end do
                dtpp = 1.0d0/dtemp(1)

                do  m = 2, 5
                   buf(j,m) = dtpp * dtemp(m)
                end do

                cuf(j)   = buf(j,3) * buf(j,3)
                buf(j,1) = cuf(j) + buf(j,2) * buf(j,2) + 
     >                     buf(j,4) * buf(j,4)
                q(j) = 0.5d0*(buf(j,2)*ue(j,2) + buf(j,3)*ue(j,3) +
     >                        buf(j,4)*ue(j,4))
             end do

             do  j = 1, grid_points(2)-2
                jm1 = j-1
                jp1 = j+1
                  
                forcing(i,j,k,1) = forcing(i,j,k,1) -
     >                ty2*( ue(jp1,3)-ue(jm1,3) )+
     >                dy1ty1*(ue(jp1,1)-2.0d0*ue(j,1)+ue(jm1,1))

                forcing(i,j,k,2) = forcing(i,j,k,2) - ty2*(
     >                ue(jp1,2)*buf(jp1,3)-ue(jm1,2)*buf(jm1,3))+
     >                yycon2*(buf(jp1,2)-2.0d0*buf(j,2)+buf(jm1,2))+
     >                dy2ty1*( ue(jp1,2)-2.0* ue(j,2)+ ue(jm1,2))

                forcing(i,j,k,3) = forcing(i,j,k,3) - ty2*(
     >                (ue(jp1,3)*buf(jp1,3)+c2*(ue(jp1,5)-q(jp1)))-
     >                (ue(jm1,3)*buf(jm1,3)+c2*(ue(jm1,5)-q(jm1))))+
     >                yycon1*(buf(jp1,3)-2.0d0*buf(j,3)+buf(jm1,3))+
     >                dy3ty1*( ue(jp1,3)-2.0d0*ue(j,3) +ue(jm1,3))

                forcing(i,j,k,4) = forcing(i,j,k,4) - ty2*(
     >                ue(jp1,4)*buf(jp1,3)-ue(jm1,4)*buf(jm1,3))+
     >                yycon2*(buf(jp1,4)-2.0d0*buf(j,4)+buf(jm1,4))+
     >                dy4ty1*( ue(jp1,4)-2.0d0*ue(j,4)+ ue(jm1,4))

                forcing(i,j,k,5) = forcing(i,j,k,5) - ty2*(
     >                buf(jp1,3)*(c1*ue(jp1,5)-c2*q(jp1))-
     >                buf(jm1,3)*(c1*ue(jm1,5)-c2*q(jm1)))+
     >                0.5d0*yycon3*(buf(jp1,1)-2.0d0*buf(j,1)+
     >                              buf(jm1,1))+
     >                yycon4*(cuf(jp1)-2.0d0*cuf(j)+cuf(jm1))+
     >                yycon5*(buf(jp1,5)-2.0d0*buf(j,5)+buf(jm1,5))+
     >                dy5ty1*(ue(jp1,5)-2.0d0*ue(j,5)+ue(jm1,5))
             end do

c---------------------------------------------------------------------
c            Fourth-order dissipation                      
c---------------------------------------------------------------------
             do   m = 1, 5
                j = 1
                forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                    (5.0d0*ue(j,m) - 4.0d0*ue(j+1,m) +ue(j+2,m))
                j = 2
                forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                   (-4.0d0*ue(j-1,m) + 6.0d0*ue(j,m) -
     >                     4.0d0*ue(j+1,m) +       ue(j+2,m))
             end do

             do   m = 1, 5
                do  j = 3, grid_points(2)-4
                   forcing(i,j,k,m) = forcing(i,j,k,m) - dssp*
     >                   (ue(j-2,m) - 4.0d0*ue(j-1,m) +
     >                    6.0d0*ue(j,m) - 4.0d0*ue(j+1,m) + ue(j+2,m))
                end do
             end do

             do   m = 1, 5
                j = grid_points(2)-3
                forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                   (ue(j-2,m) - 4.0d0*ue(j-1,m) +
     >                    6.0d0*ue(j,m) - 4.0d0*ue(j+1,m))
                j = grid_points(2)-2
                forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                   (ue(j-2,m) - 4.0d0*ue(j-1,m) + 5.0d0*ue(j,m))

             end do

          end do
       end do

c---------------------------------------------------------------------
c      zeta-direction flux differences                      
c---------------------------------------------------------------------
       do  j=1, grid_points(2)-2
          eta = dble(j) * dnym1
          do   i = 1, grid_points(1)-2
             xi = dble(i) * dnxm1

             do k=0, grid_points(3)-1
                zeta = dble(k) * dnzm1

                call exact_solution(xi, eta, zeta, dtemp)
                do   m = 1, 5
                   ue(k,m) = dtemp(m)
                end do

                dtpp = 1.0d0/dtemp(1)

                do   m = 2, 5
                   buf(k,m) = dtpp * dtemp(m)
                end do

                cuf(k)   = buf(k,4) * buf(k,4)
                buf(k,1) = cuf(k) + buf(k,2) * buf(k,2) + 
     >                     buf(k,3) * buf(k,3)
                q(k) = 0.5d0*(buf(k,2)*ue(k,2) + buf(k,3)*ue(k,3) +
     >                        buf(k,4)*ue(k,4))
             end do

             do    k=1, grid_points(3)-2
                km1 = k-1
                kp1 = k+1
                  
                forcing(i,j,k,1) = forcing(i,j,k,1) -
     >                 tz2*( ue(kp1,4)-ue(km1,4) )+
     >                 dz1tz1*(ue(kp1,1)-2.0d0*ue(k,1)+ue(km1,1))

                forcing(i,j,k,2) = forcing(i,j,k,2) - tz2 * (
     >                 ue(kp1,2)*buf(kp1,4)-ue(km1,2)*buf(km1,4))+
     >                 zzcon2*(buf(kp1,2)-2.0d0*buf(k,2)+buf(km1,2))+
     >                 dz2tz1*( ue(kp1,2)-2.0d0* ue(k,2)+ ue(km1,2))

                forcing(i,j,k,3) = forcing(i,j,k,3) - tz2 * (
     >                 ue(kp1,3)*buf(kp1,4)-ue(km1,3)*buf(km1,4))+
     >                 zzcon2*(buf(kp1,3)-2.0d0*buf(k,3)+buf(km1,3))+
     >                 dz3tz1*(ue(kp1,3)-2.0d0*ue(k,3)+ue(km1,3))

                forcing(i,j,k,4) = forcing(i,j,k,4) - tz2 * (
     >                (ue(kp1,4)*buf(kp1,4)+c2*(ue(kp1,5)-q(kp1)))-
     >                (ue(km1,4)*buf(km1,4)+c2*(ue(km1,5)-q(km1))))+
     >                zzcon1*(buf(kp1,4)-2.0d0*buf(k,4)+buf(km1,4))+
     >                dz4tz1*( ue(kp1,4)-2.0d0*ue(k,4) +ue(km1,4))

                forcing(i,j,k,5) = forcing(i,j,k,5) - tz2 * (
     >                 buf(kp1,4)*(c1*ue(kp1,5)-c2*q(kp1))-
     >                 buf(km1,4)*(c1*ue(km1,5)-c2*q(km1)))+
     >                 0.5d0*zzcon3*(buf(kp1,1)-2.0d0*buf(k,1)
     >                              +buf(km1,1))+
     >                 zzcon4*(cuf(kp1)-2.0d0*cuf(k)+cuf(km1))+
     >                 zzcon5*(buf(kp1,5)-2.0d0*buf(k,5)+buf(km1,5))+
     >                 dz5tz1*( ue(kp1,5)-2.0d0*ue(k,5)+ ue(km1,5))
             end do

c---------------------------------------------------------------------
c            Fourth-order dissipation                        
c---------------------------------------------------------------------
             do   m = 1, 5
                k = 1
                forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                    (5.0d0*ue(k,m) - 4.0d0*ue(k+1,m) +ue(k+2,m))
                k = 2
                forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                   (-4.0d0*ue(k-1,m) + 6.0d0*ue(k,m) -
     >                     4.0d0*ue(k+1,m) +       ue(k+2,m))
             end do

             do   m = 1, 5
                do  k = 3, grid_points(3)-4
                   forcing(i,j,k,m) = forcing(i,j,k,m) - dssp*
     >                   (ue(k-2,m) - 4.0d0*ue(k-1,m) +
     >                    6.0d0*ue(k,m) - 4.0d0*ue(k+1,m) + ue(k+2,m))
                end do
             end do

             do    m = 1, 5
                k = grid_points(3)-3
                forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                   (ue(k-2,m) - 4.0d0*ue(k-1,m) +
     >                    6.0d0*ue(k,m) - 4.0d0*ue(k+1,m))
                   k = grid_points(3)-2
                   forcing(i,j,k,m) = forcing(i,j,k,m) - dssp *
     >                   (ue(k-2,m) - 4.0d0*ue(k-1,m) + 5.0d0*ue(k,m))
                end do

          end do
       end do

c---------------------------------------------------------------------
c now change the sign of the forcing function, 
c---------------------------------------------------------------------
       do   m = 1, 5
          do   k = 1, grid_points(3)-2
             do   j = 1, grid_points(2)-2
                do   i = 1, grid_points(1)-2
                   forcing(i,j,k,m) = -1.d0 * forcing(i,j,k,m)
                end do
             end do
          end do
       end do

       return
       end






c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine exact_solution(xi,eta,zeta,dtemp)

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c this function returns the exact solution at point xi, eta, zeta  
c---------------------------------------------------------------------

       include 'header.h'

       double precision  xi, eta, zeta, dtemp(5)
       integer m

       do  m = 1, 5
          dtemp(m) =  ce(m,1) +
     >    xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
     >    eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
     >    zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) + 
     >    zeta*ce(m,13))))
       end do

       return
       end



c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine  initialize

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c This subroutine initializes the field variable u using 
c tri-linear transfinite interpolation of the boundary values     
c---------------------------------------------------------------------

       include 'header.h'
  
       integer i, j, k, m, ix, iy, iz
       double precision  xi, eta, zeta, Pface(5,3,2), Pxi, Peta, 
     >                   Pzeta, temp(5)

c---------------------------------------------------------------------
c  Later (in compute_rhs) we compute 1/u for every element. A few of 
c  the corner elements are not used, but it convenient (and faster) 
c  to compute the whole thing with a simple loop. Make sure those 
c  values are nonzero by initializing the whole thing here. 
c---------------------------------------------------------------------

      do k = 0, IMAX-1
         do j = 0, IMAX-1
            do i = 0, IMAX-1
               u(i, j, k, 1) = 1.0
               u(i, j, k, 2) = 0.0
               u(i, j, k, 3) = 0.0
               u(i, j, k, 4) = 0.0
               u(i, j, k, 5) = 1.0
            end do
         end do
      end do

c---------------------------------------------------------------------
c first store the "interpolated" values everywhere on the grid    
c---------------------------------------------------------------------

          do  k = 0, grid_points(3)-1
             zeta = dble(k) * dnzm1
             do  j = 0, grid_points(2)-1
                eta = dble(j) * dnym1
                do   i = 0, grid_points(1)-1
                   xi = dble(i) * dnxm1
                  
                   do ix = 1, 2
                      call exact_solution(dble(ix-1), eta, zeta, 
     >                                    Pface(1,1,ix))
                   end do

                   do    iy = 1, 2
                      call exact_solution(xi, dble(iy-1) , zeta, 
     >                                    Pface(1,2,iy))
                   end do

                   do    iz = 1, 2
                      call exact_solution(xi, eta, dble(iz-1),   
     >                                    Pface(1,3,iz))
                   end do

                   do   m = 1, 5
                      Pxi   = xi   * Pface(m,1,2) + 
     >                        (1.0d0-xi)   * Pface(m,1,1)
                      Peta  = eta  * Pface(m,2,2) + 
     >                        (1.0d0-eta)  * Pface(m,2,1)
                      Pzeta = zeta * Pface(m,3,2) + 
     >                        (1.0d0-zeta) * Pface(m,3,1)
 
                      u(i,j,k,m) = Pxi + Peta + Pzeta - 
     >                          Pxi*Peta - Pxi*Pzeta - Peta*Pzeta + 
     >                          Pxi*Peta*Pzeta

                   end do
                end do
             end do
          end do


c---------------------------------------------------------------------
c now store the exact values on the boundaries        
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c west face                                                  
c---------------------------------------------------------------------

       xi = 0.0d0
       i  = 0
       do  k = 0, grid_points(3)-1
          zeta = dble(k) * dnzm1
          do   j = 0, grid_points(2)-1
             eta = dble(j) * dnym1
             call exact_solution(xi, eta, zeta, temp)
             do   m = 1, 5
                u(i,j,k,m) = temp(m)
             end do
          end do
       end do

c---------------------------------------------------------------------
c east face                                                      
c---------------------------------------------------------------------

       xi = 1.0d0
       i  = grid_points(1)-1
       do   k = 0, grid_points(3)-1
          zeta = dble(k) * dnzm1
          do   j = 0, grid_points(2)-1
             eta = dble(j) * dnym1
             call exact_solution(xi, eta, zeta, temp)
             do   m = 1, 5
                u(i,j,k,m) = temp(m)
             end do
          end do
       end do

c---------------------------------------------------------------------
c south face                                                 
c---------------------------------------------------------------------

       eta = 0.0d0
       j   = 0
       do  k = 0, grid_points(3)-1
          zeta = dble(k) * dnzm1
          do   i = 0, grid_points(1)-1
             xi = dble(i) * dnxm1
             call exact_solution(xi, eta, zeta, temp)
             do   m = 1, 5
                u(i,j,k,m) = temp(m)
             end do
          end do
       end do


c---------------------------------------------------------------------
c north face                                    
c---------------------------------------------------------------------

       eta = 1.0d0
       j   = grid_points(2)-1
       do   k = 0, grid_points(3)-1
          zeta = dble(k) * dnzm1
          do   i = 0, grid_points(1)-1
             xi = dble(i) * dnxm1
             call exact_solution(xi, eta, zeta, temp)
             do   m = 1, 5
                u(i,j,k,m) = temp(m)
             end do
          end do
       end do

c---------------------------------------------------------------------
c bottom face                                       
c---------------------------------------------------------------------

       zeta = 0.0d0
       k    = 0
       do   i =0, grid_points(1)-1
          xi = dble(i) *dnxm1
          do   j = 0, grid_points(2)-1
             eta = dble(j) * dnym1
             call exact_solution(xi, eta, zeta, temp)
             do   m = 1, 5
                u(i,j,k,m) = temp(m)
             end do
          end do
       end do

c---------------------------------------------------------------------
c top face     
c---------------------------------------------------------------------

       zeta = 1.0d0
       k    = grid_points(3)-1
       do   i =0, grid_points(1)-1
          xi = dble(i) * dnxm1
          do   j = 0, grid_points(2)-1
             eta = dble(j) * dnym1
             call exact_solution(xi, eta, zeta, temp)
             do   m = 1, 5
                u(i,j,k,m) = temp(m)
             end do
          end do
       end do

       return
       end


       subroutine lhsinit

       include 'header.h'
       
       integer i, j, k, c, n

       c = 1

c---------------------------------------------------------------------
c     zap the whole left hand side for starters
c---------------------------------------------------------------------
       do  n = 1, 15
          do  k = 0, grid_points(3)-1
             do  j = 0, grid_points(2)-1
                do  i = 0, grid_points(1)-1
                   lhs(i,j,k,n) = 0.0d0
                end do
             end do
          end do
       end do

c---------------------------------------------------------------------
c      next, set all diagonal values to 1. This is overkill, but 
c      convenient
c---------------------------------------------------------------------
       do   n = 1, 3
          do   k = 0, grid_points(3)-1
             do   j = 0, grid_points(2)-1
                do   i = 0, grid_points(1)-1
                   lhs(i,j,k,5*n-2) = 1.0d0
                end do
             end do
          end do
       end do

 
       return
       end




c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine lhsx

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c This function computes the left hand side for the three x-factors  
c---------------------------------------------------------------------

       include 'header.h'

       double precision ru1
       integer          i, j, k


c---------------------------------------------------------------------
c      first fill the lhs for the u-eigenvalue                   
c---------------------------------------------------------------------
       do  k = 1, grid_points(3)-2
          do  j = 1, grid_points(2)-2
             do  i = 0, grid_points(1)-1
                ru1 = c3c4*rho_i(i,j,k)
                cv(i) = us(i,j,k)
                rhon(i) = dmax1(dx2+con43*ru1, 
     >                          dx5+c1c5*ru1,
     >                          dxmax+ru1,
     >                          dx1)
             end do

             do  i = 1, grid_points(1)-2
                lhs(i,j,k,1) =   0.0d0
                lhs(i,j,k,2) = - dttx2 * cv(i-1) - dttx1 * rhon(i-1)
                lhs(i,j,k,3) =   1.0d0 + c2dttx1 * rhon(i)
                lhs(i,j,k,4) =   dttx2 * cv(i+1) - dttx1 * rhon(i+1)
                lhs(i,j,k,5) =   0.0d0
             end do
          end do
       end do

c---------------------------------------------------------------------
c      add fourth order dissipation                             
c---------------------------------------------------------------------

       i = 1
       do   k = 1, grid_points(3)-2
          do   j = 1, grid_points(2)-2
             lhs(i,j,k,3) = lhs(i,j,k,3) + comz5
             lhs(i,j,k,4) = lhs(i,j,k,4) - comz4
             lhs(i,j,k,5) = lhs(i,j,k,5) + comz1
  
             lhs(i+1,j,k,2) = lhs(i+1,j,k,2) - comz4
             lhs(i+1,j,k,3) = lhs(i+1,j,k,3) + comz6
             lhs(i+1,j,k,4) = lhs(i+1,j,k,4) - comz4
             lhs(i+1,j,k,5) = lhs(i+1,j,k,5) + comz1
          end do
       end do

       do   k = 1, grid_points(3)-2
          do   j = 1, grid_points(2)-2
             do   i=3, grid_points(1)-4
                lhs(i,j,k,1) = lhs(i,j,k,1) + comz1
                lhs(i,j,k,2) = lhs(i,j,k,2) - comz4
                lhs(i,j,k,3) = lhs(i,j,k,3) + comz6
                lhs(i,j,k,4) = lhs(i,j,k,4) - comz4
                lhs(i,j,k,5) = lhs(i,j,k,5) + comz1
             end do
          end do
       end do


       i = grid_points(1)-3
       do   k = 1, grid_points(3)-2
          do   j = 1, grid_points(2)-2
             lhs(i,j,k,1) = lhs(i,j,k,1) + comz1
             lhs(i,j,k,2) = lhs(i,j,k,2) - comz4
             lhs(i,j,k,3) = lhs(i,j,k,3) + comz6
             lhs(i,j,k,4) = lhs(i,j,k,4) - comz4

             lhs(i+1,j,k,1) = lhs(i+1,j,k,1) + comz1
             lhs(i+1,j,k,2) = lhs(i+1,j,k,2) - comz4
             lhs(i+1,j,k,3) = lhs(i+1,j,k,3) + comz5
          end do
       end do

c---------------------------------------------------------------------
c      subsequently, fill the other factors (u+c), (u-c) by adding to 
c      the first  
c---------------------------------------------------------------------
       do   k = 1, grid_points(3)-2
          do   j = 1, grid_points(2)-2
             do   i = 1, grid_points(1)-2
                lhs(i,j,k,1+5)  = lhs(i,j,k,1)
                lhs(i,j,k,2+5)  = lhs(i,j,k,2) - 
     >                            dttx2 * speed(i-1,j,k)
                lhs(i,j,k,3+5)  = lhs(i,j,k,3)
                lhs(i,j,k,4+5)  = lhs(i,j,k,4) + 
     >                            dttx2 * speed(i+1,j,k)
                lhs(i,j,k,5+5) = lhs(i,j,k,5)
                lhs(i,j,k,1+10) = lhs(i,j,k,1)
                lhs(i,j,k,2+10) = lhs(i,j,k,2) + 
     >                            dttx2 * speed(i-1,j,k)
                lhs(i,j,k,3+10) = lhs(i,j,k,3)
                lhs(i,j,k,4+10) = lhs(i,j,k,4) - 
     >                            dttx2 * speed(i+1,j,k)
                lhs(i,j,k,5+10) = lhs(i,j,k,5)
             end do
          end do
       end do

       return
       end




c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine lhsy

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c This function computes the left hand side for the three y-factors   
c---------------------------------------------------------------------

       include 'header.h'

       double precision ru1
       integer          i, j, k


c---------------------------------------------------------------------
c      first fill the lhs for the u-eigenvalue         
c---------------------------------------------------------------------
       do  k = 1, grid_points(3)-2
          do  i = 1, grid_points(1)-2

             do  j = 1-1, grid_points(2)-1
                ru1 = c3c4*rho_i(i,j,k)
                cv(j) = vs(i,j,k)
                rhoq(j) = dmax1( dy3 + con43 * ru1,
     >                           dy5 + c1c5*ru1,
     >                           dymax + ru1,
     >                           dy1)
             end do
            
             do  j = 1, grid_points(2)-2
                lhs(i,j,k,1) =  0.0d0
                lhs(i,j,k,2) = -dtty2 * cv(j-1) - dtty1 * rhoq(j-1)
                lhs(i,j,k,3) =  1.0 + c2dtty1 * rhoq(j)
                lhs(i,j,k,4) =  dtty2 * cv(j+1) - dtty1 * rhoq(j+1)
                lhs(i,j,k,5) =  0.0d0
             end do
          end do
       end do

c---------------------------------------------------------------------
c      add fourth order dissipation                             
c---------------------------------------------------------------------

       j = 1
       do   k = 1, grid_points(3)-2
          do   i = 1, grid_points(1)-2

             lhs(i,j,k,3) = lhs(i,j,k,3) + comz5
             lhs(i,j,k,4) = lhs(i,j,k,4) - comz4
             lhs(i,j,k,5) = lhs(i,j,k,5) + comz1
       
             lhs(i,j+1,k,2) = lhs(i,j+1,k,2) - comz4
             lhs(i,j+1,k,3) = lhs(i,j+1,k,3) + comz6
             lhs(i,j+1,k,4) = lhs(i,j+1,k,4) - comz4
             lhs(i,j+1,k,5) = lhs(i,j+1,k,5) + comz1
          end do
       end do

       do   k = 1, grid_points(3)-2
          do   j=3, grid_points(2)-4
             do   i = 1, grid_points(1)-2

                lhs(i,j,k,1) = lhs(i,j,k,1) + comz1
                lhs(i,j,k,2) = lhs(i,j,k,2) - comz4
                lhs(i,j,k,3) = lhs(i,j,k,3) + comz6
                lhs(i,j,k,4) = lhs(i,j,k,4) - comz4
                lhs(i,j,k,5) = lhs(i,j,k,5) + comz1
             end do
          end do
       end do

       j = grid_points(2)-3
       do   k = 1, grid_points(3)-2
          do   i = 1, grid_points(1)-2
             lhs(i,j,k,1) = lhs(i,j,k,1) + comz1
             lhs(i,j,k,2) = lhs(i,j,k,2) - comz4
             lhs(i,j,k,3) = lhs(i,j,k,3) + comz6
             lhs(i,j,k,4) = lhs(i,j,k,4) - comz4

             lhs(i,j+1,k,1) = lhs(i,j+1,k,1) + comz1
             lhs(i,j+1,k,2) = lhs(i,j+1,k,2) - comz4
             lhs(i,j+1,k,3) = lhs(i,j+1,k,3) + comz5
          end do
       end do

c---------------------------------------------------------------------
c      subsequently, do the other two factors                    
c---------------------------------------------------------------------
       do    k = 1, grid_points(3)-2
          do    j = 1, grid_points(2)-2
             do    i = 1, grid_points(1)-2
                lhs(i,j,k,1+5)  = lhs(i,j,k,1)
                lhs(i,j,k,2+5)  = lhs(i,j,k,2) - 
     >                            dtty2 * speed(i,j-1,k)
                lhs(i,j,k,3+5)  = lhs(i,j,k,3)
                lhs(i,j,k,4+5)  = lhs(i,j,k,4) + 
     >                            dtty2 * speed(i,j+1,k)
                lhs(i,j,k,5+5) = lhs(i,j,k,5)
                lhs(i,j,k,1+10) = lhs(i,j,k,1)
                lhs(i,j,k,2+10) = lhs(i,j,k,2) + 
     >                            dtty2 * speed(i,j-1,k)
                lhs(i,j,k,3+10) = lhs(i,j,k,3)
                lhs(i,j,k,4+10) = lhs(i,j,k,4) - 
     >                            dtty2 * speed(i,j+1,k)
                lhs(i,j,k,5+10) = lhs(i,j,k,5)
             end do
          end do
       end do

       return
       end




c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine lhsz

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c This function computes the left hand side for the three z-factors   
c---------------------------------------------------------------------

       include 'header.h'

       double precision ru1
       integer i, j, k

c---------------------------------------------------------------------
c first fill the lhs for the u-eigenvalue                          
c---------------------------------------------------------------------
       do   j = 1, grid_points(2)-2
          do   i = 1, grid_points(1)-2

             do   k = 1-1, grid_points(3)-1
                ru1 = c3c4*rho_i(i,j,k)
                cv(k) = ws(i,j,k)
                rhos(k) = dmax1(dz4 + con43 * ru1,
     >                          dz5 + c1c5 * ru1,
     >                          dzmax + ru1,
     >                          dz1)
             end do

             do   k =  1, grid_points(3)-2
                lhs(i,j,k,1) =  0.0d0
                lhs(i,j,k,2) = -dttz2 * cv(k-1) - dttz1 * rhos(k-1)
                lhs(i,j,k,3) =  1.0 + c2dttz1 * rhos(k)
                lhs(i,j,k,4) =  dttz2 * cv(k+1) - dttz1 * rhos(k+1)
                lhs(i,j,k,5) =  0.0d0
             end do
          end do
       end do

c---------------------------------------------------------------------
c      add fourth order dissipation                                  
c---------------------------------------------------------------------

       k = 1
       do    j = 1, grid_points(2)-2
          do    i = 1, grid_points(1)-2
             lhs(i,j,k,3) = lhs(i,j,k,3) + comz5
             lhs(i,j,k,4) = lhs(i,j,k,4) - comz4
             lhs(i,j,k,5) = lhs(i,j,k,5) + comz1

             lhs(i,j,k+1,2) = lhs(i,j,k+1,2) - comz4
             lhs(i,j,k+1,3) = lhs(i,j,k+1,3) + comz6
             lhs(i,j,k+1,4) = lhs(i,j,k+1,4) - comz4
             lhs(i,j,k+1,5) = lhs(i,j,k+1,5) + comz1
          end do
       end do

       do    k = 3, grid_points(3)-4
          do    j = 1, grid_points(2)-2
             do    i = 1, grid_points(1)-2
                lhs(i,j,k,1) = lhs(i,j,k,1) + comz1
                lhs(i,j,k,2) = lhs(i,j,k,2) - comz4
                lhs(i,j,k,3) = lhs(i,j,k,3) + comz6
                lhs(i,j,k,4) = lhs(i,j,k,4) - comz4
                lhs(i,j,k,5) = lhs(i,j,k,5) + comz1
             end do
          end do
       end do

       k = grid_points(3)-3 
       do    j = 1, grid_points(2)-2
          do    i = 1, grid_points(1)-2
             lhs(i,j,k,1) = lhs(i,j,k,1) + comz1
             lhs(i,j,k,2) = lhs(i,j,k,2) - comz4
             lhs(i,j,k,3) = lhs(i,j,k,3) + comz6
             lhs(i,j,k,4) = lhs(i,j,k,4) - comz4

             lhs(i,j,k+1,1) = lhs(i,j,k+1,1) + comz1
             lhs(i,j,k+1,2) = lhs(i,j,k+1,2) - comz4
             lhs(i,j,k+1,3) = lhs(i,j,k+1,3) + comz5
          end do
       end do


c---------------------------------------------------------------------
c      subsequently, fill the other factors (u+c), (u-c) 
c---------------------------------------------------------------------
       do    k = 1, grid_points(3)-2
          do    j = 1, grid_points(2)-2
             do    i = 1, grid_points(1)-2
                lhs(i,j,k,1+5)  = lhs(i,j,k,1)
                lhs(i,j,k,2+5)  = lhs(i,j,k,2) - 
     >                            dttz2 * speed(i,j,k-1)
                lhs(i,j,k,3+5)  = lhs(i,j,k,3)
                lhs(i,j,k,4+5)  = lhs(i,j,k,4) + 
     >                            dttz2 * speed(i,j,k+1)
                lhs(i,j,k,5+5) = lhs(i,j,k,5)
                lhs(i,j,k,1+10) = lhs(i,j,k,1)
                lhs(i,j,k,2+10) = lhs(i,j,k,2) + 
     >                            dttz2 * speed(i,j,k-1)
                lhs(i,j,k,3+10) = lhs(i,j,k,3)
                lhs(i,j,k,4+10) = lhs(i,j,k,4) - 
     >                            dttz2 * speed(i,j,k+1)
                lhs(i,j,k,5+10) = lhs(i,j,k,5)
             end do
          end do
       end do

       return
       end



c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine  ninvr

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c   block-diagonal matrix-vector multiplication              
c---------------------------------------------------------------------

       include 'header.h'

       integer  i, j, k
       double precision r1, r2, r3, r4, r5, t1, t2

       do k = 1, grid_points(3)-2
          do j = 1, grid_points(2)-2
             do i = 1, grid_points(1)-2

                r1 = rhs(i,j,k,1)
                r2 = rhs(i,j,k,2)
                r3 = rhs(i,j,k,3)
                r4 = rhs(i,j,k,4)
                r5 = rhs(i,j,k,5)
               
                t1 = bt * r3
                t2 = 0.5d0 * ( r4 + r5 )

                rhs(i,j,k,1) = -r2
                rhs(i,j,k,2) =  r1
                rhs(i,j,k,3) = bt * ( r4 - r5 )
                rhs(i,j,k,4) = -t1 + t2
                rhs(i,j,k,5) =  t1 + t2
             enddo    
          enddo
       enddo

       return
       end

c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine pinvr

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c   block-diagonal matrix-vector multiplication                       
c---------------------------------------------------------------------

       include 'header.h'

       integer i, j, k
       double precision r1, r2, r3, r4, r5, t1, t2

       do   k = 1, grid_points(3)-2
          do   j = 1, grid_points(2)-2
             do   i = 1, grid_points(1)-2

                r1 = rhs(i,j,k,1)
                r2 = rhs(i,j,k,2)
                r3 = rhs(i,j,k,3)
                r4 = rhs(i,j,k,4)
                r5 = rhs(i,j,k,5)

                t1 = bt * r1
                t2 = 0.5d0 * ( r4 + r5 )

                rhs(i,j,k,1) =  bt * ( r4 - r5 )
                rhs(i,j,k,2) = -r3
                rhs(i,j,k,3) =  r2
                rhs(i,j,k,4) = -t1 + t2
                rhs(i,j,k,5) =  t1 + t2
             end do
          end do
       end do

       return
       end



c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine compute_rhs

c---------------------------------------------------------------------
c---------------------------------------------------------------------

       include 'header.h'

       integer i, j, k, m
       double precision aux, rho_inv, uijk, up1, um1, vijk, vp1, vm1,
     >                  wijk, wp1, wm1


c---------------------------------------------------------------------
c      compute the reciprocal of density, and the kinetic energy, 
c      and the speed of sound. 
c---------------------------------------------------------------------

       do    k = 0, grid_points(3)-1
          do    j = 0, grid_points(2)-1
             do    i = 0, grid_points(1)-1
                rho_inv = 1.0d0/u(i,j,k,1)
                rho_i(i,j,k) = rho_inv
                us(i,j,k) = u(i,j,k,2) * rho_inv
                vs(i,j,k) = u(i,j,k,3) * rho_inv
                ws(i,j,k) = u(i,j,k,4) * rho_inv
                square(i,j,k)     = 0.5d0* (
     >                        u(i,j,k,2)*u(i,j,k,2) + 
     >                        u(i,j,k,3)*u(i,j,k,3) +
     >                        u(i,j,k,4)*u(i,j,k,4) ) * rho_inv
                qs(i,j,k) = square(i,j,k) * rho_inv
c---------------------------------------------------------------------
c               (don't need speed and ainx until the lhs computation)
c---------------------------------------------------------------------
                aux = c1c2*rho_inv* (u(i,j,k,5) - square(i,j,k))
                aux = dsqrt(aux)
                speed(i,j,k) = aux
                ainv(i,j,k)  = 1.0d0/aux
             end do
          end do
       end do

c---------------------------------------------------------------------
c copy the exact forcing term to the right hand side;  because 
c this forcing term is known, we can store it on the whole grid
c including the boundary                   
c---------------------------------------------------------------------

       do   m = 1, 5
          do   k = 0, grid_points(3)-1
             do   j = 0, grid_points(2)-1
                do   i = 0, grid_points(1)-1
                   rhs(i,j,k,m) = forcing(i,j,k,m)
                end do
             end do
          end do
       end do


c---------------------------------------------------------------------
c      compute xi-direction fluxes 
c---------------------------------------------------------------------
       do    k = 1, grid_points(3)-2
          do    j = 1, grid_points(2)-2
             do    i = 1, grid_points(1)-2
                uijk = us(i,j,k)
                up1  = us(i+1,j,k)
                um1  = us(i-1,j,k)

                rhs(i,j,k,1) = rhs(i,j,k,1) + dx1tx1 * 
     >                    (u(i+1,j,k,1) - 2.0d0*u(i,j,k,1) + 
     >                     u(i-1,j,k,1)) -
     >                    tx2 * (u(i+1,j,k,2) - u(i-1,j,k,2))

                rhs(i,j,k,2) = rhs(i,j,k,2) + dx2tx1 * 
     >                    (u(i+1,j,k,2) - 2.0d0*u(i,j,k,2) + 
     >                     u(i-1,j,k,2)) +
     >                    xxcon2*con43 * (up1 - 2.0d0*uijk + um1) -
     >                    tx2 * (u(i+1,j,k,2)*up1 - 
     >                           u(i-1,j,k,2)*um1 +
     >                           (u(i+1,j,k,5)- square(i+1,j,k)-
     >                            u(i-1,j,k,5)+ square(i-1,j,k))*
     >                            c2)

                rhs(i,j,k,3) = rhs(i,j,k,3) + dx3tx1 * 
     >                    (u(i+1,j,k,3) - 2.0d0*u(i,j,k,3) +
     >                     u(i-1,j,k,3)) +
     >                    xxcon2 * (vs(i+1,j,k) - 2.0d0*vs(i,j,k) +
     >                              vs(i-1,j,k)) -
     >                    tx2 * (u(i+1,j,k,3)*up1 - 
     >                           u(i-1,j,k,3)*um1)

                rhs(i,j,k,4) = rhs(i,j,k,4) + dx4tx1 * 
     >                    (u(i+1,j,k,4) - 2.0d0*u(i,j,k,4) +
     >                     u(i-1,j,k,4)) +
     >                    xxcon2 * (ws(i+1,j,k) - 2.0d0*ws(i,j,k) +
     >                              ws(i-1,j,k)) -
     >                    tx2 * (u(i+1,j,k,4)*up1 - 
     >                           u(i-1,j,k,4)*um1)

                rhs(i,j,k,5) = rhs(i,j,k,5) + dx5tx1 * 
     >                    (u(i+1,j,k,5) - 2.0d0*u(i,j,k,5) +
     >                     u(i-1,j,k,5)) +
     >                    xxcon3 * (qs(i+1,j,k) - 2.0d0*qs(i,j,k) +
     >                              qs(i-1,j,k)) +
     >                    xxcon4 * (up1*up1 -       2.0d0*uijk*uijk + 
     >                              um1*um1) +
     >                    xxcon5 * (u(i+1,j,k,5)*rho_i(i+1,j,k) - 
     >                              2.0d0*u(i,j,k,5)*rho_i(i,j,k) +
     >                              u(i-1,j,k,5)*rho_i(i-1,j,k)) -
     >                    tx2 * ( (c1*u(i+1,j,k,5) - 
     >                             c2*square(i+1,j,k))*up1 -
     >                            (c1*u(i-1,j,k,5) - 
     >                             c2*square(i-1,j,k))*um1 )
             end do
          end do
       end do

c---------------------------------------------------------------------
c      add fourth order xi-direction dissipation               
c---------------------------------------------------------------------

       i = 1
       do    m = 1, 5
          do    k = 1, grid_points(3)-2
             do    j = 1, grid_points(2)-2
                rhs(i,j,k,m) = rhs(i,j,k,m)- dssp * 
     >                    ( 5.0d0*u(i,j,k,m) - 4.0d0*u(i+1,j,k,m) +
     >                            u(i+2,j,k,m))
             end do
          end do
       end do

       i = 2
       do    m = 1, 5
          do    k = 1, grid_points(3)-2
             do    j = 1, grid_points(2)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) - dssp * 
     >                    (-4.0d0*u(i-1,j,k,m) + 6.0d0*u(i,j,k,m) -
     >                      4.0d0*u(i+1,j,k,m) + u(i+2,j,k,m))
             end do
          end do
       end do

       do     m = 1, 5
          do     k = 1, grid_points(3)-2
             do     j = 1, grid_points(2)-2
                do  i = 3*1,grid_points(1)-3*1-1
                   rhs(i,j,k,m) = rhs(i,j,k,m) - dssp * 
     >                    (  u(i-2,j,k,m) - 4.0d0*u(i-1,j,k,m) + 
     >                     6.0*u(i,j,k,m) - 4.0d0*u(i+1,j,k,m) + 
     >                         u(i+2,j,k,m) )
                end do
             end do
          end do
       end do

       i = grid_points(1)-3
       do     m = 1, 5
          do     k = 1, grid_points(3)-2
             do     j = 1, grid_points(2)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) - dssp *
     >                    ( u(i-2,j,k,m) - 4.0d0*u(i-1,j,k,m) + 
     >                      6.0d0*u(i,j,k,m) - 4.0d0*u(i+1,j,k,m) )
             end do
          end do
       end do

       i = grid_points(1)-2
       do     m = 1, 5
          do     k = 1, grid_points(3)-2
             do     j = 1, grid_points(2)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) - dssp *
     >                    ( u(i-2,j,k,m) - 4.d0*u(i-1,j,k,m) +
     >                      5.d0*u(i,j,k,m) )
             end do
          end do
       end do

c---------------------------------------------------------------------
c      compute eta-direction fluxes 
c---------------------------------------------------------------------
       do     k = 1, grid_points(3)-2
          do     j = 1, grid_points(2)-2
             do     i = 1, grid_points(1)-2
                vijk = vs(i,j,k)
                vp1  = vs(i,j+1,k)
                vm1  = vs(i,j-1,k)
                rhs(i,j,k,1) = rhs(i,j,k,1) + dy1ty1 * 
     >                   (u(i,j+1,k,1) - 2.0d0*u(i,j,k,1) + 
     >                    u(i,j-1,k,1)) -
     >                   ty2 * (u(i,j+1,k,3) - u(i,j-1,k,3))
                rhs(i,j,k,2) = rhs(i,j,k,2) + dy2ty1 * 
     >                   (u(i,j+1,k,2) - 2.0d0*u(i,j,k,2) + 
     >                    u(i,j-1,k,2)) +
     >                   yycon2 * (us(i,j+1,k) - 2.0d0*us(i,j,k) + 
     >                             us(i,j-1,k)) -
     >                   ty2 * (u(i,j+1,k,2)*vp1 - 
     >                          u(i,j-1,k,2)*vm1)
                rhs(i,j,k,3) = rhs(i,j,k,3) + dy3ty1 * 
     >                   (u(i,j+1,k,3) - 2.0d0*u(i,j,k,3) + 
     >                    u(i,j-1,k,3)) +
     >                   yycon2*con43 * (vp1 - 2.0d0*vijk + vm1) -
     >                   ty2 * (u(i,j+1,k,3)*vp1 - 
     >                          u(i,j-1,k,3)*vm1 +
     >                          (u(i,j+1,k,5) - square(i,j+1,k) - 
     >                           u(i,j-1,k,5) + square(i,j-1,k))
     >                          *c2)
                rhs(i,j,k,4) = rhs(i,j,k,4) + dy4ty1 * 
     >                   (u(i,j+1,k,4) - 2.0d0*u(i,j,k,4) + 
     >                    u(i,j-1,k,4)) +
     >                   yycon2 * (ws(i,j+1,k) - 2.0d0*ws(i,j,k) + 
     >                             ws(i,j-1,k)) -
     >                   ty2 * (u(i,j+1,k,4)*vp1 - 
     >                          u(i,j-1,k,4)*vm1)
                rhs(i,j,k,5) = rhs(i,j,k,5) + dy5ty1 * 
     >                   (u(i,j+1,k,5) - 2.0d0*u(i,j,k,5) + 
     >                    u(i,j-1,k,5)) +
     >                   yycon3 * (qs(i,j+1,k) - 2.0d0*qs(i,j,k) + 
     >                             qs(i,j-1,k)) +
     >                   yycon4 * (vp1*vp1       - 2.0d0*vijk*vijk + 
     >                             vm1*vm1) +
     >                   yycon5 * (u(i,j+1,k,5)*rho_i(i,j+1,k) - 
     >                             2.0d0*u(i,j,k,5)*rho_i(i,j,k) +
     >                             u(i,j-1,k,5)*rho_i(i,j-1,k)) -
     >                   ty2 * ((c1*u(i,j+1,k,5) - 
     >                           c2*square(i,j+1,k)) * vp1 -
     >                          (c1*u(i,j-1,k,5) - 
     >                           c2*square(i,j-1,k)) * vm1)
             end do
          end do
       end do

c---------------------------------------------------------------------
c      add fourth order eta-direction dissipation         
c---------------------------------------------------------------------

       j = 1
       do     m = 1, 5
          do     k = 1, grid_points(3)-2
             do     i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m)- dssp * 
     >                    ( 5.0d0*u(i,j,k,m) - 4.0d0*u(i,j+1,k,m) +
     >                            u(i,j+2,k,m))
             end do
          end do
       end do

       j = 2
       do     m = 1, 5
          do     k = 1, grid_points(3)-2
             do     i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) - dssp * 
     >                    (-4.0d0*u(i,j-1,k,m) + 6.0d0*u(i,j,k,m) -
     >                      4.0d0*u(i,j+1,k,m) + u(i,j+2,k,m))
             end do
          end do
       end do

       do     m = 1, 5
          do     k = 1, grid_points(3)-2
             do    j = 3*1, grid_points(2)-3*1-1
                do  i = 1,grid_points(1)-2
                   rhs(i,j,k,m) = rhs(i,j,k,m) - dssp * 
     >                    (  u(i,j-2,k,m) - 4.0d0*u(i,j-1,k,m) + 
     >                     6.0*u(i,j,k,m) - 4.0d0*u(i,j+1,k,m) + 
     >                         u(i,j+2,k,m) )
                end do
             end do
          end do
       end do
 
       j = grid_points(2)-3
       do     m = 1, 5
          do     k = 1, grid_points(3)-2
             do     i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) - dssp *
     >                    ( u(i,j-2,k,m) - 4.0d0*u(i,j-1,k,m) + 
     >                      6.0d0*u(i,j,k,m) - 4.0d0*u(i,j+1,k,m) )
             end do
          end do
       end do

       j = grid_points(2)-2
       do     m = 1, 5
          do     k = 1, grid_points(3)-2
             do     i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) - dssp *
     >                    ( u(i,j-2,k,m) - 4.d0*u(i,j-1,k,m) +
     >                      5.d0*u(i,j,k,m) )
             end do
          end do
       end do

c---------------------------------------------------------------------
c      compute zeta-direction fluxes 
c---------------------------------------------------------------------
       do    k = 1, grid_points(3)-2
          do     j = 1, grid_points(2)-2
             do     i = 1, grid_points(1)-2
                wijk = ws(i,j,k)
                wp1  = ws(i,j,k+1)
                wm1  = ws(i,j,k-1)

                rhs(i,j,k,1) = rhs(i,j,k,1) + dz1tz1 * 
     >                   (u(i,j,k+1,1) - 2.0d0*u(i,j,k,1) + 
     >                    u(i,j,k-1,1)) -
     >                   tz2 * (u(i,j,k+1,4) - u(i,j,k-1,4))
                rhs(i,j,k,2) = rhs(i,j,k,2) + dz2tz1 * 
     >                   (u(i,j,k+1,2) - 2.0d0*u(i,j,k,2) + 
     >                    u(i,j,k-1,2)) +
     >                   zzcon2 * (us(i,j,k+1) - 2.0d0*us(i,j,k) + 
     >                             us(i,j,k-1)) -
     >                   tz2 * (u(i,j,k+1,2)*wp1 - 
     >                          u(i,j,k-1,2)*wm1)
                rhs(i,j,k,3) = rhs(i,j,k,3) + dz3tz1 * 
     >                   (u(i,j,k+1,3) - 2.0d0*u(i,j,k,3) + 
     >                    u(i,j,k-1,3)) +
     >                   zzcon2 * (vs(i,j,k+1) - 2.0d0*vs(i,j,k) + 
     >                             vs(i,j,k-1)) -
     >                   tz2 * (u(i,j,k+1,3)*wp1 - 
     >                          u(i,j,k-1,3)*wm1)
                rhs(i,j,k,4) = rhs(i,j,k,4) + dz4tz1 * 
     >                   (u(i,j,k+1,4) - 2.0d0*u(i,j,k,4) + 
     >                    u(i,j,k-1,4)) +
     >                   zzcon2*con43 * (wp1 - 2.0d0*wijk + wm1) -
     >                   tz2 * (u(i,j,k+1,4)*wp1 - 
     >                          u(i,j,k-1,4)*wm1 +
     >                          (u(i,j,k+1,5) - square(i,j,k+1) - 
     >                           u(i,j,k-1,5) + square(i,j,k-1))
     >                          *c2)
                rhs(i,j,k,5) = rhs(i,j,k,5) + dz5tz1 * 
     >                   (u(i,j,k+1,5) - 2.0d0*u(i,j,k,5) + 
     >                    u(i,j,k-1,5)) +
     >                   zzcon3 * (qs(i,j,k+1) - 2.0d0*qs(i,j,k) + 
     >                             qs(i,j,k-1)) +
     >                   zzcon4 * (wp1*wp1 - 2.0d0*wijk*wijk + 
     >                             wm1*wm1) +
     >                   zzcon5 * (u(i,j,k+1,5)*rho_i(i,j,k+1) - 
     >                             2.0d0*u(i,j,k,5)*rho_i(i,j,k) +
     >                             u(i,j,k-1,5)*rho_i(i,j,k-1)) -
     >                   tz2 * ( (c1*u(i,j,k+1,5) - 
     >                            c2*square(i,j,k+1))*wp1 -
     >                           (c1*u(i,j,k-1,5) - 
     >                            c2*square(i,j,k-1))*wm1)
             end do
          end do
       end do

c---------------------------------------------------------------------
c      add fourth order zeta-direction dissipation                
c---------------------------------------------------------------------

       k = 1
       do     m = 1, 5
          do     j = 1, grid_points(2)-2
             do     i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m)- dssp * 
     >                    ( 5.0d0*u(i,j,k,m) - 4.0d0*u(i,j,k+1,m) +
     >                            u(i,j,k+2,m))
             end do
          end do
       end do

       k = 2
       do     m = 1, 5
          do     j = 1, grid_points(2)-2
             do     i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) - dssp * 
     >                    (-4.0d0*u(i,j,k-1,m) + 6.0d0*u(i,j,k,m) -
     >                      4.0d0*u(i,j,k+1,m) + u(i,j,k+2,m))
             end do
          end do
       end do

       do     m = 1, 5
          do     k = 3*1, grid_points(3)-3*1-1
             do     j = 1, grid_points(2)-2
                do     i = 1,grid_points(1)-2
                   rhs(i,j,k,m) = rhs(i,j,k,m) - dssp * 
     >                    (  u(i,j,k-2,m) - 4.0d0*u(i,j,k-1,m) + 
     >                     6.0*u(i,j,k,m) - 4.0d0*u(i,j,k+1,m) + 
     >                         u(i,j,k+2,m) )
                end do
             end do
          end do
       end do
 
       k = grid_points(3)-3
       do     m = 1, 5
          do     j = 1, grid_points(2)-2
             do     i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) - dssp *
     >                    ( u(i,j,k-2,m) - 4.0d0*u(i,j,k-1,m) + 
     >                      6.0d0*u(i,j,k,m) - 4.0d0*u(i,j,k+1,m) )
             end do
          end do
       end do

       k = grid_points(3)-2
       do     m = 1, 5
          do     j = 1, grid_points(2)-2
             do     i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) - dssp *
     >                    ( u(i,j,k-2,m) - 4.d0*u(i,j,k-1,m) +
     >                      5.d0*u(i,j,k,m) )
             end do
          end do
       end do

       do     m = 1, 5
          do     k = 1, grid_points(3)-2
             do     j = 1, grid_points(2)-2
                do    i = 1, grid_points(1)-2
                   rhs(i,j,k,m) = rhs(i,j,k,m) * dt
                end do
             end do
          end do
       end do
    
       return
       end





c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine  set_constants

c---------------------------------------------------------------------
c---------------------------------------------------------------------

       include 'header.h'
  
       ce(1,1)  = 2.0d0
       ce(1,2)  = 0.0d0
       ce(1,3)  = 0.0d0
       ce(1,4)  = 4.0d0
       ce(1,5)  = 5.0d0
       ce(1,6)  = 3.0d0
       ce(1,7)  = 0.5d0
       ce(1,8)  = 0.02d0
       ce(1,9)  = 0.01d0
       ce(1,10) = 0.03d0
       ce(1,11) = 0.5d0
       ce(1,12) = 0.4d0
       ce(1,13) = 0.3d0
 
       ce(2,1)  = 1.0d0
       ce(2,2)  = 0.0d0
       ce(2,3)  = 0.0d0
       ce(2,4)  = 0.0d0
       ce(2,5)  = 1.0d0
       ce(2,6)  = 2.0d0
       ce(2,7)  = 3.0d0
       ce(2,8)  = 0.01d0
       ce(2,9)  = 0.03d0
       ce(2,10) = 0.02d0
       ce(2,11) = 0.4d0
       ce(2,12) = 0.3d0
       ce(2,13) = 0.5d0

       ce(3,1)  = 2.0d0
       ce(3,2)  = 2.0d0
       ce(3,3)  = 0.0d0
       ce(3,4)  = 0.0d0
       ce(3,5)  = 0.0d0
       ce(3,6)  = 2.0d0
       ce(3,7)  = 3.0d0
       ce(3,8)  = 0.04d0
       ce(3,9)  = 0.03d0
       ce(3,10) = 0.05d0
       ce(3,11) = 0.3d0
       ce(3,12) = 0.5d0
       ce(3,13) = 0.4d0

       ce(4,1)  = 2.0d0
       ce(4,2)  = 2.0d0
       ce(4,3)  = 0.0d0
       ce(4,4)  = 0.0d0
       ce(4,5)  = 0.0d0
       ce(4,6)  = 2.0d0
       ce(4,7)  = 3.0d0
       ce(4,8)  = 0.03d0
       ce(4,9)  = 0.05d0
       ce(4,10) = 0.04d0
       ce(4,11) = 0.2d0
       ce(4,12) = 0.1d0
       ce(4,13) = 0.3d0

       ce(5,1)  = 5.0d0
       ce(5,2)  = 4.0d0
       ce(5,3)  = 3.0d0
       ce(5,4)  = 2.0d0
       ce(5,5)  = 0.1d0
       ce(5,6)  = 0.4d0
       ce(5,7)  = 0.3d0
       ce(5,8)  = 0.05d0
       ce(5,9)  = 0.04d0
       ce(5,10) = 0.03d0
       ce(5,11) = 0.1d0
       ce(5,12) = 0.3d0
       ce(5,13) = 0.2d0

       c1 = 1.4d0
       c2 = 0.4d0
       c3 = 0.1d0
       c4 = 1.0d0
       c5 = 1.4d0

       bt = dsqrt(0.5d0)

       dnxm1 = 1.0d0 / dble(grid_points(1)-1)
       dnym1 = 1.0d0 / dble(grid_points(2)-1)
       dnzm1 = 1.0d0 / dble(grid_points(3)-1)

       c1c2 = c1 * c2
       c1c5 = c1 * c5
       c3c4 = c3 * c4
       c1345 = c1c5 * c3c4

       conz1 = (1.0d0-c1c5)

       tx1 = 1.0d0 / (dnxm1 * dnxm1)
       tx2 = 1.0d0 / (2.0d0 * dnxm1)
       tx3 = 1.0d0 / dnxm1

       ty1 = 1.0d0 / (dnym1 * dnym1)
       ty2 = 1.0d0 / (2.0d0 * dnym1)
       ty3 = 1.0d0 / dnym1
 
       tz1 = 1.0d0 / (dnzm1 * dnzm1)
       tz2 = 1.0d0 / (2.0d0 * dnzm1)
       tz3 = 1.0d0 / dnzm1

       dx1 = 0.75d0
       dx2 = 0.75d0
       dx3 = 0.75d0
       dx4 = 0.75d0
       dx5 = 0.75d0

       dy1 = 0.75d0
       dy2 = 0.75d0
       dy3 = 0.75d0
       dy4 = 0.75d0
       dy5 = 0.75d0

       dz1 = 1.0d0
       dz2 = 1.0d0
       dz3 = 1.0d0
       dz4 = 1.0d0
       dz5 = 1.0d0

       dxmax = dmax1(dx3, dx4)
       dymax = dmax1(dy2, dy4)
       dzmax = dmax1(dz2, dz3)

       dssp = 0.25d0 * dmax1(dx1, dmax1(dy1, dz1) )

       c4dssp = 4.0d0 * dssp
       c5dssp = 5.0d0 * dssp

       dttx1 = dt*tx1
       dttx2 = dt*tx2
       dtty1 = dt*ty1
       dtty2 = dt*ty2
       dttz1 = dt*tz1
       dttz2 = dt*tz2

       c2dttx1 = 2.0d0*dttx1
       c2dtty1 = 2.0d0*dtty1
       c2dttz1 = 2.0d0*dttz1

       dtdssp = dt*dssp

       comz1  = dtdssp
       comz4  = 4.0d0*dtdssp
       comz5  = 5.0d0*dtdssp
       comz6  = 6.0d0*dtdssp

       c3c4tx3 = c3c4*tx3
       c3c4ty3 = c3c4*ty3
       c3c4tz3 = c3c4*tz3

       dx1tx1 = dx1*tx1
       dx2tx1 = dx2*tx1
       dx3tx1 = dx3*tx1
       dx4tx1 = dx4*tx1
       dx5tx1 = dx5*tx1
        
       dy1ty1 = dy1*ty1
       dy2ty1 = dy2*ty1
       dy3ty1 = dy3*ty1
       dy4ty1 = dy4*ty1
       dy5ty1 = dy5*ty1
        
       dz1tz1 = dz1*tz1
       dz2tz1 = dz2*tz1
       dz3tz1 = dz3*tz1
       dz4tz1 = dz4*tz1
       dz5tz1 = dz5*tz1

       c2iv  = 2.5d0
       con43 = 4.0d0/3.0d0
       con16 = 1.0d0/6.0d0
        
       xxcon1 = c3c4tx3*con43*tx3
       xxcon2 = c3c4tx3*tx3
       xxcon3 = c3c4tx3*conz1*tx3
       xxcon4 = c3c4tx3*con16*tx3
       xxcon5 = c3c4tx3*c1c5*tx3

       yycon1 = c3c4ty3*con43*ty3
       yycon2 = c3c4ty3*ty3
       yycon3 = c3c4ty3*conz1*ty3
       yycon4 = c3c4ty3*con16*ty3
       yycon5 = c3c4ty3*c1c5*ty3

       zzcon1 = c3c4tz3*con43*tz3
       zzcon2 = c3c4tz3*tz3
       zzcon3 = c3c4tz3*conz1*tz3
       zzcon4 = c3c4tz3*con16*tz3
       zzcon5 = c3c4tz3*c1c5*tz3

       return
       end
!-------------------------------------------------------------------------!
!                                                                         !
!        N  A  S     P A R A L L E L     B E N C H M A R K S  2.3         !
!                                                                         !
!                     S E R I A L     V E R S I O N S                     !
!                                                                         !
!                                   S P                                   !
!                                                                         !
!-------------------------------------------------------------------------!
!                                                                         !
!    This benchmark is a serial version of the NPB SP code.               !
!                                                                         !
!    Permission to use, copy, distribute and modify this software         !
!    for any purpose with or without fee is hereby granted.  We           !
!    request, however, that all derived work reference the NAS            !
!    Parallel Benchmarks 2.3. This software is provided "as is"           !
!    without express or implied warranty.                                 !
!                                                                         !
!    Information on NPB 2.3, including the technical report, the          !
!    original specifications, source code, results and information        !
!    on how to submit new results, is available at:                       !
!                                                                         !
!           http://www.nas.nasa.gov/NAS/NPB/                              !
!                                                                         !
!    Send comments or suggestions to  npb@nas.nasa.gov                    !
!    Send bug reports to              npb-bugs@nas.nasa.gov               !
!                                                                         !
!          NAS Parallel Benchmarks Group                                  !
!          NASA Ames Research Center                                      !
!          Mail Stop: T27A-1                                              !
!          Moffett Field, CA   94035-1000                                 !
!                                                                         !
!          E-mail:  npb@nas.nasa.gov                                      !
!          Fax:     (415) 604-3957                                        !
!                                                                         !
!-------------------------------------------------------------------------!


c---------------------------------------------------------------------
c
c Author: R. Van der Wijngaart
c         W. Saphir
c---------------------------------------------------------------------

c---------------------------------------------------------------------
       program SP
c---------------------------------------------------------------------

       include  'header.h'
      
       integer          i, niter, step, fstatus
c       external         timer_read
c       double precision mflops, t, tmax, timer_read
       logical          verified
       character        class

c---------------------------------------------------------------------
c      Read input file (if it exists), else take
c      defaults from parameters
c---------------------------------------------------------------------
          
       write(*, 1000)
c       open (unit=2,file='inputsp.data',status='old', iostat=fstatus)

c       if (fstatus .eq. 0) then
c         write(*,233) 
c 233     format(' Reading from input file inputsp.data')
c         read (2,*) niter
c         read (2,*) dt
c         read (2,*) grid_points(1), grid_points(2), grid_points(3)
c         close(2)
c       else
         write(*,234) 
         niter = niter_default
         dt    = dt_default
         grid_points(1) = problem_size
         grid_points(2) = problem_size
         grid_points(3) = problem_size
c       endif
 234   format(' No input file inputsp.data. Using compiled defaults')

       write(*, 1001) grid_points(1), grid_points(2), grid_points(3)
       write(*, 1002) niter, dt

 1000 format(//,' NAS Parallel Benchmarks 2.3-serial version',
     >          ' - SP Benchmark', /)
 1001     format(' Size: ', i3, 'x', i3, 'x', i3)
 1002     format(' Iterations: ', i3, '    dt: ', F10.6)

       if ( (grid_points(1) .gt. IMAX) .or.
     >      (grid_points(2) .gt. JMAX) .or.
     >      (grid_points(3) .gt. KMAX) ) then
c             print *, (grid_points(i),i=1,3)
c             print *,' Problem size too big for compiled array sizes'
             write(6,*) (grid_points(i),i=1,3)
             write(6,*) 'Problem size too big for compiled array sizes'
             goto 999
       endif

       call set_constants

       call initialize

       call lhsinit

       call exact_rhs

c---------------------------------------------------------------------
c      do one time step to touch all code, and reinitialize
c---------------------------------------------------------------------
       call adi
       call initialize

c       call timer_clear(1)
c       call timer_start(1)

       do  step = 1, niter

          if (mod(step, 20) .eq. 0 .or. step .eq. 1) then
             write(*, 200) step
 200         format(' Time step ', i4)
          endif

          call adi

       end do

c       call timer_stop(1)
c       tmax = timer_read(1)
       
       call verify(niter, class, verified)

c       if( tmax .ne. 0. ) then
c          mflops = (881.174*float( problem_size )**3
c     >             -4683.91*float( problem_size )**2
c     >             +11484.5*float( problem_size )
c     >             -19272.4) * float( niter ) / (tmax*1000000.0d0)
c       else
c          mflops = 0.0
c       endif

c      call print_results('SP', class, grid_points(1), 
c     >     grid_points(2), grid_points(3), niter, 
c     >     tmax, mflops, '          floating point', 
c     >     verified, npbversion,compiletime, cs1, cs2, cs3, cs4, cs5, 
c     >     cs6, '(none)')

 999   continue

       end

c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine  txinvr

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c block-diagonal matrix-vector multiplication                  
c---------------------------------------------------------------------

       include 'header.h'

       integer i, j, k
       double precision t1, t2, t3, ac, ru1, uu, vv, ww, r1, r2, r3, 
     >                  r4, r5, ac2inv


       do    k = 1, grid_points(3)-2
          do    j = 1, grid_points(2)-2
             do    i = 1, grid_points(1)-2

                ru1 = rho_i(i,j,k)
                uu = us(i,j,k)
                vv = vs(i,j,k)
                ww = ws(i,j,k)
                ac = speed(i,j,k)
                ac2inv = ainv(i,j,k)*ainv(i,j,k)

                r1 = rhs(i,j,k,1)
                r2 = rhs(i,j,k,2)
                r3 = rhs(i,j,k,3)
                r4 = rhs(i,j,k,4)
                r5 = rhs(i,j,k,5)

                t1 = c2 * ac2inv * ( qs(i,j,k)*r1 - uu*r2  - 
     >                  vv*r3 - ww*r4 + r5 )
                t2 = bt * ru1 * ( uu * r1 - r2 )
                t3 = ( bt * ru1 * ac ) * t1

                rhs(i,j,k,1) = r1 - t1
                rhs(i,j,k,2) = - ru1 * ( ww*r1 - r4 )
                rhs(i,j,k,3) =   ru1 * ( vv*r1 - r3 )
                rhs(i,j,k,4) = - t2 + t3
                rhs(i,j,k,5) =   t2 + t3

             end do
          end do
       end do

       return
       end



c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine  tzetar

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c   block-diagonal matrix-vector multiplication                       
c---------------------------------------------------------------------

       include 'header.h'

       integer i, j, k
       double precision  t1, t2, t3, ac, xvel, yvel, zvel, r1, r2, r3, 
     >                   r4, r5, btuz, acinv, ac2u, uzik1


       do    k = 1, grid_points(3)-2
          do    j = 1, grid_points(2)-2
             do    i = 1, grid_points(1)-2

                xvel = us(i,j,k)
                yvel = vs(i,j,k)
                zvel = ws(i,j,k)
                ac   = speed(i,j,k)
                acinv = ainv(i,j,k)

                ac2u = ac*ac

                r1 = rhs(i,j,k,1)
                r2 = rhs(i,j,k,2)
                r3 = rhs(i,j,k,3)
                r4 = rhs(i,j,k,4)
                r5 = rhs(i,j,k,5)      

                uzik1 = u(i,j,k,1)
                btuz  = bt * uzik1

                t1 = btuz*acinv * (r4 + r5)
                t2 = r3 + t1
                t3 = btuz * (r4 - r5)

                rhs(i,j,k,1) = t2
                rhs(i,j,k,2) = -uzik1*r2 + xvel*t2
                rhs(i,j,k,3) =  uzik1*r1 + yvel*t2
                rhs(i,j,k,4) =  zvel*t2  + t3
                rhs(i,j,k,5) =  uzik1*(-xvel*r2 + yvel*r1) + 
     >                    qs(i,j,k)*t2 + c2iv*ac2u*t1 + zvel*t3

             end do
          end do
       end do

       return
       end

c---------------------------------------------------------------------
c---------------------------------------------------------------------

        subroutine verify(no_time_steps, class, verified)

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c  verification routine                         
c---------------------------------------------------------------------

        include 'header.h'

        double precision xcrref(5),xceref(5),xcrdif(5),xcedif(5), 
     >                   epsilon, xce(5), xcr(5), dtref
        integer m, no_time_steps
        character class
        logical verified

c---------------------------------------------------------------------
c   tolerance level
c---------------------------------------------------------------------
        epsilon = 1.0d-08


c---------------------------------------------------------------------
c   compute the error norm and the residual norm, and exit if not printing
c---------------------------------------------------------------------
        call error_norm(xce)
        call compute_rhs

        call rhs_norm(xcr)

        do m = 1, 5
           xcr(m) = xcr(m) / dt
        enddo

        class = 'U'
        verified = .true.

        do m = 1,5
           xcrref(m) = 1.0
           xceref(m) = 1.0
        end do

c---------------------------------------------------------------------
c    reference data for 12X12X12 grids after 100 time steps, with DT = 1.50d-02
c---------------------------------------------------------------------
        if ( (grid_points(1)  .eq. 12     ) .and. 
     >       (grid_points(2)  .eq. 12     ) .and.
     >       (grid_points(3)  .eq. 12     ) .and.
     >       (no_time_steps   .eq. 100    ))  then

           class = 'S'
           dtref = 1.5d-2

c---------------------------------------------------------------------
c    Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
           xcrref(1) = 2.7470315451339479d-02
           xcrref(2) = 1.0360746705285417d-02
           xcrref(3) = 1.6235745065095532d-02
           xcrref(4) = 1.5840557224455615d-02
           xcrref(5) = 3.4849040609362460d-02

c---------------------------------------------------------------------
c    Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
           xceref(1) = 2.7289258557377227d-05
           xceref(2) = 1.0364446640837285d-05
           xceref(3) = 1.6154798287166471d-05
           xceref(4) = 1.5750704994480102d-05
           xceref(5) = 3.4177666183390531d-05


c---------------------------------------------------------------------
c    reference data for 36X36X36 grids after 400 time steps, with DT = 1.5d-03
c---------------------------------------------------------------------
        elseif ( (grid_points(1) .eq. 36) .and. 
     >           (grid_points(2) .eq. 36) .and.
     >           (grid_points(3) .eq. 36) .and.
     >           (no_time_steps . eq. 400) ) then

           class = 'W'
           dtref = 1.5d-3

c---------------------------------------------------------------------
c    Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
           xcrref(1) = 0.1893253733584d-02
           xcrref(2) = 0.1717075447775d-03
           xcrref(3) = 0.2778153350936d-03
           xcrref(4) = 0.2887475409984d-03
           xcrref(5) = 0.3143611161242d-02

c---------------------------------------------------------------------
c    Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
           xceref(1) = 0.7542088599534d-04
           xceref(2) = 0.6512852253086d-05
           xceref(3) = 0.1049092285688d-04
           xceref(4) = 0.1128838671535d-04
           xceref(5) = 0.1212845639773d-03

c---------------------------------------------------------------------
c    reference data for 64X64X64 grids after 400 time steps, with DT = 1.5d-03
c---------------------------------------------------------------------
        elseif ( (grid_points(1) .eq. 64) .and. 
     >           (grid_points(2) .eq. 64) .and.
     >           (grid_points(3) .eq. 64) .and.
     >           (no_time_steps . eq. 400) ) then

           class = 'A'
           dtref = 1.5d-3

c---------------------------------------------------------------------
c    Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
           xcrref(1) = 2.4799822399300195d0
           xcrref(2) = 1.1276337964368832d0
           xcrref(3) = 1.5028977888770491d0
           xcrref(4) = 1.4217816211695179d0
           xcrref(5) = 2.1292113035138280d0

c---------------------------------------------------------------------
c    Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
           xceref(1) = 1.0900140297820550d-04
           xceref(2) = 3.7343951769282091d-05
           xceref(3) = 5.0092785406541633d-05
           xceref(4) = 4.7671093939528255d-05
           xceref(5) = 1.3621613399213001d-04

c---------------------------------------------------------------------
c    reference data for 102X102X102 grids after 400 time steps,
c    with DT = 1.0d-03
c---------------------------------------------------------------------
        elseif ( (grid_points(1) .eq. 102) .and. 
     >           (grid_points(2) .eq. 102) .and.
     >           (grid_points(3) .eq. 102) .and.
     >           (no_time_steps . eq. 400) ) then

           class = 'B'
           dtref = 1.0d-3

c---------------------------------------------------------------------
c    Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
           xcrref(1) = 0.6903293579998d+02
           xcrref(2) = 0.3095134488084d+02
           xcrref(3) = 0.4103336647017d+02
           xcrref(4) = 0.3864769009604d+02
           xcrref(5) = 0.5643482272596d+02

c---------------------------------------------------------------------
c    Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
           xceref(1) = 0.9810006190188d-02
           xceref(2) = 0.1022827905670d-02
           xceref(3) = 0.1720597911692d-02
           xceref(4) = 0.1694479428231d-02
           xceref(5) = 0.1847456263981d-01


c---------------------------------------------------------------------
c    reference data for 162X162X162 grids after 400 time steps,
c    with DT = 0.67d-03
c---------------------------------------------------------------------
        elseif ( (grid_points(1) .eq. 162) .and. 
     >           (grid_points(2) .eq. 162) .and.
     >           (grid_points(3) .eq. 162) .and.
     >           (no_time_steps . eq. 400) ) then

           class = 'C'
           dtref = 0.67d-3

c---------------------------------------------------------------------
c    Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
           xcrref(1) = 0.5881691581829d+03
           xcrref(2) = 0.2454417603569d+03
           xcrref(3) = 0.3293829191851d+03
           xcrref(4) = 0.3081924971891d+03
           xcrref(5) = 0.4597223799176d+03

c---------------------------------------------------------------------
c    Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
           xceref(1) = 0.2598120500183d+00
           xceref(2) = 0.2590888922315d-01
           xceref(3) = 0.5132886416320d-01
           xceref(4) = 0.4806073419454d-01
           xceref(5) = 0.5483377491301d+00


        else
           verified = .false.
        endif

c---------------------------------------------------------------------
c    verification test for residuals if gridsize is either 12X12X12 or 
c    64X64X64 or 102X102X102 or 162X162X162
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c    Compute the difference of solution values and the known reference values.
c---------------------------------------------------------------------
        do m = 1, 5
           
           xcrdif(m) = dabs((xcr(m)-xcrref(m))/xcrref(m)) 
           xcedif(m) = dabs((xce(m)-xceref(m))/xceref(m))
           
        enddo

c---------------------------------------------------------------------
c    Output the comparison of computed results to known cases.
c---------------------------------------------------------------------

        if (class .ne. 'U') then
           write(*, 1990) class
 1990      format(' Verification being performed for class ', a)
           write (*,2000) epsilon
 2000      format(' accuracy setting for epsilon = ', E20.13)
           if (dabs(dt-dtref) .gt. epsilon) then  
              verified = .false.
              class = 'U'
              write (*,1000) dtref
 1000         format(' DT does not match the reference value of ', 
     >                 E15.8)
           endif
        else 
           write(*, 1995)
 1995      format(' Unknown class')
        endif


        if (class .ne. 'U') then
           write (*,2001) 
        else
           write (*, 2005)
        endif

 2001   format(' Comparison of RMS-norms of residual')
 2005   format(' RMS-norms of residual')
        do m = 1, 5
           if (class .eq. 'U') then
              write(*, 2015) m, xcr(m)
           else if (xcrdif(m) .gt. epsilon) then
              verified = .false.
              write (*,2010) m,xcr(m),xcrref(m),xcrdif(m)
           else 
              write (*,2011) m,xcr(m),xcrref(m),xcrdif(m)
           endif
        enddo

        if (class .ne. 'U') then
           write (*,2002)
        else
           write (*,2006)
        endif
 2002   format(' Comparison of RMS-norms of solution error')
 2006   format(' RMS-norms of solution error')
        
        do m = 1, 5
           if (class .eq. 'U') then
              write(*, 2015) m, xce(m)
           else if (xcedif(m) .gt. epsilon) then
              verified = .false.
              write (*,2010) m,xce(m),xceref(m),xcedif(m)
           else
              write (*,2011) m,xce(m),xceref(m),xcedif(m)
           endif
        enddo
        
 2010   format(' FAILURE: ', i2, E20.13, E20.13, E20.13)
 2011   format('          ', i2, E20.13, E20.13, E20.13)
 2015   format('          ', i2, E20.13)
        
        if (class .eq. 'U') then
           write(*, 2022)
           write(*, 2023)
 2022      format(' No reference values provided')
 2023      format(' No verification performed')
        else if (verified) then
           write(*, 2020)
 2020      format(' Verification Successful')
        else
           write(*, 2021)
 2021      format(' Verification failed')
        endif

        return


        end

c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine x_solve

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c this function performs the solution of the approximate factorization
c step in the x-direction for all five matrix components
c simultaneously. The Thomas algorithm is employed to solve the
c systems for the x-lines. Boundary conditions are non-periodic
c---------------------------------------------------------------------

       include 'header.h'

       integer i, j, k, n, i1, i2, m
       double precision  fac1, fac2


c---------------------------------------------------------------------
c                          FORWARD ELIMINATION  
c---------------------------------------------------------------------

       call lhsx

c---------------------------------------------------------------------
c      perform the Thomas algorithm; first, FORWARD ELIMINATION     
c---------------------------------------------------------------------
       n = 0

       do    k = 1, grid_points(3)-2
          do    j = 1, grid_points(2)-2
             do    i = 0, grid_points(1)-3
                i1 = i  + 1
                i2 = i  + 2
                fac1               = 1.d0/lhs(i,j,k,n+3)
                lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
                lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
                do    m = 1, 3
                   rhs(i,j,k,m) = fac1*rhs(i,j,k,m)
                end do
                lhs(i1,j,k,n+3) = lhs(i1,j,k,n+3) -
     >                         lhs(i1,j,k,n+2)*lhs(i,j,k,n+4)
                lhs(i1,j,k,n+4) = lhs(i1,j,k,n+4) -
     >                         lhs(i1,j,k,n+2)*lhs(i,j,k,n+5)
                do    m = 1, 3
                   rhs(i1,j,k,m) = rhs(i1,j,k,m) -
     >                         lhs(i1,j,k,n+2)*rhs(i,j,k,m)
                end do
                lhs(i2,j,k,n+2) = lhs(i2,j,k,n+2) -
     >                         lhs(i2,j,k,n+1)*lhs(i,j,k,n+4)
                lhs(i2,j,k,n+3) = lhs(i2,j,k,n+3) -
     >                         lhs(i2,j,k,n+1)*lhs(i,j,k,n+5)
                do    m = 1, 3
                   rhs(i2,j,k,m) = rhs(i2,j,k,m) -
     >                         lhs(i2,j,k,n+1)*rhs(i,j,k,m)
                end do
             end do
          end do
       end do

c---------------------------------------------------------------------
c      The last two rows in this grid block are a bit different, 
c      since they do not have two more rows available for the
c      elimination of off-diagonal entries
c---------------------------------------------------------------------

       i  = grid_points(1)-2
       i1 = grid_points(1)-1
       do    k = 1, grid_points(3)-2
          do    j = 1, grid_points(2)-2
             fac1               = 1.d0/lhs(i,j,k,n+3)
             lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
             lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
             do    m = 1, 3
                rhs(i,j,k,m) = fac1*rhs(i,j,k,m)
             end do
             lhs(i1,j,k,n+3) = lhs(i1,j,k,n+3) -
     >                      lhs(i1,j,k,n+2)*lhs(i,j,k,n+4)
             lhs(i1,j,k,n+4) = lhs(i1,j,k,n+4) -
     >                      lhs(i1,j,k,n+2)*lhs(i,j,k,n+5)
             do    m = 1, 3
                rhs(i1,j,k,m) = rhs(i1,j,k,m) -
     >                      lhs(i1,j,k,n+2)*rhs(i,j,k,m)
             end do
c---------------------------------------------------------------------
c            scale the last row immediately 
c---------------------------------------------------------------------
             fac2               = 1.d0/lhs(i1,j,k,n+3)
             do    m = 1, 3
                rhs(i1,j,k,m) = fac2*rhs(i1,j,k,m)
             end do
          end do
       end do

c---------------------------------------------------------------------
c      do the u+c and the u-c factors                 
c---------------------------------------------------------------------

       do    m = 4, 5
          n = (m-3)*5
          do    k = 1, grid_points(3)-2
             do    j = 1, grid_points(2)-2
                do    i = 0, grid_points(1)-3
                   i1 = i  + 1
                   i2 = i  + 2
                   fac1               = 1.d0/lhs(i,j,k,n+3)
                   lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
                   lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
                   rhs(i,j,k,m) = fac1*rhs(i,j,k,m)
                   lhs(i1,j,k,n+3) = lhs(i1,j,k,n+3) -
     >                            lhs(i1,j,k,n+2)*lhs(i,j,k,n+4)
                   lhs(i1,j,k,n+4) = lhs(i1,j,k,n+4) -
     >                            lhs(i1,j,k,n+2)*lhs(i,j,k,n+5)
                   rhs(i1,j,k,m) = rhs(i1,j,k,m) -
     >                         lhs(i1,j,k,n+2)*rhs(i,j,k,m)
                   lhs(i2,j,k,n+2) = lhs(i2,j,k,n+2) -
     >                         lhs(i2,j,k,n+1)*lhs(i,j,k,n+4)
                   lhs(i2,j,k,n+3) = lhs(i2,j,k,n+3) -
     >                         lhs(i2,j,k,n+1)*lhs(i,j,k,n+5)
                   rhs(i2,j,k,m) = rhs(i2,j,k,m) -
     >                         lhs(i2,j,k,n+1)*rhs(i,j,k,m)
                end do
             end do
          end do

c---------------------------------------------------------------------
c         And again the last two rows separately
c---------------------------------------------------------------------
          i  = grid_points(1)-2
          i1 = grid_points(1)-1
          do    k = 1, grid_points(3)-2
             do    j = 1, grid_points(2)-2
                fac1               = 1.d0/lhs(i,j,k,n+3)
                lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
                lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
                rhs(i,j,k,m)     = fac1*rhs(i,j,k,m)
                lhs(i1,j,k,n+3) = lhs(i1,j,k,n+3) -
     >                      lhs(i1,j,k,n+2)*lhs(i,j,k,n+4)
                lhs(i1,j,k,n+4) = lhs(i1,j,k,n+4) -
     >                      lhs(i1,j,k,n+2)*lhs(i,j,k,n+5)
                rhs(i1,j,k,m)   = rhs(i1,j,k,m) -
     >                      lhs(i1,j,k,n+2)*rhs(i,j,k,m)
c---------------------------------------------------------------------
c               Scale the last row immediately
c---------------------------------------------------------------------
                fac2               = 1.d0/lhs(i1,j,k,n+3)
                rhs(i1,j,k,m)   = fac2*rhs(i1,j,k,m)

             end do
          end do
       end do


c---------------------------------------------------------------------
c                         BACKSUBSTITUTION 
c---------------------------------------------------------------------


       i  = grid_points(1)-2
       i1 = grid_points(1)-1
       n = 0
       do   m = 1, 3
          do   k = 1, grid_points(3)-2
             do   j = 1, grid_points(2)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) -
     >                             lhs(i,j,k,n+4)*rhs(i1,j,k,m)
             end do
          end do
       end do

       do    m = 4, 5
          n = (m-3)*5
          do   k = 1, grid_points(3)-2
             do   j = 1, grid_points(2)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) -
     >                             lhs(i,j,k,n+4)*rhs(i1,j,k,m)
             end do
          end do
       end do

c---------------------------------------------------------------------
c      The first three factors
c---------------------------------------------------------------------
       n = 0
       do   m = 1, 3
          do   k = 1, grid_points(3)-2
             do   j = 1, grid_points(2)-2
                do    i = grid_points(1)-3, 0, -1
                   i1 = i  + 1
                   i2 = i  + 2
                   rhs(i,j,k,m) = rhs(i,j,k,m) - 
     >                          lhs(i,j,k,n+4)*rhs(i1,j,k,m) -
     >                          lhs(i,j,k,n+5)*rhs(i2,j,k,m)
                end do
             end do
          end do
       end do

c---------------------------------------------------------------------
c      And the remaining two
c---------------------------------------------------------------------
       do    m = 4, 5
          n = (m-3)*5
          do   k = 1, grid_points(3)-2
             do   j = 1, grid_points(2)-2
                do    i = grid_points(1)-3, 0, -1
                   i1 = i  + 1
                   i2 = i  + 2
                   rhs(i,j,k,m) = rhs(i,j,k,m) - 
     >                          lhs(i,j,k,n+4)*rhs(i1,j,k,m) -
     >                          lhs(i,j,k,n+5)*rhs(i2,j,k,m)
                end do
             end do
          end do
       end do

c---------------------------------------------------------------------
c      Do the block-diagonal inversion          
c---------------------------------------------------------------------
       call ninvr

       return
       end
    







c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine y_solve

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c this function performs the solution of the approximate factorization
c step in the y-direction for all five matrix components
c simultaneously. The Thomas algorithm is employed to solve the
c systems for the y-lines. Boundary conditions are non-periodic
c---------------------------------------------------------------------

       include 'header.h'

       integer i, j, k, n, j1, j2, m
       double precision  fac1, fac2

c---------------------------------------------------------------------
c                          FORWARD ELIMINATION  
c---------------------------------------------------------------------

       call lhsy

       n = 0

       do    k = 1, grid_points(3)-2
          do    j = 0, grid_points(2)-3
             do    i = 1, grid_points(1)-2
                j1 = j  + 1
                j2 = j  + 2
                fac1               = 1.d0/lhs(i,j,k,n+3)
                lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
                lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
                do    m = 1, 3
                   rhs(i,j,k,m) = fac1*rhs(i,j,k,m)
                end do
                lhs(i,j1,k,n+3) = lhs(i,j1,k,n+3) -
     >                         lhs(i,j1,k,n+2)*lhs(i,j,k,n+4)
                lhs(i,j1,k,n+4) = lhs(i,j1,k,n+4) -
     >                         lhs(i,j1,k,n+2)*lhs(i,j,k,n+5)
                do    m = 1, 3
                   rhs(i,j1,k,m) = rhs(i,j1,k,m) -
     >                         lhs(i,j1,k,n+2)*rhs(i,j,k,m)
                end do
                lhs(i,j2,k,n+2) = lhs(i,j2,k,n+2) -
     >                         lhs(i,j2,k,n+1)*lhs(i,j,k,n+4)
                lhs(i,j2,k,n+3) = lhs(i,j2,k,n+3) -
     >                         lhs(i,j2,k,n+1)*lhs(i,j,k,n+5)
                do    m = 1, 3
                   rhs(i,j2,k,m) = rhs(i,j2,k,m) -
     >                         lhs(i,j2,k,n+1)*rhs(i,j,k,m)
                end do
             end do
          end do
       end do

c---------------------------------------------------------------------
c      The last two rows in this grid block are a bit different, 
c      since they do not have two more rows available for the
c      elimination of off-diagonal entries
c---------------------------------------------------------------------

       j  = grid_points(2)-2
       j1 = grid_points(2)-1
       do    k = 1, grid_points(3)-2
          do    i = 1, grid_points(1)-2
             fac1               = 1.d0/lhs(i,j,k,n+3)
             lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
             lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
             do    m = 1, 3
                rhs(i,j,k,m) = fac1*rhs(i,j,k,m)
             end do
             lhs(i,j1,k,n+3) = lhs(i,j1,k,n+3) -
     >                      lhs(i,j1,k,n+2)*lhs(i,j,k,n+4)
             lhs(i,j1,k,n+4) = lhs(i,j1,k,n+4) -
     >                      lhs(i,j1,k,n+2)*lhs(i,j,k,n+5)
             do    m = 1, 3
                rhs(i,j1,k,m) = rhs(i,j1,k,m) -
     >                      lhs(i,j1,k,n+2)*rhs(i,j,k,m)
             end do
c---------------------------------------------------------------------
c            scale the last row immediately 
c---------------------------------------------------------------------
             fac2               = 1.d0/lhs(i,j1,k,n+3)
             do    m = 1, 3
                rhs(i,j1,k,m) = fac2*rhs(i,j1,k,m)
             end do
          end do
       end do

c---------------------------------------------------------------------
c      do the u+c and the u-c factors                 
c---------------------------------------------------------------------
       do    m = 4, 5

          n = (m-3)*5
          do    k = 1, grid_points(3)-2
             do    j = 0, grid_points(2)-3
                do    i = 1, grid_points(1)-2
                   j1 = j  + 1
                   j2 = j  + 2
                   fac1               = 1.d0/lhs(i,j,k,n+3)
                   lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
                   lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
                   rhs(i,j,k,m) = fac1*rhs(i,j,k,m)
                   lhs(i,j1,k,n+3) = lhs(i,j1,k,n+3) -
     >                         lhs(i,j1,k,n+2)*lhs(i,j,k,n+4)
                   lhs(i,j1,k,n+4) = lhs(i,j1,k,n+4) -
     >                         lhs(i,j1,k,n+2)*lhs(i,j,k,n+5)
                   rhs(i,j1,k,m) = rhs(i,j1,k,m) -
     >                         lhs(i,j1,k,n+2)*rhs(i,j,k,m)
                   lhs(i,j2,k,n+2) = lhs(i,j2,k,n+2) -
     >                         lhs(i,j2,k,n+1)*lhs(i,j,k,n+4)
                   lhs(i,j2,k,n+3) = lhs(i,j2,k,n+3) -
     >                         lhs(i,j2,k,n+1)*lhs(i,j,k,n+5)
                   rhs(i,j2,k,m) = rhs(i,j2,k,m) -
     >                         lhs(i,j2,k,n+1)*rhs(i,j,k,m)
                end do
             end do
          end do

c---------------------------------------------------------------------
c         And again the last two rows separately
c---------------------------------------------------------------------
          j  = grid_points(2)-2
          j1 = grid_points(2)-1
          do    k = 1, grid_points(3)-2
             do    i = 1, grid_points(1)-2
                fac1               = 1.d0/lhs(i,j,k,n+3)
                lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
                lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
                rhs(i,j,k,m)     = fac1*rhs(i,j,k,m)
                lhs(i,j1,k,n+3) = lhs(i,j1,k,n+3) -
     >                      lhs(i,j1,k,n+2)*lhs(i,j,k,n+4)
                lhs(i,j1,k,n+4) = lhs(i,j1,k,n+4) -
     >                      lhs(i,j1,k,n+2)*lhs(i,j,k,n+5)
                rhs(i,j1,k,m)   = rhs(i,j1,k,m) -
     >                      lhs(i,j1,k,n+2)*rhs(i,j,k,m)
c---------------------------------------------------------------------
c               Scale the last row immediately 
c---------------------------------------------------------------------
                fac2               = 1.d0/lhs(i,j1,k,n+3)
                rhs(i,j1,k,m)   = fac2*rhs(i,j1,k,m)

             end do
          end do

       end do



c---------------------------------------------------------------------
c                         BACKSUBSTITUTION 
c---------------------------------------------------------------------

       j  = grid_points(2)-2
       j1 = grid_points(2)-1
       n = 0
       do   m = 1, 3
          do   k = 1, grid_points(3)-2
             do   i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) -
     >                           lhs(i,j,k,n+4)*rhs(i,j1,k,m)
             end do
          end do
       end do

       do    m = 4, 5
          n = (m-3)*5
          do   k = 1, grid_points(3)-2
             do   i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) -
     >                             lhs(i,j,k,n+4)*rhs(i,j1,k,m)
             end do
          end do
       end do

c---------------------------------------------------------------------
c      The first three factors
c---------------------------------------------------------------------
       n = 0
       do   m = 1, 3
          do   k = 1, grid_points(3)-2
             do   j = grid_points(2)-3, 0, -1
                do    i = 1, grid_points(1)-2
                   j1 = j  + 1
                   j2 = j  + 2
                   rhs(i,j,k,m) = rhs(i,j,k,m) - 
     >                          lhs(i,j,k,n+4)*rhs(i,j1,k,m) -
     >                          lhs(i,j,k,n+5)*rhs(i,j2,k,m)
                end do
             end do
          end do
       end do

c---------------------------------------------------------------------
c      And the remaining two
c---------------------------------------------------------------------
       do    m = 4, 5
          n = (m-3)*5
          do   k = 1, grid_points(3)-2
             do   j = grid_points(2)-3, 0, -1
                do    i = 1, grid_points(1)-2
                   j1 = j  + 1
                   j2 = j1 + 1
                   rhs(i,j,k,m) = rhs(i,j,k,m) - 
     >                          lhs(i,j,k,n+4)*rhs(i,j1,k,m) -
     >                          lhs(i,j,k,n+5)*rhs(i,j2,k,m)
                end do
             end do
          end do
       end do

       call pinvr

       return
       end
    







c---------------------------------------------------------------------
c---------------------------------------------------------------------

       subroutine z_solve

c---------------------------------------------------------------------
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c this function performs the solution of the approximate factorization
c step in the z-direction for all five matrix components
c simultaneously. The Thomas algorithm is employed to solve the
c systems for the z-lines. Boundary conditions are non-periodic
c---------------------------------------------------------------------

       include 'header.h'

       integer i, j, k, n, k1, k2, m
       double precision  fac1, fac2

c---------------------------------------------------------------------
c                          FORWARD ELIMINATION  
c---------------------------------------------------------------------

       call lhsz

       n = 0

       do    k = 0, grid_points(3)-3
          do    j = 1, grid_points(2)-2
             do    i = 1, grid_points(1)-2
                k1 = k  + 1
                k2 = k  + 2
                fac1               = 1.d0/lhs(i,j,k,n+3)
                lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
                lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
                do    m = 1, 3
                   rhs(i,j,k,m) = fac1*rhs(i,j,k,m)
                end do
                lhs(i,j,k1,n+3) = lhs(i,j,k1,n+3) -
     >                         lhs(i,j,k1,n+2)*lhs(i,j,k,n+4)
                lhs(i,j,k1,n+4) = lhs(i,j,k1,n+4) -
     >                         lhs(i,j,k1,n+2)*lhs(i,j,k,n+5)
                do    m = 1, 3
                   rhs(i,j,k1,m) = rhs(i,j,k1,m) -
     >                         lhs(i,j,k1,n+2)*rhs(i,j,k,m)
                end do
                lhs(i,j,k2,n+2) = lhs(i,j,k2,n+2) -
     >                         lhs(i,j,k2,n+1)*lhs(i,j,k,n+4)
                lhs(i,j,k2,n+3) = lhs(i,j,k2,n+3) -
     >                         lhs(i,j,k2,n+1)*lhs(i,j,k,n+5)
                do    m = 1, 3
                   rhs(i,j,k2,m) = rhs(i,j,k2,m) -
     >                         lhs(i,j,k2,n+1)*rhs(i,j,k,m)
                end do
             end do
          end do
       end do

c---------------------------------------------------------------------
c      The last two rows in this grid block are a bit different, 
c      since they do not have two more rows available for the
c      elimination of off-diagonal entries
c---------------------------------------------------------------------
       k  = grid_points(3)-2
       k1 = grid_points(3)-1
       do    j = 1, grid_points(2)-2
          do    i = 1, grid_points(1)-2
             fac1               = 1.d0/lhs(i,j,k,n+3)
             lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
             lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
             do    m = 1, 3
                rhs(i,j,k,m) = fac1*rhs(i,j,k,m)
             end do
             lhs(i,j,k1,n+3) = lhs(i,j,k1,n+3) -
     >                      lhs(i,j,k1,n+2)*lhs(i,j,k,n+4)
             lhs(i,j,k1,n+4) = lhs(i,j,k1,n+4) -
     >                      lhs(i,j,k1,n+2)*lhs(i,j,k,n+5)
             do    m = 1, 3
                rhs(i,j,k1,m) = rhs(i,j,k1,m) -
     >                      lhs(i,j,k1,n+2)*rhs(i,j,k,m)
             end do
c---------------------------------------------------------------------
c               scale the last row immediately
c---------------------------------------------------------------------
             fac2               = 1.d0/lhs(i,j,k1,n+3)
             do    m = 1, 3
                rhs(i,j,k1,m) = fac2*rhs(i,j,k1,m)
             end do
          end do
       end do

c---------------------------------------------------------------------
c      do the u+c and the u-c factors               
c---------------------------------------------------------------------
       do   m = 4, 5

          n = (m-3)*5
          do    k = 0, grid_points(3)-3
             do    j = 1, grid_points(2)-2
                do    i = 1, grid_points(1)-2
                   k1 = k  + 1
                   k2 = k  + 2
                   fac1               = 1.d0/lhs(i,j,k,n+3)
                   lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
                   lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
                   rhs(i,j,k,m) = fac1*rhs(i,j,k,m)
                   lhs(i,j,k1,n+3) = lhs(i,j,k1,n+3) -
     >                         lhs(i,j,k1,n+2)*lhs(i,j,k,n+4)
                   lhs(i,j,k1,n+4) = lhs(i,j,k1,n+4) -
     >                         lhs(i,j,k1,n+2)*lhs(i,j,k,n+5)
                   rhs(i,j,k1,m) = rhs(i,j,k1,m) -
     >                         lhs(i,j,k1,n+2)*rhs(i,j,k,m)
                   lhs(i,j,k2,n+2) = lhs(i,j,k2,n+2) -
     >                         lhs(i,j,k2,n+1)*lhs(i,j,k,n+4)
                   lhs(i,j,k2,n+3) = lhs(i,j,k2,n+3) -
     >                         lhs(i,j,k2,n+1)*lhs(i,j,k,n+5)
                   rhs(i,j,k2,m) = rhs(i,j,k2,m) -
     >                         lhs(i,j,k2,n+1)*rhs(i,j,k,m)
                end do
             end do
          end do

c---------------------------------------------------------------------
c         And again the last two rows separately
c---------------------------------------------------------------------
          k  = grid_points(3)-2
          k1 = grid_points(3)-1
          do    j = 1, grid_points(2)-2
             do    i = 1, grid_points(1)-2
                fac1               = 1.d0/lhs(i,j,k,n+3)
                lhs(i,j,k,n+4)   = fac1*lhs(i,j,k,n+4)
                lhs(i,j,k,n+5)   = fac1*lhs(i,j,k,n+5)
                rhs(i,j,k,m)     = fac1*rhs(i,j,k,m)
                lhs(i,j,k1,n+3) = lhs(i,j,k1,n+3) -
     >                      lhs(i,j,k1,n+2)*lhs(i,j,k,n+4)
                lhs(i,j,k1,n+4) = lhs(i,j,k1,n+4) -
     >                      lhs(i,j,k1,n+2)*lhs(i,j,k,n+5)
                rhs(i,j,k1,m)   = rhs(i,j,k1,m) -
     >                      lhs(i,j,k1,n+2)*rhs(i,j,k,m)
c---------------------------------------------------------------------
c               Scale the last row immediately (some of this is overkill
c               if this is the last cell)
c---------------------------------------------------------------------
                fac2               = 1.d0/lhs(i,j,k1,n+3)
                rhs(i,j,k1,m)   = fac2*rhs(i,j,k1,m)

             end do
          end do
       end do


c---------------------------------------------------------------------
c                         BACKSUBSTITUTION 
c---------------------------------------------------------------------

       k  = grid_points(3)-2
       k1 = grid_points(3)-1
       n = 0
       do   m = 1, 3
          do   j = 1, grid_points(2)-2
             do   i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) -
     >                             lhs(i,j,k,n+4)*rhs(i,j,k1,m)
             end do
          end do
       end do

       do    m = 4, 5
          n = (m-3)*5
          do   j = 1, grid_points(2)-2
             do   i = 1, grid_points(1)-2
                rhs(i,j,k,m) = rhs(i,j,k,m) -
     >                             lhs(i,j,k,n+4)*rhs(i,j,k1,m)
             end do
          end do
       end do

c---------------------------------------------------------------------
c      Whether or not this is the last processor, we always have
c      to complete the back-substitution 
c---------------------------------------------------------------------

c---------------------------------------------------------------------
c      The first three factors
c---------------------------------------------------------------------
       n = 0
       do   m = 1, 3
          do   k = grid_points(3)-3, 0, -1
             do   j = 1, grid_points(2)-2
                do    i = 1, grid_points(1)-2
                   k1 = k  + 1
                   k2 = k  + 2
                   rhs(i,j,k,m) = rhs(i,j,k,m) - 
     >                          lhs(i,j,k,n+4)*rhs(i,j,k1,m) -
     >                          lhs(i,j,k,n+5)*rhs(i,j,k2,m)
                end do
             end do
          end do
       end do

c---------------------------------------------------------------------
c      And the remaining two
c---------------------------------------------------------------------
       do    m = 4, 5
          n = (m-3)*5
          do   k = grid_points(3)-3, 0, -1
             do   j = 1, grid_points(2)-2
                do    i = 1, grid_points(1)-2
                   k1 = k  + 1
                   k2 = k  + 2
                   rhs(i,j,k,m) = rhs(i,j,k,m) - 
     >                          lhs(i,j,k,n+4)*rhs(i,j,k1,m) -
     >                          lhs(i,j,k,n+5)*rhs(i,j,k2,m)
                end do
             end do
          end do
       end do

       call tzetar

       return
       end
    






