*
*     Serial MGCG method
*
*     Solve A x = b, where A is a tri-block diagonal matrix that arises
*     from discretization of 3-D problem.
*
*     $Id: mgcg_s.f,v 1.1 2000/06/22 07:27:46 tatebe Exp $
*

*
*     Copyright(C) 2000 Osamu Tatebe, all rights reserved, no warranty.
*
*     This software is a free software.  All rights of this software
*     belong to Osamu Tatebe.  You can redistribute this whole package
*     as it is if you do not modify and inform me by E-mail.
*

*
*     Parameters:
*
*       INPUT:
*
*         G_LEV     Num. of grids
*         NUM_C     Num. of iterations on the coarsest grid
*         G_M1(G_LEV)   Array of num. of grid points in x-direction
*         G_M2(G_LEV)   Array of num. of grid points in y-direction
*         G_M3(G_LEV)   Array of num. of grid points in z-direction
*         A1(NUM_DIAG, N * 8/7)  Coefficient matrices
*         B(N)      Right-hand term
*         X(N1)     Initial approximate
*         ITER      Maximum number of iterations
*         EPS       Epsilon 
*
*       WORK SPACE:
*
*         WS(*)
*                   Work array of dimension 2 * N + 2 * N1 + N * 8/7 + N * 22/7 + N1 * 8/7
*
*       OUTPUT:
*
*         X(N1)     Approximate solution
*         ITER      Num. of iterations until convergence
*         IER       Error code
*
*       where N = G_M1(1) * G_M2(1) * G_M3(1)
*         and N1 = (G_M1(1) + 2) * (G_M2(1) + 2) * (G_M3(1) + 2).


      SUBROUTINE MGCG(G_LEV, NUM_C, G_M1, G_M2, G_M3,
     $                A1, B, X, ITER, EPS, WS, IER)
*
      INCLUDE 'paraf.h'
*
      INTEGER   G_LEV, NUM_C, G_M1(G_LEV), G_M2(G_LEV), G_M3(G_LEV)
      REAL*8    A1(NUM_DIAG, *)
      REAL*8    B(*), X(*), WS(*)
      INTEGER   ITER, IER
      REAL*8    EPS
*
      INTEGER   N, N1

      N = G_M1(1) * G_M2(1) * G_M3(1)
      N1 = (G_M1(1) + 2) * (G_M2(1) + 2) * (G_M3(1) + 2)

      CALL MGCG_S(G_LEV, NUM_C, G_M1(1), G_M2(1), G_M3(1),
     $            G_M1, G_M2, G_M3, A1, B, X, ITER, EPS,
     $            WS(1), WS(N + 1),
     $            WS(N + N1 + 1),
     $            WS(N + 2 * N1 + 1),
     $            WS(2 * N + 2 * N1 + 1),
     $            WS(2 * N + 2 * N1 + N * 8 / 7 + 1), IER)

      RETURN
      END

*
*     Parameters:
*
*       INPUT:
*
*         G_LEV     Num. of grids
*         NUM_C     Num. of iterations on the coarsest grid
*         G_M11     Num. of grid points in x-direction on the finest grid (= G_M1(1))
*         G_M21     Num. of grid points in y-direction on the finest grid (= G_M2(1))
*         G_M31     Num. of grid points in z-direction on the finest grid (= G_M3(1))
*         G_M1(*)   Array of num. of grid points in x-direction
*         G_M2(*)   Array of num. of grid points in y-direction
*         G_M3(*)   Array of num. of grid points in z-direction
*         A1(NUM_DIAG, *)  Coefficient matrices
*         B(*)      Right-hand term
*         X(*)      Initial approximate
*         ITER      Maximum number of iterations
*         EPS       Epsilon 
*
*       WORK SPACE:
*
*         R(*), P1(*)
*                   Work array of dimension N
*         R1(*), P(*)
*                   Work array of dimension N1
*         INV_A(*)
*                   Work array of dimension N * 8/7
*         WS(*)
*                   Work array of dimension N * 22/7 + N1 * 8/7
*
*       OUTPUT:
*
*         X(*)      Approximate solution
*         ITER      Num. of iterations until convergence
*         IER       Error code
*

      SUBROUTINE MGCG_S(G_LEV, NUM_C, G_M11, G_M21, G_M31,
     $                  G_M1, G_M2, G_M3,
     $                  A1, B, X, ITER, EPS,
     $                  R, R1, P, P1, INV_A, WS, IER)
*
      INCLUDE 'paraf.h'
*
      INTEGER   G_LEV, NUM_C, G_M1(G_LEV), G_M2(G_LEV), G_M3(G_LEV)
      INTEGER   G_M11, G_M21, G_M31
      REAL*8    A1(NUM_DIAG, G_M11 * G_M21 * G_M31 * 8 / 7)
      REAL*8    B(G_M11, G_M21, G_M31)
      REAL*8    X(0:G_M11+1, 0:G_M21+1, 0:G_M31+1)
      INTEGER   ITER, IER
      REAL*8    EPS
*     Work Space
      REAL*8    R(G_M11, G_M21, G_M31)
      REAL*8    R1(0:G_M11+1, 0:G_M21+1, 0:G_M31+1)
      REAL*8    P(0:G_M11+1, 0:G_M21+1, 0:G_M31+1)
      REAL*8    P1(G_M11, G_M21, G_M31)
      REAL*8    INV_A(G_M11 * G_M21 * G_M31 * 8 / 7)
      REAL*8    WS(G_M11 * G_M21 * G_M31 * 22 / 7
     $               + (G_M11 + 2) * (G_M21 + 2) * (G_M31 + 2) * 8 / 7)
*
      REAL*8    INI_RR
      REAL*8    R1R1, T_R1
      REAL*8    ALPHA, BETA, PP
      REAL*8	RR
      REAL*8	R1R2
*
      INTEGER   I, J, K
      INTEGER   N, N1
      INTEGER   D, D2
      INTEGER   L_L(MAX_NUM_GRID)
      INTEGER   IT

*
*     Initialize
*

      N = G_M11 * G_M21 * G_M31
      IT = 0

      D = NUM_DIAG
      D2 = (D + 1) / 2

*
*     At first, compute an inverse of the diagonal part of A1
*

      I = 1
      L_L(I) = 0

      N1 = N

      IF (N1 .EQ. 0 .OR. I .GT. G_LEV) GO TO 20
   10 CONTINUE

!$OMP PARALLEL DO
          DO J = 1, N1
              INV_A(L_L(I) + J) = 1.0 / A1(D2, L_L(I) + J)
          END DO
          I = I + 1
          IF (I .LE. G_LEV) THEN
              L_L(I) = L_L(I - 1) + N1
              N1 = G_M1(I) * G_M2(I) * G_M3(I)
          END IF

      IF (N1 .NE. 0 .AND. I .LE. G_LEV) GO TO 10
   20 CONTINUE

*     /*** r = b - A x0, rr = sqrt(r, r) ***/

      CALL RES_VEC_NORM_S(G_M11, G_M21, G_M31, B, A1, X, R, INI_RR)
      
*#ifdef RESIDUAL
      WRITE (6, 1000) IT, 1.0
 1000 FORMAT ('RESIDUAL (', I3, ') = ', 1PG15.8)
*#endif

*     /***  preconditioning  ***/

!$OMP PARALLEL DO PRIVATE(J, I)
      DO K = 0, G_M31 + 1
          DO J = 0, G_M21 + 1
              DO I = 0, G_M11 + 1
                  R1(I, J, K) = 0.0
              END DO
          END DO
      END DO
      CALL MG_S(G_LEV, NUM_C, G_M11, G_M21, G_M31,
     $          G_M1, G_M2, G_M3,
     $          A1, INV_A, R, R1,
     $          WS, WS(2 * N + 1),
     $          WS(N * 10 / 3 + 1))

*      DO K = 1, G_M31
*          DO J = 1, G_M21
*              DO I = 1, G_M11
*                  R1(I, J, K) = R(I, J, K)
*              END DO
*          END DO
*      END DO

*     /***  p = r1, r1r1 = (r1, r) ***/

      R1R1 = 0

!$OMP PARALLEL DO PRIVATE(J, I, T_R1) REDUCTION(+:R1R1)
      DO K = 1, G_M31
          DO J = 1, G_M21
              DO I = 1, G_M11
                  T_R1 = R1(I, J, K)
                  P(I, J, K) = T_R1
                  R1R1 = R1R1 + T_R1 * R(I, J, K)
              END DO
          END DO
      END DO

*     WHILE (1) {
   30 CONTINUE

*         /*** p1 = A p, pp = (p, p1) ***/

          CALL MUL_VEC_NORM_S(G_M11, G_M21, G_M31, A1, P, P1, PP)

*         /*** alpha = (r1, r) / (p, Ap) ***/

          ALPHA = R1R1 / PP

*         /*
*          *  Update x & r;
*          *  x += alpha p, r -= alpha A p
*	   */

*	  /*  I think it is faster not to fuse two loops  */

          RR = 0

!$OMP PARALLEL PRIVATE(K, J, I) FIRSTPRIVATE(ALPHA)
!$OMP DO
          DO K = 1, G_M31
              DO J = 1, G_M21
                  DO I = 1, G_M11
                      X(I, J, K) = X(I, J, K) + ALPHA * P(I, J, K)
                  END DO
              END DO
          END DO
!$OMP END DO NOWAIT

!$OMP DO
          DO K = 1, G_M31
              DO J = 1, G_M21
                  DO I = 1, G_M11
                      R(I, J, K) = R(I, J, K) - ALPHA * P1(I, J, K)
                  END DO
              END DO
          END DO

*         /*****  convergence test  *****/

!$OMP DO REDUCTION(+:RR)
          DO K = 1, G_M31
              DO J = 1, G_M21
                  DO I = 1, G_M11
                      RR = RR + R(I, J, K) * R(I, J, K)
                  END DO
              END DO
          END DO
!$OMP END PARALLEL

          RR = SQRT(RR) / INI_RR

          IT = IT + 1

*#ifdef RESIDUAL
          WRITE (6, 1000) IT, RR
*#endif

          IF (RR .LT. EPS) GO TO 40
          IF (IT .GE. ITER) GO TO 40

*         /*****  preconditioning  *****/

!$OMP PARALLEL DO PRIVATE(J, I)
          DO K = 0, G_M31 + 1
              DO J = 0, G_M21 + 1
                  DO I = 0, G_M11 + 1
                      R1(I, J, K) = 0.0
                  END DO
              END DO
          END DO
         CALL MG_S(G_LEV, NUM_C, G_M11, G_M21, G_M31, G_M1, G_M2, G_M3,
     $              A1, INV_A,
     $              R, R1, WS, WS(2 * N + 1),
     $              WS(N * 10 / 3 + 1))

*          DO K = 1, G_M31
*              DO J = 1, G_M21
*                  DO I = 1, G_M11
*                      R1(I, J, K) = R(I, J, K)
*                  END DO
*              END DO
*          END DO

*         /*****  beta = (new_r1, new_r) / (r1, r)  *****/

          R1R2 = 0
!$OMP PARALLEL PRIVATE(I, J, K, BETA) FIRSTPRIVATE(R1R1)
!$OMP DO REDUCTION(+:R1R2)
          DO K = 1, G_M31
              DO J = 1, G_M21
                  DO I = 1, G_M11
                      R1R2 = R1R2 + R1(I, J, K) * R(I, J, K)
                  END DO
              END DO
          END DO

          BETA = R1R2 / R1R1

*         /*****  p = r1 + beta p  *****/

!$OMP DO
          DO K = 1, G_M31
              DO J = 1, G_M21
                  DO I = 1, G_M11
                      P(I, J, K) = R1(I, J, K) + BETA * P(I, J, K)
                  END DO
              END DO
          END DO
!$OMP END PARALLEL

          R1R1 = R1R2

      GO TO 30
*     }

   40 CONTINUE

      ITER = IT
      IER = 0

      RETURN
      END
