*
*     Serial MGCG method
*
*     Solve A x = b, where A is a tri-block diagonal matrix that arises
*     from discretization of 2-D problem.
*
*     $Id: mgcg_s.f,v 1.10 2000/05/01 17:31:25 tatebe Exp $
*

*
*     Copyright(C) 1999, 2000 Osamu Tatebe, all rights reserved,
*     no warranty.
*
*     This software is a free software.  All rights of this software
*     belong to Osamu Tatebe.  You can redistribute this whole package
*     as it is if you do not modify and inform me by E-mail.
*

*
*     Parameters:
*
*       INPUT:
*
*         G_LEV     Num. of grids
*         NUM_C     Num. of iterations on the coarsest grid
*         G_M1(G_LEV)   Array of num. of grid points in x-direction
*         G_M2(G_LEV)   Array of num. of grid points in y-direction
*         A1(NUM_DIAG, N * 4/3)  Coefficient matrices
*         B(N)      Right-hand term
*         X(N1)     Initial approximate
*         ITER      Maximum number of iterations
*         EPS       Epsilon 
*
*       WORK SPACE:
*
*         WS(*)
*                   Work array of dimension 2 * N + 2 * N1 + N * 4/3
*                                           + N * 4/3 + N / 3 + 2 * (M1+M2) + 4 * (G_LEV-1)
*
*       OUTPUT:
*
*         X(N1)     Approximate solution
*         ITER      Num. of iterations until convergence
*         IER       Error code
*
*       where M1 = G_M1(1), M2 = G_M2(1), N = M1 * M2 and N1 = (M1+2) * (M2+2).


      SUBROUTINE MGCG(G_LEV, NUM_C, G_M1, G_M2,
     $                A1, B, X, ITER, EPS, WS, IER)
*
      INCLUDE 'paraf.h'
*
      INTEGER   G_LEV, NUM_C, G_M1(G_LEV), G_M2(G_LEV)
      REAL*8    A1(NUM_DIAG, *)
      REAL*8    B(*), X(*), WS(*)
      INTEGER   ITER, IER
      REAL*8    EPS
*
      INTEGER   N, N1

      N = G_M1(1) * G_M2(1)
      N1 = (G_M1(1) + 2) * (G_M2(1) + 2)

      CALL MGCG_S(G_LEV, NUM_C, G_M1(1), G_M2(1),
     $            G_M1, G_M2, A1, B, X, ITER, EPS,
     $            WS(1), WS(N + 1),
     $            WS(N + N1 + 1),
     $            WS(N + 2 * N1 + 1),
     $            WS(2 * N + 2 * N1 + 1),
     $            WS(2 * N + 2 * N1 + N * 4 / 3 + 1), IER)

      RETURN
      END

*
*     Parameters:
*
*       INPUT:
*
*         G_LEV     Num. of grids
*         NUM_C     Num. of iterations on the coarsest grid
*         G_M11     Num. of grid points in x-direction on the finest grid (= G_M1(1))
*         G_M21     Num. of grid points in y-direction on the finest grid (= G_M2(1))
*         G_M1(*)   Array of num. of grid points in x-direction
*         G_M2(*)   Array of num. of grid points in y-direction
*         A1(NUM_DIAG, *)  Coefficient matrices
*         B(*)      Right-hand term
*         X(*)      Initial approximate
*         ITER      Maximum number of iterations
*         EPS       Epsilon 
*
*       WORK SPACE:
*
*         R(*), R1(*), P(*), P1(*)
*                   Work array of dimension G_M11 * G_M21
*         INV_A(*)
*                   Work array of dimension N * 4 / 3
*         WS(*)
*                   Work array of dimension N * 4 / 3 + N / 3 + 2 * (M1 + M2) + 4 * (G_LEV-1)
*
*       OUTPUT:
*
*         X(*)      Approximate solution
*         ITER      Num. of iterations until convergence
*         IER       Error code
*

      SUBROUTINE MGCG_S(G_LEV, NUM_C, G_M11, G_M21, G_M1, G_M2,
     $                  A1, B, X, ITER, EPS,
     $                  R, R1, P, P1, INV_A, WS, IER)
*
      INCLUDE 'paraf.h'
*
      INTEGER   G_LEV, NUM_C
      INTEGER   G_M11, G_M21, G_M1(G_LEV), G_M2(G_LEV)
      REAL*8    A1(NUM_DIAG, G_M11 * G_M21 * 4 / 3)
      REAL*8    B(G_M11, G_M21), X(0:G_M11+1, 0:G_M21+1)
      INTEGER   ITER, IER
      REAL*8    EPS
*     Work Space
      REAL*8    R(G_M11, G_M21), R1(0:G_M11+1, 0:G_M21+1)
      REAL*8    P(0:G_M11+1, 0:G_M21+1), P1(G_M11, G_M21)
      REAL*8    INV_A(G_M11 * G_M21 * 4 / 3)
      REAL*8    WS(G_M11 * G_M21 * 4/3
     $             + G_M11 * G_M21 / 3
     $             + 2 * (G_M11 + G_M21) + 4 * (G_LEV - 1))
*
      REAL*8    INI_RR, T_R, RR
      REAL*8    R1R1, T_R1, R1R2
      REAL*8    ALPHA, BETA, PP, T_P
*
      INTEGER   I, J
      INTEGER   N, N1
      INTEGER   D, D2
      INTEGER   L_L(MAX_NUM_GRID)
      INTEGER   IT

*
*     Initialize
*

      IT = 0
      INI_RR = 0.0
      R1R1 = 0.0
      PP = 0.0
      RR = 0.0
      R1R2 = 0.0

!$OMP PARALLEL PRIVATE(N, N1, D, D2, I, J, L_L,
!$OMP&                 T_R, T_R1, T_P, ALPHA, BETA)
!$OMP&         FIRSTPRIVATE(G_LEV, NUM_C, G_M11, G_M21, EPS)

      N = G_M11 * G_M21

*
*     At first, compute an inverse of the diagonal part of A1
*

      I = 1
      L_L(I) = 0
      N1 = N
      D = NUM_DIAG
      D2 = (D + 1) / 2

      IF (N1 .EQ. 0 .OR. I .GT. G_LEV) GO TO 20
   10 CONTINUE
!$OMP DO
          DO J = 1, N1
              INV_A(L_L(I) + J) = 1.0 / A1(D2, L_L(I) + J)
          END DO
          I = I + 1
          IF (I .LE. G_LEV) THEN
              L_L(I) = L_L(I - 1) + N1
              N1 = G_M1(I) * G_M2(I)
          END IF

      IF (N1 .NE. 0 .AND. I .LE. G_LEV) GO TO 10
   20 CONTINUE

*     /*** r = b - A x0, rr = sqrt(r, r) ***/

*      CALL RES_VEC_NORM_S(G_M11, G_M21, B, A1, X, R, INI_RR)
!$OMP DO REDUCTION(+:INI_RR)
      DO J = 1, G_M21
          DO I = 1, G_M11
              T_R = B(I, J) -
     $            (A1(1, I + (J-1) * G_M11) * X(I, J - 1) +
     $             A1(2, I + (J-1) * G_M11) * X(I - 1, J) +
     $             A1(3, I + (J-1) * G_M11) * X(I, J) +
     $             A1(4, I + (J-1) * G_M11) * X(I + 1, J) +
     $             A1(5, I + (J-1) * G_M11) * X(I, J + 1))
              R(I, J) = T_R
              INI_RR = INI_RR + T_R * T_R
          END DO
      END DO

!$OMP MASTER
      INI_RR = SQRT(INI_RR)

*#ifdef RESIDUAL
*      WRITE (6, 1000) IT, 1.0
* 1000 FORMAT ('RESIDUAL (', I3, ') = ', 1PG15.8)
*#endif
!$OMP END MASTER

*     /***  preconditioning  ***/

!$OMP DO 
      DO J = 0, G_M21 + 1
          DO I = 0, G_M11 + 1
              R1(I, J) = 0.0
          END DO
      END DO

      CALL MG_S(G_LEV, NUM_C, G_M11, G_M21, G_M1, G_M2,
     $          A1, INV_A, R, R1, WS, WS(N + 1), WS(N * 4 / 3 + 1))

*      DO J = 1, G_M21
*          DO I = 1, G_M11
*              R1(I, J) = R(I, J)
*          END DO
*      END DO

*     /***  p = r1, r1r1 = (r1, r) ***/

!$OMP DO REDUCTION(+:R1R1)
      DO J = 1, G_M21
          DO I = 1, G_M11
              T_R1 = R1(I, J)
              P(I, J) = T_R1
              R1R1 = R1R1 + T_R1 * R(I, J)
          END DO
      END DO
*!$OMP END DO NOWAIT

*     WHILE (1) {
   30 CONTINUE

*         /*** p1 = A p, pp = (p, p1) ***/

*!$OMP SINGLE
*          PP = 0.0
*!$OMP END SINGLE
*          CALL MUL_VEC_NORM_S(G_M11, G_M21, A1, P, P1, PP)
!$OMP DO REDUCTION(+:PP)
          DO J = 1, G_M21
              DO I = 1, G_M11
                  T_P =
     $                (A1(1, I + (J-1) * G_M11) * P(I, J - 1) +
     $                 A1(2, I + (J-1) * G_M11) * P(I - 1, J) +
     $                 A1(3, I + (J-1) * G_M11) * P(I, J) +
     $                 A1(4, I + (J-1) * G_M11) * P(I + 1, J) +
     $                 A1(5, I + (J-1) * G_M11) * P(I, J + 1))
                  P1(I, J) = T_P
                  PP = PP + T_P * P(I, J)
              END DO
          END DO

*         /*** alpha = (r1, r) / (p, Ap) ***/

          ALPHA = R1R1 / PP

*         /*
*          *  Update x & r;
*          *  x += alpha p, r -= alpha A p
*	   */

*	  /*  I think it is faster not to fuse two loops  */

!$OMP DO
          DO J = 1, G_M21
              DO I = 1, G_M11
                  X(I, J) = X(I, J) + ALPHA * P(I, J)
              END DO
          END DO
!$OMP END DO NOWAIT

!$OMP DO
          DO J = 1, G_M21
              DO I = 1, G_M11
                  R(I, J) = R(I, J) - ALPHA * P1(I, J)
              END DO
          END DO

*         /*****  convergence test  *****/
*!$OMP SINGLE
*          RR = 0.0
*!$OMP END SINGLE
!$OMP DO REDUCTION (+:RR)
          DO J = 1, G_M21
              DO I = 1, G_M11
                  RR = RR + R(I, J) * R(I, J)
              END DO
          END DO

*!$OMP SINGLE
!$OMP MASTER
          RR = SQRT(RR) / INI_RR
          IT = IT + 1

*#ifdef RESIDUAL
*          WRITE (6, 1000) IT, RR
*#endif
!$OMP END MASTER
!$OMP BARRIER
*!$OMP END SINGLE

          IF (RR .LT. EPS) GO TO 40
          IF (IT .GE. ITER) GO TO 40

*         /*****  preconditioning  *****/

!$OMP DO
          DO J = 0, G_M21 + 1
              DO I = 0, G_M11 + 1
                  R1(I, J) = 0.0
              END DO
          END DO

          CALL MG_S(G_LEV, NUM_C, G_M1(1), G_M2(1), G_M1, G_M2,
     $              A1, INV_A, R, R1, WS, WS(N + 1), WS(N * 4 / 3 + 1))

*          DO J = 1, G_M21
*              DO I = 1, G_M11
*                  R1(I, J) = R(I, J)
*              END DO
*          END DO

*         /*****  beta = (new_r1, new_r) / (r1, r)  *****/
*!$OMP SINGLE
*          R1R2 = 0
*!$OMP END SINGLE
!$OMP DO REDUCTION(+:R1R2)
          DO J = 1, G_M21
              DO I = 1, G_M11
                  R1R2 = R1R2 + R1(I, J) * R(I, J)
              END DO
          END DO

          BETA = R1R2 / R1R1

*         /*****  p = r1 + beta p  *****/

!$OMP DO
          DO J = 1, G_M21
              DO I = 1, G_M11
                  P(I, J) = R1(I, J) + BETA * P(I, J)
              END DO
          END DO
!$OMP END DO NOWAIT

*!$OMP SINGLE
!$OMP MASTER
          R1R1 = R1R2
          PP = 0.0
          RR = 0.0
          R1R2 = 0.0
!$OMP END MASTER
!$OMP BARRIER
*!$OMP END SINGLE

      GO TO 30
*     }

   40 CONTINUE

!$OMP END PARALLEL

      ITER = IT
      IER = 0

      RETURN
      END
