static char rcsid[] = "$Id: max2.c,v 1.2 1998/07/21 02:31:36 tatebe Exp $";

#include <sys/types.h>

#ifdef __MPI__
#include <mpi.h>
#endif

#include "conf.h"
#include "misc.h"
#include "lu.h"

#ifdef __AP__
extern int tid;
#endif

/*
  this code assumes NUM_PROC_X is the power of 2.
  */

void
max_y(cid, piv_i, local_max, local_piv)
    int cid;
    int *piv_i;
    double *local_max, *local_piv;
{
    struct data_type {
	int p;
	double max;
	double piv;
    } t, t1, *tp, *t2;
    int i;
#ifdef __MPI__
    MPI_Status	status;
#endif
    t.p = *piv_i;
    t.max = *local_max;
    t.piv = local_piv[0];

    tp = &t;

    for (i = NUM_PROC_X; i < NUM_PROC; i <<= 1) {
#ifdef __MPI__
	MPI_Send(tp, sizeof(struct data_type), MPI_BYTE,
		 cid ^ i, MAX_VALUE,
		 MPI_COMM_WORLD);
	MPI_Recv(&t1, sizeof(struct data_type), MPI_BYTE,
		 cid ^ i, MAX_VALUE,
		 MPI_COMM_WORLD, &status);
#endif
#ifdef __AP__
/*
	l_asend(cid ^ i, tid, MAX_VALUE, tp,
		sizeof(struct data_type));
	t2 = (struct data_type *)l_arecv(cid ^ i, tid, MAX_VALUE);
	*/
	l_asend((cid + i) & MASK_XY, tid, MAX_VALUE, tp,
		sizeof(struct data_type));
	t2 = (struct data_type *)
	    l_arecv((cid - i + NUM_PROC) & MASK_XY, tid, MAX_VALUE);
#endif
#ifdef __MPI__
	if (t.max < t1.max)
	    t = t1;
	else if (t.max == t1.max)
	    if (t.p > t1.p)
		t = t1;
#endif
#ifdef __AP__
	if (tp->max < t2->max)
	    tp = t2;
	else if (tp->max == t2->max)
	    if (tp->p > t2->p)
		tp = t2;
#endif
    }

    *piv_i = tp->p;
    *local_max = tp->max;
    local_piv[0] = tp->piv;
}


void
max8_y(cid, piv_i, local_max, local_piv)
    int cid;
    int *piv_i;
    double *local_max, *local_piv;
{
    struct data_type {
	int p;
	double max;
	double piv[8];
    } t, t1, *tp, *t2;
    int i;
    int cidx, cidy;
#ifdef __MPI__
    MPI_Status	status;
#endif
    lin_trec(cid, cidx, cidy);

    t.p = *piv_i;
    t.max = *local_max;
    t.piv[0] = local_piv[0];
    t.piv[1] = local_piv[1];
    t.piv[2] = local_piv[2];
    t.piv[3] = local_piv[3];
    t.piv[4] = local_piv[4];
    t.piv[5] = local_piv[5];
    t.piv[6] = local_piv[6];
    t.piv[7] = local_piv[7];

    tp = &t;

    for (i = NUM_PROC_X; i < NUM_PROC; i <<= 1) {
#ifdef __MPI__
	MPI_Send(tp, sizeof(struct data_type), MPI_BYTE,
		 cid ^ i, MAX_VALUE,
		 MPI_COMM_WORLD);
	MPI_Recv(&t1, sizeof(struct data_type), MPI_BYTE,
		 cid ^ i, MAX_VALUE,
		 MPI_COMM_WORLD, &status);
#endif
#ifdef __AP__
/*
	l_asend(cid ^ i, tid, MAX_VALUE, tp,
		sizeof(struct data_type));
	t2 = (struct data_type *)l_arecv(cid ^ i, tid, MAX_VALUE);
	*/
	l_asend((cid + i) & MASK_XY, tid, MAX_VALUE, tp,
		sizeof(struct data_type));
	t2 = (struct data_type *)
	    l_arecv((cid - i + NUM_PROC) & MASK_XY, tid, MAX_VALUE);
#endif
#ifdef __MPI__
	if (t.max < t1.max)
	    t = t1;
	else if (t.max == t1.max)
	    if (t.p > t1.p)
		t = t1;
#endif
#ifdef __AP__
	if (tp->max < t2->max)
	    tp = t2;
	else if (tp->max == t2->max)
	    if (tp->p > t2->p)
		tp = t2;
#endif
    }

    *piv_i = tp->p;
    *local_max = tp->max;
    local_piv[0] = tp->piv[0];
    local_piv[1] = tp->piv[1];
    local_piv[2] = tp->piv[2];
    local_piv[3] = tp->piv[3];
    local_piv[4] = tp->piv[4];
    local_piv[5] = tp->piv[5];
    local_piv[6] = tp->piv[6];
    local_piv[7] = tp->piv[7];
}
