/*
 *  Header file for configurations.
 */

#ifndef __conf_h__
#define __conf_h__

/*
 *  the number of processors
 *
 *  Caution: NUM_PROC_[XY] should be a power of 2.
 */

#define NUM_PROC_X	2
#define NUM_PROC_Y	2


/*
 *  the size of a computation tile
 *
 *  It is necessary that the size of data cache is greater than
 *  TILE_K * TILE_J + 8 * TILE_K + 8 * TILE_J.
 */

/*  for ORIGIN 2000	(Cache Size : 4MB) */
#define TILE_K		400
#define TILE_J		400

/*  for Cenju-3	(Cache Size : 1MB) */
/*
#define TILE_K		340
#define TILE_J		340
*/

/*  for AP+ (SuperSPARC; Cache Size : 16KB) */
/*
#define TILE_K		36
#define TILE_J		36
*/

/*  for AP1000	(Cache Size : 128KB) */
/*
#define TILE_K		120
#define TILE_J		120
*/

/*  for AP3000	(Cache Size (L1) : 16KB) */
/*
#define TILE_K		28
#define TILE_J		28
*/

/*
 *  the maximum problem size
 */

/*#define MAX_PSIZE	(2048 + TILE_K)  *//* for less collision of cache line */
#define MAX_PSIZE	(5000 + TILE_K)
#define N		MAX_PSIZE


/*
 *  you don't need to change the following lines
 */

#define NPROC		(NUM_PROC_X * NUM_PROC_Y)
#define NUM_PROC	NPROC

#define MASK_X	(NUM_PROC_X - 1)
#define MASK_Y	(NUM_PROC_Y - 1)
#define MASK_XY	(NUM_PROC - 1)

#define NUM_PER_PROC	((MAX_PSIZE + NUM_PROC - 1) / NUM_PROC)
/* Warning:: this NUM_PER_PROC_X is assumed as dot(8, 1) distribution */
#define NUM_PER_PROC_X	((((MAX_PSIZE + 7) / 8 + NUM_PROC_X - 1) / NUM_PROC_X) * 8)
#define NUM_PER_PROC_Y	((MAX_PSIZE + NUM_PROC_Y - 1) / NUM_PROC_Y)

#define ROW_SIZE	MAX_PSIZE
#define COL_SIZE	((MAX_PSIZE + NPROC - 1) / NPROC)
#define VEC_SIZE	((MAX_PSIZE + NPROC - 1) / NPROC)

typedef double mat[COL_SIZE][ROW_SIZE];
typedef double dot_mat[NUM_PER_PROC_Y][NUM_PER_PROC_X + 1];

#endif /* __conf_h__ */
