This source file includes following definitions.
- NBC_Scan_args_compare
- nbc_scan_init
- scan_sched_linear
- scan_sched_recursivedoubling
- ompi_coll_libnbc_iscan
- ompi_coll_libnbc_scan_init
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 #include "opal/align.h"
  22 #include "ompi/op/op.h"
  23 
  24 #include "nbc_internal.h"
  25 
  26 static inline int scan_sched_linear(
  27     int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
  28     MPI_Datatype datatype,  MPI_Op op, char inplace, NBC_Schedule *schedule,
  29     void *tmpbuf);
  30 static inline int scan_sched_recursivedoubling(
  31     int rank, int comm_size, const void *sendbuf, void *recvbuf,
  32     int count, MPI_Datatype datatype,  MPI_Op op, char inplace,
  33     NBC_Schedule *schedule, void *tmpbuf1, void *tmpbuf2);
  34 
  35 #ifdef NBC_CACHE_SCHEDULE
  36 
  37 int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param) {
  38   if ((a->sendbuf == b->sendbuf) &&
  39       (a->recvbuf == b->recvbuf) &&
  40       (a->count == b->count) &&
  41       (a->datatype == b->datatype) &&
  42       (a->op == b->op) ) {
  43     return 0;
  44   }
  45 
  46   if (a->sendbuf < b->sendbuf) {
  47     return -1;
  48   }
  49 
  50   return 1;
  51 }
  52 #endif
  53 
  54 static int nbc_scan_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
  55                          struct ompi_communicator_t *comm, ompi_request_t ** request,
  56                          struct mca_coll_base_module_2_3_0_t *module, bool persistent) {
  57     int rank, p, res;
  58     ptrdiff_t gap, span;
  59     NBC_Schedule *schedule;
  60     void *tmpbuf = NULL, *tmpbuf1 = NULL, *tmpbuf2 = NULL;
  61     enum { NBC_SCAN_LINEAR, NBC_SCAN_RDBL } alg;
  62     char inplace;
  63     ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
  64 
  65     NBC_IN_PLACE(sendbuf, recvbuf, inplace);
  66 
  67     rank = ompi_comm_rank (comm);
  68     p = ompi_comm_size (comm);
  69 
  70     if (count == 0) {
  71         return nbc_get_noop_request(persistent, request);
  72     }
  73 
  74     span = opal_datatype_span(&datatype->super, count, &gap);
  75     if (libnbc_iscan_algorithm == 2) {
  76         alg = NBC_SCAN_RDBL;
  77         ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
  78         tmpbuf = malloc(span_align + span);
  79         if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; }
  80         tmpbuf1 = (void *)(-gap);
  81         tmpbuf2 = (char *)(span_align) - gap;
  82     } else {
  83         alg = NBC_SCAN_LINEAR;
  84         if (rank > 0) {
  85             tmpbuf = malloc(span);
  86             if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; }
  87         }
  88     }
  89 
  90 #ifdef NBC_CACHE_SCHEDULE
  91   NBC_Scan_args *args, *found, search;
  92 
  93   
  94   search.sendbuf = sendbuf;
  95   search.recvbuf = recvbuf;
  96   search.count = count;
  97   search.datatype = datatype;
  98   search.op = op;
  99   found = (NBC_Scan_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN], &search);
 100   if (NULL == found) {
 101 #endif
 102     schedule = OBJ_NEW(NBC_Schedule);
 103     if (OPAL_UNLIKELY(NULL == schedule)) {
 104         free(tmpbuf);
 105         return OMPI_ERR_OUT_OF_RESOURCE;
 106     }
 107 
 108     if (alg == NBC_SCAN_LINEAR) {
 109         res = scan_sched_linear(rank, p, sendbuf, recvbuf, count, datatype,
 110                                 op, inplace, schedule, tmpbuf);
 111     } else {
 112         res = scan_sched_recursivedoubling(rank, p, sendbuf, recvbuf, count,
 113                                            datatype, op, inplace, schedule, tmpbuf1, tmpbuf2);
 114     }
 115     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 116         OBJ_RELEASE(schedule);
 117         free(tmpbuf);
 118         return res;
 119     }
 120 
 121     res = NBC_Sched_commit(schedule);
 122     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 123         OBJ_RELEASE(schedule);
 124         free(tmpbuf);
 125         return res;
 126     }
 127 
 128 #ifdef NBC_CACHE_SCHEDULE
 129     
 130     args = (NBC_Scan_args *) malloc (sizeof (args));
 131     if (NULL != args) {
 132       args->sendbuf = sendbuf;
 133       args->recvbuf = recvbuf;
 134       args->count = count;
 135       args->datatype = datatype;
 136       args->op = op;
 137       args->schedule = schedule;
 138       res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN], args, args, 0);
 139       if (0 == res) {
 140         OBJ_RETAIN(schedule);
 141 
 142         
 143         if (++libnbc_module->NBC_Dict_size[NBC_SCAN] > NBC_SCHED_DICT_UPPER) {
 144           NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_SCAN],
 145                                    &libnbc_module->NBC_Dict_size[NBC_SCAN]);
 146         }
 147       } else {
 148         NBC_Error("error in dict_insert() (%i)", res);
 149         free (args);
 150       }
 151     }
 152   } else {
 153     
 154     schedule = found->schedule;
 155     OBJ_RETAIN(schedule);
 156   }
 157 #endif
 158 
 159     res = NBC_Schedule_request(schedule, comm, libnbc_module, persistent, request, tmpbuf);
 160     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 161         OBJ_RELEASE(schedule);
 162         free(tmpbuf);
 163         return res;
 164     }
 165 
 166     return OMPI_SUCCESS;
 167 }
 168 
 169 
 170 
 171 
 172 
 173 
 174 
 175 
 176 
 177 
 178 
 179 
 180 
 181 
 182 
 183 static inline int scan_sched_linear(
 184     int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
 185     MPI_Datatype datatype,  MPI_Op op, char inplace, NBC_Schedule *schedule,
 186     void *tmpbuf)
 187 {
 188     int res = OMPI_SUCCESS;
 189 
 190     if (!inplace) {
 191         
 192         res = NBC_Sched_copy((void *)sendbuf, false, count, datatype,
 193                              recvbuf, false, count, datatype, schedule, false);
 194         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 195     }
 196 
 197     if (rank > 0) {
 198         ptrdiff_t gap;
 199         opal_datatype_span(&datatype->super, count, &gap);
 200         
 201         res = NBC_Sched_recv((void *)(-gap), true, count, datatype, rank - 1, schedule, true);
 202         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 203 
 204         
 205         
 206         res = NBC_Sched_op((void *)(-gap), true, recvbuf, false, count, datatype, op, schedule,
 207                            true);
 208         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 209     }
 210 
 211     if (rank != comm_size - 1) {
 212         res = NBC_Sched_send(recvbuf, false, count, datatype, rank + 1, schedule, false);
 213         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 214     }
 215 
 216 cleanup_and_return:
 217     return res;
 218 }
 219 
 220 
 221 
 222 
 223 
 224 
 225 
 226 
 227 
 228 
 229 
 230 
 231 
 232 
 233 
 234 
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 static inline int scan_sched_recursivedoubling(
 254     int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
 255     MPI_Datatype datatype, MPI_Op op, char inplace,
 256     NBC_Schedule *schedule, void *tmpbuf1, void *tmpbuf2)
 257 {
 258     int res = OMPI_SUCCESS;
 259 
 260     if (!inplace) {
 261         res = NBC_Sched_copy((void *)sendbuf, false, count, datatype,
 262                               recvbuf, false, count, datatype, schedule, true);
 263         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 264     }
 265     if (comm_size < 2)
 266         goto cleanup_and_return;
 267 
 268     char *psend = (char *)tmpbuf1;
 269     char *precv = (char *)tmpbuf2;
 270     res = NBC_Sched_copy(recvbuf, false, count, datatype,
 271                          psend, true, count, datatype, schedule, true);
 272     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 273 
 274     int is_commute = ompi_op_is_commute(op);
 275     for (int mask = 1; mask < comm_size; mask <<= 1) {
 276         int remote = rank ^ mask;
 277         if (remote < comm_size) {
 278             res = NBC_Sched_send(psend, true, count, datatype, remote, schedule, false);
 279             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 280             res = NBC_Sched_recv(precv, true, count, datatype, remote, schedule, true);
 281             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 282 
 283             if (rank > remote) {
 284                 
 285                 res = NBC_Sched_op(precv, true, recvbuf, false, count,
 286                                    datatype, op, schedule, false);
 287                 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 288                 
 289                 res = NBC_Sched_op(precv, true, psend, true, count,
 290                                    datatype, op, schedule, true);
 291                 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 292             } else {
 293                 if (is_commute) {
 294                     
 295                     res = NBC_Sched_op(precv, true, psend, true, count,
 296                                        datatype, op, schedule, true);
 297                     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 298                 } else {
 299                     
 300                     res = NBC_Sched_op(psend, true, precv, true, count,
 301                                        datatype, op, schedule, true);
 302                     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 303                     char *tmp = psend;
 304                     psend = precv;
 305                     precv = tmp;
 306                 }
 307             }
 308         }
 309     }
 310 
 311  cleanup_and_return:
 312     return res;
 313 }
 314 
 315 int ompi_coll_libnbc_iscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
 316                            struct ompi_communicator_t *comm, ompi_request_t ** request,
 317                            struct mca_coll_base_module_2_3_0_t *module) {
 318     int res = nbc_scan_init(sendbuf, recvbuf, count, datatype, op,
 319                             comm, request, module, false);
 320     if (OPAL_LIKELY(OMPI_SUCCESS != res)) {
 321         return res;
 322     }
 323     res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
 324     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 325         NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
 326         *request = &ompi_request_null.request;
 327         return res;
 328     }
 329 
 330     return OMPI_SUCCESS;
 331 }
 332 
 333 int ompi_coll_libnbc_scan_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
 334                                struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
 335                                struct mca_coll_base_module_2_3_0_t *module) {
 336     int res = nbc_scan_init(sendbuf, recvbuf, count, datatype, op,
 337                             comm, request, module, true);
 338     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 339         return res;
 340     }
 341 
 342     return OMPI_SUCCESS;
 343 }