This source file includes following definitions.
- NBC_Scan_args_compare
- nbc_exscan_init
- ompi_coll_libnbc_iexscan
- ompi_coll_libnbc_exscan_init
- exscan_sched_linear
- exscan_sched_recursivedoubling
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 #include "opal/align.h"
  22 #include "ompi/op/op.h"
  23 
  24 #include "nbc_internal.h"
  25 
  26 static inline int exscan_sched_linear(
  27     int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
  28     MPI_Datatype datatype,  MPI_Op op, char inplace, NBC_Schedule *schedule,
  29     void *tmpbuf);
  30 static inline int exscan_sched_recursivedoubling(
  31     int rank, int comm_size, const void *sendbuf, void *recvbuf,
  32     int count, MPI_Datatype datatype,  MPI_Op op, char inplace,
  33     NBC_Schedule *schedule, void *tmpbuf1, void *tmpbuf2);
  34 
  35 #ifdef NBC_CACHE_SCHEDULE
  36 
  37 int NBC_Scan_args_compare(NBC_Scan_args *a, NBC_Scan_args *b, void *param) {
  38     if ((a->sendbuf == b->sendbuf) &&
  39         (a->recvbuf == b->recvbuf) &&
  40         (a->count == b->count) &&
  41         (a->datatype == b->datatype) &&
  42         (a->op == b->op) ) {
  43         return 0;
  44     }
  45 
  46     if( a->sendbuf < b->sendbuf ) {
  47         return -1;
  48     }
  49 
  50     return 1;
  51 }
  52 #endif
  53 
  54 static int nbc_exscan_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
  55                            struct ompi_communicator_t *comm, ompi_request_t ** request,
  56                            struct mca_coll_base_module_2_3_0_t *module, bool persistent) {
  57     int rank, p, res;
  58     NBC_Schedule *schedule;
  59     char inplace;
  60     void *tmpbuf = NULL, *tmpbuf1 = NULL, *tmpbuf2 = NULL;
  61     enum { NBC_EXSCAN_LINEAR, NBC_EXSCAN_RDBL } alg;
  62     ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
  63     ptrdiff_t span, gap;
  64 
  65     NBC_IN_PLACE(sendbuf, recvbuf, inplace);
  66 
  67     rank = ompi_comm_rank(comm);
  68     p = ompi_comm_size(comm);
  69 
  70     if (p < 2) {
  71         return nbc_get_noop_request(persistent, request);
  72     }
  73 
  74     span = opal_datatype_span(&datatype->super, count, &gap);
  75     if (libnbc_iexscan_algorithm == 2) {
  76         alg = NBC_EXSCAN_RDBL;
  77         ptrdiff_t span_align = OPAL_ALIGN(span, datatype->super.align, ptrdiff_t);
  78         tmpbuf = malloc(span_align + span);
  79         if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; }
  80         tmpbuf1 = (void *)(-gap);
  81         tmpbuf2 = (char *)(span_align) - gap;
  82     } else {
  83         alg = NBC_EXSCAN_LINEAR;
  84         if (rank > 0) {
  85             tmpbuf = malloc(span);
  86             if (NULL == tmpbuf) { return OMPI_ERR_OUT_OF_RESOURCE; }
  87         }
  88     }
  89 
  90 #ifdef NBC_CACHE_SCHEDULE
  91     NBC_Scan_args *args, *found, search;
  92     
  93     search.sendbuf = sendbuf;
  94     search.recvbuf = recvbuf;
  95     search.count = count;
  96     search.datatype = datatype;
  97     search.op = op;
  98     found = (NBC_Scan_args *) hb_tree_search ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], &search);
  99     if (NULL == found) {
 100 #endif
 101     schedule = OBJ_NEW(NBC_Schedule);
 102     if (OPAL_UNLIKELY(NULL == schedule)) {
 103         free(tmpbuf);
 104         return OMPI_ERR_OUT_OF_RESOURCE;
 105     }
 106 
 107     if (alg == NBC_EXSCAN_LINEAR) {
 108         res = exscan_sched_linear(rank, p, sendbuf, recvbuf, count, datatype,
 109                                   op, inplace, schedule, tmpbuf);
 110     } else {
 111         res = exscan_sched_recursivedoubling(rank, p, sendbuf, recvbuf, count,
 112                                              datatype, op, inplace, schedule, tmpbuf1, tmpbuf2);
 113     }
 114     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 115         OBJ_RELEASE(schedule);
 116         free(tmpbuf);
 117         return res;
 118     }
 119 
 120     res = NBC_Sched_commit(schedule);
 121     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 122        OBJ_RELEASE(schedule);
 123        free(tmpbuf);
 124        return res;
 125     }
 126 
 127 #ifdef NBC_CACHE_SCHEDULE
 128         
 129         args = (NBC_Scan_args *) malloc (sizeof (args));
 130         if (NULL != args) {
 131             args->sendbuf = sendbuf;
 132             args->recvbuf = recvbuf;
 133             args->count = count;
 134             args->datatype = datatype;
 135             args->op = op;
 136             args->schedule = schedule;
 137             res = hb_tree_insert ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN], args, args, 0);
 138             if (0 == res) {
 139                 OBJ_RETAIN(schedule);
 140 
 141                 
 142                 if (++libnbc_module->NBC_Dict_size[NBC_EXSCAN] > NBC_SCHED_DICT_UPPER) {
 143                     NBC_SchedCache_dictwipe ((hb_tree *) libnbc_module->NBC_Dict[NBC_EXSCAN],
 144                                              &libnbc_module->NBC_Dict_size[NBC_EXSCAN]);
 145                 }
 146             } else {
 147                 NBC_Error("error in dict_insert() (%i)", res);
 148                 free (args);
 149             }
 150         }
 151     } else {
 152         
 153         schedule = found->schedule;
 154         OBJ_RETAIN(schedule);
 155     }
 156 #endif
 157 
 158     res = NBC_Schedule_request(schedule, comm, libnbc_module, persistent, request, tmpbuf);
 159     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 160         OBJ_RELEASE(schedule);
 161         free(tmpbuf);
 162         return res;
 163     }
 164 
 165     return OMPI_SUCCESS;
 166 }
 167 
 168 int ompi_coll_libnbc_iexscan(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
 169                              struct ompi_communicator_t *comm, ompi_request_t ** request,
 170                              struct mca_coll_base_module_2_3_0_t *module) {
 171     int res = nbc_exscan_init(sendbuf, recvbuf, count, datatype, op,
 172                               comm, request, module, false);
 173     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 174         return res;
 175     }
 176   
 177     res = NBC_Start(*(ompi_coll_libnbc_request_t **)request);
 178     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 179         NBC_Return_handle (*(ompi_coll_libnbc_request_t **)request);
 180         *request = &ompi_request_null.request;
 181         return res;
 182     }
 183 
 184     return OMPI_SUCCESS;
 185 }
 186 
 187 int ompi_coll_libnbc_exscan_init(const void* sendbuf, void* recvbuf, int count, MPI_Datatype datatype, MPI_Op op,
 188                                  struct ompi_communicator_t *comm, MPI_Info info, ompi_request_t ** request,
 189                                  struct mca_coll_base_module_2_3_0_t *module) {
 190     int res = nbc_exscan_init(sendbuf, recvbuf, count, datatype, op,
 191                               comm, request, module, true);
 192     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
 193         return res;
 194     }
 195 
 196     return OMPI_SUCCESS;
 197 }
 198 
 199 
 200 
 201 
 202 
 203 
 204 
 205 
 206 
 207 
 208 
 209 
 210 
 211 
 212 
 213 static inline int exscan_sched_linear(
 214     int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
 215     MPI_Datatype datatype,  MPI_Op op, char inplace, NBC_Schedule *schedule,
 216     void *tmpbuf)
 217 {
 218     int res = OMPI_SUCCESS;
 219     ptrdiff_t gap;
 220     opal_datatype_span(&datatype->super, count, &gap);
 221 
 222     if (rank > 0) {
 223         if (inplace) {
 224             res = NBC_Sched_copy(recvbuf, false, count, datatype,
 225                                  (char *)tmpbuf - gap, false, count, datatype, schedule, false);
 226         } else {
 227             res = NBC_Sched_copy((void *)sendbuf, false, count, datatype,
 228                                  (char *)tmpbuf - gap, false, count, datatype, schedule, false);
 229         }
 230         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 231 
 232         res = NBC_Sched_recv(recvbuf, false, count, datatype, rank - 1, schedule, false);
 233         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 234 
 235         if (rank < comm_size - 1) {
 236             
 237             res = NBC_Sched_barrier(schedule);
 238             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 239 
 240             res = NBC_Sched_op(recvbuf, false, (void *)(-gap), true, count,
 241                                datatype, op, schedule, true);
 242             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 243 
 244             
 245             res = NBC_Sched_send ((void *)(-gap), true, count, datatype, rank + 1, schedule, false);
 246             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 247         }
 248     } else if (comm_size > 1) {
 249         
 250         if (inplace) {
 251             res = NBC_Sched_send(recvbuf, false, count, datatype, 1, schedule, false);
 252         } else {
 253             res = NBC_Sched_send(sendbuf, false, count, datatype, 1, schedule, false);
 254         }
 255         if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 256     }
 257 
 258 cleanup_and_return:
 259     return res;
 260 }
 261 
 262 
 263 
 264 
 265 
 266 
 267 
 268 
 269 
 270 
 271 
 272 
 273 
 274 
 275 
 276 
 277 
 278 
 279 
 280 
 281 
 282 
 283 
 284 
 285 
 286 
 287 
 288 
 289 
 290 
 291 
 292 
 293 
 294 
 295 static inline int exscan_sched_recursivedoubling(
 296     int rank, int comm_size, const void *sendbuf, void *recvbuf, int count,
 297     MPI_Datatype datatype, MPI_Op op, char inplace,
 298     NBC_Schedule *schedule, void *tmpbuf1, void *tmpbuf2)
 299 {
 300     int res = OMPI_SUCCESS;
 301     char *psend = (char *)tmpbuf1;
 302     char *precv = (char *)tmpbuf2;
 303 
 304     if (!inplace) {
 305         res = NBC_Sched_copy((char *)sendbuf, false, count, datatype,
 306                              psend, true, count, datatype, schedule, true);
 307     } else {
 308         res = NBC_Sched_copy((char *)recvbuf, false, count, datatype,
 309                              psend, true, count, datatype, schedule, true);
 310     }
 311     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 312 
 313     int is_commute = ompi_op_is_commute(op);
 314     int is_first_block = 1;
 315 
 316     for (int mask = 1; mask < comm_size; mask <<= 1) {
 317         int remote = rank ^ mask;
 318         if (remote < comm_size) {
 319             res = NBC_Sched_send(psend, true, count, datatype, remote, schedule, false);
 320             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 321             res = NBC_Sched_recv(precv, true, count, datatype, remote, schedule, true);
 322             if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 323 
 324             if (rank > remote) {
 325                 
 326                 if (is_first_block) {
 327                     res = NBC_Sched_copy(precv, true, count, datatype,
 328                                          recvbuf, false, count, datatype, schedule, false);
 329                     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 330                     is_first_block = 0;
 331                 } else {
 332                     
 333                     res = NBC_Sched_op(precv, true, recvbuf, false, count,
 334                                        datatype, op, schedule, false);
 335                     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 336                 }
 337                 
 338                 res = NBC_Sched_op(precv, true, psend, true, count,
 339                                    datatype, op, schedule, true);
 340                 if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 341             } else {
 342                 if (is_commute) {
 343                     
 344                     res = NBC_Sched_op(precv, true, psend, true, count,
 345                                        datatype, op, schedule, true);
 346                     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 347                 } else {
 348                     
 349                     res = NBC_Sched_op(psend, true, precv, true, count,
 350                                        datatype, op, schedule, true);
 351                     if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
 352                     char *tmp = psend;
 353                     psend = precv;
 354                     precv = tmp;
 355                 }
 356             }
 357         }
 358     }
 359 
 360 cleanup_and_return:
 361     return res;
 362 }