mca_btl_smcuda_component 88 opal/mca/btl/smcuda/btl_smcuda.c .btl_component = &mca_btl_smcuda_component.super, mca_btl_smcuda_component 126 opal/mca/btl/smcuda/btl_smcuda.c mca_mpool_base_module_t *mpool = mca_btl_smcuda_component.sm_mpool; mca_btl_smcuda_component 235 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component_t* m = &mca_btl_smcuda_component; mca_btl_smcuda_component 240 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.mem_node = my_mem_node = 0; mca_btl_smcuda_component 241 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.num_mem_nodes = num_mem_nodes = 1; mca_btl_smcuda_component 250 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.num_mem_nodes = num_mem_nodes = strtoul(loc, NULL, 10); mca_btl_smcuda_component 265 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.num_mem_nodes = num_mem_nodes = i; mca_btl_smcuda_component 273 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.mem_node = my_mem_node = -1; mca_btl_smcuda_component 282 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.mem_node = my_mem_node = -1; mca_btl_smcuda_component 286 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.mem_node = my_mem_node; mca_btl_smcuda_component 318 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.mem_node = my_mem_node = numa; mca_btl_smcuda_component 320 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.mem_node = my_mem_node = -1; mca_btl_smcuda_component 330 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpools = mca_btl_smcuda_component 337 opal/mca/btl/smcuda/btl_smcuda.c res->allocator = mca_btl_smcuda_component.allocator; mca_btl_smcuda_component 344 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpools[0] = common_sm_mpool_create (res); mca_btl_smcuda_component 346 opal/mca/btl/smcuda/btl_smcuda.c if (NULL == mca_btl_smcuda_component.sm_mpools[0]) { mca_btl_smcuda_component 351 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpool = mca_btl_smcuda_component.sm_mpools[0]; mca_btl_smcuda_component 353 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpool_base = mca_btl_smcuda_component 354 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpools[0]->mpool_base(mca_btl_smcuda_component.sm_mpools[0]); mca_btl_smcuda_component 357 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_peers = (struct mca_btl_base_endpoint_t**) mca_btl_smcuda_component 359 opal/mca/btl/smcuda/btl_smcuda.c if (NULL == mca_btl_smcuda_component.sm_peers) { mca_btl_smcuda_component 378 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpool_base, (int)res->size); mca_btl_smcuda_component 379 opal/mca/btl/smcuda/btl_smcuda.c mca_common_cuda_register(mca_btl_smcuda_component.sm_mpool_base, res->size, "smcuda"); mca_btl_smcuda_component 396 opal/mca/btl/smcuda/btl_smcuda.c if(mca_btl_smcuda_component.sm_max_procs > 0 && mca_btl_smcuda_component 397 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.num_smp_procs + n > mca_btl_smcuda_component 398 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_max_procs) { mca_btl_smcuda_component 402 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.shm_fifo = (volatile sm_fifo_t **)mca_btl_smcuda_component.sm_seg->module_data_addr; mca_btl_smcuda_component 403 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.shm_bases = (char**)(mca_btl_smcuda_component.shm_fifo + n); mca_btl_smcuda_component 404 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.shm_mem_nodes = (uint16_t*)(mca_btl_smcuda_component.shm_bases + n); mca_btl_smcuda_component 407 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.shm_bases[mca_btl_smcuda_component.my_smp_rank] = mca_btl_smcuda_component 408 opal/mca/btl/smcuda/btl_smcuda.c (char*)mca_btl_smcuda_component.sm_mpool_base; mca_btl_smcuda_component 409 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.shm_mem_nodes[mca_btl_smcuda_component.my_smp_rank] = mca_btl_smcuda_component 416 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.shm_fifo[mca_btl_smcuda_component.my_smp_rank] = my_fifos; mca_btl_smcuda_component 420 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.fifo = (sm_fifo_t**)malloc(sizeof(sm_fifo_t*) * n); mca_btl_smcuda_component 422 opal/mca/btl/smcuda/btl_smcuda.c if(NULL == mca_btl_smcuda_component.fifo) mca_btl_smcuda_component 425 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.fifo[mca_btl_smcuda_component.my_smp_rank] = my_fifos; mca_btl_smcuda_component 427 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.mem_nodes = (uint16_t *) malloc(sizeof(uint16_t) * n); mca_btl_smcuda_component 428 opal/mca/btl/smcuda/btl_smcuda.c if(NULL == mca_btl_smcuda_component.mem_nodes) mca_btl_smcuda_component 436 opal/mca/btl/smcuda/btl_smcuda.c sizeof(mca_btl_smcuda_hdr_t) + mca_btl_smcuda_component.eager_limit; mca_btl_smcuda_component 437 opal/mca/btl/smcuda/btl_smcuda.c i = opal_free_list_init (&mca_btl_smcuda_component.sm_frags_eager, length, mca_btl_smcuda_component 440 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_free_list_num, mca_btl_smcuda_component 441 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_free_list_max, mca_btl_smcuda_component 442 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_free_list_inc, mca_btl_smcuda_component 443 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpool, 0, NULL, NULL, NULL); mca_btl_smcuda_component 449 opal/mca/btl/smcuda/btl_smcuda.c sizeof(mca_btl_smcuda_hdr_t) + mca_btl_smcuda_component.max_frag_size; mca_btl_smcuda_component 450 opal/mca/btl/smcuda/btl_smcuda.c i = opal_free_list_init (&mca_btl_smcuda_component.sm_frags_max, length, mca_btl_smcuda_component 453 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_free_list_num, mca_btl_smcuda_component 454 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_free_list_max, mca_btl_smcuda_component 455 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_free_list_inc, mca_btl_smcuda_component 456 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpool, 0, NULL, NULL, NULL); mca_btl_smcuda_component 460 opal/mca/btl/smcuda/btl_smcuda.c i = opal_free_list_init (&mca_btl_smcuda_component.sm_frags_user, mca_btl_smcuda_component 464 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_free_list_num, mca_btl_smcuda_component 465 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_free_list_max, mca_btl_smcuda_component 466 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_free_list_inc, mca_btl_smcuda_component 467 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpool, 0, NULL, NULL, NULL); mca_btl_smcuda_component 471 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.num_outstanding_frags = 0; mca_btl_smcuda_component 473 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.num_pending_sends = 0; mca_btl_smcuda_component 474 opal/mca/btl/smcuda/btl_smcuda.c i = opal_free_list_init (&mca_btl_smcuda_component.pending_send_fl, mca_btl_smcuda_component 500 opal/mca/btl/smcuda/btl_smcuda.c ep->peer_smp_rank = local_proc + mca_btl_smcuda_component.num_smp_procs; mca_btl_smcuda_component 562 opal/mca/btl/smcuda/btl_smcuda.c my_smp_rank = mca_btl_smcuda_component.my_smp_rank = n_local_procs++; mca_btl_smcuda_component 599 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_max_procs); mca_btl_smcuda_component 610 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_peers[peers[proc]->peer_smp_rank] = peers[proc]; mca_btl_smcuda_component 614 opal/mca/btl/smcuda/btl_smcuda.c bases = mca_btl_smcuda_component.shm_bases; mca_btl_smcuda_component 615 opal/mca/btl/smcuda/btl_smcuda.c sm_mpool_modp = (mca_common_sm_mpool_module_t *)mca_btl_smcuda_component.sm_mpool; mca_btl_smcuda_component 623 opal/mca/btl/smcuda/btl_smcuda.c for(j = mca_btl_smcuda_component.num_smp_procs; mca_btl_smcuda_component 624 opal/mca/btl/smcuda/btl_smcuda.c j < mca_btl_smcuda_component.num_smp_procs + FIFO_MAP_NUM(n_local_procs); j++) { mca_btl_smcuda_component 626 opal/mca/btl/smcuda/btl_smcuda.c return_code = sm_fifo_init( mca_btl_smcuda_component.fifo_size, mca_btl_smcuda_component 627 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpool, mca_btl_smcuda_component 628 opal/mca/btl/smcuda/btl_smcuda.c &mca_btl_smcuda_component.fifo[my_smp_rank][j], mca_btl_smcuda_component 629 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.fifo_lazy_free); mca_btl_smcuda_component 639 opal/mca/btl/smcuda/btl_smcuda.c (void)opal_atomic_add_fetch_32(&mca_btl_smcuda_component.sm_seg->module_seg->seg_inited, 1); mca_btl_smcuda_component 641 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_seg->module_seg->seg_inited) { mca_btl_smcuda_component 650 opal/mca/btl/smcuda/btl_smcuda.c mca_common_sm_module_unlink(mca_btl_smcuda_component.sm_seg)) { mca_btl_smcuda_component 665 opal/mca/btl/smcuda/btl_smcuda.c if (-1 == unlink(mca_btl_smcuda_component.sm_mpool_rndv_file_name)) { mca_btl_smcuda_component 667 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpool_rndv_file_name); mca_btl_smcuda_component 669 opal/mca/btl/smcuda/btl_smcuda.c if (-1 == unlink(mca_btl_smcuda_component.sm_rndv_file_name)) { mca_btl_smcuda_component 671 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_rndv_file_name); mca_btl_smcuda_component 676 opal/mca/btl/smcuda/btl_smcuda.c free(mca_btl_smcuda_component.sm_mpool_ctl_file_name); mca_btl_smcuda_component 677 opal/mca/btl/smcuda/btl_smcuda.c free(mca_btl_smcuda_component.sm_mpool_rndv_file_name); mca_btl_smcuda_component 678 opal/mca/btl/smcuda/btl_smcuda.c free(mca_btl_smcuda_component.sm_ctl_file_name); mca_btl_smcuda_component 679 opal/mca/btl/smcuda/btl_smcuda.c free(mca_btl_smcuda_component.sm_rndv_file_name); mca_btl_smcuda_component 682 opal/mca/btl/smcuda/btl_smcuda.c for(j = mca_btl_smcuda_component.num_smp_procs; mca_btl_smcuda_component 683 opal/mca/btl/smcuda/btl_smcuda.c j < mca_btl_smcuda_component.num_smp_procs + n_local_procs; j++) { mca_btl_smcuda_component 689 opal/mca/btl/smcuda/btl_smcuda.c while(NULL == mca_btl_smcuda_component.shm_fifo[j]) { mca_btl_smcuda_component 698 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.fifo[j] = mca_btl_smcuda_component 699 opal/mca/btl/smcuda/btl_smcuda.c (sm_fifo_t*)OFFSET2ADDR(diff, mca_btl_smcuda_component.shm_fifo[j]); mca_btl_smcuda_component 702 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.mem_nodes[j] = mca_btl_smcuda_component.shm_mem_nodes[j]; mca_btl_smcuda_component 706 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.num_smp_procs += n_local_procs; mca_btl_smcuda_component 709 opal/mca/btl/smcuda/btl_smcuda.c return_code = opal_free_list_resize_mt (&mca_btl_smcuda_component.sm_frags_eager, mca_btl_smcuda_component 710 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.num_smp_procs * 2); mca_btl_smcuda_component 780 opal/mca/btl/smcuda/btl_smcuda.c if(size <= mca_btl_smcuda_component.eager_limit) { mca_btl_smcuda_component 782 opal/mca/btl/smcuda/btl_smcuda.c } else if (size <= mca_btl_smcuda_component.max_frag_size) { mca_btl_smcuda_component 830 opal/mca/btl/smcuda/btl_smcuda.c if ( reserve + max_data <= mca_btl_smcuda_component.eager_limit ) { mca_btl_smcuda_component 914 opal/mca/btl/smcuda/btl_smcuda.c if ( mca_btl_smcuda_component.num_outstanding_frags * 2 > (int) mca_btl_smcuda_component.fifo_size ) { mca_btl_smcuda_component 919 opal/mca/btl/smcuda/btl_smcuda.c if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) { mca_btl_smcuda_component 932 opal/mca/btl/smcuda/btl_smcuda.c if( length < mca_btl_smcuda_component.eager_limit ) { mca_btl_smcuda_component 979 opal/mca/btl/smcuda/btl_smcuda.c OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); mca_btl_smcuda_component 1006 opal/mca/btl/smcuda/btl_smcuda.c if ( mca_btl_smcuda_component.num_outstanding_frags * 2 > (int) mca_btl_smcuda_component.fifo_size ) { mca_btl_smcuda_component 1011 opal/mca/btl/smcuda/btl_smcuda.c if (mca_common_cuda_enabled && (IPC_INIT == endpoint->ipcstate) && mca_btl_smcuda_component.use_cuda_ipc) { mca_btl_smcuda_component 1029 opal/mca/btl/smcuda/btl_smcuda.c OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); mca_btl_smcuda_component 1212 opal/mca/btl/smcuda/btl_smcuda.c if ( mca_btl_smcuda_component.num_outstanding_frags * 2 > (int) mca_btl_smcuda_component.fifo_size ) { mca_btl_smcuda_component 1244 opal/mca/btl/smcuda/btl_smcuda.c OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); mca_btl_smcuda_component 1245 opal/mca/btl/smcuda/btl_smcuda.c opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component 1248 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.my_smp_rank, mca_btl_smcuda_component 1288 opal/mca/btl/smcuda/btl_smcuda.c if( NULL != mca_btl_smcuda_component.sm_mpool && mca_btl_smcuda_component 1289 opal/mca/btl/smcuda/btl_smcuda.c NULL != mca_btl_smcuda_component.sm_mpool->mpool_ft_event) { mca_btl_smcuda_component 1290 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpool->mpool_ft_event(state); mca_btl_smcuda_component 1294 opal/mca/btl/smcuda/btl_smcuda.c if( NULL != mca_btl_smcuda_component.sm_seg ) { mca_btl_smcuda_component 1310 opal/mca/btl/smcuda/btl_smcuda.c if( NULL != mca_btl_smcuda_component.sm_seg ) { mca_btl_smcuda_component 1312 opal/mca/btl/smcuda/btl_smcuda.c opal_crs_base_cleanup_append(mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name, false); mca_btl_smcuda_component 1316 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpool = NULL; mca_btl_smcuda_component 1321 opal/mca/btl/smcuda/btl_smcuda.c if( NULL != mca_btl_smcuda_component.sm_seg ) { mca_btl_smcuda_component 1323 opal/mca/btl/smcuda/btl_smcuda.c opal_crs_base_cleanup_append(mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name, false); mca_btl_smcuda_component 1327 opal/mca/btl/smcuda/btl_smcuda.c mca_btl_smcuda_component.sm_mpool = NULL; mca_btl_smcuda_component 213 opal/mca/btl/smcuda/btl_smcuda.h OPAL_MODULE_DECLSPEC extern mca_btl_smcuda_component_t mca_btl_smcuda_component; mca_btl_smcuda_component 246 opal/mca/btl/smcuda/btl_smcuda.h #define VIRTUAL2RELATIVE(VADDR ) ((long)(VADDR) - (long)mca_btl_smcuda_component.shm_bases[mca_btl_smcuda_component.my_smp_rank]) mca_btl_smcuda_component 247 opal/mca/btl/smcuda/btl_smcuda.h #define RELATIVE2VIRTUAL(OFFSET) ((long)(OFFSET) + (long)mca_btl_smcuda_component.shm_bases[mca_btl_smcuda_component.my_smp_rank]) mca_btl_smcuda_component 85 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component_t mca_btl_smcuda_component = { mca_btl_smcuda_component 117 opal/mca/btl/smcuda/btl_smcuda_component.c (void) mca_base_component_var_register (&mca_btl_smcuda_component.super.btl_version, mca_btl_smcuda_component 131 opal/mca/btl/smcuda/btl_smcuda_component.c (void) mca_base_component_var_register (&mca_btl_smcuda_component.super.btl_version, mca_btl_smcuda_component 146 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.mpool_min_size = 134217728; mca_btl_smcuda_component 147 opal/mca/btl/smcuda/btl_smcuda_component.c (void) mca_base_component_var_register(&mca_btl_smcuda_component.super.btl_version, "min_size", mca_btl_smcuda_component 151 opal/mca/btl/smcuda/btl_smcuda_component.c &mca_btl_smcuda_component.mpool_min_size); mca_btl_smcuda_component 153 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_param_register_int("free_list_num", 8, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_num); mca_btl_smcuda_component 154 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_param_register_int("free_list_max", -1, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_max); mca_btl_smcuda_component 155 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_param_register_int("free_list_inc", 64, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_free_list_inc); mca_btl_smcuda_component 156 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_param_register_int("max_procs", -1, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.sm_max_procs); mca_btl_smcuda_component 159 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_mpool_name = "sm"; mca_btl_smcuda_component 160 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_param_register_uint("fifo_size", 4096, OPAL_INFO_LVL_4, &mca_btl_smcuda_component.fifo_size); mca_btl_smcuda_component 161 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_param_register_int("num_fifos", 1, OPAL_INFO_LVL_4, &mca_btl_smcuda_component.nfifos); mca_btl_smcuda_component 163 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_param_register_uint("fifo_lazy_free", 120, OPAL_INFO_LVL_5, &mca_btl_smcuda_component.fifo_lazy_free); mca_btl_smcuda_component 166 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_param_register_int("sm_extra_procs", 0, OPAL_INFO_LVL_9, &mca_btl_smcuda_component.sm_extra_procs); mca_btl_smcuda_component 168 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.allocator = "bucket"; mca_btl_smcuda_component 169 opal/mca/btl/smcuda/btl_smcuda_component.c (void) mca_base_component_var_register (&mca_btl_smcuda_component.super.btl_version, "allocator", mca_btl_smcuda_component 172 opal/mca/btl/smcuda/btl_smcuda_component.c MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_smcuda_component.allocator); mca_btl_smcuda_component 181 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_param_register_int("use_cuda_ipc", 1, OPAL_INFO_LVL_4, &mca_btl_smcuda_component.use_cuda_ipc); mca_btl_smcuda_component 182 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_param_register_int("use_cuda_ipc_same_gpu", 1, OPAL_INFO_LVL_4,&mca_btl_smcuda_component.use_cuda_ipc_same_gpu); mca_btl_smcuda_component 183 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_param_register_int("cuda_ipc_verbose", 0, OPAL_INFO_LVL_4, &mca_btl_smcuda_component.cuda_ipc_verbose); mca_btl_smcuda_component 184 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.cuda_ipc_output = opal_output_open(NULL); mca_btl_smcuda_component 185 opal/mca/btl/smcuda/btl_smcuda_component.c opal_output_set_verbosity(mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component.cuda_ipc_verbose); mca_btl_smcuda_component 201 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_base_param_register(&mca_btl_smcuda_component.super.btl_version, mca_btl_smcuda_component 229 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_max_btls = 1; mca_btl_smcuda_component 232 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.nfifos = opal_next_poweroftwo_inclusive (mca_btl_smcuda_component.nfifos); mca_btl_smcuda_component 235 opal/mca/btl/smcuda/btl_smcuda_component.c if (mca_btl_smcuda_component.fifo_lazy_free >= (mca_btl_smcuda_component.fifo_size >> 1) ) mca_btl_smcuda_component 236 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.fifo_lazy_free = (mca_btl_smcuda_component.fifo_size >> 1); mca_btl_smcuda_component 237 opal/mca/btl/smcuda/btl_smcuda_component.c if (mca_btl_smcuda_component.fifo_lazy_free <= 0) mca_btl_smcuda_component 238 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.fifo_lazy_free = 1; mca_btl_smcuda_component 240 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.max_frag_size = mca_btl_smcuda.super.btl_max_send_size; mca_btl_smcuda_component 241 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.eager_limit = mca_btl_smcuda.super.btl_eager_limit; mca_btl_smcuda_component 246 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.max_frag_size = mca_btl_smcuda.super.btl_cuda_max_send_size; mca_btl_smcuda_component 251 opal/mca/btl/smcuda/btl_smcuda_component.c (int)mca_btl_smcuda_component.max_frag_size); mca_btl_smcuda_component 255 opal/mca/btl/smcuda/btl_smcuda_component.c OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_lock, opal_mutex_t); mca_btl_smcuda_component 256 opal/mca/btl/smcuda/btl_smcuda_component.c OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_frags_eager, opal_free_list_t); mca_btl_smcuda_component 257 opal/mca/btl/smcuda/btl_smcuda_component.c OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_frags_max, opal_free_list_t); mca_btl_smcuda_component 258 opal/mca/btl/smcuda/btl_smcuda_component.c OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_frags_user, opal_free_list_t); mca_btl_smcuda_component 259 opal/mca/btl/smcuda/btl_smcuda_component.c OBJ_CONSTRUCT(&mca_btl_smcuda_component.pending_send_fl, opal_free_list_t); mca_btl_smcuda_component 273 opal/mca/btl/smcuda/btl_smcuda_component.c OBJ_DESTRUCT(&mca_btl_smcuda_component.sm_lock); mca_btl_smcuda_component 283 opal/mca/btl/smcuda/btl_smcuda_component.c if(mca_btl_smcuda_component.sm_seg != NULL) { mca_btl_smcuda_component 284 opal/mca/btl/smcuda/btl_smcuda_component.c return_value = mca_common_sm_fini( mca_btl_smcuda_component.sm_seg ); mca_btl_smcuda_component 301 opal/mca/btl/smcuda/btl_smcuda_component.c unlink(mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name); mca_btl_smcuda_component 304 opal/mca/btl/smcuda/btl_smcuda_component.c unlink(mca_btl_smcuda_component.sm_seg->shmem_ds.seg_name); mca_btl_smcuda_component 306 opal/mca/btl/smcuda/btl_smcuda_component.c OBJ_RELEASE(mca_btl_smcuda_component.sm_seg); mca_btl_smcuda_component 311 opal/mca/btl/smcuda/btl_smcuda_component.c if(mca_btl_smcuda_component.sm_fifo_fd > 0) { mca_btl_smcuda_component 314 opal/mca/btl/smcuda/btl_smcuda_component.c if( write(mca_btl_smcuda_component.sm_fifo_fd,&cmd,sizeof(cmd)) != mca_btl_smcuda_component 319 opal/mca/btl/smcuda/btl_smcuda_component.c opal_thread_join(&mca_btl_smcuda_component.sm_fifo_thread, NULL); mca_btl_smcuda_component 320 opal/mca/btl/smcuda/btl_smcuda_component.c close(mca_btl_smcuda_component.sm_fifo_fd); mca_btl_smcuda_component 321 opal/mca/btl/smcuda/btl_smcuda_component.c unlink(mca_btl_smcuda_component.sm_fifo_path); mca_btl_smcuda_component 350 opal/mca/btl/smcuda/btl_smcuda_component.c if (0 > mca_btl_smcuda_component.sm_max_procs) { mca_btl_smcuda_component 352 opal/mca/btl/smcuda/btl_smcuda_component.c if (0 <= mca_btl_smcuda_component.sm_extra_procs) { mca_btl_smcuda_component 354 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_max_procs = mca_btl_smcuda_component 355 opal/mca/btl/smcuda/btl_smcuda_component.c n + mca_btl_smcuda_component.sm_extra_procs; mca_btl_smcuda_component 358 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_max_procs = 2 * n; mca_btl_smcuda_component 406 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.fifo_size + 4 * opal_cache_line_size) + mca_btl_smcuda_component 407 opal/mca/btl/smcuda/btl_smcuda_component.c (2 * max_procs + mca_btl_smcuda_component.sm_free_list_inc) * mca_btl_smcuda_component 408 opal/mca/btl/smcuda/btl_smcuda_component.c (mca_btl_smcuda_component.eager_limit + 2 * opal_cache_line_size) + mca_btl_smcuda_component 409 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_free_list_num * mca_btl_smcuda_component 410 opal/mca/btl/smcuda/btl_smcuda_component.c (mca_btl_smcuda_component.max_frag_size + 2 * opal_cache_line_size); mca_btl_smcuda_component 512 opal/mca/btl/smcuda/btl_smcuda_component.c if (size < mca_btl_smcuda_component.mpool_min_size) { mca_btl_smcuda_component 513 opal/mca/btl/smcuda/btl_smcuda_component.c size = mca_btl_smcuda_component.mpool_min_size; mca_btl_smcuda_component 634 opal/mca/btl/smcuda/btl_smcuda_component.c if ( mca_btl_smcuda_component.num_outstanding_frags * 2 > (int) mca_btl_smcuda_component.fifo_size ) { mca_btl_smcuda_component 663 opal/mca/btl/smcuda/btl_smcuda_component.c OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, +1); mca_btl_smcuda_component 695 opal/mca/btl/smcuda/btl_smcuda_component.c endpoint = mca_btl_smcuda_component.sm_peers[frag->hdr->my_smp_rank]; mca_btl_smcuda_component 719 opal/mca/btl/smcuda/btl_smcuda_component.c opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component 721 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.my_smp_rank, mca_btl_smcuda_component 743 opal/mca/btl/smcuda/btl_smcuda_component.c if (mca_btl_smcuda_component.use_cuda_ipc_same_gpu) { mca_btl_smcuda_component 746 opal/mca/btl/smcuda/btl_smcuda_component.c opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component 757 opal/mca/btl/smcuda/btl_smcuda_component.c opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component 768 opal/mca/btl/smcuda/btl_smcuda_component.c opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component 776 opal/mca/btl/smcuda/btl_smcuda_component.c opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component 782 opal/mca/btl/smcuda/btl_smcuda_component.c ep_proc, (char *)&mca_btl_smcuda_component.cuda_ipc_output); mca_btl_smcuda_component 783 opal/mca/btl/smcuda/btl_smcuda_component.c opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component 791 opal/mca/btl/smcuda/btl_smcuda_component.c opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component 797 opal/mca/btl/smcuda/btl_smcuda_component.c opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component 802 opal/mca/btl/smcuda/btl_smcuda_component.c ep_proc, (char *)&mca_btl_smcuda_component.cuda_ipc_output); mca_btl_smcuda_component 810 opal/mca/btl/smcuda/btl_smcuda_component.c opal_output_verbose(10, mca_btl_smcuda_component.cuda_ipc_output, mca_btl_smcuda_component 842 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_mpool = NULL; mca_btl_smcuda_component 843 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_mpool_base = NULL; mca_btl_smcuda_component 879 opal/mca/btl/smcuda/btl_smcuda_component.c if (OPAL_SUCCESS != backing_store_init(&mca_btl_smcuda_component, mca_btl_smcuda_component 886 opal/mca/btl/smcuda/btl_smcuda_component.c sprintf( mca_btl_smcuda_component.sm_fifo_path, mca_btl_smcuda_component 889 opal/mca/btl/smcuda/btl_smcuda_component.c if(mkfifo(mca_btl_smcuda_component.sm_fifo_path, 0660) < 0) { mca_btl_smcuda_component 893 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_fifo_fd = open(mca_btl_smcuda_component.sm_fifo_path, mca_btl_smcuda_component 895 opal/mca/btl/smcuda/btl_smcuda_component.c if(mca_btl_smcuda_component.sm_fifo_fd < 0) { mca_btl_smcuda_component 898 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_fifo_path, errno); mca_btl_smcuda_component 902 opal/mca/btl/smcuda/btl_smcuda_component.c OBJ_CONSTRUCT(&mca_btl_smcuda_component.sm_fifo_thread, opal_thread_t); mca_btl_smcuda_component 903 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_fifo_thread.t_run = mca_btl_smcuda_component 905 opal/mca/btl/smcuda/btl_smcuda_component.c opal_thread_start(&mca_btl_smcuda_component.sm_fifo_thread); mca_btl_smcuda_component 908 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_btls = mca_btl_smcuda_component 909 opal/mca/btl/smcuda/btl_smcuda_component.c (mca_btl_smcuda_t **)malloc(mca_btl_smcuda_component.sm_max_btls * mca_btl_smcuda_component 911 opal/mca/btl/smcuda/btl_smcuda_component.c if (NULL == mca_btl_smcuda_component.sm_btls) { mca_btl_smcuda_component 924 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_btls[0] = (mca_btl_smcuda_t*)(&(mca_btl_smcuda)); mca_btl_smcuda_component 928 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.num_smp_procs = 0; mca_btl_smcuda_component 929 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.my_smp_rank = -1; /* not defined */ mca_btl_smcuda_component 930 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_num_btls = 1; mca_btl_smcuda_component 956 opal/mca/btl/smcuda/btl_smcuda_component.c if(read(mca_btl_smcuda_component.sm_fifo_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) { mca_btl_smcuda_component 985 opal/mca/btl/smcuda/btl_smcuda_component.c OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, -1); mca_btl_smcuda_component 990 opal/mca/btl/smcuda/btl_smcuda_component.c opal_free_list_return (&mca_btl_smcuda_component.pending_send_fl, (opal_free_list_item_t*)si); mca_btl_smcuda_component 1005 opal/mca/btl/smcuda/btl_smcuda_component.c int my_smp_rank = mca_btl_smcuda_component.my_smp_rank; mca_btl_smcuda_component 1010 opal/mca/btl/smcuda/btl_smcuda_component.c if ( 0 < mca_btl_smcuda_component.num_pending_sends ) { mca_btl_smcuda_component 1014 opal/mca/btl/smcuda/btl_smcuda_component.c for ( peer_smp_rank = 0; peer_smp_rank < mca_btl_smcuda_component.num_smp_procs; peer_smp_rank++) { mca_btl_smcuda_component 1018 opal/mca/btl/smcuda/btl_smcuda_component.c endpoint = mca_btl_smcuda_component.sm_peers[peer_smp_rank]; mca_btl_smcuda_component 1025 opal/mca/btl/smcuda/btl_smcuda_component.c for(j = 0; j < FIFO_MAP_NUM(mca_btl_smcuda_component.num_smp_procs); j++) { mca_btl_smcuda_component 1026 opal/mca/btl/smcuda/btl_smcuda_component.c fifo = &(mca_btl_smcuda_component.fifo[my_smp_rank][j]); mca_btl_smcuda_component 1074 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_peers[peer_smp_rank], mca_btl_smcuda_component 1098 opal/mca/btl/smcuda/btl_smcuda_component.c OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_outstanding_frags, -1); mca_btl_smcuda_component 1122 opal/mca/btl/smcuda/btl_smcuda_component.c mca_btl_smcuda_component.sm_peers[peer_smp_rank], mca_btl_smcuda_component 35 opal/mca/btl/smcuda/btl_smcuda_fifo.h i = opal_free_list_get (&mca_btl_smcuda_component.pending_send_fl); mca_btl_smcuda_component 43 opal/mca/btl/smcuda/btl_smcuda_fifo.h OPAL_THREAD_ADD_FETCH32(&mca_btl_smcuda_component.num_pending_sends, +1); mca_btl_smcuda_component 82 opal/mca/btl/smcuda/btl_smcuda_fifo.h #define FIFO_MAP(x) ((x) & (mca_btl_smcuda_component.nfifos - 1)) mca_btl_smcuda_component 83 opal/mca/btl/smcuda/btl_smcuda_fifo.h #define FIFO_MAP_NUM(n) ( (mca_btl_smcuda_component.nfifos) < (n) ? (mca_btl_smcuda_component.nfifos) : (n) ) mca_btl_smcuda_component 89 opal/mca/btl/smcuda/btl_smcuda_fifo.h sm_fifo_t* fifo = &(mca_btl_smcuda_component.fifo[peer_smp_rank][FIFO_MAP(my_smp_rank)]); \ mca_btl_smcuda_component 35 opal/mca/btl/smcuda/btl_smcuda_frag.c frag->hdr->my_smp_rank = mca_btl_smcuda_component.my_smp_rank; mca_btl_smcuda_component 48 opal/mca/btl/smcuda/btl_smcuda_frag.c frag->size = mca_btl_smcuda_component.eager_limit; mca_btl_smcuda_component 49 opal/mca/btl/smcuda/btl_smcuda_frag.c frag->my_list = &mca_btl_smcuda_component.sm_frags_eager; mca_btl_smcuda_component 55 opal/mca/btl/smcuda/btl_smcuda_frag.c frag->size = mca_btl_smcuda_component.max_frag_size; mca_btl_smcuda_component 56 opal/mca/btl/smcuda/btl_smcuda_frag.c frag->my_list = &mca_btl_smcuda_component.sm_frags_max; mca_btl_smcuda_component 63 opal/mca/btl/smcuda/btl_smcuda_frag.c frag->my_list = &mca_btl_smcuda_component.sm_frags_user; mca_btl_smcuda_component 103 opal/mca/btl/smcuda/btl_smcuda_frag.h opal_free_list_get (&mca_btl_smcuda_component.sm_frags_eager); \ mca_btl_smcuda_component 109 opal/mca/btl/smcuda/btl_smcuda_frag.h opal_free_list_get (&mca_btl_smcuda_component.sm_frags_max); \ mca_btl_smcuda_component 115 opal/mca/btl/smcuda/btl_smcuda_frag.h opal_free_list_get (&mca_btl_smcuda_component.sm_frags_user); \