root/opal/mca/hwloc/hwloc201/hwloc/hwloc/topology-cuda.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. hwloc_cuda_cores_per_MP
  2. hwloc_cuda_discover
  3. hwloc_cuda_component_instantiate
  4. hwloc_cuda_component_init

   1 /*
   2  * Copyright © 2011 Université Bordeaux
   3  * Copyright © 2012-2017 Inria.  All rights reserved.
   4  * See COPYING in top-level directory.
   5  */
   6 
   7 #include <private/autogen/config.h>
   8 #include <hwloc.h>
   9 #include <hwloc/plugins.h>
  10 #include <hwloc/cudart.h>
  11 
  12 /* private headers allowed for convenience because this plugin is built within hwloc */
  13 #include <private/misc.h>
  14 #include <private/debug.h>
  15 
  16 #include <cuda_runtime_api.h>
  17 
  18 static unsigned hwloc_cuda_cores_per_MP(int major, int minor)
  19 {
  20   /* FP32 cores per MP, based on CUDA C Programming Guide, Annex G */
  21   switch (major) {
  22     case 1:
  23       switch (minor) {
  24         case 0:
  25         case 1:
  26         case 2:
  27         case 3: return 8;
  28       }
  29       break;
  30     case 2:
  31       switch (minor) {
  32         case 0: return 32;
  33         case 1: return 48;
  34       }
  35       break;
  36     case 3:
  37       return 192;
  38     case 5:
  39       return 128;
  40     case 6:
  41       switch (minor) {
  42         case 0: return 64;
  43         case 1:
  44         case 2: return 128;
  45       }
  46       break;
  47     case 7:
  48       return 64;
  49   }
  50   hwloc_debug("unknown compute capability %d.%d, disabling core display.\n", major, minor);
  51   return 0;
  52 }
  53 
  54 static int
  55 hwloc_cuda_discover(struct hwloc_backend *backend)
  56 {
  57   struct hwloc_topology *topology = backend->topology;
  58   enum hwloc_type_filter_e filter;
  59   cudaError_t cures;
  60   int nb, i;
  61 
  62   hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter);
  63   if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
  64     return 0;
  65 
  66   cures = cudaGetDeviceCount(&nb);
  67   if (cures)
  68     return -1;
  69 
  70   for (i = 0; i < nb; i++) {
  71     int domain, bus, dev;
  72     char cuda_name[32];
  73     char number[32];
  74     struct cudaDeviceProp prop;
  75     hwloc_obj_t cuda_device, parent;
  76     unsigned cores;
  77 
  78     cuda_device = hwloc_alloc_setup_object(topology, HWLOC_OBJ_OS_DEVICE, HWLOC_UNKNOWN_INDEX);
  79     snprintf(cuda_name, sizeof(cuda_name), "cuda%d", i);
  80     cuda_device->name = strdup(cuda_name);
  81     cuda_device->depth = HWLOC_TYPE_DEPTH_UNKNOWN;
  82     cuda_device->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;
  83 
  84     cuda_device->subtype = strdup("CUDA");
  85     hwloc_obj_add_info(cuda_device, "Backend", "CUDA");
  86     hwloc_obj_add_info(cuda_device, "GPUVendor", "NVIDIA Corporation");
  87 
  88     cures = cudaGetDeviceProperties(&prop, i);
  89     if (!cures && prop.name[0] != '\0')
  90       hwloc_obj_add_info(cuda_device, "GPUModel", prop.name);
  91 
  92     snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.totalGlobalMem) >> 10);
  93     hwloc_obj_add_info(cuda_device, "CUDAGlobalMemorySize", number);
  94 
  95     snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.l2CacheSize) >> 10);
  96     hwloc_obj_add_info(cuda_device, "CUDAL2CacheSize", number);
  97 
  98     snprintf(number, sizeof(number), "%d", prop.multiProcessorCount);
  99     hwloc_obj_add_info(cuda_device, "CUDAMultiProcessors", number);
 100 
 101     cores = hwloc_cuda_cores_per_MP(prop.major, prop.minor);
 102     if (cores) {
 103       snprintf(number, sizeof(number), "%u", cores);
 104       hwloc_obj_add_info(cuda_device, "CUDACoresPerMP", number);
 105     }
 106 
 107     snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.sharedMemPerBlock) >> 10);
 108     hwloc_obj_add_info(cuda_device, "CUDASharedMemorySizePerMP", number);
 109 
 110     parent = NULL;
 111     if (hwloc_cudart_get_device_pci_ids(NULL /* topology unused */, i, &domain, &bus, &dev) == 0) {
 112       parent = hwloc_pcidisc_find_by_busid(topology, domain, bus, dev, 0);
 113       if (!parent)
 114         parent = hwloc_pcidisc_find_busid_parent(topology, domain, bus, dev, 0);
 115     }
 116     if (!parent)
 117       parent = hwloc_get_root_obj(topology);
 118 
 119     hwloc_insert_object_by_parent(topology, parent, cuda_device);
 120   }
 121 
 122   return 0;
 123 }
 124 
 125 static struct hwloc_backend *
 126 hwloc_cuda_component_instantiate(struct hwloc_disc_component *component,
 127                                  const void *_data1 __hwloc_attribute_unused,
 128                                  const void *_data2 __hwloc_attribute_unused,
 129                                  const void *_data3 __hwloc_attribute_unused)
 130 {
 131   struct hwloc_backend *backend;
 132 
 133   backend = hwloc_backend_alloc(component);
 134   if (!backend)
 135     return NULL;
 136   /* the first callback will initialize those */
 137   backend->discover = hwloc_cuda_discover;
 138   return backend;
 139 }
 140 
 141 static struct hwloc_disc_component hwloc_cuda_disc_component = {
 142   HWLOC_DISC_COMPONENT_TYPE_MISC,
 143   "cuda",
 144   HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
 145   hwloc_cuda_component_instantiate,
 146   10, /* after pci */
 147   1,
 148   NULL
 149 };
 150 
 151 static int
 152 hwloc_cuda_component_init(unsigned long flags)
 153 {
 154   if (flags)
 155     return -1;
 156   if (hwloc_plugin_check_namespace("cuda", "hwloc_backend_alloc") < 0)
 157     return -1;
 158   return 0;
 159 }
 160 
 161 #ifdef HWLOC_INSIDE_PLUGIN
 162 HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component;
 163 #endif
 164 
 165 const struct hwloc_component hwloc_cuda_component = {
 166   HWLOC_COMPONENT_ABI,
 167   hwloc_cuda_component_init, NULL,
 168   HWLOC_COMPONENT_TYPE_DISC,
 169   0,
 170   &hwloc_cuda_disc_component
 171 };

/* [<][>][^][v][top][bottom][index][help] */