This source file includes following definitions.
- allocate
- finalize
- discover
- tm_getline
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 #include "orte_config.h"
  22 #include "orte/constants.h"
  23 #include "orte/types.h"
  24 
  25 #include <errno.h>
  26 #include <unistd.h>
  27 #include <string.h>
  28 
  29 #include "orte/util/show_help.h"
  30 #include "opal/util/os_path.h"
  31 #include "opal/util/net.h"
  32 
  33 #include "orte/mca/errmgr/errmgr.h"
  34 #include "orte/runtime/orte_globals.h"
  35 #include "orte/util/name_fns.h"
  36 
  37 #include "orte/mca/ras/base/ras_private.h"
  38 #include "ras_tm.h"
  39 
  40 
  41 
  42 
  43 
  44 static int allocate(orte_job_t *jdata, opal_list_t *nodes);
  45 static int finalize(void);
  46 
  47 static int discover(opal_list_t* nodelist, char *pbs_jobid);
  48 static char *tm_getline(FILE *fp);
  49 
  50 #define TM_FILE_MAX_LINE_LENGTH 512
  51 
  52 static char *filename;
  53 
  54 
  55 
  56 
  57 orte_ras_base_module_t orte_ras_tm_module = {
  58     NULL,
  59     allocate,
  60     NULL,
  61     finalize
  62 };
  63 
  64 
  65 
  66 
  67 
  68 
  69 
  70 static int allocate(orte_job_t *jdata, opal_list_t *nodes)
  71 {
  72     int ret;
  73     char *pbs_jobid;
  74 
  75     
  76     if (NULL == (pbs_jobid = getenv("PBS_JOBID"))) {
  77         ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
  78         return ORTE_ERR_NOT_FOUND;
  79     }
  80 
  81     
  82 
  83 
  84     orte_job_ident = strdup(pbs_jobid);
  85 
  86     if (ORTE_SUCCESS != (ret = discover(nodes, pbs_jobid))) {
  87         ORTE_ERROR_LOG(ret);
  88         return ret;
  89     }
  90 
  91     
  92 
  93 
  94     if (opal_list_is_empty(nodes)) {
  95         orte_show_help("help-ras-tm.txt", "no-nodes-found", true, filename);
  96         return ORTE_ERR_NOT_FOUND;
  97     }
  98 
  99     
 100     return ORTE_SUCCESS;
 101 }
 102 
 103 
 104 
 105 
 106 static int finalize(void)
 107 {
 108     OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
 109                          "%s ras:tm:finalize: success (nothing to do)",
 110                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 111     return ORTE_SUCCESS;
 112 }
 113 
 114 
 115 
 116 
 117 
 118 
 119 
 120 
 121 
 122 
 123 
 124 static int discover(opal_list_t* nodelist, char *pbs_jobid)
 125 {
 126     int32_t nodeid;
 127     orte_node_t *node;
 128     opal_list_item_t* item;
 129     FILE *fp;
 130     char *hostname, *cppn;
 131     int ppn;
 132     char *ptr;
 133 
 134     
 135 
 136 
 137     
 138 
 139 
 140 
 141 
 142 
 143 
 144     
 145 
 146 
 147     if (mca_ras_tm_component.smp_mode) {
 148         if (NULL == (cppn = getenv("PBS_PPN"))) {
 149             orte_show_help("help-ras-tm.txt", "smp-error", true);
 150             return ORTE_ERR_NOT_FOUND;
 151         }
 152         ppn = strtol(cppn, NULL, 10);
 153     } else {
 154         ppn = 1;
 155     }
 156 
 157     
 158     filename = opal_os_path(false, mca_ras_tm_component.nodefile_dir,
 159                             pbs_jobid, NULL);
 160     fp = fopen(filename, "r");
 161     if (NULL == fp) {
 162         ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
 163         free(filename);
 164         return ORTE_ERR_FILE_OPEN_FAILURE;
 165     }
 166 
 167     
 168 
 169 
 170 
 171 
 172     nodeid=0;
 173     while (NULL != (hostname = tm_getline(fp))) {
 174         if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hostname) ) {
 175             if (NULL != (ptr = strchr(hostname, '.'))) {
 176                 *ptr = '\0';
 177             }
 178         }
 179 
 180         OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
 181                              "%s ras:tm:allocate:discover: got hostname %s",
 182                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostname));
 183 
 184         
 185 
 186 
 187         for (item = opal_list_get_first(nodelist);
 188              opal_list_get_end(nodelist) != item;
 189              item = opal_list_get_next(item)) {
 190             node = (orte_node_t*) item;
 191             if (0 == strcmp(node->name, hostname)) {
 192                 if (mca_ras_tm_component.smp_mode) {
 193                     
 194                     orte_show_help("help-ras-tm.txt", "smp-multi", true);
 195                     return ORTE_ERR_BAD_PARAM;
 196                 }
 197                 ++node->slots;
 198 
 199                 OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
 200                                      "%s ras:tm:allocate:discover: found -- bumped slots to %d",
 201                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->slots));
 202 
 203                 break;
 204             }
 205         }
 206 
 207         
 208 
 209         if (opal_list_get_end(nodelist) == item) {
 210 
 211             
 212 
 213             OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
 214                                  "%s ras:tm:allocate:discover: not found -- added to list",
 215                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 216 
 217             node = OBJ_NEW(orte_node_t);
 218             node->name = hostname;
 219             orte_set_attribute(&node->attributes, ORTE_NODE_LAUNCH_ID, ORTE_ATTR_LOCAL, &nodeid, OPAL_INT32);
 220             node->slots_inuse = 0;
 221             node->slots_max = 0;
 222             node->slots = ppn;
 223             node->state = ORTE_NODE_STATE_UP;
 224             opal_list_append(nodelist, &node->super);
 225         } else {
 226 
 227             
 228             free(hostname);
 229         }
 230 
 231         
 232         nodeid++;
 233     }
 234     fclose(fp);
 235 
 236     return ORTE_SUCCESS;
 237 }
 238 
 239 static char *tm_getline(FILE *fp)
 240 {
 241     char *ret, *buff;
 242     char input[TM_FILE_MAX_LINE_LENGTH];
 243 
 244     ret = fgets(input, TM_FILE_MAX_LINE_LENGTH, fp);
 245     if (NULL != ret) {
 246         input[strlen(input)-1] = '\0';  
 247         buff = strdup(input);
 248         return buff;
 249     }
 250 
 251     return NULL;
 252 }
 253