root/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ompi_coll_tuned_read_rules_config_file
  2. skiptonewline
  3. getnext

   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2015 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2015      Research Organization for Information Science
  13  *                         and Technology (RIST). All rights reserved.
  14  * $COPYRIGHT$
  15  *
  16  * Additional copyrights may follow
  17  *
  18  * $HEADER$
  19  */
  20 
  21 #include "ompi_config.h"
  22 #include <stdlib.h>
  23 #include <stdio.h>
  24 
  25 #include "mpi.h"
  26 #include "ompi/mca/mca.h"
  27 #include "coll_tuned.h"
  28 
  29 /* need to include our own topo prototypes so we can malloc data on the comm correctly */
  30 #include "ompi/mca/coll/base/coll_base_topo.h"
  31 
  32 /* also need the dynamic rule structures */
  33 #include "coll_tuned_dynamic_rules.h"
  34 
  35 /* and our own prototypes */
  36 #include "coll_tuned_dynamic_file.h"
  37 
  38 
  39 #define MYEOF   -999
  40 
  41 static long getnext (FILE *fptr); /* local function */
  42 
  43 static int fileline=0; /* used for verbose error messages */
  44 
  45 /*
  46  * Reads a rule file called fname
  47  * Builds the algorithm rule table for a max of n_collectives
  48  *
  49  * If an error occurs it removes rule table and then exits with a very verbose
  50  * error message (this stops the user using a half baked rule table
  51  *
  52  * Returns the number of actual collectives that a rule exists for
  53  * (note 0 is NOT an error)
  54  *
  55  */
  56 
  57 int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules, int n_collectives)
  58 {
  59     FILE *fptr = (FILE*) NULL;
  60     int X, CI, NCS, CS, ALG, NMS, FANINOUT;
  61     long MS, SS;
  62     int x, ncs, nms;
  63 
  64     ompi_coll_alg_rule_t *alg_rules = (ompi_coll_alg_rule_t*) NULL;   /* complete table of rules */
  65 
  66     /* individual pointers to sections of rules */
  67     ompi_coll_alg_rule_t *alg_p = (ompi_coll_alg_rule_t*) NULL;
  68     ompi_coll_com_rule_t *com_p = (ompi_coll_com_rule_t*) NULL;
  69     ompi_coll_msg_rule_t *msg_p = (ompi_coll_msg_rule_t*) NULL;
  70 
  71     /* stats info */
  72     int total_alg_count = 0;
  73     int total_com_count = 0;
  74     int total_msg_count = 0;
  75 
  76     if (!fname) {
  77         OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave NULL as rule table configuration file for tuned collectives... ignoring!\n"));
  78         return (-1);
  79     }
  80 
  81     if (!rules) {
  82         OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave NULL as rule table result ptr!... ignoring!\n"));
  83         return (-2);
  84     }
  85 
  86     if (n_collectives<1) {
  87         OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave %d as max number of collectives in the rule table configuration file for tuned collectives!... ignoring!\n", n_collectives));
  88         return (-3);
  89     }
  90 
  91     fptr = fopen (fname, "r");
  92     if (!fptr) {
  93         OPAL_OUTPUT((ompi_coll_tuned_stream,"cannot read rules file [%s]\n", fname));
  94         goto on_file_error;
  95     }
  96 
  97     /* make space and init the algorithm rules for each of the n_collectives MPI collectives */
  98     alg_rules = ompi_coll_tuned_mk_alg_rules (n_collectives);
  99     if (NULL == alg_rules) {
 100         OPAL_OUTPUT((ompi_coll_tuned_stream,"cannot cannot allocate rules for file [%s]\n", fname));
 101         goto on_file_error;
 102     }
 103 
 104     X = (int)getnext(fptr);
 105     if (X<0) {
 106         OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline));
 107         goto on_file_error;
 108     }
 109     if (X>n_collectives) {
 110         OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %d is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline));
 111         goto on_file_error;
 112     }
 113 
 114     for (x=0;x<X;x++) { /* for each collective */
 115 
 116         CI = (int)getnext (fptr);
 117         if (CI<0) {
 118             OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read next Collective id in configuration file around line %d\n", fileline));
 119             goto on_file_error;
 120         }
 121         if (CI>=n_collectives) {
 122             OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %d is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline));
 123             goto on_file_error;
 124         }
 125 
 126         if (alg_rules[CI].alg_rule_id != CI) {
 127             OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %d\n", CI));
 128             goto on_file_error;
 129         }
 130         OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %d\n", CI));
 131         alg_p = &alg_rules[CI];
 132 
 133         alg_p->alg_rule_id = CI;
 134         alg_p->n_com_sizes = 0;
 135         alg_p->com_rules = (ompi_coll_com_rule_t *) NULL;
 136 
 137         NCS = (int)getnext (fptr);
 138         if (NCS<0) {
 139             OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %d at around line %d\n", CI, fileline));
 140             goto on_file_error;
 141         }
 142         OPAL_OUTPUT((ompi_coll_tuned_stream, "Read communicator count %d for dynamic rule for collective ID %d\n", NCS, CI));
 143         alg_p->n_com_sizes = NCS;
 144         alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCS, CI);
 145 
 146         for (ncs=0;ncs<NCS;ncs++) {     /* for each comm size */
 147 
 148             com_p = &(alg_p->com_rules[ncs]);
 149 
 150             CS = (int)getnext (fptr);
 151             if (CS<0) {
 152                 OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline));
 153                 goto on_file_error;
 154             }
 155 
 156             com_p->mpi_comsize = CS;
 157 
 158             NMS = (int)getnext (fptr);
 159             if (NMS<0) {
 160                 OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline));
 161                 goto on_file_error;
 162             }
 163             OPAL_OUTPUT((ompi_coll_tuned_stream, "Read message count %d for dynamic rule for collective ID %d and comm size %d\n",
 164                          NMS, CI, CS));
 165             com_p->n_msg_sizes = NMS;
 166             com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMS, CI, ncs, CS);
 167 
 168             msg_p = com_p->msg_rules;
 169 
 170             for (nms=0;nms<NMS;nms++) { /* for each msg size */
 171 
 172                 msg_p = &(com_p->msg_rules[nms]);
 173 
 174                 MS = getnext (fptr);
 175                 if (MS<0) {
 176                     OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
 177                     goto on_file_error;
 178                 }
 179                 msg_p->msg_size = (size_t)MS;
 180 
 181                 ALG = (int)getnext (fptr);
 182                 if (ALG<0) {
 183                     OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
 184                     goto on_file_error;
 185                 }
 186                 msg_p->result_alg = ALG;
 187 
 188                 FANINOUT = (int)getnext (fptr);
 189                 if (FANINOUT<0) {
 190                     OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
 191                     goto on_file_error;
 192                 }
 193                 msg_p->result_topo_faninout = FANINOUT;
 194 
 195                 SS = getnext (fptr);
 196                 if (SS<0) {
 197                     OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
 198                     goto on_file_error;
 199                 }
 200                 msg_p->result_segsize = SS;
 201 
 202                 if (!nms && MS) {
 203                     OPAL_OUTPUT((ompi_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n"));
 204                     OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %d com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline));
 205                     goto on_file_error;
 206                 }
 207 
 208                 total_msg_count++;
 209 
 210             } /* msg size */
 211 
 212             total_com_count++;
 213 
 214         } /* comm size */
 215 
 216         total_alg_count++;
 217         OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %d\n", CI));
 218 
 219     } /* per collective */
 220 
 221     fclose (fptr);
 222 
 223     OPAL_OUTPUT((ompi_coll_tuned_stream,"\nConfigure file Stats\n"));
 224     OPAL_OUTPUT((ompi_coll_tuned_stream,"Collectives with rules\t\t\t: %5d\n", total_alg_count));
 225     OPAL_OUTPUT((ompi_coll_tuned_stream,"Communicator sizes with rules\t\t: %5d\n", total_com_count));
 226     OPAL_OUTPUT((ompi_coll_tuned_stream,"Message sizes with rules\t\t: %5d\n", total_msg_count));
 227     OPAL_OUTPUT((ompi_coll_tuned_stream,"Lines in configuration file read\t\t: %5d\n", fileline));
 228 
 229     /* return the rules to the caller */
 230     *rules = alg_rules;
 231 
 232     return (total_alg_count);
 233 
 234 
 235  on_file_error:
 236 
 237     /* here we close out the file and delete any memory allocated nicely */
 238     /* we return back a verbose message and a count of -1 algorithms read */
 239     /* draconian but its better than having a bad collective decision table */
 240 
 241     OPAL_OUTPUT((ompi_coll_tuned_stream,"read_rules_config_file: bad configure file [%s]. Read afar as line %d\n", fname, fileline));
 242     OPAL_OUTPUT((ompi_coll_tuned_stream,"Ignoring user supplied tuned collectives configuration decision file.\n"));
 243     OPAL_OUTPUT((ompi_coll_tuned_stream,"Switching back to [compiled in] fixed decision table.\n"));
 244     OPAL_OUTPUT((ompi_coll_tuned_stream,"Fix errors as listed above and try again.\n"));
 245 
 246     /* deallocate memory if allocated */
 247     if (alg_rules) ompi_coll_tuned_free_all_rules (alg_rules, n_collectives);
 248 
 249     /* close file */
 250     if (fptr) fclose (fptr);
 251 
 252     *rules = (ompi_coll_alg_rule_t*) NULL;
 253     return (-1);
 254 }
 255 
 256 
 257 static void skiptonewline (FILE *fptr)
 258 {
 259     char val;
 260     int rc;
 261 
 262     do {
 263         rc = fread(&val, 1, 1, fptr);
 264         if (0 == rc) return;
 265         if ((1 == rc)&&('\n' == val)) {
 266             fileline++;
 267             return;
 268         }
 269     } while (1);
 270 }
 271 
 272 static long getnext (FILE *fptr)
 273 {
 274     long val;
 275     int rc;
 276     char trash;
 277 
 278     do {
 279         rc = fscanf(fptr, "%li", &val);
 280         if (rc == EOF) return MYEOF;
 281         if (1 == rc) return val;
 282         /* in all other cases, skip to the end */
 283         rc = fread(&trash, 1, 1, fptr);
 284         if (rc == EOF) return MYEOF;
 285         if ('\n' == trash) fileline++;
 286         if ('#' == trash) skiptonewline (fptr);
 287     } while (1);
 288 }

/* [<][>][^][v][top][bottom][index][help] */