Line data Source code
1 : // This file is part of ELPA.
2 : //
3 : // The ELPA library was originally created by the ELPA consortium,
4 : // consisting of the following organizations:
5 : //
6 : // - Max Planck Computing and Data Facility (MPCDF), formerly known as
7 : // Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8 : // - Bergische Universität Wuppertal, Lehrstuhl für angewandte
9 : // Informatik,
10 : // - Technische Universität München, Lehrstuhl für Informatik mit
11 : // Schwerpunkt Wissenschaftliches Rechnen ,
12 : // - Fritz-Haber-Institut, Berlin, Abt. Theorie,
13 : // - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
14 : // Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
15 : // and
16 : // - IBM Deutschland GmbH
17 : //
18 : // This particular source code file contains additions, changes and
19 : // enhancements authored by Intel Corporation which is not part of
20 : // the ELPA consortium.
21 : //
22 : // More information can be found here:
23 : // http://elpa.mpcdf.mpg.de/
24 : //
25 : // ELPA is free software: you can redistribute it and/or modify
26 : // it under the terms of the version 3 of the license of the
27 : // GNU Lesser General Public License as published by the Free
28 : // Software Foundation.
29 : //
30 : // ELPA is distributed in the hope that it will be useful,
31 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
32 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33 : // GNU Lesser General Public License for more details.
34 : //
35 : // You should have received a copy of the GNU Lesser General Public License
36 : // along with ELPA. If not, see <http://www.gnu.org/licenses/>
37 : //
38 : // ELPA reflects a substantial effort on the part of the original
39 : // ELPA consortium, and we ask you to respect the spirit of the
40 : // license that we chose: i.e., please contribute any changes you
41 : // may have back to the original ELPA library distribution, and keep
42 : // any derivatives of ELPA under the same license that we chose for
43 : // the original distribution, the GNU Lesser General Public License.
44 : //
45 : // Authors: L. Huedepohl and A. Marek, MPCDF
46 : #include <elpa/elpa.h>
47 : #include "elpa_index.h"
48 :
49 : #include <execinfo.h>
50 :
51 : static int enumerate_identity(int i);
52 : static int cardinality_bool(void);
53 : static int valid_bool(elpa_index_t index, int n, int new_value);
54 :
55 : static int number_of_solvers();
56 : static int solver_enumerate(int i);
57 : static int solver_is_valid(elpa_index_t index, int n, int new_value);
58 : static const char* elpa_solver_name(int solver);
59 :
60 : static int number_of_real_kernels();
61 : static int real_kernel_enumerate(int i);
62 : static int real_kernel_is_valid(elpa_index_t index, int n, int new_value);
63 : static const char *real_kernel_name(int kernel);
64 :
65 : static int number_of_complex_kernels();
66 : static int complex_kernel_enumerate(int i);
67 : static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value);
68 : static const char *complex_kernel_name(int kernel);
69 :
70 : static int band_to_full_cardinality();
71 : static int band_to_full_enumerate(int i);
72 : static int band_to_full_is_valid(elpa_index_t index, int n, int new_value);
73 :
74 : static int na_is_valid(elpa_index_t index, int n, int new_value);
75 : static int nev_is_valid(elpa_index_t index, int n, int new_value);
76 : static int bw_is_valid(elpa_index_t index, int n, int new_value);
77 : static int gpu_is_valid(elpa_index_t index, int n, int new_value);
78 :
79 : static int is_positive(elpa_index_t index, int n, int new_value);
80 :
81 : static int elpa_double_string_to_value(char *name, char *string, double *value);
82 : static int elpa_double_value_to_string(char *name, double value, const char **string);
83 :
84 : #define BASE_ENTRY(option_name, option_description, once_value, readonly_value) \
85 : .base = { \
86 : .name = option_name, \
87 : .description = option_description, \
88 : .once = once_value, \
89 : .readonly = readonly_value, \
90 : .env_default = "ELPA_DEFAULT_" option_name, \
91 : .env_force = "ELPA_FORCE_" option_name, \
92 : }
93 :
94 : #define INT_PARAMETER_ENTRY(option_name, option_description, valid_func) \
95 : { \
96 : BASE_ENTRY(option_name, option_description, 1, 0), \
97 : .valid = valid_func, \
98 : }
99 :
100 : #define BOOL_ENTRY(option_name, option_description, default, tune_level, tune_domain) \
101 : { \
102 : BASE_ENTRY(option_name, option_description, 0, 0), \
103 : .default_value = default, \
104 : .autotune_level = tune_level, \
105 : .autotune_domain = tune_domain, \
106 : .cardinality = cardinality_bool, \
107 : .enumerate = enumerate_identity, \
108 : .valid = valid_bool, \
109 : }
110 :
111 : #define INT_ENTRY(option_name, option_description, default, tune_level, tune_domain, card_func, enumerate_func, valid_func, to_string_func) \
112 : { \
113 : BASE_ENTRY(option_name, option_description, 0, 0), \
114 : .default_value = default, \
115 : .autotune_level = tune_level, \
116 : .autotune_domain = tune_domain, \
117 : .cardinality = card_func, \
118 : .enumerate = enumerate_func, \
119 : .valid = valid_func, \
120 : .to_string = to_string_func, \
121 : }
122 :
123 : #define INT_ANY_ENTRY(option_name, option_description) \
124 : { \
125 : BASE_ENTRY(option_name, option_description, 0, 0), \
126 : }
127 :
128 : /* The order here is important! Tunable options that are dependent on other
129 : * tunable options must appear later in the list than their prerequisites */
130 : static const elpa_index_int_entry_t int_entries[] = {
131 : INT_PARAMETER_ENTRY("na", "Global matrix has size (na * na)", na_is_valid),
132 : INT_PARAMETER_ENTRY("nev", "Number of eigenvectors to be computed, 0 <= nev <= na", nev_is_valid),
133 : INT_PARAMETER_ENTRY("nblk", "Block size of scalapack block-cyclic distribution", is_positive),
134 : INT_PARAMETER_ENTRY("local_nrows", "Number of matrix rows stored on this process", NULL),
135 : INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process", NULL),
136 : INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition", NULL),
137 : INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition", NULL),
138 : INT_PARAMETER_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", bw_is_valid),
139 : INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication"),
140 : INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication"),
141 : INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator"),
142 : INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
143 : number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name),
144 : INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
145 : cardinality_bool, enumerate_identity, gpu_is_valid, NULL),
146 : INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
147 : number_of_real_kernels, real_kernel_enumerate, \
148 : real_kernel_is_valid, real_kernel_name),
149 : INT_ENTRY("complex_kernel", "Complex kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_COMPLEX_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_COMPLEX, \
150 : number_of_complex_kernels, complex_kernel_enumerate, \
151 : complex_kernel_is_valid, complex_kernel_name),
152 :
153 : //INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
154 : INT_ENTRY("blocking_in_band_to_full", "Loop blocking, default 3", 3, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY, band_to_full_cardinality, band_to_full_enumerate, band_to_full_is_valid, NULL),
155 : //BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_REAL),
156 : BOOL_ENTRY("qr", "Use QR decomposition, only used for ELPA_SOLVER_2STAGE, real case", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_REAL),
157 : BOOL_ENTRY("timings", "Enable time measurement", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
158 : BOOL_ENTRY("debug", "Emit verbose debugging messages", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
159 : BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
160 : BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0),
161 : };
162 :
163 : #define READONLY_DOUBLE_ENTRY(option_name, option_description) \
164 : { \
165 : BASE_ENTRY(option_name, option_description, 0, 1, 0) \
166 : }
167 :
168 : static const elpa_index_double_entry_t double_entries[] = {
169 : /* Empty for now */
170 : };
171 :
172 21312 : void elpa_index_free(elpa_index_t index) {
173 : #define FREE_OPTION(TYPE, ...) \
174 : free(index->TYPE##_options.values); \
175 : free(index->TYPE##_options.is_set); \
176 : free(index->TYPE##_options.notified);
177 :
178 21312 : FOR_ALL_TYPES(FREE_OPTION);
179 :
180 21312 : free(index);
181 21312 : }
182 :
183 14019728 : static int compar(const void *key, const void *member) {
184 14019728 : const char *name = (const char *) key;
185 14019728 : elpa_index_int_entry_t *entry = (elpa_index_int_entry_t *) member;
186 :
187 14019728 : int l1 = strlen(entry->base.name);
188 14019728 : int l2 = strlen(name);
189 14019728 : if (l1 != l2) {
190 11515112 : return 1;
191 : }
192 2504616 : if (strncmp(name, entry->base.name, l1) == 0) {
193 1267512 : return 0;
194 : } else {
195 1237104 : return 1;
196 : }
197 : }
198 :
199 : #define IMPLEMENT_FIND_ENTRY(TYPE, ...) \
200 : static int find_##TYPE##_entry(char *name) { \
201 : elpa_index_##TYPE##_entry_t *entry; \
202 : size_t nmembers = nelements(TYPE##_entries); \
203 : entry = lfind((const void*) name, (const void *) TYPE##_entries, &nmembers, sizeof(elpa_index_##TYPE##_entry_t), compar); \
204 : if (entry) { \
205 : return (entry - &TYPE##_entries[0]); \
206 : } else { \
207 : return -1; \
208 : } \
209 : }
210 1267512 : FOR_ALL_TYPES(IMPLEMENT_FIND_ENTRY)
211 :
212 :
213 : #define IMPLEMENT_GETENV(TYPE, PRINTF_SPEC, ...) \
214 : static int getenv_##TYPE(elpa_index_t index, const char *env_variable, enum NOTIFY_FLAGS notify_flag, int n, TYPE *value, const char *error_string) { \
215 : int err; \
216 : char *env_value = getenv(env_variable); \
217 : if (env_value) { \
218 : err = elpa_##TYPE##_string_to_value(TYPE##_entries[n].base.name, env_value, value); \
219 : if (err != ELPA_OK) { \
220 : fprintf(stderr, "ELPA: Error interpreting environment variable %s with value '%s': %s\n", \
221 : TYPE##_entries[n].base.name, env_value, elpa_strerr(err)); \
222 : } else {\
223 : const char *value_string = NULL; \
224 : if (elpa_##TYPE##_value_to_string(TYPE##_entries[n].base.name, *value, &value_string) == ELPA_OK) { \
225 : if (!(index->TYPE##_options.notified[n] & notify_flag)) { \
226 : fprintf(stderr, "ELPA: %s '%s' is set to %s due to environment variable %s\n", \
227 : error_string, TYPE##_entries[n].base.name, value_string, env_variable); \
228 : index->TYPE##_options.notified[n] |= notify_flag; \
229 : } \
230 : } else { \
231 : fprintf(stderr, "ELPA: %s '%s' is set to '" PRINTF_SPEC "' due to environment variable %s\n", \
232 : error_string, TYPE##_entries[n].base.name, *value, env_variable);\
233 : } \
234 : return 1; \
235 : } \
236 : } \
237 : return 0; \
238 : }
239 824776 : FOR_ALL_TYPES(IMPLEMENT_GETENV)
240 :
241 :
242 : #define IMPLEMENT_GET_FUNCTION(TYPE, PRINTF_SPEC, ERROR_VALUE) \
243 : TYPE elpa_index_get_##TYPE##_value(elpa_index_t index, char *name, int *error) { \
244 : TYPE ret; \
245 : if (sizeof(TYPE##_entries) == 0) { \
246 : return ELPA_ERROR_ENTRY_NOT_FOUND; \
247 : } \
248 : int n = find_##TYPE##_entry(name); \
249 : if (n >= 0) { \
250 : int from_env = 0; \
251 : if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
252 : from_env = getenv_##TYPE(index, TYPE##_entries[n].base.env_force, NOTIFY_ENV_FORCE, n, &ret, "Option"); \
253 : } \
254 : if (!from_env) { \
255 : ret = index->TYPE##_options.values[n]; \
256 : } \
257 : if (error != NULL) { \
258 : *error = ELPA_OK; \
259 : } \
260 : return ret; \
261 : } else { \
262 : if (error != NULL) { \
263 : *error = ELPA_ERROR_ENTRY_NOT_FOUND; \
264 : } \
265 : return ERROR_VALUE; \
266 : } \
267 : }
268 584520 : FOR_ALL_TYPES(IMPLEMENT_GET_FUNCTION)
269 :
270 :
271 : #define IMPLEMENT_LOC_FUNCTION(TYPE, ...) \
272 : TYPE* elpa_index_get_##TYPE##_loc(elpa_index_t index, char *name) { \
273 : if (sizeof(TYPE##_entries) == 0) { \
274 : return NULL; \
275 : } \
276 : int n = find_##TYPE##_entry(name); \
277 : if (n >= 0) { \
278 : return &index->TYPE##_options.values[n]; \
279 : } else { \
280 : return NULL; \
281 : } \
282 : }
283 106560 : FOR_ALL_TYPES(IMPLEMENT_LOC_FUNCTION)
284 :
285 :
286 : #define IMPLEMENT_SET_FUNCTION(TYPE, PRINTF_SPEC, ...) \
287 : int elpa_index_set_##TYPE##_value(elpa_index_t index, char *name, TYPE value) { \
288 : if (sizeof(TYPE##_entries) == 0) { \
289 : return ELPA_ERROR_ENTRY_NOT_FOUND; \
290 : } \
291 : int n = find_##TYPE##_entry(name); \
292 : if (n < 0) { \
293 : return ELPA_ERROR_ENTRY_NOT_FOUND; \
294 : }; \
295 : if (TYPE##_entries[n].valid != NULL) { \
296 : if(!TYPE##_entries[n].valid(index, n, value)) { \
297 : return ELPA_ERROR_ENTRY_INVALID_VALUE; \
298 : }; \
299 : } \
300 : if (TYPE##_entries[n].base.once & index->TYPE##_options.is_set[n]) { \
301 : return ELPA_ERROR_ENTRY_ALREADY_SET; \
302 : } \
303 : if (TYPE##_entries[n].base.readonly) { \
304 : return ELPA_ERROR_ENTRY_READONLY; \
305 : } \
306 : index->TYPE##_options.values[n] = value; \
307 : index->TYPE##_options.is_set[n] = 1; \
308 : return ELPA_OK; \
309 : }
310 280352 : FOR_ALL_TYPES(IMPLEMENT_SET_FUNCTION)
311 :
312 :
313 : #define IMPLEMENT_IS_SET_FUNCTION(TYPE, ...) \
314 : int elpa_index_##TYPE##_value_is_set(elpa_index_t index, char *name) { \
315 : if (sizeof(TYPE##_entries) == 0) { \
316 : return ELPA_ERROR_ENTRY_NOT_FOUND; \
317 : } \
318 : int n = find_##TYPE##_entry(name); \
319 : if (n >= 0) { \
320 : if (index->TYPE##_options.is_set[n]) { \
321 : return 1; \
322 : } else { \
323 : return 0; \
324 : } \
325 : } else { \
326 : return ELPA_ERROR_ENTRY_NOT_FOUND; \
327 : } \
328 : }
329 98576 : FOR_ALL_TYPES(IMPLEMENT_IS_SET_FUNCTION)
330 :
331 :
332 78992 : int elpa_index_value_is_set(elpa_index_t index, char *name) {
333 78992 : int res = ELPA_ERROR;
334 :
335 : #define RET_IF_SET(TYPE, ...) \
336 : res = elpa_index_##TYPE##_value_is_set(index, name); \
337 : if (res >= 0) { \
338 : return res; \
339 : }
340 :
341 78992 : FOR_ALL_TYPES(RET_IF_SET)
342 :
343 0 : fprintf(stderr, "ELPA Error: Could not find entry '%s'\n", name);
344 0 : return res;
345 : }
346 :
347 0 : int elpa_index_int_is_valid(elpa_index_t index, char *name, int new_value) {
348 0 : int n = find_int_entry(name); \
349 0 : if (n >= 0) { \
350 0 : if (int_entries[n].valid == NULL) {
351 0 : return ELPA_OK;
352 : } else {
353 0 : return int_entries[n].valid(index, n, new_value) ? ELPA_OK : ELPA_ERROR;
354 : }
355 : }
356 0 : return ELPA_ERROR_ENTRY_NOT_FOUND;
357 : }
358 :
359 156224 : int elpa_int_value_to_string(char *name, int value, const char **string) {
360 156224 : int n = find_int_entry(name);
361 156224 : if (n < 0) {
362 0 : return ELPA_ERROR_ENTRY_NOT_FOUND;
363 : }
364 156224 : if (int_entries[n].to_string == NULL) {
365 0 : return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
366 : }
367 156224 : *string = int_entries[n].to_string(value);
368 156224 : return ELPA_OK;
369 : }
370 :
371 :
372 78112 : int elpa_int_value_to_strlen(char *name, int value) {
373 78112 : const char *string = NULL;
374 78112 : elpa_int_value_to_string(name, value, &string);
375 78112 : if (string == NULL) {
376 0 : return 0;
377 : } else {
378 78112 : return strlen(string);
379 : }
380 : }
381 :
382 :
383 0 : int elpa_index_int_value_to_strlen(elpa_index_t index, char *name) {
384 0 : int n = find_int_entry(name);
385 0 : if (n < 0) {
386 0 : return 0;
387 : }
388 0 : return elpa_int_value_to_strlen(name, index->int_options.values[n]);
389 : }
390 :
391 :
392 0 : int elpa_int_string_to_value(char *name, char *string, int *value) {
393 0 : int n = find_int_entry(name);
394 0 : if (n < 0) {
395 0 : return ELPA_ERROR_ENTRY_NOT_FOUND;
396 : }
397 :
398 0 : if (int_entries[n].to_string == NULL) {
399 : int val, ret;
400 0 : ret = sscanf(string, "%d", &val);
401 0 : if (ret == strlen(string)) {
402 0 : *value = val;
403 0 : return ELPA_OK;
404 : } else {
405 0 : return ELPA_ERROR_ENTRY_INVALID_VALUE;
406 : }
407 : }
408 :
409 0 : for (int i = 0; i < int_entries[n].cardinality(); i++) {
410 0 : int candidate = int_entries[n].enumerate(i);
411 0 : if (strcmp(string, int_entries[n].to_string(candidate)) == 0) {
412 0 : *value = candidate;
413 0 : return ELPA_OK;
414 : }
415 : }
416 0 : return ELPA_ERROR_ENTRY_INVALID_VALUE;
417 : }
418 :
419 0 : int elpa_double_string_to_value(char *name, char *string, double *value) {
420 : double val;
421 0 : int ret = sscanf(string, "%lf", &val);
422 0 : if (ret == strlen(string)) {
423 0 : *value = val;
424 0 : return ELPA_OK;
425 : } else {
426 : /* \todo: remove */
427 0 : fprintf(stderr, "ELPA: DEBUG: Could not parse double value '%s' for option '%s'\n", string, name);
428 0 : return ELPA_ERROR_ENTRY_INVALID_VALUE;
429 : }
430 : }
431 :
432 0 : int elpa_double_value_to_string(char *name, double value, const char **string) {
433 0 : return ELPA_ERROR_ENTRY_NO_STRING_REPRESENTATION;
434 : }
435 :
436 1920 : int elpa_option_cardinality(char *name) {
437 1920 : int n = find_int_entry(name);
438 1920 : if (n < 0 || !int_entries[n].cardinality) {
439 0 : return ELPA_ERROR_ENTRY_NOT_FOUND;
440 : }
441 1920 : return int_entries[n].cardinality();
442 : }
443 :
444 39360 : int elpa_option_enumerate(char *name, int i) {
445 39360 : int n = find_int_entry(name);
446 39360 : if (n < 0 || !int_entries[n].enumerate) {
447 0 : return 0;
448 : }
449 39360 : return int_entries[n].enumerate(i);
450 : }
451 :
452 :
453 : /* Helper functions for simple int entries */
454 0 : static int cardinality_bool(void) {
455 0 : return 2;
456 : }
457 :
458 31392 : static int valid_bool(elpa_index_t index, int n, int new_value) {
459 31392 : return (0 <= new_value) && (new_value < 2);
460 : }
461 :
462 0 : static int enumerate_identity(int i) {
463 0 : return i;
464 : }
465 :
466 : /* Helper functions for specific options */
467 :
468 : #define NAME_CASE(name, value, ...) \
469 : case value: \
470 : return #name;
471 :
472 : #define VALID_CASE(name, value) \
473 : case value: \
474 : return 1;
475 :
476 : #define VALID_CASE_3(name, value, available, other_checks) \
477 : case value: \
478 : return available && (other_checks(value));
479 :
480 0 : static const char* elpa_solver_name(int solver) {
481 0 : switch(solver) {
482 0 : ELPA_FOR_ALL_SOLVERS(NAME_CASE)
483 : default:
484 0 : return "(Invalid solver)";
485 : }
486 : }
487 :
488 0 : static int number_of_solvers() {
489 0 : return ELPA_NUMBER_OF_SOLVERS;
490 : }
491 :
492 0 : static int solver_enumerate(int i) {
493 : #define OPTION_RANK(name, value, ...) \
494 : +(value >= sizeof(array_of_size_value)/sizeof(int) ? 0 : 1)
495 :
496 : #define EMPTY()
497 : #define DEFER1(m) m EMPTY()
498 : #define EVAL(...) __VA_ARGS__
499 :
500 : #define ENUMERATE_CASE(name, value, ...) \
501 : { const int array_of_size_value[value]; \
502 : case 0 DEFER1(INNER_ITERATOR)()(OPTION_RANK): \
503 : return value; }
504 :
505 0 : switch(i) {
506 : #define INNER_ITERATOR() ELPA_FOR_ALL_SOLVERS
507 0 : EVAL(ELPA_FOR_ALL_SOLVERS(ENUMERATE_CASE))
508 : #undef INNER_ITERATOR
509 : default:
510 0 : return 0;
511 : }
512 : }
513 :
514 :
515 18816 : static int solver_is_valid(elpa_index_t index, int n, int new_value) {
516 18816 : switch(new_value) {
517 7776 : ELPA_FOR_ALL_SOLVERS(VALID_CASE)
518 : default:
519 0 : return 0;
520 : }
521 : }
522 :
523 1056 : static int number_of_real_kernels() {
524 1056 : return ELPA_2STAGE_NUMBER_OF_REAL_KERNELS;
525 : }
526 :
527 26400 : static int real_kernel_enumerate(int i) {
528 26400 : switch(i) {
529 : #define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_REAL_KERNELS
530 1056 : EVAL(ELPA_FOR_ALL_2STAGE_REAL_KERNELS(ENUMERATE_CASE))
531 : #undef INNER_ITERATOR
532 : default:
533 1056 : return 0;
534 : }
535 : }
536 :
537 97088 : static const char *real_kernel_name(int kernel) {
538 97088 : switch(kernel) {
539 7040 : ELPA_FOR_ALL_2STAGE_REAL_KERNELS(NAME_CASE)
540 : default:
541 0 : return "(Invalid real kernel)";
542 : }
543 : }
544 :
545 : #define REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
546 : kernel_number == ELPA_2STAGE_REAL_GPU ? gpu_is_active : 1
547 :
548 29184 : static int real_kernel_is_valid(elpa_index_t index, int n, int new_value) {
549 29184 : int solver = elpa_index_get_int_value(index, "solver", NULL);
550 29184 : if (solver == ELPA_SOLVER_1STAGE) {
551 0 : return new_value == ELPA_2STAGE_REAL_DEFAULT;
552 : }
553 29184 : int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
554 29184 : switch(new_value) {
555 1056 : ELPA_FOR_ALL_2STAGE_REAL_KERNELS(VALID_CASE_3, REAL_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
556 : default:
557 1056 : return 0;
558 : }
559 : }
560 :
561 864 : static int number_of_complex_kernels() {
562 864 : return ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS;
563 : }
564 :
565 :
566 12960 : static int complex_kernel_enumerate(int i) {
567 12960 : switch(i) {
568 : #define INNER_ITERATOR() ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS
569 864 : EVAL(ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(ENUMERATE_CASE))
570 : #undef INNER_ITERATOR
571 : default:
572 864 : return 0;
573 : }
574 : }
575 :
576 59136 : static const char *complex_kernel_name(int kernel) {
577 59136 : switch(kernel) {
578 5760 : ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(NAME_CASE)
579 : default:
580 0 : return "(Invalid complex kernel)";
581 : }
582 : }
583 :
584 : #define COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE(kernel_number) \
585 : kernel_number == ELPA_2STAGE_COMPLEX_GPU ? gpu_is_active : 1
586 :
587 15168 : static int complex_kernel_is_valid(elpa_index_t index, int n, int new_value) {
588 15168 : int solver = elpa_index_get_int_value(index, "solver", NULL);
589 15168 : if (solver == ELPA_SOLVER_1STAGE) {
590 0 : return new_value == ELPA_2STAGE_COMPLEX_DEFAULT;
591 : }
592 15168 : int gpu_is_active = elpa_index_get_int_value(index, "gpu", NULL);
593 15168 : switch(new_value) {
594 864 : ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(VALID_CASE_3, COMPLEX_GPU_KERNEL_ONLY_WHEN_GPU_IS_ACTIVE)
595 : default:
596 864 : return 0;
597 : }
598 : }
599 :
600 21312 : static int na_is_valid(elpa_index_t index, int n, int new_value) {
601 21312 : return new_value > 0;
602 : }
603 :
604 19008 : static int nev_is_valid(elpa_index_t index, int n, int new_value) {
605 19008 : if (!elpa_index_int_value_is_set(index, "na")) {
606 0 : return 0;
607 : }
608 19008 : return 0 <= new_value && new_value <= elpa_index_get_int_value(index, "na", NULL);
609 : }
610 :
611 21312 : static int is_positive(elpa_index_t index, int n, int new_value) {
612 21312 : return new_value > 0;
613 : }
614 :
615 576 : static int bw_is_valid(elpa_index_t index, int n, int new_value) {
616 : int na;
617 576 : if (elpa_index_int_value_is_set(index, "na") != 1) {
618 0 : return 0;
619 : }
620 :
621 576 : na = elpa_index_get_int_value(index, "na", NULL);
622 576 : return (0 <= new_value) && (new_value < na);
623 : }
624 :
625 13824 : static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
626 13824 : return new_value == 0 || new_value == 1;
627 : }
628 :
629 0 : static int band_to_full_cardinality() {
630 : /* TODO */
631 0 : fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
632 0 : abort();
633 : }
634 :
635 0 : static int band_to_full_enumerate(int i) {
636 : /* TODO */
637 0 : fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
638 0 : abort();
639 : }
640 :
641 0 : static int band_to_full_is_valid(elpa_index_t index, int n, int new_value) {
642 : /* TODO */
643 0 : fprintf(stderr, "TODO on %s:%d\n", __FILE__, __LINE__);
644 0 : abort();
645 : }
646 :
647 21312 : elpa_index_t elpa_index_instance() {
648 21312 : elpa_index_t index = (elpa_index_t) calloc(1, sizeof(struct elpa_index_struct));
649 :
650 : #define ALLOCATE(TYPE, PRINTF_SPEC, ...) \
651 : index->TYPE##_options.values = (TYPE*) calloc(nelements(TYPE##_entries), sizeof(TYPE)); \
652 : index->TYPE##_options.is_set = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
653 : index->TYPE##_options.notified = (int*) calloc(nelements(TYPE##_entries), sizeof(int)); \
654 : for (int n = 0; n < nelements(TYPE##_entries); n++) { \
655 : TYPE default_value = TYPE##_entries[n].default_value; \
656 : if (!TYPE##_entries[n].base.once && !TYPE##_entries[n].base.readonly) { \
657 : getenv_##TYPE(index, TYPE##_entries[n].base.env_default, NOTIFY_ENV_DEFAULT, n, &default_value, "Default for option"); \
658 : } \
659 : index->TYPE##_options.values[n] = default_value; \
660 : }
661 :
662 21312 : FOR_ALL_TYPES(ALLOCATE)
663 :
664 21312 : return index;
665 : }
666 :
667 0 : static int is_tunable(elpa_index_t index, int i, int autotune_level, int autotune_domain) {
668 0 : return (int_entries[i].autotune_level != 0) &&
669 0 : (int_entries[i].autotune_level <= autotune_level) &&
670 0 : (int_entries[i].autotune_domain & autotune_domain) &&
671 0 : (!index->int_options.is_set[i]);
672 : }
673 :
674 0 : int elpa_index_autotune_cardinality(elpa_index_t index, int autotune_level, int autotune_domain) {
675 0 : int N = 1;
676 :
677 0 : for (int i = 0; i < nelements(int_entries); i++) { \
678 0 : if (is_tunable(index, i, autotune_level, autotune_domain)) {
679 0 : N *= int_entries[i].cardinality();
680 : }
681 : }
682 0 : return N;
683 : }
684 :
685 0 : int elpa_index_set_autotune_parameters(elpa_index_t index, int autotune_level, int autotune_domain, int n) {
686 0 : int debug = elpa_index_get_int_value(index, "debug", NULL);
687 0 : for (int i = 0; i < nelements(int_entries); i++) {
688 0 : if (is_tunable(index, i, autotune_level, autotune_domain)) {
689 0 : int value = int_entries[i].enumerate(n % int_entries[i].cardinality());
690 : /* Try to set option i to that value */
691 0 : if (int_entries[i].valid(index, i, value)) {
692 0 : index->int_options.values[i] = value;
693 : } else {
694 0 : return 0;
695 : }
696 0 : n /= int_entries[i].cardinality();
697 : }
698 : }
699 0 : if (debug == 1) {
700 0 : for (int i = 0; i < nelements(int_entries); i++) {
701 0 : if (is_tunable(index, i, autotune_level, autotune_domain)) {
702 0 : fprintf(stderr, "%s = ", int_entries[i].base.name);
703 0 : if (int_entries[i].to_string) {
704 0 : fprintf(stderr, "%s\n", int_entries[i].to_string(index->int_options.values[i]));
705 : } else {
706 0 : fprintf(stderr, "%d\n", index->int_options.values[i]);
707 : }
708 : }
709 : }
710 0 : fprintf(stderr, "\n");
711 : }
712 :
713 : /* Could set all values */
714 0 : return 1;
715 : }
|