This source file includes following definitions.
- mca_atomic_basic_startup
- mca_atomic_basic_finalize
- mca_atomic_basic_fop
- mca_atomic_basic_op
- mca_atomic_basic_add
- mca_atomic_basic_and
- mca_atomic_basic_or
- mca_atomic_basic_xor
- mca_atomic_basic_fadd
- mca_atomic_basic_fand
- mca_atomic_basic_for
- mca_atomic_basic_fxor
- mca_atomic_basic_swap
- mca_atomic_basic_query
- atomic_basic_lock
- atomic_basic_unlock
1
2
3
4
5
6
7
8
9
10
11 #include "oshmem_config.h"
12 #include <stdio.h>
13
14 #include "oshmem/constants.h"
15 #include "oshmem/mca/atomic/atomic.h"
16 #include "oshmem/mca/spml/spml.h"
17 #include "oshmem/mca/memheap/memheap.h"
18 #include "oshmem/proc/proc.h"
19 #include "oshmem/op/op.h"
20 #include "atomic_basic.h"
21
22 static char *atomic_lock_sync;
23 static int *atomic_lock_turn;
24 static char *local_lock_sync;
25 static int *local_lock_turn;
26
27 enum {
28 ATOMIC_LOCK_IDLE = 0,
29 ATOMIC_LOCK_WAITING = 1,
30 ATOMIC_LOCK_ACTIVE = 2
31 };
32
33
34
35
36
37 int mca_atomic_basic_startup(bool enable_progress_threads, bool enable_threads)
38 {
39 int rc = OSHMEM_SUCCESS;
40 void* ptr = NULL;
41 int num_pe = oshmem_num_procs();
42
43 rc = MCA_MEMHEAP_CALL(private_alloc((num_pe * sizeof(char)), &ptr));
44 if (rc == OSHMEM_SUCCESS) {
45 atomic_lock_sync = (char*) ptr;
46 memset(atomic_lock_sync, ATOMIC_LOCK_IDLE, sizeof(char) * num_pe);
47
48 rc = MCA_MEMHEAP_CALL(private_alloc(sizeof(int), &ptr));
49 if (rc == OSHMEM_SUCCESS) {
50 atomic_lock_turn = (int*) ptr;
51 *atomic_lock_turn = 0;
52 if (rc == OSHMEM_SUCCESS) {
53 local_lock_sync = (char*) malloc(num_pe * sizeof(char));
54 local_lock_turn = (int*) malloc(sizeof(int));
55 if (!local_lock_sync || !local_lock_turn) {
56 rc = OSHMEM_ERR_OUT_OF_RESOURCE;
57 } else {
58 memcpy((void*) local_lock_sync,
59 (void*) atomic_lock_sync,
60 sizeof(char) * num_pe);
61 *local_lock_turn = *atomic_lock_turn;
62 }
63 }
64 }
65 }
66
67 return rc;
68 }
69
70 int mca_atomic_basic_finalize(void)
71 {
72 void* ptr = NULL;
73
74 ptr = (void*) atomic_lock_sync;
75 MCA_MEMHEAP_CALL(private_free(ptr));
76 atomic_lock_sync = NULL;
77
78 ptr = (void*) atomic_lock_turn;
79 MCA_MEMHEAP_CALL(private_free(ptr));
80 atomic_lock_turn = NULL;
81
82 if (local_lock_sync) {
83 free((void*) local_lock_sync);
84 local_lock_sync = NULL;
85 }
86
87 if (local_lock_turn) {
88 free((void*) local_lock_turn);
89 local_lock_turn = NULL;
90 }
91
92 return OSHMEM_SUCCESS;
93 }
94
95 static inline
96 int mca_atomic_basic_fop(shmem_ctx_t ctx,
97 void *target,
98 void *prev,
99 uint64_t value,
100 size_t size,
101 int pe,
102 struct oshmem_op_t *op)
103 {
104 int rc = OSHMEM_SUCCESS;
105 long long temp_value = 0;
106
107 atomic_basic_lock(ctx, pe);
108
109 rc = MCA_SPML_CALL(get(ctx, target, size, (void*)&temp_value, pe));
110
111 memcpy(prev, (void*) &temp_value, size);
112
113 op->o_func.c_fn((void*) value,
114 (void*) &temp_value,
115 size / op->dt_size);
116
117 if (rc == OSHMEM_SUCCESS) {
118 rc = MCA_SPML_CALL(put(ctx, target, size, (void*)&temp_value, pe));
119 shmem_quiet();
120 }
121
122 atomic_basic_unlock(ctx, pe);
123
124 return rc;
125 }
126
127 static inline
128 int mca_atomic_basic_op(shmem_ctx_t ctx,
129 void *target,
130 uint64_t value,
131 size_t size,
132 int pe,
133 struct oshmem_op_t *op)
134 {
135 long long prev;
136
137 return mca_atomic_basic_fop(ctx, target, &prev, value, size, pe, op);
138 }
139
140 static int mca_atomic_basic_add(shmem_ctx_t ctx, void *target, uint64_t value,
141 size_t size, int pe)
142 {
143 return mca_atomic_basic_op(ctx, target, value, size, pe,
144 MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_sum_int64));
145 }
146
147 static int mca_atomic_basic_and(shmem_ctx_t ctx,
148 void *target, uint64_t value,
149 size_t size, int pe)
150 {
151 return mca_atomic_basic_op(ctx, target, value, size, pe,
152 MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_and_int64));
153 }
154
155 static int mca_atomic_basic_or(shmem_ctx_t ctx, void *target, uint64_t value,
156 size_t size, int pe)
157 {
158 return mca_atomic_basic_op(ctx, target, value, size, pe,
159 MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_or_int64));
160 }
161
162 static int mca_atomic_basic_xor(shmem_ctx_t ctx,
163 void *target, uint64_t value,
164 size_t size, int pe)
165 {
166 return mca_atomic_basic_op(ctx, target, value, size, pe,
167 MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_xor_int64));
168 }
169
170 static int mca_atomic_basic_fadd(shmem_ctx_t ctx, void *target, void *prev, uint64_t value,
171 size_t size, int pe)
172 {
173 return mca_atomic_basic_fop(ctx, target, prev, value, size, pe,
174 MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_sum_int64));
175 }
176
177 static int mca_atomic_basic_fand(shmem_ctx_t ctx,
178 void *target, void *prev, uint64_t value,
179 size_t size, int pe)
180 {
181 return mca_atomic_basic_fop(ctx, target, prev, value, size, pe,
182 MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_and_int64));
183 }
184
185 static int mca_atomic_basic_for(shmem_ctx_t ctx, void *target, void *prev, uint64_t value,
186 size_t size, int pe)
187 {
188 return mca_atomic_basic_fop(ctx, target, prev, value, size, pe,
189 MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_or_int64));
190 }
191
192 static int mca_atomic_basic_fxor(shmem_ctx_t ctx, void *target, void *prev, uint64_t value,
193 size_t size, int pe)
194 {
195 return mca_atomic_basic_fop(ctx, target, prev, value, size, pe,
196 MCA_BASIC_OP(size, oshmem_op_sum_int32, oshmem_op_xor_int64));
197 }
198
199 static int mca_atomic_basic_swap(shmem_ctx_t ctx, void *target, void *prev, uint64_t value,
200 size_t size, int pe)
201 {
202 return mca_atomic_basic_fop(ctx, target, prev, value, size, pe,
203 MCA_BASIC_OP(size, oshmem_op_swap_int32, oshmem_op_swap_int64));
204 }
205
206 mca_atomic_base_module_t *
207 mca_atomic_basic_query(int *priority)
208 {
209 mca_atomic_basic_module_t *module;
210
211 *priority = mca_atomic_basic_component.priority;
212
213 module = OBJ_NEW(mca_atomic_basic_module_t);
214 if (module) {
215 module->super.atomic_add = mca_atomic_basic_add;
216 module->super.atomic_and = mca_atomic_basic_and;
217 module->super.atomic_or = mca_atomic_basic_or;
218 module->super.atomic_xor = mca_atomic_basic_xor;
219 module->super.atomic_fadd = mca_atomic_basic_fadd;
220 module->super.atomic_fand = mca_atomic_basic_fand;
221 module->super.atomic_for = mca_atomic_basic_for;
222 module->super.atomic_fxor = mca_atomic_basic_fxor;
223 module->super.atomic_swap = mca_atomic_basic_swap;
224 module->super.atomic_cswap = mca_atomic_basic_cswap;
225 return &(module->super);
226 }
227
228 return NULL ;
229 }
230
231 void atomic_basic_lock(shmem_ctx_t ctx, int pe)
232 {
233 int index = -1;
234 int me = oshmem_my_proc_id();
235 int num_pe = oshmem_num_procs();
236 char lock_required = ATOMIC_LOCK_WAITING;
237 char lock_active = ATOMIC_LOCK_ACTIVE;
238 int root_pe = pe;
239
240 do {
241
242 do {
243 MCA_SPML_CALL(put(ctx, (void*)(atomic_lock_sync + me), sizeof(lock_required), (void*)&lock_required, root_pe));
244 MCA_SPML_CALL(get(ctx, (void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe));
245 } while (local_lock_sync[me] != lock_required);
246
247 MCA_SPML_CALL(get(ctx, (void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe));
248 while (index != me) {
249 if (local_lock_sync[index] != ATOMIC_LOCK_IDLE) {
250 MCA_SPML_CALL(get(ctx, (void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe));
251 MCA_SPML_CALL(get(ctx, (void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe));
252 } else {
253 index = (index + 1) % num_pe;
254 }
255 }
256
257
258 do {
259 MCA_SPML_CALL(put(ctx, (void*)(atomic_lock_sync + me), sizeof(lock_active), (void*)&lock_active, root_pe));
260 MCA_SPML_CALL(get(ctx, (void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe));
261 } while (local_lock_sync[me] != lock_active);
262
263 index = 0;
264 while ((index < num_pe)
265 && ((index == me)
266 || (local_lock_sync[index] != ATOMIC_LOCK_ACTIVE))) {
267 index = index + 1;
268 }
269
270 MCA_SPML_CALL(get(ctx, (void*)atomic_lock_turn, sizeof(*atomic_lock_turn), (void*)local_lock_turn, root_pe));
271 } while (!((index >= num_pe)
272 && ((*local_lock_turn == me)
273 || (local_lock_sync[*local_lock_turn] == ATOMIC_LOCK_IDLE))));
274
275 MCA_SPML_CALL(put(ctx, (void*)atomic_lock_turn, sizeof(me), (void*)&me, root_pe));
276 }
277
278 void atomic_basic_unlock(shmem_ctx_t ctx, int pe)
279 {
280 int index = -1;
281 int me = oshmem_my_proc_id();
282 int num_pe = oshmem_num_procs();
283 char lock_idle = ATOMIC_LOCK_IDLE;
284 int root_pe = pe;
285
286 MCA_SPML_CALL(get(ctx, (void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe));
287 MCA_SPML_CALL(get(ctx, (void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe));
288
289 do {
290 index = (index + 1) % num_pe;
291 } while (local_lock_sync[index] == ATOMIC_LOCK_IDLE);
292
293 MCA_SPML_CALL(put(ctx, (void*)atomic_lock_turn, sizeof(index), (void*)&index, root_pe));
294
295 do {
296 MCA_SPML_CALL(put(ctx, (void*)(atomic_lock_sync + me), sizeof(lock_idle), (void*)&lock_idle, root_pe));
297 MCA_SPML_CALL(get(ctx, (void*)atomic_lock_sync, num_pe * sizeof(*atomic_lock_sync), (void*)local_lock_sync, root_pe));
298 } while (local_lock_sync[me] != lock_idle);
299 }