This source file includes following definitions.
- mca_btl_vader_fbox_set_header
- mca_btl_vader_fbox_read_header
- mca_btl_vader_fbox_sendi
- mca_btl_vader_check_fboxes
- mca_btl_vader_try_fbox_setup
1
2
3
4
5
6
7
8
9
10
11
12
13
14 #if !defined(MCA_BTL_VADER_FBOX_H)
15 #define MCA_BTL_VADER_FBOX_H
16
17 #include "btl_vader.h"
18
19 #define MCA_BTL_VADER_POLL_COUNT 31
20
21 typedef union mca_btl_vader_fbox_hdr_t {
22 struct {
23
24
25
26
27
28 uint32_t size;
29
30 uint16_t tag;
31
32 uint16_t seq;
33 } data;
34 struct {
35 uint32_t value0;
36 uint32_t value1;
37 } data_i32;
38 uint64_t ival;
39 } mca_btl_vader_fbox_hdr_t;
40
41 #define MCA_BTL_VADER_FBOX_HDR(x) ((mca_btl_vader_fbox_hdr_t *) (x))
42
43 #define MCA_BTL_VADER_FBOX_OFFSET_MASK 0x7fffffff
44 #define MCA_BTL_VADER_FBOX_HB_MASK 0x80000000
45
46
47
48
49 #define BUFFER_FREE(s,e,hbm,size) (((s + !hbm) > (e)) ? (s) - (e) : (size - (e)))
50
51
52 #define MCA_BTL_VADER_FBOX_OFFSET_HBS(v) (!!((v) & MCA_BTL_VADER_FBOX_HB_MASK))
53
54 void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, mca_btl_base_endpoint_t *ep);
55
56 static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr, uint16_t tag,
57 uint16_t seq, uint32_t size)
58 {
59 mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = tag, .seq = seq, .size = size}};
60
61 hdr->data_i32.value1 = 0;
62 opal_atomic_wmb ();
63 hdr->data_i32.value0 = size;
64 opal_atomic_wmb ();
65 hdr->data_i32.value1 = tmp.data_i32.value1;
66 }
67
68 static inline mca_btl_vader_fbox_hdr_t mca_btl_vader_fbox_read_header (mca_btl_vader_fbox_hdr_t *hdr)
69 {
70 mca_btl_vader_fbox_hdr_t tmp = {.data_i32 = {.value1 = hdr->data_i32.value1}};;
71 opal_atomic_rmb ();
72 tmp.data_i32.value0 = hdr->data_i32.value0;
73 return tmp;
74 }
75
76
77 static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t *ep, unsigned char tag,
78 void * restrict header, const size_t header_size,
79 void * restrict payload, const size_t payload_size)
80 {
81 const unsigned int fbox_size = mca_btl_vader_component.fbox_size;
82 size_t size = header_size + payload_size;
83 unsigned int start, end, buffer_free;
84 size_t data_size = size;
85 unsigned char *dst, *data;
86 bool hbs, hbm;
87
88
89 if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer || size > (fbox_size >> 2))) {
90 return false;
91 }
92
93 OPAL_THREAD_LOCK(&ep->lock);
94
95
96 hbs = MCA_BTL_VADER_FBOX_OFFSET_HBS(ep->fbox_out.end);
97 hbm = MCA_BTL_VADER_FBOX_OFFSET_HBS(ep->fbox_out.start) == hbs;
98
99
100 start = ep->fbox_out.start & MCA_BTL_VADER_FBOX_OFFSET_MASK;
101 end = ep->fbox_out.end & MCA_BTL_VADER_FBOX_OFFSET_MASK;
102 buffer_free = BUFFER_FREE(start, end, hbm, fbox_size);
103
104
105 size = (size + sizeof (mca_btl_vader_fbox_hdr_t) + MCA_BTL_VADER_FBOX_ALIGNMENT_MASK) & ~MCA_BTL_VADER_FBOX_ALIGNMENT_MASK;
106
107 dst = ep->fbox_out.buffer + end;
108
109 if (OPAL_UNLIKELY(buffer_free < size)) {
110
111 BTL_VERBOSE(("not enough room for a fragment of size %u. in use buffer segment: {start: %x, end: %x, high bit matches: %d}",
112 (unsigned) size, start, end, (int) hbm));
113
114
115 start = ep->fbox_out.start = ep->fbox_out.startp[0];
116
117
118 start &= MCA_BTL_VADER_FBOX_OFFSET_MASK;
119 hbm = MCA_BTL_VADER_FBOX_OFFSET_HBS(ep->fbox_out.start) == hbs;
120 buffer_free = BUFFER_FREE(start, end, hbm, fbox_size);
121
122 opal_atomic_rmb ();
123
124
125
126 if (OPAL_UNLIKELY(buffer_free > 0 && buffer_free < size && start <= end)) {
127 BTL_VERBOSE(("message will not fit in remaining buffer space. skipping to beginning"));
128
129 mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), 0xff, ep->fbox_out.seq++,
130 buffer_free - sizeof (mca_btl_vader_fbox_hdr_t));
131
132 end = MCA_BTL_VADER_FBOX_ALIGNMENT;
133
134 hbs = !hbs;
135
136 buffer_free = BUFFER_FREE(start, end, !hbm, fbox_size);
137 dst = ep->fbox_out.buffer + end;
138 }
139
140 if (OPAL_UNLIKELY(buffer_free < size)) {
141 ep->fbox_out.end = (hbs << 31) | end;
142 opal_atomic_wmb ();
143 OPAL_THREAD_UNLOCK(&ep->lock);
144 return false;
145 }
146 }
147
148 BTL_VERBOSE(("writing fragment of size %u to offset %u {start: 0x%x, end: 0x%x (hbs: %d)} of peer's buffer. free = %u",
149 (unsigned int) size, end, start, end, hbs, buffer_free));
150
151 data = dst + sizeof (mca_btl_vader_fbox_hdr_t);
152
153 memcpy (data, header, header_size);
154 if (payload) {
155
156 memcpy (data + header_size, payload, payload_size);
157 }
158
159 end += size;
160
161 if (OPAL_UNLIKELY(fbox_size == end)) {
162
163 hbs = !hbs;
164
165 end = MCA_BTL_VADER_FBOX_ALIGNMENT;
166 } else if (buffer_free > size) {
167 MCA_BTL_VADER_FBOX_HDR(ep->fbox_out.buffer + end)->ival = 0;
168 }
169
170
171 mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), tag, ep->fbox_out.seq++, data_size);
172
173
174 ep->fbox_out.end = ((uint32_t) hbs << 31) | end;
175 opal_atomic_wmb ();
176 OPAL_THREAD_UNLOCK(&ep->lock);
177
178 return true;
179 }
180
181 static inline bool mca_btl_vader_check_fboxes (void)
182 {
183 const unsigned int fbox_size = mca_btl_vader_component.fbox_size;
184 bool processed = false;
185
186 for (unsigned int i = 0 ; i < mca_btl_vader_component.num_fbox_in_endpoints ; ++i) {
187 mca_btl_base_endpoint_t *ep = mca_btl_vader_component.fbox_in_endpoints[i];
188 unsigned int start = ep->fbox_in.start & MCA_BTL_VADER_FBOX_OFFSET_MASK;
189
190
191 bool hbs = MCA_BTL_VADER_FBOX_OFFSET_HBS(ep->fbox_in.start);
192 int poll_count;
193
194 for (poll_count = 0 ; poll_count <= MCA_BTL_VADER_POLL_COUNT ; ++poll_count) {
195 const mca_btl_vader_fbox_hdr_t hdr = mca_btl_vader_fbox_read_header (MCA_BTL_VADER_FBOX_HDR(ep->fbox_in.buffer + start));
196
197
198 if (0 == hdr.data.tag || hdr.data.seq != ep->fbox_in.seq) {
199 break;
200 }
201
202 ++ep->fbox_in.seq;
203
204
205 opal_atomic_rmb ();
206
207 BTL_VERBOSE(("got frag from %d with header {.tag = %d, .size = %d, .seq = %u} from offset %u",
208 ep->peer_smp_rank, hdr.data.tag, hdr.data.size, hdr.data.seq, start));
209
210
211 if (OPAL_LIKELY((0xfe & hdr.data.tag) != 0xfe)) {
212 mca_btl_base_segment_t segment;
213 mca_btl_base_descriptor_t desc = {.des_segments = &segment, .des_segment_count = 1};
214 const mca_btl_active_message_callback_t *reg =
215 mca_btl_base_active_message_trigger + hdr.data.tag;
216
217
218
219
220
221
222 segment.seg_len = hdr.data.size;
223 segment.seg_addr.pval = (void *) (ep->fbox_in.buffer + start + sizeof (hdr));
224
225
226 reg->cbfunc(&mca_btl_vader.super, hdr.data.tag, &desc, reg->cbdata);
227 } else if (OPAL_LIKELY(0xfe == hdr.data.tag)) {
228
229 fifo_value_t *value = (fifo_value_t *)(ep->fbox_in.buffer + start + sizeof (hdr));
230 mca_btl_vader_hdr_t *hdr = relative2virtual(*value);
231 mca_btl_vader_poll_handle_frag (hdr, ep);
232 }
233
234 start = (start + hdr.data.size + sizeof (hdr) + MCA_BTL_VADER_FBOX_ALIGNMENT_MASK) & ~MCA_BTL_VADER_FBOX_ALIGNMENT_MASK;
235 if (OPAL_UNLIKELY(fbox_size == start)) {
236
237 start = MCA_BTL_VADER_FBOX_ALIGNMENT;
238
239 hbs = !hbs;
240 }
241 }
242
243 if (poll_count) {
244 BTL_VERBOSE(("left off at offset %u (hbs: %d)", start, hbs));
245
246
247
248 opal_atomic_mb ();
249 ep->fbox_in.start = ep->fbox_in.startp[0] = ((uint32_t) hbs << 31) | start;
250 processed = true;
251 }
252 }
253
254
255 return processed;
256 }
257
258 static inline void mca_btl_vader_try_fbox_setup (mca_btl_base_endpoint_t *ep, mca_btl_vader_hdr_t *hdr)
259 {
260 if (OPAL_UNLIKELY(NULL == ep->fbox_out.buffer && mca_btl_vader_component.fbox_threshold == OPAL_THREAD_ADD_FETCH_SIZE_T (&ep->send_count, 1))) {
261
262 OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
263
264
265 if (0 <= opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, -1)) {
266 opal_free_list_item_t *fbox = opal_free_list_get (&mca_btl_vader_component.vader_fboxes);
267
268 if (NULL != fbox) {
269
270 memset (fbox->ptr, 0, mca_btl_vader_component.fbox_size);
271 mca_btl_vader_endpoint_setup_fbox_send (ep, fbox);
272
273 hdr->flags |= MCA_BTL_VADER_FLAG_SETUP_FBOX;
274 hdr->fbox_base = virtual2relative((char *) ep->fbox_out.buffer);
275 } else {
276 opal_atomic_add_fetch_32 (&ep->fifo->fbox_available, 1);
277 }
278
279 opal_atomic_wmb ();
280 }
281
282 OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock);
283 }
284 }
285
286 #endif