1 /*
2 * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2009 HNR Consulting. All rights reserved.
4 * Copyright (c) 2012 Lawrence Livermore National Lab. All rights reserved.
5 * Copyright (c) 2014 Mellanox Technologies LTD. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 *
35 */
36
37 /*
38 * Abstract:
39 * OSM Congestion Control configuration implementation
40 *
41 * Author:
42 * Albert Chu, LLNL
43 */
44
45 #if HAVE_CONFIG_H
46 # include <config.h>
47 #endif /* HAVE_CONFIG_H */
48
49 #include <stdlib.h>
50 #include <string.h>
51
52 #include <iba/ib_types.h>
53 #include <complib/cl_debug.h>
54 #include <opensm/osm_file_ids.h>
55 #define FILE_ID OSM_FILE_CONGESTION_CONTROL_C
56 #include <opensm/osm_subnet.h>
57 #include <opensm/osm_opensm.h>
58 #include <opensm/osm_log.h>
59 #include <opensm/osm_subnet.h>
60 #include <opensm/osm_congestion_control.h>
61
62 #define CONGESTION_CONTROL_INITIAL_TID_VALUE 0x7A93
63
cc_mad_post(osm_congestion_control_t * p_cc,osm_madw_t * p_madw,osm_node_t * p_node,osm_physp_t * p_physp,ib_net16_t attr_id,ib_net32_t attr_mod)64 static void cc_mad_post(osm_congestion_control_t *p_cc,
65 osm_madw_t *p_madw,
66 osm_node_t *p_node,
67 osm_physp_t *p_physp,
68 ib_net16_t attr_id,
69 ib_net32_t attr_mod)
70 {
71 osm_subn_opt_t *p_opt = &p_cc->subn->opt;
72 ib_cc_mad_t *p_cc_mad;
73 uint8_t port;
74
75 OSM_LOG_ENTER(p_cc->log);
76
77 port = osm_physp_get_port_num(p_physp);
78
79 p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
80
81 p_cc_mad->header.base_ver = 1;
82 p_cc_mad->header.mgmt_class = IB_MCLASS_CC;
83 p_cc_mad->header.class_ver = 2;
84 p_cc_mad->header.method = IB_MAD_METHOD_SET;
85 p_cc_mad->header.status = 0;
86 p_cc_mad->header.class_spec = 0;
87 p_cc_mad->header.trans_id =
88 cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) &
89 (uint64_t) (0xFFFFFFFF));
90 if (p_cc_mad->header.trans_id == 0)
91 p_cc_mad->header.trans_id =
92 cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) &
93 (uint64_t) (0xFFFFFFFF));
94 p_cc_mad->header.attr_id = attr_id;
95 p_cc_mad->header.resv = 0;
96 p_cc_mad->header.attr_mod = attr_mod;
97
98 p_cc_mad->cc_key = p_opt->cc_key;
99
100 memset(p_cc_mad->log_data, '\0', IB_CC_LOG_DATA_SIZE);
101
102 p_madw->mad_addr.dest_lid = osm_node_get_base_lid(p_node, port);
103 p_madw->mad_addr.addr_type.gsi.remote_qp = IB_QP1;
104 p_madw->mad_addr.addr_type.gsi.remote_qkey =
105 cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
106 p_madw->resp_expected = TRUE;
107 p_madw->fail_msg = CL_DISP_MSGID_NONE;
108
109 p_madw->context.cc_context.node_guid = osm_node_get_node_guid(p_node);
110 p_madw->context.cc_context.port_guid = osm_physp_get_port_guid(p_physp);
111 p_madw->context.cc_context.port = port;
112 p_madw->context.cc_context.mad_method = IB_MAD_METHOD_SET;
113 p_madw->context.cc_context.attr_mod = attr_mod;
114
115 cl_spinlock_acquire(&p_cc->mad_queue_lock);
116 cl_atomic_inc(&p_cc->outstanding_mads);
117 cl_qlist_insert_tail(&p_cc->mad_queue, &p_madw->list_item);
118 cl_spinlock_release(&p_cc->mad_queue_lock);
119
120 cl_event_signal(&p_cc->cc_poller_wakeup);
121
122 OSM_LOG_EXIT(p_cc->log);
123 }
124
cc_setup_mad_data(osm_sm_t * p_sm)125 static void cc_setup_mad_data(osm_sm_t * p_sm)
126 {
127 osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
128 osm_subn_opt_t *p_opt = &p_sm->p_subn->opt;
129 uint16_t ccti_limit;
130 int i;
131
132 /* Switch Congestion Setting */
133 p_cc->sw_cong_setting.control_map = p_opt->cc_sw_cong_setting_control_map;
134
135 memcpy(p_cc->sw_cong_setting.victim_mask,
136 p_opt->cc_sw_cong_setting_victim_mask,
137 IB_CC_PORT_MASK_DATA_SIZE);
138
139 memcpy(p_cc->sw_cong_setting.credit_mask,
140 p_opt->cc_sw_cong_setting_credit_mask,
141 IB_CC_PORT_MASK_DATA_SIZE);
142
143 /* threshold is 4 bits, takes up upper nibble of byte */
144 p_cc->sw_cong_setting.threshold_resv = (p_opt->cc_sw_cong_setting_threshold << 4);
145
146 p_cc->sw_cong_setting.packet_size = p_opt->cc_sw_cong_setting_packet_size;
147
148 /* cs threshold is 4 bits, takes up upper nibble of short */
149 p_cc->sw_cong_setting.cs_threshold_resv =
150 cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_threshold << 12);
151
152 p_cc->sw_cong_setting.cs_return_delay =
153 cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_return_delay.shift << 14
154 | p_opt->cc_sw_cong_setting_credit_starvation_return_delay.multiplier);
155
156 p_cc->sw_cong_setting.marking_rate = p_opt->cc_sw_cong_setting_marking_rate;
157
158 /* CA Congestion Setting */
159 p_cc->ca_cong_setting.port_control = p_opt->cc_ca_cong_setting_port_control;
160 p_cc->ca_cong_setting.control_map = p_opt->cc_ca_cong_setting_control_map;
161
162 for (i = 0; i < IB_CA_CONG_ENTRY_DATA_SIZE; i++) {
163 ib_ca_cong_entry_t *p_entry;
164
165 p_entry = &p_cc->ca_cong_setting.entry_list[i];
166
167 p_entry->ccti_timer = p_opt->cc_ca_cong_entries[i].ccti_timer;
168 p_entry->ccti_increase = p_opt->cc_ca_cong_entries[i].ccti_increase;
169 p_entry->trigger_threshold = p_opt->cc_ca_cong_entries[i].trigger_threshold;
170 p_entry->ccti_min = p_opt->cc_ca_cong_entries[i].ccti_min;
171 p_entry->resv0 = 0;
172 p_entry->resv1 = 0;
173 }
174
175 /* Congestion Control Table */
176
177 /* if no entries, we will always send at least 1 mad to set ccti_limit = 0 */
178 if (!p_opt->cc_cct.entries_len)
179 p_cc->cc_tbl_mads = 1;
180 else {
181 p_cc->cc_tbl_mads = p_opt->cc_cct.entries_len - 1;
182 p_cc->cc_tbl_mads /= IB_CC_TBL_ENTRY_LIST_MAX;
183 p_cc->cc_tbl_mads += 1;
184 }
185
186 CL_ASSERT(p_cc->cc_tbl_mads <= OSM_CCT_ENTRY_MAD_BLOCKS);
187
188 if (!p_opt->cc_cct.entries_len)
189 ccti_limit = 0;
190 else
191 ccti_limit = p_opt->cc_cct.entries_len - 1;
192
193 for (i = 0; i < p_cc->cc_tbl_mads; i++) {
194 int j;
195
196 p_cc->cc_tbl[i].ccti_limit = cl_hton16(ccti_limit);
197 p_cc->cc_tbl[i].resv = 0;
198
199 memset(p_cc->cc_tbl[i].entry_list,
200 '\0',
201 sizeof(p_cc->cc_tbl[i].entry_list));
202
203 if (!ccti_limit)
204 break;
205
206 for (j = 0; j < IB_CC_TBL_ENTRY_LIST_MAX; j++) {
207 int k;
208
209 k = (i * IB_CC_TBL_ENTRY_LIST_MAX) + j;
210 p_cc->cc_tbl[i].entry_list[j].shift_multiplier =
211 cl_hton16(p_opt->cc_cct.entries[k].shift << 14
212 | p_opt->cc_cct.entries[k].multiplier);
213 }
214 }
215 }
216
cc_send_sw_cong_setting(osm_sm_t * p_sm,osm_node_t * p_node)217 static ib_api_status_t cc_send_sw_cong_setting(osm_sm_t * p_sm,
218 osm_node_t *p_node)
219 {
220 osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
221 unsigned force_update;
222 osm_physp_t *p_physp;
223 osm_madw_t *p_madw = NULL;
224 ib_cc_mad_t *p_cc_mad = NULL;
225 ib_sw_cong_setting_t *p_sw_cong_setting = NULL;
226
227 OSM_LOG_ENTER(p_sm->p_log);
228
229 p_physp = osm_node_get_physp_ptr(p_node, 0);
230
231 force_update = p_physp->need_update || p_sm->p_subn->need_update;
232
233 if (!force_update
234 && !memcmp(&p_cc->sw_cong_setting,
235 &p_physp->cc.sw.sw_cong_setting,
236 sizeof(p_cc->sw_cong_setting)))
237 return IB_SUCCESS;
238
239 p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
240 MAD_BLOCK_SIZE, NULL);
241 if (p_madw == NULL) {
242 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C101: "
243 "failed to allocate mad\n");
244 return IB_INSUFFICIENT_MEMORY;
245 }
246
247 p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
248
249 p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
250
251 memcpy(p_sw_cong_setting,
252 &p_cc->sw_cong_setting,
253 sizeof(p_cc->sw_cong_setting));
254
255 cc_mad_post(p_cc, p_madw, p_node, p_physp,
256 IB_MAD_ATTR_SW_CONG_SETTING, 0);
257
258 OSM_LOG_EXIT(p_sm->p_log);
259
260 return IB_SUCCESS;
261 }
262
cc_send_ca_cong_setting(osm_sm_t * p_sm,osm_node_t * p_node,osm_physp_t * p_physp)263 static ib_api_status_t cc_send_ca_cong_setting(osm_sm_t * p_sm,
264 osm_node_t *p_node,
265 osm_physp_t *p_physp)
266 {
267 osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
268 unsigned force_update;
269 osm_madw_t *p_madw = NULL;
270 ib_cc_mad_t *p_cc_mad = NULL;
271 ib_ca_cong_setting_t *p_ca_cong_setting = NULL;
272
273 OSM_LOG_ENTER(p_sm->p_log);
274
275 force_update = p_physp->need_update || p_sm->p_subn->need_update;
276
277 if (!force_update
278 && !memcmp(&p_cc->ca_cong_setting,
279 &p_physp->cc.ca.ca_cong_setting,
280 sizeof(p_cc->ca_cong_setting)))
281 return IB_SUCCESS;
282
283 p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
284 MAD_BLOCK_SIZE, NULL);
285 if (p_madw == NULL) {
286 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C102: "
287 "failed to allocate mad\n");
288 return IB_INSUFFICIENT_MEMORY;
289 }
290
291 p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
292
293 p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
294
295 memcpy(p_ca_cong_setting,
296 &p_cc->ca_cong_setting,
297 sizeof(p_cc->ca_cong_setting));
298
299 cc_mad_post(p_cc, p_madw, p_node, p_physp,
300 IB_MAD_ATTR_CA_CONG_SETTING, 0);
301
302 OSM_LOG_EXIT(p_sm->p_log);
303
304 return IB_SUCCESS;
305 }
306
cc_send_cct(osm_sm_t * p_sm,osm_node_t * p_node,osm_physp_t * p_physp)307 static ib_api_status_t cc_send_cct(osm_sm_t * p_sm,
308 osm_node_t *p_node,
309 osm_physp_t *p_physp)
310 {
311 osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc;
312 unsigned force_update;
313 osm_madw_t *p_madw = NULL;
314 ib_cc_mad_t *p_cc_mad = NULL;
315 ib_cc_tbl_t *p_cc_tbl = NULL;
316 unsigned int index = 0;
317
318 OSM_LOG_ENTER(p_sm->p_log);
319
320 force_update = p_physp->need_update || p_sm->p_subn->need_update;
321
322 for (index = 0; index < p_cc->cc_tbl_mads; index++) {
323 if (!force_update
324 && !memcmp(&p_cc->cc_tbl[index],
325 &p_physp->cc.ca.cc_tbl[index],
326 sizeof(p_cc->cc_tbl[index])))
327 continue;
328
329 p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle,
330 MAD_BLOCK_SIZE, NULL);
331 if (p_madw == NULL) {
332 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C103: "
333 "failed to allocate mad\n");
334 return IB_INSUFFICIENT_MEMORY;
335 }
336
337 p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
338
339 p_cc_tbl = (ib_cc_tbl_t *)ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
340
341 memcpy(p_cc_tbl,
342 &p_cc->cc_tbl[index],
343 sizeof(p_cc->cc_tbl[index]));
344
345 cc_mad_post(p_cc, p_madw, p_node, p_physp,
346 IB_MAD_ATTR_CC_TBL, cl_hton32(index));
347 }
348
349 OSM_LOG_EXIT(p_sm->p_log);
350
351 return IB_SUCCESS;
352 }
353
osm_congestion_control_setup(struct osm_opensm * p_osm)354 int osm_congestion_control_setup(struct osm_opensm *p_osm)
355 {
356 cl_qmap_t *p_tbl;
357 cl_map_item_t *p_next;
358 int ret = 0;
359
360 if (!p_osm->subn.opt.congestion_control)
361 return 0;
362
363 OSM_LOG_ENTER(&p_osm->log);
364
365 /*
366 * Do nothing unless the most recent routing attempt was successful.
367 */
368 if (!p_osm->routing_engine_used)
369 return 0;
370
371 cc_setup_mad_data(&p_osm->sm);
372
373 cl_plock_acquire(&p_osm->lock);
374
375 p_tbl = &p_osm->subn.port_guid_tbl;
376 p_next = cl_qmap_head(p_tbl);
377 while (p_next != cl_qmap_end(p_tbl)) {
378 osm_port_t *p_port = (osm_port_t *) p_next;
379 osm_node_t *p_node = p_port->p_node;
380 ib_api_status_t status;
381
382 p_next = cl_qmap_next(p_next);
383
384 if (p_port->cc_unavailable_flag)
385 continue;
386
387 if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {
388 status = cc_send_sw_cong_setting(&p_osm->sm, p_node);
389 if (status != IB_SUCCESS)
390 ret = -1;
391 } else if (osm_node_get_type(p_node) == IB_NODE_TYPE_CA) {
392 status = cc_send_ca_cong_setting(&p_osm->sm,
393 p_node,
394 p_port->p_physp);
395 if (status != IB_SUCCESS)
396 ret = -1;
397
398 status = cc_send_cct(&p_osm->sm,
399 p_node,
400 p_port->p_physp);
401 if (status != IB_SUCCESS)
402 ret = -1;
403 }
404 }
405
406 cl_plock_release(&p_osm->lock);
407
408 OSM_LOG_EXIT(&p_osm->log);
409
410 return ret;
411 }
412
osm_congestion_control_wait_pending_transactions(struct osm_opensm * p_osm)413 int osm_congestion_control_wait_pending_transactions(struct osm_opensm *p_osm)
414 {
415 osm_congestion_control_t *cc = &p_osm->cc;
416
417 if (!p_osm->subn.opt.congestion_control)
418 return 0;
419
420 while (1) {
421 unsigned count = cc->outstanding_mads;
422 if (!count || osm_exit_flag)
423 break;
424 cl_event_wait_on(&cc->outstanding_mads_done_event,
425 EVENT_NO_TIMEOUT,
426 TRUE);
427 }
428
429 return osm_exit_flag;
430 }
431
decrement_outstanding_mads(osm_congestion_control_t * p_cc)432 static inline void decrement_outstanding_mads(osm_congestion_control_t *p_cc)
433 {
434 uint32_t outstanding;
435
436 outstanding = cl_atomic_dec(&p_cc->outstanding_mads);
437 if (!outstanding)
438 cl_event_signal(&p_cc->outstanding_mads_done_event);
439
440 cl_atomic_dec(&p_cc->outstanding_mads_on_wire);
441 cl_event_signal(&p_cc->sig_mads_on_wire_continue);
442 }
443
cc_rcv_mad(void * context,void * data)444 static void cc_rcv_mad(void *context, void *data)
445 {
446 osm_congestion_control_t *p_cc = context;
447 osm_opensm_t *p_osm = p_cc->osm;
448 osm_madw_t *p_madw = data;
449 ib_cc_mad_t *p_cc_mad;
450 osm_madw_context_t *p_mad_context = &p_madw->context;
451 ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw);
452 ib_net64_t node_guid = p_mad_context->cc_context.node_guid;
453 ib_net64_t port_guid = p_mad_context->cc_context.port_guid;
454 uint8_t port = p_mad_context->cc_context.port;
455 osm_port_t *p_port;
456
457 OSM_LOG_ENTER(p_cc->log);
458
459 OSM_LOG(p_cc->log, OSM_LOG_VERBOSE,
460 "Processing received MAD status 0x%x for "
461 "attr ID %u mod 0x%x node 0x%" PRIx64 " port %u\n",
462 cl_ntoh16(p_mad->status), cl_ntoh16(p_mad->attr_id),
463 cl_ntoh32(p_mad_context->cc_context.attr_mod),
464 cl_ntoh64(node_guid), port);
465
466 p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw);
467
468 cl_plock_acquire(&p_osm->lock);
469
470 p_port = osm_get_port_by_guid(p_cc->subn, port_guid);
471 if (!p_port) {
472 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C109: "
473 "Port GUID 0x%" PRIx64 " not in table\n",
474 cl_ntoh64(port_guid));
475 cl_plock_release(&p_osm->lock);
476 goto Exit;
477 }
478
479 p_port->cc_timeout_count = 0;
480
481 if (p_cc_mad->header.status) {
482 if (p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_CLASS_VER
483 || p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD
484 || p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD_ATTR)
485 p_port->cc_unavailable_flag = TRUE;
486 cl_plock_release(&p_osm->lock);
487 goto Exit;
488 }
489 else
490 p_port->cc_unavailable_flag = FALSE;
491
492 if (p_cc_mad->header.attr_id == IB_MAD_ATTR_SW_CONG_SETTING) {
493 ib_sw_cong_setting_t *p_sw_cong_setting;
494
495 p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
496 p_port->p_physp->cc.sw.sw_cong_setting = *p_sw_cong_setting;
497 }
498 else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CA_CONG_SETTING) {
499 ib_ca_cong_setting_t *p_ca_cong_setting;
500
501 p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
502 p_port->p_physp->cc.ca.ca_cong_setting = *p_ca_cong_setting;
503 }
504 else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CC_TBL) {
505 ib_net32_t attr_mod = p_mad_context->cc_context.attr_mod;
506 uint32_t index = cl_ntoh32(attr_mod);
507 ib_cc_tbl_t *p_cc_tbl;
508
509 p_cc_tbl = ib_cc_mad_get_mgt_data_ptr(p_cc_mad);
510 p_port->p_physp->cc.ca.cc_tbl[index] = *p_cc_tbl;
511 }
512 else
513 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C10A: "
514 "Unexpected MAD attribute ID %u received\n",
515 cl_ntoh16(p_cc_mad->header.attr_id));
516
517 cl_plock_release(&p_osm->lock);
518
519 Exit:
520 decrement_outstanding_mads(p_cc);
521 osm_mad_pool_put(p_cc->mad_pool, p_madw);
522 OSM_LOG_EXIT(p_cc->log);
523 }
524
cc_poller_send(osm_congestion_control_t * p_cc,osm_madw_t * p_madw)525 static void cc_poller_send(osm_congestion_control_t *p_cc,
526 osm_madw_t *p_madw)
527 {
528 osm_subn_opt_t *p_opt = &p_cc->subn->opt;
529 ib_api_status_t status;
530 cl_status_t sts;
531 osm_madw_context_t mad_context = p_madw->context;
532
533 status = osm_vendor_send(p_cc->bind_handle, p_madw, TRUE);
534 if (status == IB_SUCCESS) {
535 cl_atomic_inc(&p_cc->outstanding_mads_on_wire);
536 while (p_cc->outstanding_mads_on_wire >
537 (int32_t)p_opt->cc_max_outstanding_mads) {
538 wait:
539 sts = cl_event_wait_on(&p_cc->sig_mads_on_wire_continue,
540 EVENT_NO_TIMEOUT, TRUE);
541 if (sts != CL_SUCCESS)
542 goto wait;
543 }
544 } else
545 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C104: "
546 "send failed to node 0x%" PRIx64 "port %u\n",
547 cl_ntoh64(mad_context.cc_context.node_guid),
548 mad_context.cc_context.port);
549 }
550
cc_poller(void * p_ptr)551 static void cc_poller(void *p_ptr)
552 {
553 osm_congestion_control_t *p_cc = p_ptr;
554 osm_madw_t *p_madw;
555
556 OSM_LOG_ENTER(p_cc->log);
557
558 if (p_cc->thread_state == OSM_THREAD_STATE_NONE)
559 p_cc->thread_state = OSM_THREAD_STATE_RUN;
560
561 while (p_cc->thread_state == OSM_THREAD_STATE_RUN) {
562 cl_spinlock_acquire(&p_cc->mad_queue_lock);
563
564 p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue);
565
566 cl_spinlock_release(&p_cc->mad_queue_lock);
567
568 if (p_madw != (osm_madw_t *) cl_qlist_end(&p_cc->mad_queue))
569 cc_poller_send(p_cc, p_madw);
570 else
571 cl_event_wait_on(&p_cc->cc_poller_wakeup,
572 EVENT_NO_TIMEOUT, TRUE);
573 }
574
575 OSM_LOG_EXIT(p_cc->log);
576 }
577
osm_congestion_control_init(osm_congestion_control_t * p_cc,struct osm_opensm * p_osm,const osm_subn_opt_t * p_opt)578 ib_api_status_t osm_congestion_control_init(osm_congestion_control_t * p_cc,
579 struct osm_opensm *p_osm,
580 const osm_subn_opt_t * p_opt)
581 {
582 ib_api_status_t status = IB_SUCCESS;
583
584 OSM_LOG_ENTER(&p_osm->log);
585
586 memset(p_cc, 0, sizeof(*p_cc));
587
588 p_cc->osm = p_osm;
589 p_cc->subn = &p_osm->subn;
590 p_cc->sm = &p_osm->sm;
591 p_cc->log = &p_osm->log;
592 p_cc->mad_pool = &p_osm->mad_pool;
593 p_cc->trans_id = CONGESTION_CONTROL_INITIAL_TID_VALUE;
594 p_cc->vendor = p_osm->p_vendor;
595
596 p_cc->cc_disp_h = cl_disp_register(&p_osm->disp, OSM_MSG_MAD_CC,
597 cc_rcv_mad, p_cc);
598 if (p_cc->cc_disp_h == CL_DISP_INVALID_HANDLE)
599 goto Exit;
600
601 cl_qlist_init(&p_cc->mad_queue);
602
603 status = cl_spinlock_init(&p_cc->mad_queue_lock);
604 if (status != IB_SUCCESS)
605 goto Exit;
606
607 cl_event_construct(&p_cc->cc_poller_wakeup);
608 status = cl_event_init(&p_cc->cc_poller_wakeup, FALSE);
609 if (status != IB_SUCCESS)
610 goto Exit;
611
612 cl_event_construct(&p_cc->outstanding_mads_done_event);
613 status = cl_event_init(&p_cc->outstanding_mads_done_event, FALSE);
614 if (status != IB_SUCCESS)
615 goto Exit;
616
617 cl_event_construct(&p_cc->sig_mads_on_wire_continue);
618 status = cl_event_init(&p_cc->sig_mads_on_wire_continue, FALSE);
619 if (status != IB_SUCCESS)
620 goto Exit;
621
622 p_cc->thread_state = OSM_THREAD_STATE_NONE;
623
624 status = cl_thread_init(&p_cc->cc_poller, cc_poller, p_cc,
625 "cc poller");
626 if (status != IB_SUCCESS)
627 goto Exit;
628
629 status = IB_SUCCESS;
630 Exit:
631 OSM_LOG_EXIT(p_cc->log);
632 return status;
633 }
634
cc_mad_recv_callback(osm_madw_t * p_madw,void * bind_context,osm_madw_t * p_req_madw)635 static void cc_mad_recv_callback(osm_madw_t * p_madw, void *bind_context,
636 osm_madw_t * p_req_madw)
637 {
638 osm_congestion_control_t *p_cc = bind_context;
639
640 OSM_LOG_ENTER(p_cc->log);
641
642 CL_ASSERT(p_madw);
643
644 /* HACK - should be extended when supporting CC traps */
645 CL_ASSERT(p_req_madw != NULL);
646
647 osm_madw_copy_context(p_madw, p_req_madw);
648 osm_mad_pool_put(p_cc->mad_pool, p_req_madw);
649
650 /* Do not decrement outstanding mads here, do it in the dispatcher */
651
652 if (cl_disp_post(p_cc->cc_disp_h, OSM_MSG_MAD_CC,
653 p_madw, NULL, NULL) != CL_SUCCESS) {
654 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C105: "
655 "Congestion Control Dispatcher post failed\n");
656 osm_mad_pool_put(p_cc->mad_pool, p_madw);
657 }
658
659 OSM_LOG_EXIT(p_cc->log);
660 }
661
cc_mad_send_err_callback(void * bind_context,osm_madw_t * p_madw)662 static void cc_mad_send_err_callback(void *bind_context,
663 osm_madw_t * p_madw)
664 {
665 osm_congestion_control_t *p_cc = bind_context;
666 osm_madw_context_t *p_madw_context = &p_madw->context;
667 osm_opensm_t *p_osm = p_cc->osm;
668 uint64_t node_guid = p_madw_context->cc_context.node_guid;
669 uint64_t port_guid = p_madw_context->cc_context.port_guid;
670 uint8_t port = p_madw_context->cc_context.port;
671 osm_port_t *p_port;
672 int log_flag = 1;
673
674 OSM_LOG_ENTER(p_cc->log);
675
676 cl_plock_acquire(&p_osm->lock);
677
678 p_port = osm_get_port_by_guid(p_cc->subn, port_guid);
679 if (!p_port) {
680 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C10B: "
681 "Port GUID 0x%" PRIx64 " not in table\n",
682 cl_ntoh64(port_guid));
683 cl_plock_release(&p_osm->lock);
684 goto Exit;
685 }
686
687 /* If timed out before, don't bothering logging again
688 * we assume no CC support
689 */
690 if (p_madw->status == IB_TIMEOUT
691 && p_port->cc_timeout_count)
692 log_flag = 0;
693
694 if (log_flag)
695 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C106: MAD Error (%s): "
696 "attr id = %u LID %u GUID 0x%016" PRIx64 " port %u "
697 "TID 0x%" PRIx64 "\n",
698 ib_get_err_str(p_madw->status),
699 p_madw->p_mad->attr_id,
700 cl_ntoh16(p_madw->mad_addr.dest_lid),
701 cl_ntoh64(node_guid),
702 port,
703 cl_ntoh64(p_madw->p_mad->trans_id));
704
705 if (p_madw->status == IB_TIMEOUT) {
706 p_port->cc_timeout_count++;
707 if (p_port->cc_timeout_count > OSM_CC_TIMEOUT_COUNT_THRESHOLD
708 && !p_port->cc_unavailable_flag) {
709 p_port->cc_unavailable_flag = TRUE;
710 p_port->cc_timeout_count = 0;
711 }
712 } else
713 p_cc->subn->subnet_initialization_error = TRUE;
714
715 cl_plock_release(&p_osm->lock);
716
717 Exit:
718 osm_mad_pool_put(p_cc->mad_pool, p_madw);
719
720 decrement_outstanding_mads(p_cc);
721
722 OSM_LOG_EXIT(p_cc->log);
723 }
724
osm_congestion_control_bind(osm_congestion_control_t * p_cc,ib_net64_t port_guid)725 ib_api_status_t osm_congestion_control_bind(osm_congestion_control_t * p_cc,
726 ib_net64_t port_guid)
727 {
728 osm_bind_info_t bind_info;
729 ib_api_status_t status = IB_SUCCESS;
730
731 OSM_LOG_ENTER(p_cc->log);
732
733 bind_info.port_guid = p_cc->port_guid = port_guid;
734 bind_info.mad_class = IB_MCLASS_CC;
735 bind_info.class_version = 2;
736 bind_info.is_responder = FALSE;
737 bind_info.is_report_processor = FALSE;
738 bind_info.is_trap_processor = FALSE;
739 bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE;
740 bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE;
741 bind_info.timeout = p_cc->subn->opt.transaction_timeout;
742 bind_info.retries = p_cc->subn->opt.transaction_retries;
743
744 OSM_LOG(p_cc->log, OSM_LOG_VERBOSE,
745 "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
746
747 p_cc->bind_handle = osm_vendor_bind(p_cc->vendor, &bind_info,
748 p_cc->mad_pool,
749 cc_mad_recv_callback,
750 cc_mad_send_err_callback, p_cc);
751
752 if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) {
753 status = IB_ERROR;
754 OSM_LOG(p_cc->log, OSM_LOG_ERROR,
755 "ERR C107: Vendor specific bind failed (%s)\n",
756 ib_get_err_str(status));
757 goto Exit;
758 }
759
760 Exit:
761 OSM_LOG_EXIT(p_cc->log);
762 return status;
763 }
764
osm_congestion_control_shutdown(osm_congestion_control_t * p_cc)765 void osm_congestion_control_shutdown(osm_congestion_control_t * p_cc)
766 {
767 OSM_LOG_ENTER(p_cc->log);
768 if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) {
769 OSM_LOG(p_cc->log, OSM_LOG_ERROR,
770 "ERR C108: No previous bind\n");
771 goto Exit;
772 }
773 cl_disp_unregister(p_cc->cc_disp_h);
774 Exit:
775 OSM_LOG_EXIT(p_cc->log);
776 }
777
osm_congestion_control_destroy(osm_congestion_control_t * p_cc)778 void osm_congestion_control_destroy(osm_congestion_control_t * p_cc)
779 {
780 osm_madw_t *p_madw;
781
782 OSM_LOG_ENTER(p_cc->log);
783
784 p_cc->thread_state = OSM_THREAD_STATE_EXIT;
785
786 cl_event_signal(&p_cc->sig_mads_on_wire_continue);
787 cl_event_signal(&p_cc->cc_poller_wakeup);
788
789 cl_thread_destroy(&p_cc->cc_poller);
790
791 cl_spinlock_acquire(&p_cc->mad_queue_lock);
792
793 while (!cl_is_qlist_empty(&p_cc->mad_queue)) {
794 p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue);
795 osm_mad_pool_put(p_cc->mad_pool, p_madw);
796 }
797
798 cl_spinlock_release(&p_cc->mad_queue_lock);
799
800 cl_spinlock_destroy(&p_cc->mad_queue_lock);
801
802 cl_event_destroy(&p_cc->cc_poller_wakeup);
803 cl_event_destroy(&p_cc->outstanding_mads_done_event);
804 cl_event_destroy(&p_cc->sig_mads_on_wire_continue);
805
806 OSM_LOG_EXIT(p_cc->log);
807 }
808