1 /*
2 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved.
6 * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
7 * Copyright (c) 2010 HNR Consulting. All rights reserved.
8 * Copyright (C) 2012-2013 Tokyo Institute of Technology. All rights reserved.
9 *
10 * This software is available to you under a choice of one of two
11 * licenses. You may choose to be licensed under the terms of the GNU
12 * General Public License (GPL) Version 2, available from the file
13 * COPYING in the main directory of this source tree, or the
14 * OpenIB.org BSD license below:
15 *
16 * Redistribution and use in source and binary forms, with or
17 * without modification, are permitted provided that the following
18 * conditions are met:
19 *
20 * - Redistributions of source code must retain the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer.
23 *
24 * - Redistributions in binary form must reproduce the above
25 * copyright notice, this list of conditions and the following
26 * disclaimer in the documentation and/or other materials
27 * provided with the distribution.
28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
33 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
34 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
35 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 * SOFTWARE.
37 *
38 */
39
40 /*
41 * Abstract:
42 * Implementation of osm_mcast_mgr_t.
43 * This file implements the Multicast Manager object.
44 */
45
46 #if HAVE_CONFIG_H
47 # include <config.h>
48 #endif /* HAVE_CONFIG_H */
49
50 #include <stdlib.h>
51 #include <string.h>
52 #include <iba/ib_types.h>
53 #include <complib/cl_debug.h>
54 #include <opensm/osm_file_ids.h>
55 #define FILE_ID OSM_FILE_MCAST_MGR_C
56 #include <opensm/osm_opensm.h>
57 #include <opensm/osm_sm.h>
58 #include <opensm/osm_multicast.h>
59 #include <opensm/osm_node.h>
60 #include <opensm/osm_switch.h>
61 #include <opensm/osm_helper.h>
62 #include <opensm/osm_msgdef.h>
63 #include <opensm/osm_mcast_mgr.h>
64
mcast_work_obj_new(IN osm_port_t * p_port)65 static osm_mcast_work_obj_t *mcast_work_obj_new(IN osm_port_t * p_port)
66 {
67 osm_mcast_work_obj_t *p_obj;
68
69 /*
70 clean allocated memory to avoid assertion when trying to insert to
71 qlist.
72 see cl_qlist_insert_tail(): CL_ASSERT(p_list_item->p_list != p_list)
73 */
74 p_obj = malloc(sizeof(*p_obj));
75 if (p_obj) {
76 memset(p_obj, 0, sizeof(*p_obj));
77 p_obj->p_port = p_port;
78 }
79
80 return p_obj;
81 }
82
mcast_work_obj_delete(IN osm_mcast_work_obj_t * p_wobj)83 static void mcast_work_obj_delete(IN osm_mcast_work_obj_t * p_wobj)
84 {
85 free(p_wobj);
86 }
87
osm_mcast_make_port_list_and_map(cl_qlist_t * list,cl_qmap_t * map,osm_mgrp_box_t * mbox)88 int osm_mcast_make_port_list_and_map(cl_qlist_t * list, cl_qmap_t * map,
89 osm_mgrp_box_t * mbox)
90 {
91 cl_map_item_t *map_item;
92 cl_list_item_t *list_item;
93 osm_mgrp_t *mgrp;
94 osm_mcm_port_t *mcm_port;
95 osm_mcast_work_obj_t *wobj;
96
97 cl_qmap_init(map);
98 cl_qlist_init(list);
99
100 for (list_item = cl_qlist_head(&mbox->mgrp_list);
101 list_item != cl_qlist_end(&mbox->mgrp_list);
102 list_item = cl_qlist_next(list_item)) {
103 mgrp = cl_item_obj(list_item, mgrp, list_item);
104 for (map_item = cl_qmap_head(&mgrp->mcm_port_tbl);
105 map_item != cl_qmap_end(&mgrp->mcm_port_tbl);
106 map_item = cl_qmap_next(map_item)) {
107 /* Acquire the port object for this port guid, then
108 create the new worker object to build the list. */
109 mcm_port = cl_item_obj(map_item, mcm_port, map_item);
110 if (cl_qmap_get(map, mcm_port->port->guid) !=
111 cl_qmap_end(map))
112 continue;
113 wobj = mcast_work_obj_new(mcm_port->port);
114 if (!wobj)
115 return -1;
116 cl_qlist_insert_tail(list, &wobj->list_item);
117 cl_qmap_insert(map, mcm_port->port->guid,
118 &wobj->map_item);
119 }
120 }
121 return 0;
122 }
123
osm_mcast_drop_port_list(cl_qlist_t * list)124 void osm_mcast_drop_port_list(cl_qlist_t * list)
125 {
126 while (cl_qlist_count(list))
127 mcast_work_obj_delete((osm_mcast_work_obj_t *)
128 cl_qlist_remove_head(list));
129 }
130
osm_purge_mtree(osm_sm_t * sm,IN osm_mgrp_box_t * mbox)131 void osm_purge_mtree(osm_sm_t * sm, IN osm_mgrp_box_t * mbox)
132 {
133 OSM_LOG_ENTER(sm->p_log);
134
135 if (mbox->root)
136 osm_mtree_destroy(mbox->root);
137 mbox->root = NULL;
138
139 OSM_LOG_EXIT(sm->p_log);
140 }
141
create_mgrp_switch_map(cl_qmap_t * m,cl_qlist_t * port_list)142 static void create_mgrp_switch_map(cl_qmap_t * m, cl_qlist_t * port_list)
143 {
144 osm_mcast_work_obj_t *wobj;
145 osm_port_t *port;
146 osm_switch_t *sw;
147 ib_net64_t guid;
148 cl_list_item_t *i;
149
150 cl_qmap_init(m);
151 for (i = cl_qlist_head(port_list); i != cl_qlist_end(port_list);
152 i = cl_qlist_next(i)) {
153 wobj = cl_item_obj(i, wobj, list_item);
154 port = wobj->p_port;
155 if (port->p_node->sw) {
156 sw = port->p_node->sw;
157 sw->is_mc_member = 1;
158 } else if (port->p_physp->p_remote_physp) {
159 sw = port->p_physp->p_remote_physp->p_node->sw;
160 sw->num_of_mcm++;
161 } else
162 continue;
163 guid = osm_node_get_node_guid(sw->p_node);
164 if (cl_qmap_get(m, guid) == cl_qmap_end(m))
165 cl_qmap_insert(m, guid, &sw->mgrp_item);
166 }
167 }
168
destroy_mgrp_switch_map(cl_qmap_t * m)169 static void destroy_mgrp_switch_map(cl_qmap_t * m)
170 {
171 osm_switch_t *sw;
172 cl_map_item_t *i;
173
174 for (i = cl_qmap_head(m); i != cl_qmap_end(m); i = cl_qmap_next(i)) {
175 sw = cl_item_obj(i, sw, mgrp_item);
176 sw->num_of_mcm = 0;
177 sw->is_mc_member = 0;
178 }
179 cl_qmap_remove_all(m);
180 }
181
182 /**********************************************************************
183 Calculate the maximal "min hops" from the given switch to any
184 of the group HCAs
185 **********************************************************************/
186 #ifdef OSM_VENDOR_INTF_ANAFA
mcast_mgr_compute_avg_hops(osm_sm_t * sm,cl_qmap_t * m,const osm_switch_t * this_sw)187 static float mcast_mgr_compute_avg_hops(osm_sm_t * sm, cl_qmap_t * m,
188 const osm_switch_t * this_sw)
189 {
190 float avg_hops = 0;
191 uint32_t hops = 0;
192 uint32_t num_ports = 0;
193 uint16_t lid;
194 uint32_t least_hops;
195 cl_map_item_t *i;
196 osm_switch_t *sw;
197
198 OSM_LOG_ENTER(sm->p_log);
199
200 for (i = cl_qmap_head(m); i != cl_qmap_end(m); i = cl_qmap_next(i)) {
201 sw = cl_item_obj(i, sw, mcast_item);
202 lid = cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0));
203 least_hops = osm_switch_get_least_hops(this_sw, lid);
204 /* for all host that are MC members and attached to the switch,
205 we should add the (least_hops + 1) * number_of_such_hosts.
206 If switch itself is in the MC, we should add the least_hops only */
207 hops += (least_hops + 1) * sw->num_of_mcm +
208 least_hops * sw->is_mc_member;
209 num_ports += sw->num_of_mcm + sw->is_mc_member;
210 }
211
212 /* We shouldn't be here if there aren't any ports in the group. */
213 CL_ASSERT(num_ports);
214
215 avg_hops = (float)(hops / num_ports);
216
217 OSM_LOG_EXIT(sm->p_log);
218 return avg_hops;
219 }
220 #else
mcast_mgr_compute_max_hops(osm_sm_t * sm,cl_qmap_t * m,const osm_switch_t * this_sw)221 static float mcast_mgr_compute_max_hops(osm_sm_t * sm, cl_qmap_t * m,
222 const osm_switch_t * this_sw)
223 {
224 uint32_t max_hops = 0, hops;
225 uint16_t lid;
226 cl_map_item_t *i;
227 osm_switch_t *sw;
228
229 OSM_LOG_ENTER(sm->p_log);
230
231 /*
232 For each member of the multicast group, compute the
233 number of hops to its base LID.
234 */
235 for (i = cl_qmap_head(m); i != cl_qmap_end(m); i = cl_qmap_next(i)) {
236 sw = cl_item_obj(i, sw, mgrp_item);
237 lid = cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0));
238 hops = osm_switch_get_least_hops(this_sw, lid);
239 if (!sw->is_mc_member)
240 hops += 1;
241 if (hops > max_hops)
242 max_hops = hops;
243 }
244
245 /* Note that at this point we might get (max_hops == 0),
246 which means that there's only one member in the mcast
247 group, and it's the current switch */
248
249 OSM_LOG_EXIT(sm->p_log);
250 return (float)max_hops;
251 }
252 #endif
253
254 /**********************************************************************
255 This function attempts to locate the optimal switch for the
256 center of the spanning tree. The current algorithm chooses
257 a switch with the lowest average hop count to the members
258 of the multicast group.
259 **********************************************************************/
mcast_mgr_find_optimal_switch(osm_sm_t * sm,cl_qlist_t * list)260 static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
261 cl_qlist_t * list)
262 {
263 cl_qmap_t mgrp_sw_map;
264 cl_qmap_t *p_sw_tbl;
265 osm_switch_t *p_sw, *p_best_sw = NULL;
266 float hops = 0;
267 float best_hops = 10000; /* any big # will do */
268
269 OSM_LOG_ENTER(sm->p_log);
270
271 p_sw_tbl = &sm->p_subn->sw_guid_tbl;
272
273 create_mgrp_switch_map(&mgrp_sw_map, list);
274 for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
275 p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
276 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {
277 if (!osm_switch_supports_mcast(p_sw))
278 continue;
279
280 #ifdef OSM_VENDOR_INTF_ANAFA
281 hops = mcast_mgr_compute_avg_hops(sm, &mgrp_sw_map, p_sw);
282 #else
283 hops = mcast_mgr_compute_max_hops(sm, &mgrp_sw_map, p_sw);
284 #endif
285
286 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
287 "Switch 0x%016" PRIx64 ", hops = %f\n",
288 cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)), hops);
289
290 if (hops < best_hops) {
291 p_best_sw = p_sw;
292 best_hops = hops;
293 }
294 }
295
296 if (p_best_sw)
297 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
298 "Best switch is 0x%" PRIx64 " (%s), hops = %f\n",
299 cl_ntoh64(osm_node_get_node_guid(p_best_sw->p_node)),
300 p_best_sw->p_node->print_desc, best_hops);
301 else
302 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
303 "No multicast capable switches detected\n");
304
305 destroy_mgrp_switch_map(&mgrp_sw_map);
306 OSM_LOG_EXIT(sm->p_log);
307 return p_best_sw;
308 }
309
310 /**********************************************************************
311 This function returns the existing or optimal root switch for the tree.
312 **********************************************************************/
osm_mcast_mgr_find_root_switch(osm_sm_t * sm,cl_qlist_t * list)313 osm_switch_t *osm_mcast_mgr_find_root_switch(osm_sm_t * sm, cl_qlist_t *list)
314 {
315 osm_switch_t *p_sw = NULL;
316
317 OSM_LOG_ENTER(sm->p_log);
318
319 /*
320 We always look for the best multicast tree root switch.
321 Otherwise since we always start with a a single join
322 the root will be always on the first switch attached to it.
323 - Very bad ...
324 */
325 p_sw = mcast_mgr_find_optimal_switch(sm, list);
326
327 OSM_LOG_EXIT(sm->p_log);
328 return p_sw;
329 }
330
mcast_mgr_set_mft_block(osm_sm_t * sm,IN osm_switch_t * p_sw,uint32_t block_num,uint32_t position)331 static int mcast_mgr_set_mft_block(osm_sm_t * sm, IN osm_switch_t * p_sw,
332 uint32_t block_num, uint32_t position)
333 {
334 osm_node_t *p_node;
335 osm_physp_t *p_physp;
336 osm_dr_path_t *p_path;
337 osm_madw_context_t context;
338 ib_api_status_t status;
339 uint32_t block_id_ho;
340 osm_mcast_tbl_t *p_tbl;
341 ib_net16_t block[IB_MCAST_BLOCK_SIZE];
342 int ret = 0;
343
344 CL_ASSERT(sm);
345
346 OSM_LOG_ENTER(sm->p_log);
347
348 CL_ASSERT(p_sw);
349
350 p_node = p_sw->p_node;
351
352 CL_ASSERT(p_node);
353
354 p_physp = osm_node_get_physp_ptr(p_node, 0);
355 p_path = osm_physp_get_dr_path_ptr(p_physp);
356
357 /*
358 Send multicast forwarding table blocks to the switch
359 as long as the switch indicates it has blocks needing
360 configuration.
361 */
362
363 context.mft_context.node_guid = osm_node_get_node_guid(p_node);
364 context.mft_context.set_method = TRUE;
365
366 p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
367
368 if (osm_mcast_tbl_get_block(p_tbl, (uint16_t) block_num,
369 (uint8_t) position, block)) {
370 block_id_ho = block_num + (position << 28);
371
372 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
373 "Writing MFT block %u position %u to switch 0x%" PRIx64
374 "\n", block_num, position,
375 cl_ntoh64(context.mft_context.node_guid));
376
377 status = osm_req_set(sm, p_path, (void *)block, sizeof(block),
378 IB_MAD_ATTR_MCAST_FWD_TBL,
379 cl_hton32(block_id_ho), FALSE,
380 ib_port_info_get_m_key(&p_physp->port_info),
381 CL_DISP_MSGID_NONE, &context);
382 if (status != IB_SUCCESS) {
383 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A02: "
384 "Sending multicast fwd. tbl. block 0x%X to %s "
385 "failed (%s)\n", block_id_ho,
386 p_node->print_desc, ib_get_err_str(status));
387 ret = -1;
388 }
389 }
390
391 OSM_LOG_EXIT(sm->p_log);
392 return ret;
393 }
394
395 /**********************************************************************
396 This is part of the recursive function to compute the paths in the
397 spanning tree that emanate from this switch. On input, the p_list
398 contains the group members that must be routed from this switch.
399 **********************************************************************/
mcast_mgr_subdivide(osm_sm_t * sm,uint16_t mlid_ho,osm_switch_t * p_sw,cl_qlist_t * p_list,cl_qlist_t * list_array,uint8_t array_size)400 static void mcast_mgr_subdivide(osm_sm_t * sm, uint16_t mlid_ho,
401 osm_switch_t * p_sw, cl_qlist_t * p_list,
402 cl_qlist_t * list_array, uint8_t array_size)
403 {
404 uint8_t port_num;
405 boolean_t ignore_existing;
406 osm_mcast_work_obj_t *p_wobj;
407
408 OSM_LOG_ENTER(sm->p_log);
409
410 /*
411 For Multicast Groups, we don't want to count on previous
412 configurations - since we can easily generate a storm
413 by loops.
414 */
415 ignore_existing = TRUE;
416
417 /*
418 Subdivide the set of ports into non-overlapping subsets
419 that will be routed to other switches.
420 */
421 while ((p_wobj =
422 (osm_mcast_work_obj_t *) cl_qlist_remove_head(p_list)) !=
423 (osm_mcast_work_obj_t *) cl_qlist_end(p_list)) {
424 port_num =
425 osm_switch_recommend_mcast_path(p_sw, p_wobj->p_port,
426 mlid_ho, ignore_existing);
427 if (port_num == OSM_NO_PATH) {
428 /*
429 This typically occurs if the switch does not support
430 multicast and the multicast tree must branch at this
431 switch.
432 */
433 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A03: "
434 "Error routing MLID 0x%X through switch 0x%"
435 PRIx64 " %s\n"
436 "\t\t\t\tNo multicast paths from this switch "
437 "for port with LID %u\n", mlid_ho,
438 cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)),
439 p_sw->p_node->print_desc,
440 cl_ntoh16(osm_port_get_base_lid
441 (p_wobj->p_port)));
442 mcast_work_obj_delete(p_wobj);
443 continue;
444 }
445
446 if (port_num >= array_size) {
447 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A04: "
448 "Error routing MLID 0x%X through switch 0x%"
449 PRIx64 " %s\n"
450 "\t\t\t\tNo multicast paths from this switch "
451 "to port with LID %u\n", mlid_ho,
452 cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)),
453 p_sw->p_node->print_desc,
454 cl_ntoh16(osm_port_get_base_lid
455 (p_wobj->p_port)));
456 mcast_work_obj_delete(p_wobj);
457 continue;
458 }
459
460 cl_qlist_insert_tail(&list_array[port_num], &p_wobj->list_item);
461 }
462
463 OSM_LOG_EXIT(sm->p_log);
464 }
465
mcast_mgr_purge_list(osm_sm_t * sm,uint16_t mlid,cl_qlist_t * list)466 static void mcast_mgr_purge_list(osm_sm_t * sm, uint16_t mlid, cl_qlist_t * list)
467 {
468 if (OSM_LOG_IS_ACTIVE_V2(sm->p_log, OSM_LOG_ERROR)) {
469 osm_mcast_work_obj_t *wobj;
470 cl_list_item_t *i;
471 for (i = cl_qlist_head(list); i != cl_qlist_end(list);
472 i = cl_qlist_next(i)) {
473 wobj = cl_item_obj(i, wobj, list_item);
474 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A06: "
475 "Unable to route MLID 0x%X for port 0x%" PRIx64 "\n",
476 mlid, cl_ntoh64(osm_port_get_guid(wobj->p_port)));
477 }
478 }
479 osm_mcast_drop_port_list(list);
480 }
481
482 /**********************************************************************
483 This is the recursive function to compute the paths in the spanning
484 tree that emanate from this switch. On input, the p_list contains
485 the group members that must be routed from this switch.
486
487 The function returns the newly created mtree node element.
488 **********************************************************************/
mcast_mgr_branch(osm_sm_t * sm,uint16_t mlid_ho,osm_switch_t * p_sw,cl_qlist_t * p_list,uint8_t depth,uint8_t upstream_port,uint8_t * p_max_depth)489 static osm_mtree_node_t *mcast_mgr_branch(osm_sm_t * sm, uint16_t mlid_ho,
490 osm_switch_t * p_sw,
491 cl_qlist_t * p_list, uint8_t depth,
492 uint8_t upstream_port,
493 uint8_t * p_max_depth)
494 {
495 uint8_t max_children;
496 osm_mtree_node_t *p_mtn = NULL;
497 cl_qlist_t *list_array = NULL;
498 uint8_t i;
499 ib_net64_t node_guid;
500 osm_mcast_work_obj_t *p_wobj;
501 cl_qlist_t *p_port_list;
502 size_t count;
503 osm_mcast_tbl_t *p_tbl;
504
505 OSM_LOG_ENTER(sm->p_log);
506
507 CL_ASSERT(p_sw);
508 CL_ASSERT(p_list);
509 CL_ASSERT(p_max_depth);
510
511 node_guid = osm_node_get_node_guid(p_sw->p_node);
512
513 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
514 "Routing MLID 0x%X through switch 0x%" PRIx64
515 " %s, %u nodes at depth %u\n",
516 mlid_ho, cl_ntoh64(node_guid), p_sw->p_node->print_desc,
517 cl_qlist_count(p_list), depth);
518
519 CL_ASSERT(cl_qlist_count(p_list) > 0);
520
521 depth++;
522
523 if (depth >= 64) {
524 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A21: "
525 "Maximal hops number is reached for MLID 0x%x."
526 " Break processing\n", mlid_ho);
527 mcast_mgr_purge_list(sm, mlid_ho, p_list);
528 goto Exit;
529 }
530
531 if (depth > *p_max_depth) {
532 CL_ASSERT(depth == *p_max_depth + 1);
533 *p_max_depth = depth;
534 }
535
536 if (osm_switch_supports_mcast(p_sw) == FALSE) {
537 /*
538 This switch doesn't do multicast. Clean-up.
539 */
540 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A14: "
541 "Switch 0x%" PRIx64 " %s does not support multicast\n",
542 cl_ntoh64(node_guid), p_sw->p_node->print_desc);
543
544 /*
545 Deallocate all the work objects on this branch of the tree.
546 */
547 mcast_mgr_purge_list(sm, mlid_ho, p_list);
548 goto Exit;
549 }
550
551 p_mtn = osm_mtree_node_new(p_sw);
552 if (p_mtn == NULL) {
553 /*
554 We are unable to continue routing down this
555 leg of the tree. Clean-up.
556 */
557 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A15: "
558 "Insufficient memory to build multicast tree\n");
559
560 /*
561 Deallocate all the work objects on this branch of the tree.
562 */
563 mcast_mgr_purge_list(sm, mlid_ho, p_list);
564 goto Exit;
565 }
566
567 max_children = osm_mtree_node_get_max_children(p_mtn);
568
569 CL_ASSERT(max_children > 1);
570
571 /*
572 Prepare an empty list for each port in the switch.
573 TO DO - this list array could probably be moved
574 inside the switch element to save on malloc thrashing.
575 */
576 list_array = malloc(sizeof(cl_qlist_t) * max_children);
577 if (list_array == NULL) {
578 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A16: "
579 "Unable to allocate list array\n");
580 mcast_mgr_purge_list(sm, mlid_ho, p_list);
581 osm_mtree_destroy(p_mtn);
582 p_mtn = NULL;
583 goto Exit;
584 }
585
586 memset(list_array, 0, sizeof(cl_qlist_t) * max_children);
587
588 for (i = 0; i < max_children; i++)
589 cl_qlist_init(&list_array[i]);
590
591 mcast_mgr_subdivide(sm, mlid_ho, p_sw, p_list, list_array, max_children);
592
593 p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
594
595 /*
596 Add the upstream port to the forwarding table unless
597 we're at the root of the spanning tree.
598 */
599 if (depth > 1) {
600 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
601 "Adding upstream port %u\n", upstream_port);
602
603 CL_ASSERT(upstream_port);
604 osm_mcast_tbl_set(p_tbl, mlid_ho, upstream_port);
605 }
606
607 /*
608 For each port that was allocated some routes,
609 recurse into this function to continue building the tree
610 if the node on the other end of that port is another switch.
611 Otherwise, the node is an endpoint, and we've found a leaf
612 of the tree. Mark leaves with our special pointer value.
613 */
614
615 for (i = 0; i < max_children; i++) {
616 const osm_physp_t *p_physp;
617 const osm_physp_t *p_remote_physp;
618 osm_node_t *p_node;
619 const osm_node_t *p_remote_node;
620
621 p_port_list = &list_array[i];
622
623 count = cl_qlist_count(p_port_list);
624
625 /*
626 There should be no children routed through the upstream port!
627 */
628 CL_ASSERT(upstream_port == 0 || i != upstream_port ||
629 (i == upstream_port && count == 0));
630
631 if (count == 0)
632 continue; /* No routes down this port. */
633
634 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
635 "Routing %zu destinations via switch port %u\n",
636 count, i);
637
638 if (i == 0) {
639 /* This means we are adding the switch to the MC group.
640 We do not need to continue looking at the remote
641 port, just needed to add the port to the table */
642 CL_ASSERT(count == 1);
643
644 osm_mcast_tbl_set(p_tbl, mlid_ho, i);
645
646 p_wobj = (osm_mcast_work_obj_t *)
647 cl_qlist_remove_head(p_port_list);
648 mcast_work_obj_delete(p_wobj);
649 continue;
650 }
651
652 p_node = p_sw->p_node;
653 p_remote_node = osm_node_get_remote_node(p_node, i, NULL);
654 if (!p_remote_node) {
655 /*
656 * If we reached here, it means the minhop table has
657 * invalid entries that leads to disconnected ports.
658 *
659 * A possible reason for the code to reach here is
660 * that ucast cache is enabled, and a leaf switch that
661 * is used as a non-leaf switch in a multicast has been
662 * removed from the fabric.
663 *
664 * When it happens, we should invalidate the cache
665 * and force rerouting of the fabric.
666 */
667
668 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
669 "ERR 0A1E: Tried to route MLID 0x%X through "
670 "disconnected switch 0x%" PRIx64 " port %d\n",
671 mlid_ho, cl_ntoh64(node_guid), i);
672
673 /* Free memory */
674 mcast_mgr_purge_list(sm, mlid_ho, p_port_list);
675
676 /* Invalidate ucast cache */
677 if (sm->ucast_mgr.p_subn->opt.use_ucast_cache &&
678 sm->ucast_mgr.cache_valid) {
679 OSM_LOG(sm->p_log, OSM_LOG_INFO,
680 "Unicast Cache will be invalidated due "
681 "to multicast routing errors\n");
682 osm_ucast_cache_invalidate(&sm->ucast_mgr);
683 sm->p_subn->force_heavy_sweep = TRUE;
684 }
685
686 continue;
687 }
688
689 /*
690 This port routes frames for this mcast group. Therefore,
691 set the appropriate bit in the multicast forwarding
692 table for this switch.
693 */
694 osm_mcast_tbl_set(p_tbl, mlid_ho, i);
695
696 if (osm_node_get_type(p_remote_node) == IB_NODE_TYPE_SWITCH) {
697 /*
698 Acquire a pointer to the remote switch then recurse.
699 */
700 CL_ASSERT(p_remote_node->sw);
701
702 p_physp = osm_node_get_physp_ptr(p_node, i);
703 CL_ASSERT(p_physp);
704
705 p_remote_physp = osm_physp_get_remote(p_physp);
706 CL_ASSERT(p_remote_physp);
707
708 p_mtn->child_array[i] =
709 mcast_mgr_branch(sm, mlid_ho, p_remote_node->sw,
710 p_port_list, depth,
711 osm_physp_get_port_num
712 (p_remote_physp), p_max_depth);
713 } else {
714 /*
715 The neighbor node is not a switch, so this
716 must be a leaf.
717 */
718 CL_ASSERT(count == 1);
719
720 p_mtn->child_array[i] = OSM_MTREE_LEAF;
721 p_wobj = (osm_mcast_work_obj_t *)
722 cl_qlist_remove_head(p_port_list);
723
724 CL_ASSERT(cl_is_qlist_empty(p_port_list));
725
726 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
727 "Found leaf for port 0x%016" PRIx64
728 " on switch port %u\n",
729 cl_ntoh64(osm_port_get_guid(p_wobj->p_port)),
730 i);
731 mcast_work_obj_delete(p_wobj);
732 }
733 }
734
735 free(list_array);
736 Exit:
737 OSM_LOG_EXIT(sm->p_log);
738 return p_mtn;
739 }
740
mcast_mgr_build_spanning_tree(osm_sm_t * sm,osm_mgrp_box_t * mbox)741 static ib_api_status_t mcast_mgr_build_spanning_tree(osm_sm_t * sm,
742 osm_mgrp_box_t * mbox)
743 {
744 cl_qlist_t port_list;
745 cl_qmap_t port_map;
746 uint32_t num_ports;
747 osm_switch_t *p_sw;
748 ib_api_status_t status = IB_SUCCESS;
749 uint8_t max_depth = 0;
750
751 OSM_LOG_ENTER(sm->p_log);
752
753 /*
754 TO DO - for now, just blow away the old tree.
755 In the future we'll need to construct the tree based
756 on multicast forwarding table information if the user wants to
757 preserve existing multicast routes.
758 */
759 osm_purge_mtree(sm, mbox);
760
761 /* build the first "subset" containing all member ports */
762 if (osm_mcast_make_port_list_and_map(&port_list, &port_map, mbox)) {
763 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A10: "
764 "Insufficient memory to make port list\n");
765 status = IB_ERROR;
766 goto Exit;
767 }
768
769 num_ports = cl_qlist_count(&port_list);
770 if (num_ports < 2) {
771 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
772 "MLID 0x%X has %u members - nothing to do\n",
773 mbox->mlid, num_ports);
774 osm_mcast_drop_port_list(&port_list);
775 goto Exit;
776 }
777
778 /*
779 This function builds the single spanning tree recursively.
780 At each stage, the ports to be reached are divided into
781 non-overlapping subsets of member ports that can be reached through
782 a given switch port. Construction then moves down each
783 branch, and the process starts again with each branch computing
784 for its own subset of the member ports.
785
786 The maximum recursion depth is at worst the maximum hop count in the
787 subnet, which is spec limited to 64.
788 */
789
790 /*
791 Locate the switch around which to create the spanning
792 tree for this multicast group.
793 */
794 p_sw = osm_mcast_mgr_find_root_switch(sm, &port_list);
795 if (p_sw == NULL) {
796 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A08: "
797 "Unable to locate a suitable switch for group 0x%X\n",
798 mbox->mlid);
799 osm_mcast_drop_port_list(&port_list);
800 status = IB_ERROR;
801 goto Exit;
802 }
803
804 mbox->root = mcast_mgr_branch(sm, mbox->mlid, p_sw, &port_list, 0, 0,
805 &max_depth);
806
807 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
808 "Configured MLID 0x%X for %u ports, max tree depth = %u\n",
809 mbox->mlid, num_ports, max_depth);
810 Exit:
811 OSM_LOG_EXIT(sm->p_log);
812 return status;
813 }
814
815 #if 0
816 /* unused */
817 void osm_mcast_mgr_set_table(osm_sm_t * sm, IN const osm_mgrp_t * p_mgrp,
818 IN const osm_mtree_node_t * p_mtn)
819 {
820 uint8_t i;
821 uint8_t max_children;
822 osm_mtree_node_t *p_child_mtn;
823 uint16_t mlid_ho;
824 osm_mcast_tbl_t *p_tbl;
825 osm_switch_t *p_sw;
826
827 OSM_LOG_ENTER(sm->p_log);
828
829 mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp));
830 p_sw = osm_mtree_node_get_switch_ptr(p_mtn);
831
832 CL_ASSERT(p_sw);
833
834 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
835 "Configuring MLID 0x%X on switch 0x%" PRIx64 "\n",
836 mlid_ho, osm_node_get_node_guid(p_sw->p_node));
837
838 /*
839 For every child of this tree node, set the corresponding
840 bit in the switch's mcast table.
841 */
842 p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
843 max_children = osm_mtree_node_get_max_children(p_mtn);
844
845 CL_ASSERT(max_children <= osm_switch_get_num_ports(p_sw));
846
847 osm_mcast_tbl_clear_mlid(p_tbl, mlid_ho);
848
849 for (i = 0; i < max_children; i++) {
850 p_child_mtn = osm_mtree_node_get_child(p_mtn, i);
851 if (p_child_mtn == NULL)
852 continue;
853
854 osm_mcast_tbl_set(p_tbl, mlid_ho, i);
855 }
856
857 OSM_LOG_EXIT(sm->p_log);
858 }
859 #endif
860
mcast_mgr_clear(osm_sm_t * sm,uint16_t mlid)861 static void mcast_mgr_clear(osm_sm_t * sm, uint16_t mlid)
862 {
863 osm_switch_t *p_sw;
864 cl_qmap_t *p_sw_tbl;
865 osm_mcast_tbl_t *p_mcast_tbl;
866
867 OSM_LOG_ENTER(sm->p_log);
868
869 /* Walk the switches and clear the routing entries for this MLID. */
870 p_sw_tbl = &sm->p_subn->sw_guid_tbl;
871 p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
872 while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
873 p_mcast_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
874 osm_mcast_tbl_clear_mlid(p_mcast_tbl, mlid);
875 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
876 }
877
878 OSM_LOG_EXIT(sm->p_log);
879 }
880
881 #if 0
882 /* TO DO - make this real -- at least update spanning tree */
883 /**********************************************************************
884 Lock must be held on entry.
885 **********************************************************************/
886 ib_api_status_t osm_mcast_mgr_process_single(osm_sm_t * sm,
887 IN ib_net16_t const mlid,
888 IN ib_net64_t const port_guid,
889 IN uint8_t const join_state)
890 {
891 uint8_t port_num;
892 uint16_t mlid_ho;
893 ib_net64_t sw_guid;
894 osm_port_t *p_port;
895 osm_physp_t *p_physp;
896 osm_physp_t *p_remote_physp;
897 osm_node_t *p_remote_node;
898 osm_mcast_tbl_t *p_mcast_tbl;
899 ib_api_status_t status = IB_SUCCESS;
900
901 OSM_LOG_ENTER(sm->p_log);
902
903 CL_ASSERT(mlid);
904 CL_ASSERT(port_guid);
905
906 mlid_ho = cl_ntoh16(mlid);
907
908 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
909 "Attempting to add port 0x%" PRIx64 " to MLID 0x%X, "
910 "\n\t\t\t\tjoin state = 0x%X\n",
911 cl_ntoh64(port_guid), mlid_ho, join_state);
912
913 /*
914 Acquire the Port object.
915 */
916 p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
917 if (!p_port) {
918 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A01: "
919 "Unable to acquire port object for 0x%" PRIx64 "\n",
920 cl_ntoh64(port_guid));
921 status = IB_ERROR;
922 goto Exit;
923 }
924
925 p_physp = p_port->p_physp;
926 if (p_physp == NULL) {
927 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A05: "
928 "Unable to acquire phsyical port object for 0x%" PRIx64
929 "\n", cl_ntoh64(port_guid));
930 status = IB_ERROR;
931 goto Exit;
932 }
933
934 p_remote_physp = osm_physp_get_remote(p_physp);
935 if (p_remote_physp == NULL) {
936 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A11: "
937 "Unable to acquire remote phsyical port object "
938 "for 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
939 status = IB_ERROR;
940 goto Exit;
941 }
942
943 p_remote_node = osm_physp_get_node_ptr(p_remote_physp);
944
945 CL_ASSERT(p_remote_node);
946
947 sw_guid = osm_node_get_node_guid(p_remote_node);
948
949 if (osm_node_get_type(p_remote_node) != IB_NODE_TYPE_SWITCH) {
950 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A22: "
951 "Remote node not a switch node 0x%" PRIx64 "\n",
952 cl_ntoh64(sw_guid));
953 status = IB_ERROR;
954 goto Exit;
955 }
956
957 if (!p_remote_node->sw) {
958 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A12: "
959 "No switch object 0x%" PRIx64 "\n", cl_ntoh64(sw_guid));
960 status = IB_ERROR;
961 goto Exit;
962 }
963
964 if (osm_switch_is_in_mcast_tree(p_remote_node->sw, mlid_ho)) {
965 /*
966 We're in luck. The switch attached to this port
967 is already in the multicast group, so we can just
968 add the specified port as a new leaf of the tree.
969 */
970 if (join_state & (IB_JOIN_STATE_FULL | IB_JOIN_STATE_NON)) {
971 /*
972 This node wants to receive multicast frames.
973 Get the switch port number to which the new member port
974 is attached, then configure this single mcast table.
975 */
976 port_num = osm_physp_get_port_num(p_remote_physp);
977 CL_ASSERT(port_num);
978
979 p_mcast_tbl =
980 osm_switch_get_mcast_tbl_ptr(p_remote_node->sw);
981 osm_mcast_tbl_set(p_mcast_tbl, mlid_ho, port_num);
982 } else {
983 if (join_state & IB_JOIN_STATE_SEND_ONLY)
984 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
985 "Success. Nothing to do for send"
986 "only member\n");
987 else {
988 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A13: "
989 "Unknown join state 0x%X\n",
990 join_state);
991 status = IB_ERROR;
992 goto Exit;
993 }
994 }
995 } else
996 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "Unable to add port\n");
997
998 Exit:
999 OSM_LOG_EXIT(sm->p_log);
1000 return status;
1001 }
1002 #endif
1003
1004 /**********************************************************************
1005 Process the entire group.
1006 NOTE : The lock should be held externally!
1007 **********************************************************************/
mcast_mgr_process_mlid(osm_sm_t * sm,uint16_t mlid)1008 static ib_api_status_t mcast_mgr_process_mlid(osm_sm_t * sm, uint16_t mlid)
1009 {
1010 ib_api_status_t status = IB_SUCCESS;
1011 struct osm_routing_engine *re = sm->p_subn->p_osm->routing_engine_used;
1012 osm_mgrp_box_t *mbox;
1013
1014 OSM_LOG_ENTER(sm->p_log);
1015
1016 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
1017 "Processing multicast group with mlid 0x%X\n", mlid);
1018
1019 /* Clear the multicast tables to start clean, then build
1020 the spanning tree which sets the mcast table bits for each
1021 port in the group. */
1022 mcast_mgr_clear(sm, mlid);
1023
1024 mbox = osm_get_mbox_by_mlid(sm->p_subn, cl_hton16(mlid));
1025 if (mbox) {
1026 if (re && re->mcast_build_stree)
1027 status = re->mcast_build_stree(re->context, mbox);
1028 else
1029 status = mcast_mgr_build_spanning_tree(sm, mbox);
1030
1031 if (status != IB_SUCCESS)
1032 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A17: "
1033 "Unable to create spanning tree (%s) for mlid "
1034 "0x%x\n", ib_get_err_str(status), mlid);
1035 }
1036
1037 OSM_LOG_EXIT(sm->p_log);
1038 return status;
1039 }
1040
mcast_mgr_set_mfttop(IN osm_sm_t * sm,IN osm_switch_t * p_sw)1041 static void mcast_mgr_set_mfttop(IN osm_sm_t * sm, IN osm_switch_t * p_sw)
1042 {
1043 osm_node_t *p_node;
1044 osm_dr_path_t *p_path;
1045 osm_physp_t *p_physp;
1046 osm_mcast_tbl_t *p_tbl;
1047 osm_madw_context_t context;
1048 ib_api_status_t status;
1049 ib_switch_info_t si;
1050 ib_net16_t mcast_top;
1051
1052 OSM_LOG_ENTER(sm->p_log);
1053
1054 CL_ASSERT(p_sw);
1055
1056 p_node = p_sw->p_node;
1057
1058 CL_ASSERT(p_node);
1059
1060 p_physp = osm_node_get_physp_ptr(p_node, 0);
1061 p_path = osm_physp_get_dr_path_ptr(p_physp);
1062 p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
1063
1064 if (sm->p_subn->opt.use_mfttop &&
1065 p_physp->port_info.capability_mask & IB_PORT_CAP_HAS_MCAST_FDB_TOP) {
1066 /*
1067 Set the top of the multicast forwarding table.
1068 */
1069 si = p_sw->switch_info;
1070 if (sm->p_subn->first_time_master_sweep == TRUE)
1071 mcast_top = cl_hton16(sm->mlids_init_max);
1072 else {
1073 if (p_tbl->max_block_in_use == -1)
1074 mcast_top = cl_hton16(IB_LID_MCAST_START_HO - 1);
1075 else
1076 mcast_top = cl_hton16(IB_LID_MCAST_START_HO +
1077 (p_tbl->max_block_in_use + 1) * IB_MCAST_BLOCK_SIZE - 1);
1078 }
1079 if (mcast_top == si.mcast_top)
1080 return;
1081
1082 si.mcast_top = mcast_top;
1083
1084 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
1085 "Setting switch MFT top to MLID 0x%x\n",
1086 cl_ntoh16(si.mcast_top));
1087
1088 context.si_context.light_sweep = FALSE;
1089 context.si_context.node_guid = osm_node_get_node_guid(p_node);
1090 context.si_context.set_method = TRUE;
1091 context.si_context.lft_top_change = FALSE;
1092
1093 status = osm_req_set(sm, p_path, (uint8_t *) & si,
1094 sizeof(si), IB_MAD_ATTR_SWITCH_INFO,
1095 0, FALSE,
1096 ib_port_info_get_m_key(&p_physp->port_info),
1097 CL_DISP_MSGID_NONE, &context);
1098
1099 if (status != IB_SUCCESS)
1100 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A1B: "
1101 "Sending SwitchInfo attribute failed (%s)\n",
1102 ib_get_err_str(status));
1103 }
1104 }
1105
mcast_mgr_set_mftables(osm_sm_t * sm)1106 static int mcast_mgr_set_mftables(osm_sm_t * sm)
1107 {
1108 cl_qmap_t *p_sw_tbl = &sm->p_subn->sw_guid_tbl;
1109 osm_switch_t *p_sw;
1110 osm_mcast_tbl_t *p_tbl;
1111 int block_notdone, ret = 0;
1112 int16_t block_num, max_block = -1;
1113
1114 p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
1115 while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
1116 p_sw->mft_block_num = 0;
1117 p_sw->mft_position = 0;
1118 p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
1119 if (osm_mcast_tbl_get_max_block_in_use(p_tbl) > max_block)
1120 max_block = osm_mcast_tbl_get_max_block_in_use(p_tbl);
1121 mcast_mgr_set_mfttop(sm, p_sw);
1122 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1123 }
1124
1125 /* Stripe the MFT blocks across the switches */
1126 for (block_num = 0; block_num <= max_block; block_num++) {
1127 block_notdone = 1;
1128 while (block_notdone) {
1129 block_notdone = 0;
1130 p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
1131 while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
1132 if (p_sw->mft_block_num == block_num) {
1133 block_notdone = 1;
1134 if (mcast_mgr_set_mft_block(sm, p_sw,
1135 p_sw->mft_block_num,
1136 p_sw->mft_position))
1137 ret = -1;
1138 p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
1139 if (++p_sw->mft_position > p_tbl->max_position) {
1140 p_sw->mft_position = 0;
1141 p_sw->mft_block_num++;
1142 }
1143 }
1144 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
1145 }
1146 }
1147 }
1148
1149 return ret;
1150 }
1151
alloc_mfts(osm_sm_t * sm)1152 static int alloc_mfts(osm_sm_t * sm)
1153 {
1154 int i;
1155 cl_map_item_t *item;
1156 osm_switch_t *p_sw;
1157
1158 for (i = sm->p_subn->max_mcast_lid_ho - IB_LID_MCAST_START_HO; i >= 0;
1159 i--)
1160 if (sm->p_subn->mboxes[i])
1161 break;
1162 if (i < 0)
1163 return 0;
1164
1165 /* Now, walk switches and (re)allocate multicast tables */
1166 for (item = cl_qmap_head(&sm->p_subn->sw_guid_tbl);
1167 item != cl_qmap_end(&sm->p_subn->sw_guid_tbl);
1168 item = cl_qmap_next(item)) {
1169 p_sw = (osm_switch_t *) item;
1170 if (osm_mcast_tbl_realloc(&p_sw->mcast_tbl, i))
1171 return -1;
1172 }
1173 return 0;
1174 }
1175
1176 /**********************************************************************
1177 This is the function that is invoked during idle time and sweep to
1178 handle the process request for mcast groups where join/leave/delete
1179 was required.
1180 **********************************************************************/
osm_mcast_mgr_process(osm_sm_t * sm,boolean_t config_all)1181 int osm_mcast_mgr_process(osm_sm_t * sm, boolean_t config_all)
1182 {
1183 int ret = 0;
1184 unsigned i;
1185 unsigned max_mlid;
1186
1187 OSM_LOG_ENTER(sm->p_log);
1188
1189 CL_PLOCK_EXCL_ACQUIRE(sm->p_lock);
1190
1191 /* If there are no switches in the subnet we have nothing to do. */
1192 if (cl_qmap_count(&sm->p_subn->sw_guid_tbl) == 0) {
1193 OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
1194 "No switches in subnet. Nothing to do\n");
1195 goto exit;
1196 }
1197
1198 if (alloc_mfts(sm)) {
1199 OSM_LOG(sm->p_log, OSM_LOG_ERROR,
1200 "ERR 0A09: alloc_mfts failed\n");
1201 ret = -1;
1202 goto exit;
1203 }
1204
1205 max_mlid = config_all ? sm->p_subn->max_mcast_lid_ho
1206 - IB_LID_MCAST_START_HO : sm->mlids_req_max;
1207 for (i = 0; i <= max_mlid; i++) {
1208 if (sm->mlids_req[i] ||
1209 (config_all && sm->p_subn->mboxes[i])) {
1210 sm->mlids_req[i] = 0;
1211 mcast_mgr_process_mlid(sm, i + IB_LID_MCAST_START_HO);
1212 }
1213 }
1214
1215 sm->mlids_req_max = 0;
1216
1217 ret = mcast_mgr_set_mftables(sm);
1218
1219 osm_dump_mcast_routes(sm->p_subn->p_osm);
1220
1221 exit:
1222 CL_PLOCK_RELEASE(sm->p_lock);
1223 OSM_LOG_EXIT(sm->p_log);
1224 return ret;
1225 }
1226