1 /*
2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2002-2007,2009 Mellanox Technologies LTD. All rights reserved.
4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
6 * Copyright (c) 2009 Battelle Memorial Institue. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 */
37
38 /*
39 * Abstract:
40 * Implementation of Up Down Algorithm using ranking & Min Hop
41 * Calculation functions
42 */
43
44 #if HAVE_CONFIG_H
45 # include <config.h>
46 #endif /* HAVE_CONFIG_H */
47
48 #include <stdlib.h>
49 #include <ctype.h>
50 #include <complib/cl_debug.h>
51 #include <complib/cl_qmap.h>
52 #include <opensm/osm_file_ids.h>
53 #define FILE_ID OSM_FILE_UCAST_DNUP_C
54 #include <opensm/osm_switch.h>
55 #include <opensm/osm_opensm.h>
56 #include <opensm/osm_ucast_mgr.h>
57
58 /* //////////////////////////// */
59 /* Local types */
60 /* //////////////////////////// */
61
62 /* direction */
63 typedef enum dnup_switch_dir {
64 UP = 0,
65 DOWN,
66 EQUAL
67 } dnup_switch_dir_t;
68
69 /* dnup structure */
70 typedef struct dnup {
71 osm_opensm_t *p_osm;
72 } dnup_t;
73
74 struct dnup_node {
75 cl_list_item_t list;
76 osm_switch_t *sw;
77 dnup_switch_dir_t dir;
78 unsigned rank;
79 unsigned visited;
80 };
81
82 /* This function returns direction based on rank and guid info of current &
83 remote ports */
dnup_get_dir(unsigned cur_rank,unsigned rem_rank)84 static dnup_switch_dir_t dnup_get_dir(unsigned cur_rank, unsigned rem_rank)
85 {
86 /* HACK: comes to solve root nodes connection, in a classic subnet root nodes do not connect
87 directly, but in case they are we assign to root node an UP direction to allow DNUP to discover
88 the subnet correctly (and not from the point of view of the last root node).
89 */
90 if (!cur_rank && !rem_rank)
91 return EQUAL;
92
93 if (cur_rank < rem_rank)
94 return DOWN;
95 else if (cur_rank > rem_rank)
96 return UP;
97 else
98 return EQUAL;
99 }
100
101 /**********************************************************************
102 * This function does the bfs of min hop table calculation by guid index
103 * as a starting point.
104 **********************************************************************/
dnup_bfs_by_node(IN osm_log_t * p_log,IN osm_subn_t * p_subn,IN osm_switch_t * p_sw,IN uint8_t prune_weight,OUT uint8_t * max_hops)105 static int dnup_bfs_by_node(IN osm_log_t * p_log, IN osm_subn_t * p_subn,
106 IN osm_switch_t * p_sw, IN uint8_t prune_weight,
107 OUT uint8_t * max_hops)
108 {
109 uint8_t pn, pn_rem;
110 cl_qlist_t list;
111 uint16_t lid;
112 struct dnup_node *u;
113 dnup_switch_dir_t next_dir, current_dir;
114
115 OSM_LOG_ENTER(p_log);
116
117 lid = osm_node_get_base_lid(p_sw->p_node, 0);
118 lid = cl_ntoh16(lid);
119 osm_switch_set_hops(p_sw, lid, 0, 0);
120
121 OSM_LOG(p_log, OSM_LOG_DEBUG,
122 "Starting from switch - port GUID 0x%" PRIx64 " lid %u\n",
123 cl_ntoh64(p_sw->p_node->node_info.port_guid), lid);
124
125 u = p_sw->priv;
126 u->dir = DOWN;
127
128 /* Update list with the new element */
129 cl_qlist_init(&list);
130 cl_qlist_insert_tail(&list, &u->list);
131
132 /* BFS the list till no next element */
133 while (!cl_is_qlist_empty(&list)) {
134 u = (struct dnup_node *)cl_qlist_remove_head(&list);
135 u->visited = 0; /* cleanup */
136 current_dir = u->dir;
137 /* Go over all ports of the switch and find unvisited remote nodes */
138 for (pn = 1; pn < u->sw->num_ports; pn++) {
139 osm_node_t *p_remote_node;
140 struct dnup_node *rem_u;
141 uint8_t current_min_hop, remote_min_hop,
142 set_hop_return_value;
143 osm_switch_t *p_remote_sw;
144
145 p_remote_node =
146 osm_node_get_remote_node(u->sw->p_node, pn,
147 &pn_rem);
148 /* If no remote node OR remote node is not a SWITCH
149 continue to next pn */
150 if (!p_remote_node || !p_remote_node->sw)
151 continue;
152 /* Fetch remote guid only after validation of remote node */
153 p_remote_sw = p_remote_node->sw;
154 rem_u = p_remote_sw->priv;
155 /* Decide which direction to mark it (UP/DOWN) */
156 next_dir = dnup_get_dir(u->rank, rem_u->rank);
157
158 /* Set MinHop value for the current lid */
159 current_min_hop = osm_switch_get_least_hops(u->sw, lid);
160 /* Check hop count if better insert into list && update
161 the remote node Min Hop Table */
162 remote_min_hop =
163 osm_switch_get_hop_count(p_remote_sw, lid, pn_rem);
164
165 /* Check if this is a legal step : the only illegal step is going
166 from UP to DOWN */
167 if ((current_dir == UP) && (next_dir == DOWN)) {
168 OSM_LOG(p_log, OSM_LOG_DEBUG,
169 "Avoiding move from 0x%016" PRIx64
170 " to 0x%016" PRIx64 "\n",
171 cl_ntoh64(osm_node_get_node_guid(u->sw->p_node)),
172 cl_ntoh64(osm_node_get_node_guid(p_remote_node)));
173 /* Illegal step. If prune_weight is set, allow it with an
174 * additional weight
175 */
176 if(prune_weight) {
177 current_min_hop+=prune_weight;
178 if(current_min_hop >= 64) {
179 OSM_LOG(p_log, OSM_LOG_ERROR,
180 "ERR AE02: Too many hops on subnet,"
181 " can't relax illegal Dn/Up transition.");
182 osm_switch_set_hops(p_remote_sw, lid,
183 pn_rem, OSM_NO_PATH);
184 }
185 } else {
186 continue;
187 }
188 }
189 if (current_min_hop + 1 < remote_min_hop) {
190 set_hop_return_value =
191 osm_switch_set_hops(p_remote_sw, lid,
192 pn_rem,
193 current_min_hop + 1);
194 if(max_hops && current_min_hop + 1 > *max_hops) {
195 *max_hops = current_min_hop + 1;
196 }
197 if (set_hop_return_value) {
198 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AE01: "
199 "Invalid value returned from set min hop is: %d\n",
200 set_hop_return_value);
201 }
202 /* Check if remote port has already been visited */
203 if (!rem_u->visited) {
204 /* Insert dnup_switch item into the list */
205 rem_u->dir = next_dir;
206 rem_u->visited = 1;
207 cl_qlist_insert_tail(&list,
208 &rem_u->list);
209 }
210 }
211 }
212 }
213
214 OSM_LOG_EXIT(p_log);
215 return 0;
216 }
217
218 /* NOTE : PLS check if we need to decide that the first */
219 /* rank is a SWITCH for BFS purpose */
dnup_subn_rank(IN dnup_t * p_dnup)220 static int dnup_subn_rank(IN dnup_t * p_dnup)
221 {
222 osm_switch_t *p_sw;
223 osm_physp_t *p_physp, *p_remote_physp;
224 cl_qlist_t list;
225 cl_map_item_t *item;
226 struct dnup_node *u, *remote_u;
227 uint8_t num_ports, port_num;
228 osm_log_t *p_log = &p_dnup->p_osm->log;
229 unsigned max_rank = 0;
230
231 OSM_LOG_ENTER(p_log);
232 cl_qlist_init(&list);
233
234 /* add all node level switches to the list */
235 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
236 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
237 item = cl_qmap_next(item)) {
238 p_sw = (osm_switch_t *)item;
239 u = p_sw->priv;
240 if (u->rank == 0)
241 cl_qlist_insert_tail(&list, &u->list);
242 }
243
244 /* BFS the list till it's empty */
245 while (!cl_is_qlist_empty(&list)) {
246 u = (struct dnup_node *)cl_qlist_remove_head(&list);
247 /* Go over all remote nodes and rank them (if not already visited) */
248 p_sw = u->sw;
249 num_ports = p_sw->num_ports;
250 OSM_LOG(p_log, OSM_LOG_DEBUG,
251 "Handling switch GUID 0x%" PRIx64 "\n",
252 cl_ntoh64(osm_node_get_node_guid(p_sw->p_node)));
253 for (port_num = 1; port_num < num_ports; port_num++) {
254 ib_net64_t port_guid;
255
256 /* Current port fetched in order to get remote side */
257 p_physp =
258 osm_node_get_physp_ptr(p_sw->p_node, port_num);
259
260 if (!p_physp)
261 continue;
262
263 p_remote_physp = p_physp->p_remote_physp;
264
265 /*
266 make sure that all the following occur on p_remote_physp:
267 1. The port isn't NULL
268 2. It is a switch
269 */
270 if (p_remote_physp && p_remote_physp->p_node->sw) {
271 remote_u = p_remote_physp->p_node->sw->priv;
272 port_guid = p_remote_physp->port_guid;
273
274 if (remote_u->rank > u->rank + 1) {
275 remote_u->rank = u->rank + 1;
276 max_rank = remote_u->rank;
277 cl_qlist_insert_tail(&list,
278 &remote_u->list);
279 OSM_LOG(p_log, OSM_LOG_DEBUG,
280 "Rank of port GUID 0x%" PRIx64
281 " = %u\n", cl_ntoh64(port_guid),
282 remote_u->rank);
283 }
284 }
285 }
286 }
287
288 /* Print Summary of ranking */
289 OSM_LOG(p_log, OSM_LOG_VERBOSE,
290 "Subnet ranking completed. Max Node Rank = %d\n", max_rank);
291 OSM_LOG_EXIT(p_log);
292 return 0;
293 }
294
dnup_set_min_hop_table(IN dnup_t * p_dnup)295 static int dnup_set_min_hop_table(IN dnup_t * p_dnup)
296 {
297 osm_subn_t *p_subn = &p_dnup->p_osm->subn;
298 osm_log_t *p_log = &p_dnup->p_osm->log;
299 osm_switch_t *p_sw;
300 struct dnup_node *u;
301 cl_map_item_t *item;
302 uint8_t max_hops = 0;
303
304 OSM_LOG_ENTER(p_log);
305
306 /* Go over all the switches in the subnet - for each init their Min Hop
307 Table */
308 OSM_LOG(p_log, OSM_LOG_VERBOSE,
309 "Init Min Hop Table of all switches [\n");
310
311 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
312 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
313 item = cl_qmap_next(item)) {
314 p_sw = (osm_switch_t *)item;
315 /* Clear Min Hop Table */
316 osm_switch_clear_hops(p_sw);
317 }
318
319 OSM_LOG(p_log, OSM_LOG_VERBOSE,
320 "Init Min Hop Table of all switches ]\n");
321
322 /* Now do the BFS for each port in the subnet */
323 OSM_LOG(p_log, OSM_LOG_VERBOSE,
324 "BFS through all port guids in the subnet [\n");
325
326 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
327 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
328 item = cl_qmap_next(item)) {
329 p_sw = (osm_switch_t *)item;
330 dnup_bfs_by_node(p_log, p_subn, p_sw, 0, &max_hops);
331 }
332 if(p_subn->opt.connect_roots) {
333 /*This is probably not necessary, by I am more comfortable
334 * clearing any possible side effects from the previous
335 * dnup routing pass
336 */
337 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
338 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
339 item = cl_qmap_next(item)) {
340 p_sw = (osm_switch_t *)item;
341 osm_switch_clear_hops(p_sw);
342 u = (struct dnup_node *) p_sw->priv;
343 u->visited = 0;
344 }
345 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
346 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
347 item = cl_qmap_next(item)) {
348 p_sw = (osm_switch_t *)item;
349 dnup_bfs_by_node(p_log, p_subn, p_sw, max_hops + 1, NULL);
350 }
351 }
352
353 OSM_LOG(p_log, OSM_LOG_VERBOSE,
354 "BFS through all port guids in the subnet ]\n");
355 /* Cleanup */
356 OSM_LOG_EXIT(p_log);
357 return 0;
358 }
359
dnup_build_lid_matrices(IN dnup_t * p_dnup)360 static int dnup_build_lid_matrices(IN dnup_t * p_dnup)
361 {
362 int status;
363
364 OSM_LOG_ENTER(&p_dnup->p_osm->log);
365
366 OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_VERBOSE,
367 "Ranking all port guids in the list\n");
368 /* Check if it's not a switched subnet */
369 if (cl_is_qmap_empty(&p_dnup->p_osm->subn.sw_guid_tbl)) {
370 OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_ERROR, "ERR AEOB: "
371 "This is not a switched subnet, cannot perform DNUP algorithm\n");
372 status = -1;
373 goto _exit;
374 }
375
376 /* Rank the subnet switches */
377 dnup_subn_rank(p_dnup);
378
379 /* After multiple ranking need to set Min Hop Table by DnUp algorithm */
380 OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_VERBOSE,
381 "Setting all switches' Min Hop Table\n");
382 status = dnup_set_min_hop_table(p_dnup);
383
384 _exit:
385 OSM_LOG_EXIT(&p_dnup->p_osm->log);
386 return status;
387 }
388
create_dnup_node(osm_switch_t * sw)389 static struct dnup_node *create_dnup_node(osm_switch_t * sw)
390 {
391 struct dnup_node *u;
392
393 u = malloc(sizeof(*u));
394 if (!u)
395 return NULL;
396 memset(u, 0, sizeof(*u));
397 u->sw = sw;
398 u->rank = 0xffffffff;
399 return u;
400 }
401
delete_dnup_node(struct dnup_node * u)402 static void delete_dnup_node(struct dnup_node *u)
403 {
404 u->sw->priv = NULL;
405 free(u);
406 }
407
408 /* DNUP callback function */
dnup_lid_matrices(void * ctx)409 static int dnup_lid_matrices(void *ctx)
410 {
411 dnup_t *p_dnup = ctx;
412 cl_map_item_t *item;
413 osm_switch_t *p_sw;
414 int ret = 0;
415 int num_leafs = 0;
416 uint8_t pn, pn_rem;
417
418 OSM_LOG_ENTER(&p_dnup->p_osm->log);
419
420 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
421 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
422 item = cl_qmap_next(item)) {
423 p_sw = (osm_switch_t *)item;
424 p_sw->priv = create_dnup_node(p_sw);
425 if (!p_sw->priv) {
426 OSM_LOG(&(p_dnup->p_osm->log), OSM_LOG_ERROR, "ERR AE0C: "
427 "cannot create dnup node\n");
428 OSM_LOG_EXIT(&p_dnup->p_osm->log);
429 return -1;
430 }
431 }
432
433
434 /* First setup node level nodes */
435 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
436 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
437 item = cl_qmap_next(item)) {
438 p_sw = (osm_switch_t *)item;
439
440 for (pn = 0; pn < p_sw->num_ports; pn++) {
441 osm_node_t *p_remote_node;
442 p_remote_node = osm_node_get_remote_node(p_sw->p_node, pn, &pn_rem);
443 if(p_remote_node && !p_remote_node->sw) {
444 struct dnup_node *u = p_sw->priv;
445 u->rank = 0;
446 OSM_LOG(&(p_dnup->p_osm->log),
447 OSM_LOG_VERBOSE, "(%s) rank 0 leaf switch\n",
448 p_sw->p_node->print_desc);
449 num_leafs++;
450 break;
451 }
452 }
453 }
454
455 if(num_leafs == 0) {
456 OSM_LOG(&(p_dnup->p_osm->log),
457 OSM_LOG_ERROR, "ERR AE0D: No leaf switches found, DnUp routing failed\n");
458 OSM_LOG_EXIT(&p_dnup->p_osm->log);
459 return -1;
460 }
461
462 ret = dnup_build_lid_matrices(p_dnup);
463
464 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl);
465 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl);
466 item = cl_qmap_next(item)) {
467 p_sw = (osm_switch_t *) item;
468 delete_dnup_node(p_sw->priv);
469 }
470
471 OSM_LOG_EXIT(&p_dnup->p_osm->log);
472 return ret;
473 }
474
dnup_delete(void * context)475 static void dnup_delete(void *context)
476 {
477 free(context);
478 }
479
osm_ucast_dnup_setup(struct osm_routing_engine * r,osm_opensm_t * osm)480 int osm_ucast_dnup_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
481 {
482 dnup_t *dnup;
483
484 OSM_LOG_ENTER(&osm->log);
485
486 dnup = malloc(sizeof(dnup_t));
487 if (!dnup)
488 return -1;
489 memset(dnup, 0, sizeof(dnup_t));
490
491 dnup->p_osm = osm;
492
493 r->context = dnup;
494 r->destroy = dnup_delete;
495 r->build_lid_matrices = dnup_lid_matrices;
496
497 OSM_LOG_EXIT(&osm->log);
498 return 0;
499 }
500