1 /* 2 * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2002-2007,2009 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5 * Copyright (c) 2009 HNR Consulting. All rights reserved. 6 * Copyright (c) 2009 Battelle Memorial Institue. All rights reserved. 7 * 8 * This software is available to you under a choice of one of two 9 * licenses. You may choose to be licensed under the terms of the GNU 10 * General Public License (GPL) Version 2, available from the file 11 * COPYING in the main directory of this source tree, or the 12 * OpenIB.org BSD license below: 13 * 14 * Redistribution and use in source and binary forms, with or 15 * without modification, are permitted provided that the following 16 * conditions are met: 17 * 18 * - Redistributions of source code must retain the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer. 21 * 22 * - Redistributions in binary form must reproduce the above 23 * copyright notice, this list of conditions and the following 24 * disclaimer in the documentation and/or other materials 25 * provided with the distribution. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * SOFTWARE. 35 * 36 */ 37 38 /* 39 * Abstract: 40 * Implementation of Up Down Algorithm using ranking & Min Hop 41 * Calculation functions 42 */ 43 44 #if HAVE_CONFIG_H 45 # include <config.h> 46 #endif /* HAVE_CONFIG_H */ 47 48 #include <stdlib.h> 49 #include <ctype.h> 50 #include <complib/cl_debug.h> 51 #include <complib/cl_qmap.h> 52 #include <opensm/osm_file_ids.h> 53 #define FILE_ID OSM_FILE_UCAST_DNUP_C 54 #include <opensm/osm_switch.h> 55 #include <opensm/osm_opensm.h> 56 #include <opensm/osm_ucast_mgr.h> 57 58 /* //////////////////////////// */ 59 /* Local types */ 60 /* //////////////////////////// */ 61 62 /* direction */ 63 typedef enum dnup_switch_dir { 64 UP = 0, 65 DOWN, 66 EQUAL 67 } dnup_switch_dir_t; 68 69 /* dnup structure */ 70 typedef struct dnup { 71 osm_opensm_t *p_osm; 72 } dnup_t; 73 74 struct dnup_node { 75 cl_list_item_t list; 76 osm_switch_t *sw; 77 dnup_switch_dir_t dir; 78 unsigned rank; 79 unsigned visited; 80 }; 81 82 /* This function returns direction based on rank and guid info of current & 83 remote ports */ 84 static dnup_switch_dir_t dnup_get_dir(unsigned cur_rank, unsigned rem_rank) 85 { 86 /* HACK: comes to solve root nodes connection, in a classic subnet root nodes do not connect 87 directly, but in case they are we assign to root node an UP direction to allow DNUP to discover 88 the subnet correctly (and not from the point of view of the last root node). 89 */ 90 if (!cur_rank && !rem_rank) 91 return EQUAL; 92 93 if (cur_rank < rem_rank) 94 return DOWN; 95 else if (cur_rank > rem_rank) 96 return UP; 97 else 98 return EQUAL; 99 } 100 101 /********************************************************************** 102 * This function does the bfs of min hop table calculation by guid index 103 * as a starting point. 104 **********************************************************************/ 105 static int dnup_bfs_by_node(IN osm_log_t * p_log, IN osm_subn_t * p_subn, 106 IN osm_switch_t * p_sw, IN uint8_t prune_weight, 107 OUT uint8_t * max_hops) 108 { 109 uint8_t pn, pn_rem; 110 cl_qlist_t list; 111 uint16_t lid; 112 struct dnup_node *u; 113 dnup_switch_dir_t next_dir, current_dir; 114 115 OSM_LOG_ENTER(p_log); 116 117 lid = osm_node_get_base_lid(p_sw->p_node, 0); 118 lid = cl_ntoh16(lid); 119 osm_switch_set_hops(p_sw, lid, 0, 0); 120 121 OSM_LOG(p_log, OSM_LOG_DEBUG, 122 "Starting from switch - port GUID 0x%" PRIx64 " lid %u\n", 123 cl_ntoh64(p_sw->p_node->node_info.port_guid), lid); 124 125 u = p_sw->priv; 126 u->dir = DOWN; 127 128 /* Update list with the new element */ 129 cl_qlist_init(&list); 130 cl_qlist_insert_tail(&list, &u->list); 131 132 /* BFS the list till no next element */ 133 while (!cl_is_qlist_empty(&list)) { 134 u = (struct dnup_node *)cl_qlist_remove_head(&list); 135 u->visited = 0; /* cleanup */ 136 current_dir = u->dir; 137 /* Go over all ports of the switch and find unvisited remote nodes */ 138 for (pn = 1; pn < u->sw->num_ports; pn++) { 139 osm_node_t *p_remote_node; 140 struct dnup_node *rem_u; 141 uint8_t current_min_hop, remote_min_hop, 142 set_hop_return_value; 143 osm_switch_t *p_remote_sw; 144 145 p_remote_node = 146 osm_node_get_remote_node(u->sw->p_node, pn, 147 &pn_rem); 148 /* If no remote node OR remote node is not a SWITCH 149 continue to next pn */ 150 if (!p_remote_node || !p_remote_node->sw) 151 continue; 152 /* Fetch remote guid only after validation of remote node */ 153 p_remote_sw = p_remote_node->sw; 154 rem_u = p_remote_sw->priv; 155 /* Decide which direction to mark it (UP/DOWN) */ 156 next_dir = dnup_get_dir(u->rank, rem_u->rank); 157 158 /* Set MinHop value for the current lid */ 159 current_min_hop = osm_switch_get_least_hops(u->sw, lid); 160 /* Check hop count if better insert into list && update 161 the remote node Min Hop Table */ 162 remote_min_hop = 163 osm_switch_get_hop_count(p_remote_sw, lid, pn_rem); 164 165 /* Check if this is a legal step : the only illegal step is going 166 from UP to DOWN */ 167 if ((current_dir == UP) && (next_dir == DOWN)) { 168 OSM_LOG(p_log, OSM_LOG_DEBUG, 169 "Avoiding move from 0x%016" PRIx64 170 " to 0x%016" PRIx64 "\n", 171 cl_ntoh64(osm_node_get_node_guid(u->sw->p_node)), 172 cl_ntoh64(osm_node_get_node_guid(p_remote_node))); 173 /* Illegal step. If prune_weight is set, allow it with an 174 * additional weight 175 */ 176 if(prune_weight) { 177 current_min_hop+=prune_weight; 178 if(current_min_hop >= 64) { 179 OSM_LOG(p_log, OSM_LOG_ERROR, 180 "ERR AE02: Too many hops on subnet," 181 " can't relax illegal Dn/Up transition."); 182 osm_switch_set_hops(p_remote_sw, lid, 183 pn_rem, OSM_NO_PATH); 184 } 185 } else { 186 continue; 187 } 188 } 189 if (current_min_hop + 1 < remote_min_hop) { 190 set_hop_return_value = 191 osm_switch_set_hops(p_remote_sw, lid, 192 pn_rem, 193 current_min_hop + 1); 194 if(max_hops && current_min_hop + 1 > *max_hops) { 195 *max_hops = current_min_hop + 1; 196 } 197 if (set_hop_return_value) { 198 OSM_LOG(p_log, OSM_LOG_ERROR, "ERR AE01: " 199 "Invalid value returned from set min hop is: %d\n", 200 set_hop_return_value); 201 } 202 /* Check if remote port has already been visited */ 203 if (!rem_u->visited) { 204 /* Insert dnup_switch item into the list */ 205 rem_u->dir = next_dir; 206 rem_u->visited = 1; 207 cl_qlist_insert_tail(&list, 208 &rem_u->list); 209 } 210 } 211 } 212 } 213 214 OSM_LOG_EXIT(p_log); 215 return 0; 216 } 217 218 /* NOTE : PLS check if we need to decide that the first */ 219 /* rank is a SWITCH for BFS purpose */ 220 static int dnup_subn_rank(IN dnup_t * p_dnup) 221 { 222 osm_switch_t *p_sw; 223 osm_physp_t *p_physp, *p_remote_physp; 224 cl_qlist_t list; 225 cl_map_item_t *item; 226 struct dnup_node *u, *remote_u; 227 uint8_t num_ports, port_num; 228 osm_log_t *p_log = &p_dnup->p_osm->log; 229 unsigned max_rank = 0; 230 231 OSM_LOG_ENTER(p_log); 232 cl_qlist_init(&list); 233 234 /* add all node level switches to the list */ 235 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); 236 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); 237 item = cl_qmap_next(item)) { 238 p_sw = (osm_switch_t *)item; 239 u = p_sw->priv; 240 if (u->rank == 0) 241 cl_qlist_insert_tail(&list, &u->list); 242 } 243 244 /* BFS the list till it's empty */ 245 while (!cl_is_qlist_empty(&list)) { 246 u = (struct dnup_node *)cl_qlist_remove_head(&list); 247 /* Go over all remote nodes and rank them (if not already visited) */ 248 p_sw = u->sw; 249 num_ports = p_sw->num_ports; 250 OSM_LOG(p_log, OSM_LOG_DEBUG, 251 "Handling switch GUID 0x%" PRIx64 "\n", 252 cl_ntoh64(osm_node_get_node_guid(p_sw->p_node))); 253 for (port_num = 1; port_num < num_ports; port_num++) { 254 ib_net64_t port_guid; 255 256 /* Current port fetched in order to get remote side */ 257 p_physp = 258 osm_node_get_physp_ptr(p_sw->p_node, port_num); 259 260 if (!p_physp) 261 continue; 262 263 p_remote_physp = p_physp->p_remote_physp; 264 265 /* 266 make sure that all the following occur on p_remote_physp: 267 1. The port isn't NULL 268 2. It is a switch 269 */ 270 if (p_remote_physp && p_remote_physp->p_node->sw) { 271 remote_u = p_remote_physp->p_node->sw->priv; 272 port_guid = p_remote_physp->port_guid; 273 274 if (remote_u->rank > u->rank + 1) { 275 remote_u->rank = u->rank + 1; 276 max_rank = remote_u->rank; 277 cl_qlist_insert_tail(&list, 278 &remote_u->list); 279 OSM_LOG(p_log, OSM_LOG_DEBUG, 280 "Rank of port GUID 0x%" PRIx64 281 " = %u\n", cl_ntoh64(port_guid), 282 remote_u->rank); 283 } 284 } 285 } 286 } 287 288 /* Print Summary of ranking */ 289 OSM_LOG(p_log, OSM_LOG_VERBOSE, 290 "Subnet ranking completed. Max Node Rank = %d\n", max_rank); 291 OSM_LOG_EXIT(p_log); 292 return 0; 293 } 294 295 static int dnup_set_min_hop_table(IN dnup_t * p_dnup) 296 { 297 osm_subn_t *p_subn = &p_dnup->p_osm->subn; 298 osm_log_t *p_log = &p_dnup->p_osm->log; 299 osm_switch_t *p_sw; 300 struct dnup_node *u; 301 cl_map_item_t *item; 302 uint8_t max_hops = 0; 303 304 OSM_LOG_ENTER(p_log); 305 306 /* Go over all the switches in the subnet - for each init their Min Hop 307 Table */ 308 OSM_LOG(p_log, OSM_LOG_VERBOSE, 309 "Init Min Hop Table of all switches [\n"); 310 311 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); 312 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); 313 item = cl_qmap_next(item)) { 314 p_sw = (osm_switch_t *)item; 315 /* Clear Min Hop Table */ 316 osm_switch_clear_hops(p_sw); 317 } 318 319 OSM_LOG(p_log, OSM_LOG_VERBOSE, 320 "Init Min Hop Table of all switches ]\n"); 321 322 /* Now do the BFS for each port in the subnet */ 323 OSM_LOG(p_log, OSM_LOG_VERBOSE, 324 "BFS through all port guids in the subnet [\n"); 325 326 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); 327 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); 328 item = cl_qmap_next(item)) { 329 p_sw = (osm_switch_t *)item; 330 dnup_bfs_by_node(p_log, p_subn, p_sw, 0, &max_hops); 331 } 332 if(p_subn->opt.connect_roots) { 333 /*This is probably not necessary, by I am more comfortable 334 * clearing any possible side effects from the previous 335 * dnup routing pass 336 */ 337 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); 338 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); 339 item = cl_qmap_next(item)) { 340 p_sw = (osm_switch_t *)item; 341 osm_switch_clear_hops(p_sw); 342 u = (struct dnup_node *) p_sw->priv; 343 u->visited = 0; 344 } 345 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); 346 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); 347 item = cl_qmap_next(item)) { 348 p_sw = (osm_switch_t *)item; 349 dnup_bfs_by_node(p_log, p_subn, p_sw, max_hops + 1, NULL); 350 } 351 } 352 353 OSM_LOG(p_log, OSM_LOG_VERBOSE, 354 "BFS through all port guids in the subnet ]\n"); 355 /* Cleanup */ 356 OSM_LOG_EXIT(p_log); 357 return 0; 358 } 359 360 static int dnup_build_lid_matrices(IN dnup_t * p_dnup) 361 { 362 int status; 363 364 OSM_LOG_ENTER(&p_dnup->p_osm->log); 365 366 OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_VERBOSE, 367 "Ranking all port guids in the list\n"); 368 /* Check if it's not a switched subnet */ 369 if (cl_is_qmap_empty(&p_dnup->p_osm->subn.sw_guid_tbl)) { 370 OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_ERROR, "ERR AEOB: " 371 "This is not a switched subnet, cannot perform DNUP algorithm\n"); 372 status = -1; 373 goto _exit; 374 } 375 376 /* Rank the subnet switches */ 377 dnup_subn_rank(p_dnup); 378 379 /* After multiple ranking need to set Min Hop Table by DnUp algorithm */ 380 OSM_LOG(&p_dnup->p_osm->log, OSM_LOG_VERBOSE, 381 "Setting all switches' Min Hop Table\n"); 382 status = dnup_set_min_hop_table(p_dnup); 383 384 _exit: 385 OSM_LOG_EXIT(&p_dnup->p_osm->log); 386 return status; 387 } 388 389 static struct dnup_node *create_dnup_node(osm_switch_t * sw) 390 { 391 struct dnup_node *u; 392 393 u = malloc(sizeof(*u)); 394 if (!u) 395 return NULL; 396 memset(u, 0, sizeof(*u)); 397 u->sw = sw; 398 u->rank = 0xffffffff; 399 return u; 400 } 401 402 static void delete_dnup_node(struct dnup_node *u) 403 { 404 u->sw->priv = NULL; 405 free(u); 406 } 407 408 /* DNUP callback function */ 409 static int dnup_lid_matrices(void *ctx) 410 { 411 dnup_t *p_dnup = ctx; 412 cl_map_item_t *item; 413 osm_switch_t *p_sw; 414 int ret = 0; 415 int num_leafs = 0; 416 uint8_t pn, pn_rem; 417 418 OSM_LOG_ENTER(&p_dnup->p_osm->log); 419 420 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); 421 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); 422 item = cl_qmap_next(item)) { 423 p_sw = (osm_switch_t *)item; 424 p_sw->priv = create_dnup_node(p_sw); 425 if (!p_sw->priv) { 426 OSM_LOG(&(p_dnup->p_osm->log), OSM_LOG_ERROR, "ERR AE0C: " 427 "cannot create dnup node\n"); 428 OSM_LOG_EXIT(&p_dnup->p_osm->log); 429 return -1; 430 } 431 } 432 433 434 /* First setup node level nodes */ 435 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); 436 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); 437 item = cl_qmap_next(item)) { 438 p_sw = (osm_switch_t *)item; 439 440 for (pn = 0; pn < p_sw->num_ports; pn++) { 441 osm_node_t *p_remote_node; 442 p_remote_node = osm_node_get_remote_node(p_sw->p_node, pn, &pn_rem); 443 if(p_remote_node && !p_remote_node->sw) { 444 struct dnup_node *u = p_sw->priv; 445 u->rank = 0; 446 OSM_LOG(&(p_dnup->p_osm->log), 447 OSM_LOG_VERBOSE, "(%s) rank 0 leaf switch\n", 448 p_sw->p_node->print_desc); 449 num_leafs++; 450 break; 451 } 452 } 453 } 454 455 if(num_leafs == 0) { 456 OSM_LOG(&(p_dnup->p_osm->log), 457 OSM_LOG_ERROR, "ERR AE0D: No leaf switches found, DnUp routing failed\n"); 458 OSM_LOG_EXIT(&p_dnup->p_osm->log); 459 return -1; 460 } 461 462 ret = dnup_build_lid_matrices(p_dnup); 463 464 for (item = cl_qmap_head(&p_dnup->p_osm->subn.sw_guid_tbl); 465 item != cl_qmap_end(&p_dnup->p_osm->subn.sw_guid_tbl); 466 item = cl_qmap_next(item)) { 467 p_sw = (osm_switch_t *) item; 468 delete_dnup_node(p_sw->priv); 469 } 470 471 OSM_LOG_EXIT(&p_dnup->p_osm->log); 472 return ret; 473 } 474 475 static void dnup_delete(void *context) 476 { 477 free(context); 478 } 479 480 int osm_ucast_dnup_setup(struct osm_routing_engine *r, osm_opensm_t *osm) 481 { 482 dnup_t *dnup; 483 484 OSM_LOG_ENTER(&osm->log); 485 486 dnup = malloc(sizeof(dnup_t)); 487 if (!dnup) 488 return -1; 489 memset(dnup, 0, sizeof(dnup_t)); 490 491 dnup->p_osm = osm; 492 493 r->context = dnup; 494 r->destroy = dnup_delete; 495 r->build_lid_matrices = dnup_lid_matrices; 496 497 OSM_LOG_EXIT(&osm->log); 498 return 0; 499 } 500