1 /*
2 * Copyright (c) 2008-2009 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2008-2009 Mellanox Technologies LTD. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34
35 /*
36 * Abstract:
37 * Implementation of OpenSM Cached Unicast Routing
38 *
39 * Environment:
40 * Linux User Mode
41 *
42 */
43
44 #if HAVE_CONFIG_H
45 # include <config.h>
46 #endif
47
48 #include <stdlib.h>
49 #include <string.h>
50 #include <ctype.h>
51 #include <errno.h>
52 #include <iba/ib_types.h>
53 #include <complib/cl_qmap.h>
54 #include <complib/cl_pool.h>
55 #include <complib/cl_debug.h>
56 #include <opensm/osm_file_ids.h>
57 #define FILE_ID OSM_FILE_UCAST_CACHE_C
58 #include <opensm/osm_opensm.h>
59 #include <opensm/osm_ucast_mgr.h>
60 #include <opensm/osm_ucast_cache.h>
61 #include <opensm/osm_switch.h>
62 #include <opensm/osm_node.h>
63 #include <opensm/osm_port.h>
64
65 typedef struct cache_port {
66 boolean_t is_leaf;
67 uint16_t remote_lid_ho;
68 } cache_port_t;
69
70 typedef struct cache_switch {
71 cl_map_item_t map_item;
72 boolean_t dropped;
73 uint16_t max_lid_ho;
74 uint16_t num_hops;
75 uint8_t **hops;
76 uint8_t *lft;
77 uint8_t num_ports;
78 cache_port_t ports[0];
79 } cache_switch_t;
80
cache_sw_get_base_lid_ho(cache_switch_t * p_sw)81 static uint16_t cache_sw_get_base_lid_ho(cache_switch_t * p_sw)
82 {
83 return p_sw->ports[0].remote_lid_ho;
84 }
85
cache_sw_is_leaf(cache_switch_t * p_sw)86 static boolean_t cache_sw_is_leaf(cache_switch_t * p_sw)
87 {
88 return p_sw->ports[0].is_leaf;
89 }
90
cache_sw_set_leaf(cache_switch_t * p_sw)91 static void cache_sw_set_leaf(cache_switch_t * p_sw)
92 {
93 p_sw->ports[0].is_leaf = TRUE;
94 }
95
cache_sw_new(uint16_t lid_ho,unsigned num_ports)96 static cache_switch_t *cache_sw_new(uint16_t lid_ho, unsigned num_ports)
97 {
98 cache_switch_t *p_cache_sw = malloc(sizeof(cache_switch_t) +
99 num_ports * sizeof(cache_port_t));
100 if (!p_cache_sw)
101 return NULL;
102
103 memset(p_cache_sw, 0,
104 sizeof(*p_cache_sw) + num_ports * sizeof(cache_port_t));
105
106 p_cache_sw->num_ports = num_ports;
107
108 /* port[0] fields represent this switch details - lid and type */
109 p_cache_sw->ports[0].remote_lid_ho = lid_ho;
110 p_cache_sw->ports[0].is_leaf = FALSE;
111
112 return p_cache_sw;
113 }
114
cache_sw_destroy(cache_switch_t * p_sw)115 static void cache_sw_destroy(cache_switch_t * p_sw)
116 {
117 unsigned i;
118
119 if (!p_sw)
120 return;
121
122 if (p_sw->lft)
123 free(p_sw->lft);
124 if (p_sw->hops) {
125 for (i = 0; i < p_sw->num_hops; i++)
126 if (p_sw->hops[i])
127 free(p_sw->hops[i]);
128 free(p_sw->hops);
129 }
130 free(p_sw);
131 }
132
cache_get_sw(osm_ucast_mgr_t * p_mgr,uint16_t lid_ho)133 static cache_switch_t *cache_get_sw(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho)
134 {
135 cache_switch_t *p_cache_sw = (cache_switch_t *)
136 cl_qmap_get(&p_mgr->cache_sw_tbl, lid_ho);
137 if (p_cache_sw == (cache_switch_t *)
138 cl_qmap_end(&p_mgr->cache_sw_tbl))
139 p_cache_sw = NULL;
140
141 return p_cache_sw;
142 }
143
cache_add_sw_link(osm_ucast_mgr_t * p_mgr,osm_physp_t * p,uint16_t remote_lid_ho,boolean_t is_ca)144 static void cache_add_sw_link(osm_ucast_mgr_t * p_mgr, osm_physp_t *p,
145 uint16_t remote_lid_ho, boolean_t is_ca)
146 {
147 cache_switch_t *p_cache_sw;
148 uint16_t lid_ho = cl_ntoh16(osm_node_get_base_lid(p->p_node, 0));
149
150 OSM_LOG_ENTER(p_mgr->p_log);
151
152 if (!lid_ho || !remote_lid_ho || !p->port_num)
153 goto Exit;
154
155 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
156 "Caching switch port: lid %u [port %u] -> lid %u (%s)\n",
157 lid_ho, p->port_num, remote_lid_ho, (is_ca) ? "CA/RTR" : "SW");
158
159 p_cache_sw = cache_get_sw(p_mgr, lid_ho);
160 if (!p_cache_sw) {
161 p_cache_sw = cache_sw_new(lid_ho, p->p_node->sw->num_ports);
162 if (!p_cache_sw) {
163 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
164 "ERR AD01: Out of memory - cache is invalid\n");
165 osm_ucast_cache_invalidate(p_mgr);
166 goto Exit;
167 }
168 cl_qmap_insert(&p_mgr->cache_sw_tbl, lid_ho,
169 &p_cache_sw->map_item);
170 }
171
172 if (p->port_num >= p_cache_sw->num_ports) {
173 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
174 "ERR AD02: Wrong switch? - cache is invalid\n");
175 osm_ucast_cache_invalidate(p_mgr);
176 goto Exit;
177 }
178
179 if (is_ca)
180 cache_sw_set_leaf(p_cache_sw);
181
182 if (p_cache_sw->ports[p->port_num].remote_lid_ho == 0) {
183 /* cache this link only if it hasn't been already cached */
184 p_cache_sw->ports[p->port_num].remote_lid_ho = remote_lid_ho;
185 p_cache_sw->ports[p->port_num].is_leaf = is_ca;
186 }
187 Exit:
188 OSM_LOG_EXIT(p_mgr->p_log);
189 }
190
cache_cleanup_switches(osm_ucast_mgr_t * p_mgr)191 static void cache_cleanup_switches(osm_ucast_mgr_t * p_mgr)
192 {
193 cache_switch_t *p_sw;
194 cache_switch_t *p_next_sw;
195 unsigned port_num;
196 boolean_t found_port;
197
198 if (!p_mgr->cache_valid)
199 return;
200
201 p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
202 while (p_next_sw !=
203 (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) {
204 p_sw = p_next_sw;
205 p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item);
206
207 found_port = FALSE;
208 for (port_num = 1; port_num < p_sw->num_ports; port_num++)
209 if (p_sw->ports[port_num].remote_lid_ho)
210 found_port = TRUE;
211
212 if (!found_port) {
213 cl_qmap_remove_item(&p_mgr->cache_sw_tbl,
214 &p_sw->map_item);
215 cache_sw_destroy(p_sw);
216 }
217 }
218 }
219
220 static void
cache_check_link_change(osm_ucast_mgr_t * p_mgr,osm_physp_t * p_physp_1,osm_physp_t * p_physp_2)221 cache_check_link_change(osm_ucast_mgr_t * p_mgr,
222 osm_physp_t * p_physp_1, osm_physp_t * p_physp_2)
223 {
224 OSM_LOG_ENTER(p_mgr->p_log);
225 CL_ASSERT(p_physp_1 && p_physp_2);
226
227 if (!p_mgr->cache_valid)
228 goto Exit;
229
230 if (!p_physp_1->p_remote_physp && !p_physp_2->p_remote_physp)
231 /* both ports were down - new link */
232 goto Exit;
233
234 /* unicast cache cannot tolerate any link location change */
235
236 if ((p_physp_1->p_remote_physp &&
237 p_physp_1->p_remote_physp->p_remote_physp) ||
238 (p_physp_2->p_remote_physp &&
239 p_physp_2->p_remote_physp->p_remote_physp)) {
240 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
241 "Link location change discovered\n");
242 osm_ucast_cache_invalidate(p_mgr);
243 goto Exit;
244 }
245 Exit:
246 OSM_LOG_EXIT(p_mgr->p_log);
247 }
248
cache_remove_port(osm_ucast_mgr_t * p_mgr,uint16_t lid_ho,uint8_t port_num,uint16_t remote_lid_ho,boolean_t is_ca)249 static void cache_remove_port(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho,
250 uint8_t port_num, uint16_t remote_lid_ho,
251 boolean_t is_ca)
252 {
253 cache_switch_t *p_cache_sw;
254
255 OSM_LOG_ENTER(p_mgr->p_log);
256
257 if (!p_mgr->cache_valid)
258 goto Exit;
259
260 p_cache_sw = cache_get_sw(p_mgr, lid_ho);
261 if (!p_cache_sw) {
262 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
263 "Found uncached switch/link (lid %u, port %u)\n",
264 lid_ho, port_num);
265 osm_ucast_cache_invalidate(p_mgr);
266 goto Exit;
267 }
268
269 if (port_num >= p_cache_sw->num_ports ||
270 !p_cache_sw->ports[port_num].remote_lid_ho) {
271 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
272 "Found uncached switch link (lid %u, port %u)\n",
273 lid_ho, port_num);
274 osm_ucast_cache_invalidate(p_mgr);
275 goto Exit;
276 }
277
278 if (p_cache_sw->ports[port_num].remote_lid_ho != remote_lid_ho) {
279 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
280 "Remote lid change on switch lid %u, port %u "
281 "(was %u, now %u)\n", lid_ho, port_num,
282 p_cache_sw->ports[port_num].remote_lid_ho,
283 remote_lid_ho);
284 osm_ucast_cache_invalidate(p_mgr);
285 goto Exit;
286 }
287
288 if ((p_cache_sw->ports[port_num].is_leaf && !is_ca) ||
289 (!p_cache_sw->ports[port_num].is_leaf && is_ca)) {
290 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
291 "Remote node type change on switch lid %u, port %u\n",
292 lid_ho, port_num);
293 osm_ucast_cache_invalidate(p_mgr);
294 goto Exit;
295 }
296
297 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
298 "New link from lid %u, port %u to lid %u - "
299 "found in cache\n", lid_ho, port_num, remote_lid_ho);
300
301 /* the new link was cached - clean it from the cache */
302
303 p_cache_sw->ports[port_num].remote_lid_ho = 0;
304 p_cache_sw->ports[port_num].is_leaf = FALSE;
305 Exit:
306 OSM_LOG_EXIT(p_mgr->p_log);
307 } /* cache_remove_port() */
308
309 static void
cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr,cache_switch_t * p_cache_sw,osm_switch_t * p_sw)310 cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr,
311 cache_switch_t * p_cache_sw, osm_switch_t * p_sw)
312 {
313 if (!p_mgr->cache_valid)
314 return;
315
316 /* when seting unicast info, the cached port
317 should have all the required info */
318 CL_ASSERT(p_cache_sw->max_lid_ho && p_cache_sw->lft &&
319 p_cache_sw->num_hops && p_cache_sw->hops);
320
321 p_sw->max_lid_ho = p_cache_sw->max_lid_ho;
322
323 if (p_sw->new_lft)
324 free(p_sw->new_lft);
325 p_sw->new_lft = p_cache_sw->lft;
326 p_cache_sw->lft = NULL;
327
328 p_sw->num_hops = p_cache_sw->num_hops;
329 p_cache_sw->num_hops = 0;
330 if (p_sw->hops)
331 free(p_sw->hops);
332 p_sw->hops = p_cache_sw->hops;
333 p_cache_sw->hops = NULL;
334
335 p_sw->need_update = 2;
336 }
337
ucast_cache_dump(osm_ucast_mgr_t * p_mgr)338 static void ucast_cache_dump(osm_ucast_mgr_t * p_mgr)
339 {
340 cache_switch_t *p_sw;
341 unsigned i;
342
343 OSM_LOG_ENTER(p_mgr->p_log);
344
345 if (!OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG))
346 goto Exit;
347
348 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
349 "Dumping missing nodes/links as logged by unicast cache:\n");
350 for (p_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
351 p_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
352 p_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item)) {
353
354 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
355 "\t Switch lid %u %s%s\n",
356 cache_sw_get_base_lid_ho(p_sw),
357 (cache_sw_is_leaf(p_sw)) ? "[leaf switch] " : "",
358 (p_sw->dropped) ? "[whole switch missing]" : "");
359
360 for (i = 1; i < p_sw->num_ports; i++)
361 if (p_sw->ports[i].remote_lid_ho > 0)
362 OSM_LOG(p_mgr->p_log,
363 OSM_LOG_DEBUG,
364 "\t - port %u -> lid %u %s\n",
365 i, p_sw->ports[i].remote_lid_ho,
366 (p_sw->ports[i].is_leaf) ?
367 "[remote node is leaf]" : "");
368 }
369 Exit:
370 OSM_LOG_EXIT(p_mgr->p_log);
371 }
372
osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr)373 void osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr)
374 {
375 cache_switch_t *p_sw;
376 cache_switch_t *p_next_sw;
377
378 OSM_LOG_ENTER(p_mgr->p_log);
379
380 if (!p_mgr->cache_valid)
381 goto Exit;
382
383 p_mgr->cache_valid = FALSE;
384
385 p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
386 while (p_next_sw !=
387 (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) {
388 p_sw = p_next_sw;
389 p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item);
390 cache_sw_destroy(p_sw);
391 }
392 cl_qmap_remove_all(&p_mgr->cache_sw_tbl);
393
394 OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, "Unicast Cache invalidated\n");
395 Exit:
396 OSM_LOG_EXIT(p_mgr->p_log);
397 }
398
ucast_cache_validate(osm_ucast_mgr_t * p_mgr)399 static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr)
400 {
401 cache_switch_t *p_cache_sw;
402 cache_switch_t *p_remote_cache_sw;
403 unsigned port_num;
404 unsigned max_ports;
405 uint8_t remote_node_type;
406 uint16_t lid_ho;
407 uint16_t remote_lid_ho;
408 osm_switch_t *p_sw;
409 osm_switch_t *p_remote_sw;
410 osm_node_t *p_node;
411 osm_physp_t *p_physp;
412 osm_physp_t *p_remote_physp;
413 osm_port_t *p_remote_port;
414 cl_qmap_t *p_sw_tbl;
415
416 OSM_LOG_ENTER(p_mgr->p_log);
417 if (!p_mgr->cache_valid)
418 goto Exit;
419
420 /* If there are no switches in the subnet, we are done */
421 p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
422 if (cl_qmap_count(p_sw_tbl) == 0) {
423 osm_ucast_cache_invalidate(p_mgr);
424 goto Exit;
425 }
426
427 /*
428 * Scan all the physical switch ports in the subnet.
429 * If the port need_update flag is on, check whether
430 * it's just some node/port reset or a cached topology
431 * change. Otherwise the cache is invalid.
432 */
433 for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
434 p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
435 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {
436
437 p_node = p_sw->p_node;
438
439 lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
440 p_cache_sw = cache_get_sw(p_mgr, lid_ho);
441
442 max_ports = osm_node_get_num_physp(p_node);
443
444 /* skip port 0 */
445 for (port_num = 1; port_num < max_ports; port_num++) {
446
447 p_physp = osm_node_get_physp_ptr(p_node, port_num);
448
449 if (!p_physp || !p_physp->p_remote_physp ||
450 !osm_physp_link_exists(p_physp,
451 p_physp->p_remote_physp))
452 /* no valid link */
453 continue;
454
455 /*
456 * While scanning all the physical ports in the subnet,
457 * mark corresponding leaf switches in the cache.
458 */
459 if (p_cache_sw &&
460 !p_cache_sw->dropped &&
461 !cache_sw_is_leaf(p_cache_sw) &&
462 p_physp->p_remote_physp->p_node &&
463 osm_node_get_type(p_physp->p_remote_physp->
464 p_node) != IB_NODE_TYPE_SWITCH)
465 cache_sw_set_leaf(p_cache_sw);
466
467 if (!p_physp->need_update)
468 continue;
469
470 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
471 "Checking switch lid %u, port %u\n",
472 lid_ho, port_num);
473
474 p_remote_physp = osm_physp_get_remote(p_physp);
475 remote_node_type =
476 osm_node_get_type(p_remote_physp->p_node);
477
478 if (remote_node_type == IB_NODE_TYPE_SWITCH)
479 remote_lid_ho =
480 cl_ntoh16(osm_node_get_base_lid
481 (p_remote_physp->p_node, 0));
482 else
483 remote_lid_ho =
484 cl_ntoh16(osm_node_get_base_lid
485 (p_remote_physp->p_node,
486 osm_physp_get_port_num
487 (p_remote_physp)));
488
489 if (!p_cache_sw ||
490 port_num >= p_cache_sw->num_ports ||
491 !p_cache_sw->ports[port_num].remote_lid_ho) {
492 /*
493 * There is some uncached change on the port.
494 * In general, the reasons might be as follows:
495 * - switch reset
496 * - port reset (or port down/up)
497 * - quick connection location change
498 * - new link (or new switch)
499 *
500 * First two reasons allow cache usage, while
501 * the last two reasons should invalidate cache.
502 *
503 * In case of quick connection location change,
504 * cache would have been invalidated by
505 * osm_ucast_cache_check_new_link() function.
506 *
507 * In case of new link between two known nodes,
508 * cache also would have been invalidated by
509 * osm_ucast_cache_check_new_link() function.
510 *
511 * Another reason is cached link between two
512 * known switches went back. In this case the
513 * osm_ucast_cache_check_new_link() function would
514 * clear both sides of the link from the cache
515 * during the discovery process, so effectively
516 * this would be equivalent to port reset.
517 *
518 * So three possible reasons remain:
519 * - switch reset
520 * - port reset (or port down/up)
521 * - link of a new switch
522 *
523 * To validate cache, we need to check only the
524 * third reason - link of a new node/switch:
525 * - If this is the local switch that is new,
526 * then it should have (p_sw->need_update == 2).
527 * - If the remote node is switch and it's new,
528 * then it also should have
529 * (p_sw->need_update == 2).
530 * - If the remote node is CA/RTR and it's new,
531 * then its port should have is_new flag on.
532 */
533 if (p_sw->need_update == 2) {
534 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
535 "New switch found (lid %u)\n",
536 lid_ho);
537 osm_ucast_cache_invalidate(p_mgr);
538 goto Exit;
539 }
540
541 if (remote_node_type == IB_NODE_TYPE_SWITCH) {
542
543 p_remote_sw =
544 p_remote_physp->p_node->sw;
545 if (p_remote_sw->need_update == 2) {
546 /* this could also be case of
547 switch coming back with an
548 additional link that it
549 didn't have before */
550 OSM_LOG(p_mgr->p_log,
551 OSM_LOG_DEBUG,
552 "New switch/link found (lid %u)\n",
553 remote_lid_ho);
554 osm_ucast_cache_invalidate
555 (p_mgr);
556 goto Exit;
557 }
558 } else {
559 /*
560 * Remote node is CA/RTR.
561 * Get p_port of the remote node and
562 * check its p_port->is_new flag.
563 */
564 p_remote_port =
565 osm_get_port_by_guid(p_mgr->p_subn,
566 osm_physp_get_port_guid
567 (p_remote_physp));
568 if (!p_remote_port) {
569 OSM_LOG(p_mgr->p_log,
570 OSM_LOG_ERROR,
571 "ERR AD04: No port was found for "
572 "port GUID 0x%" PRIx64 "\n",
573 cl_ntoh64(osm_physp_get_port_guid
574 (p_remote_physp)));
575 osm_ucast_cache_invalidate
576 (p_mgr);
577 goto Exit;
578 }
579 if (p_remote_port->is_new) {
580 OSM_LOG(p_mgr->p_log,
581 OSM_LOG_DEBUG,
582 "New CA/RTR found (lid %u)\n",
583 remote_lid_ho);
584 osm_ucast_cache_invalidate
585 (p_mgr);
586 goto Exit;
587 }
588 }
589 } else {
590 /*
591 * The change on the port is cached.
592 * In general, the reasons might be as follows:
593 * - link between two known nodes went back
594 * - one or more nodes went back, causing all
595 * the links to reappear
596 *
597 * If it was link that went back, then this case
598 * would have been taken care of during the
599 * discovery by osm_ucast_cache_check_new_link(),
600 * so it's some node that went back.
601 */
602 if ((p_cache_sw->ports[port_num].is_leaf &&
603 remote_node_type == IB_NODE_TYPE_SWITCH) ||
604 (!p_cache_sw->ports[port_num].is_leaf &&
605 remote_node_type != IB_NODE_TYPE_SWITCH)) {
606 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
607 "Remote node type change on switch lid %u, port %u\n",
608 lid_ho, port_num);
609 osm_ucast_cache_invalidate(p_mgr);
610 goto Exit;
611 }
612
613 if (p_cache_sw->ports[port_num].remote_lid_ho !=
614 remote_lid_ho) {
615 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
616 "Remote lid change on switch lid %u, port %u"
617 "(was %u, now %u)\n",
618 lid_ho, port_num,
619 p_cache_sw->ports[port_num].
620 remote_lid_ho, remote_lid_ho);
621 osm_ucast_cache_invalidate(p_mgr);
622 goto Exit;
623 }
624
625 /*
626 * We don't care who is the node that has
627 * reappeared in the subnet (local or remote).
628 * What's important that the cached link matches
629 * the real fabrics link.
630 * Just clean it from cache.
631 */
632
633 p_cache_sw->ports[port_num].remote_lid_ho = 0;
634 p_cache_sw->ports[port_num].is_leaf = FALSE;
635 if (p_cache_sw->dropped) {
636 cache_restore_ucast_info(p_mgr,
637 p_cache_sw,
638 p_sw);
639 p_cache_sw->dropped = FALSE;
640 }
641
642 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
643 "Restored link from cache: lid %u, port %u to lid %u\n",
644 lid_ho, port_num, remote_lid_ho);
645 }
646 }
647 }
648
649 /* Remove all the cached switches that
650 have all their ports restored */
651 cache_cleanup_switches(p_mgr);
652
653 /*
654 * Done scanning all the physical switch ports in the subnet.
655 * Now we need to check the other side:
656 * Scan all the cached switches and their ports:
657 * - If the cached switch is missing in the subnet
658 * (dropped flag is on), check that it's a leaf switch.
659 * If it's not a leaf, the cache is invalid, because
660 * cache can tolerate only leaf switch removal.
661 * - If the cached switch exists in fabric, check all
662 * its cached ports. These cached ports represent
663 * missing link in the fabric.
664 * The missing links that can be tolerated are:
665 * + link to missing CA/RTR
666 * + link to missing leaf switch
667 */
668 for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
669 p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
670 p_cache_sw =
671 (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) {
672
673 if (p_cache_sw->dropped) {
674 if (!cache_sw_is_leaf(p_cache_sw)) {
675 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
676 "Missing non-leaf switch (lid %u)\n",
677 cache_sw_get_base_lid_ho(p_cache_sw));
678 osm_ucast_cache_invalidate(p_mgr);
679 goto Exit;
680 }
681
682 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
683 "Missing leaf switch (lid %u) - "
684 "continuing validation\n",
685 cache_sw_get_base_lid_ho(p_cache_sw));
686 continue;
687 }
688
689 for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) {
690 if (!p_cache_sw->ports[port_num].remote_lid_ho)
691 continue;
692
693 if (p_cache_sw->ports[port_num].is_leaf) {
694 CL_ASSERT(cache_sw_is_leaf(p_cache_sw));
695 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
696 "Switch lid %u, port %u: missing link to CA/RTR - "
697 "continuing validation\n",
698 cache_sw_get_base_lid_ho(p_cache_sw),
699 port_num);
700 continue;
701 }
702
703 p_remote_cache_sw = cache_get_sw(p_mgr,
704 p_cache_sw->
705 ports[port_num].
706 remote_lid_ho);
707
708 if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) {
709 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
710 "Switch lid %u, port %u: missing link to existing switch\n",
711 cache_sw_get_base_lid_ho(p_cache_sw),
712 port_num);
713 osm_ucast_cache_invalidate(p_mgr);
714 goto Exit;
715 }
716
717 if (!cache_sw_is_leaf(p_remote_cache_sw)) {
718 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
719 "Switch lid %u, port %u: missing link to non-leaf switch\n",
720 cache_sw_get_base_lid_ho(p_cache_sw),
721 port_num);
722 osm_ucast_cache_invalidate(p_mgr);
723 goto Exit;
724 }
725
726 /*
727 * At this point we know that the missing link is to
728 * a leaf switch. However, one case deserves a special
729 * treatment. If there was a link between two leaf
730 * switches, then missing leaf switch might break
731 * routing. It is possible that there are routes
732 * that use leaf switches to get from switch to switch
733 * and not just to get to the CAs behind the leaf switch.
734 */
735 if (cache_sw_is_leaf(p_cache_sw) &&
736 cache_sw_is_leaf(p_remote_cache_sw)) {
737 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
738 "Switch lid %u, port %u: missing leaf-2-leaf link\n",
739 cache_sw_get_base_lid_ho(p_cache_sw),
740 port_num);
741 osm_ucast_cache_invalidate(p_mgr);
742 goto Exit;
743 }
744
745 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
746 "Switch lid %u, port %u: missing remote leaf switch - "
747 "continuing validation\n",
748 cache_sw_get_base_lid_ho(p_cache_sw),
749 port_num);
750 }
751 }
752
753 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n");
754 ucast_cache_dump(p_mgr);
755 Exit:
756 OSM_LOG_EXIT(p_mgr->p_log);
757 } /* osm_ucast_cache_validate() */
758
osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr,osm_node_t * p_node_1,uint8_t port_num_1,osm_node_t * p_node_2,uint8_t port_num_2)759 void osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr,
760 osm_node_t * p_node_1, uint8_t port_num_1,
761 osm_node_t * p_node_2, uint8_t port_num_2)
762 {
763 uint16_t lid_ho_1;
764 uint16_t lid_ho_2;
765
766 OSM_LOG_ENTER(p_mgr->p_log);
767
768 if (!p_mgr->cache_valid)
769 goto Exit;
770
771 cache_check_link_change(p_mgr,
772 osm_node_get_physp_ptr(p_node_1, port_num_1),
773 osm_node_get_physp_ptr(p_node_2, port_num_2));
774
775 if (!p_mgr->cache_valid)
776 goto Exit;
777
778 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
779 osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
780 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Found CA-2-CA link\n");
781 osm_ucast_cache_invalidate(p_mgr);
782 goto Exit;
783 }
784
785 /* for code simplicity, we want the first node to be switch */
786 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
787 osm_node_t *tmp_node = p_node_1;
788 uint8_t tmp_port_num = port_num_1;
789 p_node_1 = p_node_2;
790 port_num_1 = port_num_2;
791 p_node_2 = tmp_node;
792 port_num_2 = tmp_port_num;
793 }
794
795 lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));
796
797 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
798 lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
799 else
800 lid_ho_2 =
801 cl_ntoh16(osm_node_get_base_lid(p_node_2, port_num_2));
802
803 if (!lid_ho_1 || !lid_ho_2) {
804 /*
805 * No lid assigned, which means that one of the nodes is new.
806 * Need to wait for lid manager to process this node.
807 * The switches and their links will be checked later when
808 * the whole cache validity will be verified.
809 */
810 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
811 "Link port %u <-> %u reveals new node - cache will "
812 "be validated later\n", port_num_1, port_num_2);
813 goto Exit;
814 }
815
816 cache_remove_port(p_mgr, lid_ho_1, port_num_1, lid_ho_2,
817 (osm_node_get_type(p_node_2) !=
818 IB_NODE_TYPE_SWITCH));
819
820 /* if node_2 is a switch, the link should be cleaned from its cache */
821
822 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
823 cache_remove_port(p_mgr, lid_ho_2,
824 port_num_2, lid_ho_1, FALSE);
825
826 Exit:
827 OSM_LOG_EXIT(p_mgr->p_log);
828 } /* osm_ucast_cache_check_new_link() */
829
osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr,osm_physp_t * p_physp1,osm_physp_t * p_physp2)830 void osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr,
831 osm_physp_t * p_physp1, osm_physp_t * p_physp2)
832 {
833 osm_node_t *p_node_1 = p_physp1->p_node, *p_node_2 = p_physp2->p_node;
834 uint16_t lid_ho_1, lid_ho_2;
835
836 OSM_LOG_ENTER(p_mgr->p_log);
837
838 if (!p_mgr->cache_valid)
839 goto Exit;
840
841 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
842 osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
843 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Dropping CA-2-CA link\n");
844 osm_ucast_cache_invalidate(p_mgr);
845 goto Exit;
846 }
847
848 if ((osm_node_get_type(p_node_1) == IB_NODE_TYPE_SWITCH &&
849 !osm_node_get_physp_ptr(p_node_1, 0)) ||
850 (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH &&
851 !osm_node_get_physp_ptr(p_node_2, 0))) {
852 /* we're caching a link when one of the nodes
853 has already been dropped and cached */
854 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
855 "Port %u <-> port %u: port0 on one of the nodes "
856 "has already been dropped and cached\n",
857 p_physp1->port_num, p_physp2->port_num);
858 goto Exit;
859 }
860
861 /* One of the nodes is switch. Just for code
862 simplicity, make sure that it's the first node. */
863
864 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
865 osm_physp_t *tmp = p_physp1;
866 p_physp1 = p_physp2;
867 p_physp2 = tmp;
868 p_node_1 = p_physp1->p_node;
869 p_node_2 = p_physp2->p_node;
870 }
871
872 if (!p_node_1->sw) {
873 /* something is wrong - we'd better not use cache */
874 osm_ucast_cache_invalidate(p_mgr);
875 goto Exit;
876 }
877
878 lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));
879
880 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) {
881
882 if (!p_node_2->sw) {
883 /* something is wrong - we'd better not use cache */
884 osm_ucast_cache_invalidate(p_mgr);
885 goto Exit;
886 }
887
888 lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
889
890 /* lost switch-2-switch link - cache both sides */
891 cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, FALSE);
892 cache_add_sw_link(p_mgr, p_physp2, lid_ho_1, FALSE);
893 } else {
894 lid_ho_2 = cl_ntoh16(osm_physp_get_base_lid(p_physp2));
895
896 /* lost link to CA/RTR - cache only switch side */
897 cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, TRUE);
898 }
899
900 Exit:
901 OSM_LOG_EXIT(p_mgr->p_log);
902 } /* osm_ucast_cache_add_link() */
903
osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr,osm_node_t * p_node)904 void osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr, osm_node_t * p_node)
905 {
906 uint16_t lid_ho;
907 uint8_t max_ports;
908 uint8_t port_num;
909 osm_physp_t *p_physp;
910 cache_switch_t *p_cache_sw;
911
912 OSM_LOG_ENTER(p_mgr->p_log);
913
914 if (!p_mgr->cache_valid)
915 goto Exit;
916
917 if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {
918
919 lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
920
921 if (!lid_ho) {
922 OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
923 "Skip caching. Switch dropped before "
924 "it gets a valid lid.\n");
925 osm_ucast_cache_invalidate(p_mgr);
926 goto Exit;
927 }
928
929 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
930 "Caching dropped switch lid %u\n", lid_ho);
931
932 if (!p_node->sw) {
933 /* something is wrong - forget about cache */
934 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
935 "ERR AD03: no switch info for node lid %u - "
936 "clearing cache\n", lid_ho);
937 osm_ucast_cache_invalidate(p_mgr);
938 goto Exit;
939 }
940
941 /* unlink (add to cache) all the ports of this switch */
942 max_ports = osm_node_get_num_physp(p_node);
943 for (port_num = 1; port_num < max_ports; port_num++) {
944
945 p_physp = osm_node_get_physp_ptr(p_node, port_num);
946 if (!p_physp || !p_physp->p_remote_physp)
947 continue;
948
949 osm_ucast_cache_add_link(p_mgr, p_physp,
950 p_physp->p_remote_physp);
951 }
952
953 /*
954 * All the ports have been dropped (cached).
955 * If one of the ports was connected to CA/RTR,
956 * then the cached switch would be marked as leaf.
957 * If it isn't, then the dropped switch isn't a leaf,
958 * and cache can't handle it.
959 */
960
961 p_cache_sw = cache_get_sw(p_mgr, lid_ho);
962
963 /* p_cache_sw could be NULL if it has no remote phys ports */
964 if (!p_cache_sw || !cache_sw_is_leaf(p_cache_sw)) {
965 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
966 "Dropped non-leaf switch (lid %u)\n", lid_ho);
967 osm_ucast_cache_invalidate(p_mgr);
968 goto Exit;
969 }
970
971 p_cache_sw->dropped = TRUE;
972
973 if (!p_node->sw->num_hops || !p_node->sw->hops) {
974 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
975 "No LID matrices for switch lid %u\n", lid_ho);
976 osm_ucast_cache_invalidate(p_mgr);
977 goto Exit;
978 }
979
980 /* lid matrices */
981
982 p_cache_sw->num_hops = p_node->sw->num_hops;
983 p_node->sw->num_hops = 0;
984 p_cache_sw->hops = p_node->sw->hops;
985 p_node->sw->hops = NULL;
986
987 /* linear forwarding table */
988
989 if (p_node->sw->new_lft) {
990 /* LFT buffer exists - we use it, because
991 it is more updated than the switch's LFT */
992 p_cache_sw->lft = p_node->sw->new_lft;
993 p_node->sw->new_lft = NULL;
994 } else {
995 /* no LFT buffer, so we use the switch's LFT */
996 p_cache_sw->lft = p_node->sw->lft;
997 p_node->sw->lft = NULL;
998 p_node->sw->lft_size = 0;
999 }
1000 p_cache_sw->max_lid_ho = p_node->sw->max_lid_ho;
1001 } else {
1002 /* dropping CA/RTR: add to cache all the ports of this node */
1003 max_ports = osm_node_get_num_physp(p_node);
1004 for (port_num = 1; port_num < max_ports; port_num++) {
1005
1006 p_physp = osm_node_get_physp_ptr(p_node, port_num);
1007 if (!p_physp || !p_physp->p_remote_physp)
1008 continue;
1009
1010 CL_ASSERT(osm_node_get_type
1011 (p_physp->p_remote_physp->p_node) ==
1012 IB_NODE_TYPE_SWITCH);
1013
1014 osm_ucast_cache_add_link(p_mgr,
1015 p_physp->p_remote_physp,
1016 p_physp);
1017 }
1018 }
1019 Exit:
1020 OSM_LOG_EXIT(p_mgr->p_log);
1021 } /* osm_ucast_cache_add_node() */
1022
osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr)1023 int osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr)
1024 {
1025 cl_qmap_t *tbl = &p_mgr->p_subn->sw_guid_tbl;
1026 cl_map_item_t *item;
1027 osm_switch_t *p_sw;
1028 uint16_t lft_size;
1029
1030 if (!p_mgr->p_subn->opt.use_ucast_cache)
1031 return 1;
1032
1033 ucast_cache_validate(p_mgr);
1034 if (!p_mgr->cache_valid)
1035 return 1;
1036
1037 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
1038 "Configuring switch tables using cached routing\n");
1039
1040 for (item = cl_qmap_head(tbl); item != cl_qmap_end(tbl);
1041 item = cl_qmap_next(item)) {
1042 p_sw = (osm_switch_t *) item;
1043 CL_ASSERT(p_sw->new_lft);
1044 if (!p_sw->lft) {
1045 lft_size = (p_sw->max_lid_ho / IB_SMP_DATA_SIZE + 1)
1046 * IB_SMP_DATA_SIZE;
1047 p_sw->lft = malloc(lft_size);
1048 if (!p_sw->lft)
1049 return IB_INSUFFICIENT_MEMORY;
1050 p_sw->lft_size = lft_size;
1051 memset(p_sw->lft, OSM_NO_PATH, p_sw->lft_size);
1052 }
1053
1054 }
1055
1056 osm_ucast_mgr_set_fwd_tables(p_mgr);
1057
1058 return 0;
1059 }
1060