xref: /freebsd/contrib/ofed/opensm/opensm/osm_ucast_cache.c (revision 87181516ef48be852d5e5fee53c6e0dbfc62f21e)
1 /*
2  * Copyright (c) 2008-2009 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2008-2009 Mellanox Technologies LTD. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  */
34 
35 /*
36  * Abstract:
37  *    Implementation of OpenSM Cached Unicast Routing
38  *
39  * Environment:
40  *    Linux User Mode
41  *
42  */
43 
44 #if HAVE_CONFIG_H
45 #  include <config.h>
46 #endif
47 
48 #include <stdlib.h>
49 #include <string.h>
50 #include <ctype.h>
51 #include <errno.h>
52 #include <iba/ib_types.h>
53 #include <complib/cl_qmap.h>
54 #include <complib/cl_pool.h>
55 #include <complib/cl_debug.h>
56 #include <opensm/osm_file_ids.h>
57 #define FILE_ID OSM_FILE_UCAST_CACHE_C
58 #include <opensm/osm_opensm.h>
59 #include <opensm/osm_ucast_mgr.h>
60 #include <opensm/osm_ucast_cache.h>
61 #include <opensm/osm_switch.h>
62 #include <opensm/osm_node.h>
63 #include <opensm/osm_port.h>
64 
65 typedef struct cache_port {
66 	boolean_t is_leaf;
67 	uint16_t remote_lid_ho;
68 } cache_port_t;
69 
70 typedef struct cache_switch {
71 	cl_map_item_t map_item;
72 	boolean_t dropped;
73 	uint16_t max_lid_ho;
74 	uint16_t num_hops;
75 	uint8_t **hops;
76 	uint8_t *lft;
77 	uint8_t num_ports;
78 	cache_port_t ports[0];
79 } cache_switch_t;
80 
cache_sw_get_base_lid_ho(cache_switch_t * p_sw)81 static uint16_t cache_sw_get_base_lid_ho(cache_switch_t * p_sw)
82 {
83 	return p_sw->ports[0].remote_lid_ho;
84 }
85 
cache_sw_is_leaf(cache_switch_t * p_sw)86 static boolean_t cache_sw_is_leaf(cache_switch_t * p_sw)
87 {
88 	return p_sw->ports[0].is_leaf;
89 }
90 
cache_sw_set_leaf(cache_switch_t * p_sw)91 static void cache_sw_set_leaf(cache_switch_t * p_sw)
92 {
93 	p_sw->ports[0].is_leaf = TRUE;
94 }
95 
cache_sw_new(uint16_t lid_ho,unsigned num_ports)96 static cache_switch_t *cache_sw_new(uint16_t lid_ho, unsigned num_ports)
97 {
98 	cache_switch_t *p_cache_sw = malloc(sizeof(cache_switch_t) +
99 					    num_ports * sizeof(cache_port_t));
100 	if (!p_cache_sw)
101 		return NULL;
102 
103 	memset(p_cache_sw, 0,
104 	       sizeof(*p_cache_sw) + num_ports * sizeof(cache_port_t));
105 
106 	p_cache_sw->num_ports = num_ports;
107 
108 	/* port[0] fields represent this switch details - lid and type */
109 	p_cache_sw->ports[0].remote_lid_ho = lid_ho;
110 	p_cache_sw->ports[0].is_leaf = FALSE;
111 
112 	return p_cache_sw;
113 }
114 
cache_sw_destroy(cache_switch_t * p_sw)115 static void cache_sw_destroy(cache_switch_t * p_sw)
116 {
117 	unsigned i;
118 
119 	if (!p_sw)
120 		return;
121 
122 	if (p_sw->lft)
123 		free(p_sw->lft);
124 	if (p_sw->hops) {
125 		for (i = 0; i < p_sw->num_hops; i++)
126 			if (p_sw->hops[i])
127 				free(p_sw->hops[i]);
128 		free(p_sw->hops);
129 	}
130 	free(p_sw);
131 }
132 
cache_get_sw(osm_ucast_mgr_t * p_mgr,uint16_t lid_ho)133 static cache_switch_t *cache_get_sw(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho)
134 {
135 	cache_switch_t *p_cache_sw = (cache_switch_t *)
136 	    cl_qmap_get(&p_mgr->cache_sw_tbl, lid_ho);
137 	if (p_cache_sw == (cache_switch_t *)
138 	    cl_qmap_end(&p_mgr->cache_sw_tbl))
139 		p_cache_sw = NULL;
140 
141 	return p_cache_sw;
142 }
143 
cache_add_sw_link(osm_ucast_mgr_t * p_mgr,osm_physp_t * p,uint16_t remote_lid_ho,boolean_t is_ca)144 static void cache_add_sw_link(osm_ucast_mgr_t * p_mgr, osm_physp_t *p,
145 			      uint16_t remote_lid_ho, boolean_t is_ca)
146 {
147 	cache_switch_t *p_cache_sw;
148 	uint16_t lid_ho = cl_ntoh16(osm_node_get_base_lid(p->p_node, 0));
149 
150 	OSM_LOG_ENTER(p_mgr->p_log);
151 
152 	if (!lid_ho || !remote_lid_ho || !p->port_num)
153 		goto Exit;
154 
155 	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
156 		"Caching switch port: lid %u [port %u] -> lid %u (%s)\n",
157 		lid_ho, p->port_num, remote_lid_ho, (is_ca) ? "CA/RTR" : "SW");
158 
159 	p_cache_sw = cache_get_sw(p_mgr, lid_ho);
160 	if (!p_cache_sw) {
161 		p_cache_sw = cache_sw_new(lid_ho, p->p_node->sw->num_ports);
162 		if (!p_cache_sw) {
163 			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
164 				"ERR AD01: Out of memory - cache is invalid\n");
165 			osm_ucast_cache_invalidate(p_mgr);
166 			goto Exit;
167 		}
168 		cl_qmap_insert(&p_mgr->cache_sw_tbl, lid_ho,
169 			       &p_cache_sw->map_item);
170 	}
171 
172 	if (p->port_num >= p_cache_sw->num_ports) {
173 		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
174 			"ERR AD02: Wrong switch? - cache is invalid\n");
175 		osm_ucast_cache_invalidate(p_mgr);
176 		goto Exit;
177 	}
178 
179 	if (is_ca)
180 		cache_sw_set_leaf(p_cache_sw);
181 
182 	if (p_cache_sw->ports[p->port_num].remote_lid_ho == 0) {
183 		/* cache this link only if it hasn't been already cached */
184 		p_cache_sw->ports[p->port_num].remote_lid_ho = remote_lid_ho;
185 		p_cache_sw->ports[p->port_num].is_leaf = is_ca;
186 	}
187 Exit:
188 	OSM_LOG_EXIT(p_mgr->p_log);
189 }
190 
cache_cleanup_switches(osm_ucast_mgr_t * p_mgr)191 static void cache_cleanup_switches(osm_ucast_mgr_t * p_mgr)
192 {
193 	cache_switch_t *p_sw;
194 	cache_switch_t *p_next_sw;
195 	unsigned port_num;
196 	boolean_t found_port;
197 
198 	if (!p_mgr->cache_valid)
199 		return;
200 
201 	p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
202 	while (p_next_sw !=
203 	       (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) {
204 		p_sw = p_next_sw;
205 		p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item);
206 
207 		found_port = FALSE;
208 		for (port_num = 1; port_num < p_sw->num_ports; port_num++)
209 			if (p_sw->ports[port_num].remote_lid_ho)
210 				found_port = TRUE;
211 
212 		if (!found_port) {
213 			cl_qmap_remove_item(&p_mgr->cache_sw_tbl,
214 					    &p_sw->map_item);
215 			cache_sw_destroy(p_sw);
216 		}
217 	}
218 }
219 
220 static void
cache_check_link_change(osm_ucast_mgr_t * p_mgr,osm_physp_t * p_physp_1,osm_physp_t * p_physp_2)221 cache_check_link_change(osm_ucast_mgr_t * p_mgr,
222 			osm_physp_t * p_physp_1, osm_physp_t * p_physp_2)
223 {
224 	OSM_LOG_ENTER(p_mgr->p_log);
225 	CL_ASSERT(p_physp_1 && p_physp_2);
226 
227 	if (!p_mgr->cache_valid)
228 		goto Exit;
229 
230 	if (!p_physp_1->p_remote_physp && !p_physp_2->p_remote_physp)
231 		/* both ports were down - new link */
232 		goto Exit;
233 
234 	/* unicast cache cannot tolerate any link location change */
235 
236 	if ((p_physp_1->p_remote_physp &&
237 	     p_physp_1->p_remote_physp->p_remote_physp) ||
238 	    (p_physp_2->p_remote_physp &&
239 	     p_physp_2->p_remote_physp->p_remote_physp)) {
240 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
241 			"Link location change discovered\n");
242 		osm_ucast_cache_invalidate(p_mgr);
243 		goto Exit;
244 	}
245 Exit:
246 	OSM_LOG_EXIT(p_mgr->p_log);
247 }
248 
cache_remove_port(osm_ucast_mgr_t * p_mgr,uint16_t lid_ho,uint8_t port_num,uint16_t remote_lid_ho,boolean_t is_ca)249 static void cache_remove_port(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho,
250 			      uint8_t port_num, uint16_t remote_lid_ho,
251 			      boolean_t is_ca)
252 {
253 	cache_switch_t *p_cache_sw;
254 
255 	OSM_LOG_ENTER(p_mgr->p_log);
256 
257 	if (!p_mgr->cache_valid)
258 		goto Exit;
259 
260 	p_cache_sw = cache_get_sw(p_mgr, lid_ho);
261 	if (!p_cache_sw) {
262 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
263 			"Found uncached switch/link (lid %u, port %u)\n",
264 			lid_ho, port_num);
265 		osm_ucast_cache_invalidate(p_mgr);
266 		goto Exit;
267 	}
268 
269 	if (port_num >= p_cache_sw->num_ports ||
270 	    !p_cache_sw->ports[port_num].remote_lid_ho) {
271 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
272 			"Found uncached switch link (lid %u, port %u)\n",
273 			lid_ho, port_num);
274 		osm_ucast_cache_invalidate(p_mgr);
275 		goto Exit;
276 	}
277 
278 	if (p_cache_sw->ports[port_num].remote_lid_ho != remote_lid_ho) {
279 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
280 			"Remote lid change on switch lid %u, port %u "
281 			"(was %u, now %u)\n", lid_ho, port_num,
282 			p_cache_sw->ports[port_num].remote_lid_ho,
283 			remote_lid_ho);
284 		osm_ucast_cache_invalidate(p_mgr);
285 		goto Exit;
286 	}
287 
288 	if ((p_cache_sw->ports[port_num].is_leaf && !is_ca) ||
289 	    (!p_cache_sw->ports[port_num].is_leaf && is_ca)) {
290 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
291 			"Remote node type change on switch lid %u, port %u\n",
292 			lid_ho, port_num);
293 		osm_ucast_cache_invalidate(p_mgr);
294 		goto Exit;
295 	}
296 
297 	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
298 		"New link from lid %u, port %u to lid %u - "
299 		"found in cache\n", lid_ho, port_num, remote_lid_ho);
300 
301 	/* the new link was cached - clean it from the cache */
302 
303 	p_cache_sw->ports[port_num].remote_lid_ho = 0;
304 	p_cache_sw->ports[port_num].is_leaf = FALSE;
305 Exit:
306 	OSM_LOG_EXIT(p_mgr->p_log);
307 }				/* cache_remove_port() */
308 
309 static void
cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr,cache_switch_t * p_cache_sw,osm_switch_t * p_sw)310 cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr,
311 			 cache_switch_t * p_cache_sw, osm_switch_t * p_sw)
312 {
313 	if (!p_mgr->cache_valid)
314 		return;
315 
316 	/* when seting unicast info, the cached port
317 	   should have all the required info */
318 	CL_ASSERT(p_cache_sw->max_lid_ho && p_cache_sw->lft &&
319 		  p_cache_sw->num_hops && p_cache_sw->hops);
320 
321 	p_sw->max_lid_ho = p_cache_sw->max_lid_ho;
322 
323 	if (p_sw->new_lft)
324 		free(p_sw->new_lft);
325 	p_sw->new_lft = p_cache_sw->lft;
326 	p_cache_sw->lft = NULL;
327 
328 	p_sw->num_hops = p_cache_sw->num_hops;
329 	p_cache_sw->num_hops = 0;
330 	if (p_sw->hops)
331 		free(p_sw->hops);
332 	p_sw->hops = p_cache_sw->hops;
333 	p_cache_sw->hops = NULL;
334 
335 	p_sw->need_update = 2;
336 }
337 
ucast_cache_dump(osm_ucast_mgr_t * p_mgr)338 static void ucast_cache_dump(osm_ucast_mgr_t * p_mgr)
339 {
340 	cache_switch_t *p_sw;
341 	unsigned i;
342 
343 	OSM_LOG_ENTER(p_mgr->p_log);
344 
345 	if (!OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG))
346 		goto Exit;
347 
348 	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
349 		"Dumping missing nodes/links as logged by unicast cache:\n");
350 	for (p_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
351 	     p_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
352 	     p_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item)) {
353 
354 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
355 			"\t Switch lid %u %s%s\n",
356 			cache_sw_get_base_lid_ho(p_sw),
357 			(cache_sw_is_leaf(p_sw)) ? "[leaf switch] " : "",
358 			(p_sw->dropped) ? "[whole switch missing]" : "");
359 
360 		for (i = 1; i < p_sw->num_ports; i++)
361 			if (p_sw->ports[i].remote_lid_ho > 0)
362 				OSM_LOG(p_mgr->p_log,
363 					OSM_LOG_DEBUG,
364 					"\t     - port %u -> lid %u %s\n",
365 					i, p_sw->ports[i].remote_lid_ho,
366 					(p_sw->ports[i].is_leaf) ?
367 					"[remote node is leaf]" : "");
368 	}
369 Exit:
370 	OSM_LOG_EXIT(p_mgr->p_log);
371 }
372 
osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr)373 void osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr)
374 {
375 	cache_switch_t *p_sw;
376 	cache_switch_t *p_next_sw;
377 
378 	OSM_LOG_ENTER(p_mgr->p_log);
379 
380 	if (!p_mgr->cache_valid)
381 		goto Exit;
382 
383 	p_mgr->cache_valid = FALSE;
384 
385 	p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
386 	while (p_next_sw !=
387 	       (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) {
388 		p_sw = p_next_sw;
389 		p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item);
390 		cache_sw_destroy(p_sw);
391 	}
392 	cl_qmap_remove_all(&p_mgr->cache_sw_tbl);
393 
394 	OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, "Unicast Cache invalidated\n");
395 Exit:
396 	OSM_LOG_EXIT(p_mgr->p_log);
397 }
398 
ucast_cache_validate(osm_ucast_mgr_t * p_mgr)399 static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr)
400 {
401 	cache_switch_t *p_cache_sw;
402 	cache_switch_t *p_remote_cache_sw;
403 	unsigned port_num;
404 	unsigned max_ports;
405 	uint8_t remote_node_type;
406 	uint16_t lid_ho;
407 	uint16_t remote_lid_ho;
408 	osm_switch_t *p_sw;
409 	osm_switch_t *p_remote_sw;
410 	osm_node_t *p_node;
411 	osm_physp_t *p_physp;
412 	osm_physp_t *p_remote_physp;
413 	osm_port_t *p_remote_port;
414 	cl_qmap_t *p_sw_tbl;
415 
416 	OSM_LOG_ENTER(p_mgr->p_log);
417 	if (!p_mgr->cache_valid)
418 		goto Exit;
419 
420 	/* If there are no switches in the subnet, we are done */
421 	p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl;
422 	if (cl_qmap_count(p_sw_tbl) == 0) {
423 		osm_ucast_cache_invalidate(p_mgr);
424 		goto Exit;
425 	}
426 
427 	/*
428 	 * Scan all the physical switch ports in the subnet.
429 	 * If the port need_update flag is on, check whether
430 	 * it's just some node/port reset or a cached topology
431 	 * change. Otherwise the cache is invalid.
432 	 */
433 	for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
434 	     p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
435 	     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {
436 
437 		p_node = p_sw->p_node;
438 
439 		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
440 		p_cache_sw = cache_get_sw(p_mgr, lid_ho);
441 
442 		max_ports = osm_node_get_num_physp(p_node);
443 
444 		/* skip port 0 */
445 		for (port_num = 1; port_num < max_ports; port_num++) {
446 
447 			p_physp = osm_node_get_physp_ptr(p_node, port_num);
448 
449 			if (!p_physp || !p_physp->p_remote_physp ||
450 			    !osm_physp_link_exists(p_physp,
451 						   p_physp->p_remote_physp))
452 				/* no valid link */
453 				continue;
454 
455 			/*
456 			 * While scanning all the physical ports in the subnet,
457 			 * mark corresponding leaf switches in the cache.
458 			 */
459 			if (p_cache_sw &&
460 			    !p_cache_sw->dropped &&
461 			    !cache_sw_is_leaf(p_cache_sw) &&
462 			    p_physp->p_remote_physp->p_node &&
463 			    osm_node_get_type(p_physp->p_remote_physp->
464 					      p_node) != IB_NODE_TYPE_SWITCH)
465 				cache_sw_set_leaf(p_cache_sw);
466 
467 			if (!p_physp->need_update)
468 				continue;
469 
470 			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
471 				"Checking switch lid %u, port %u\n",
472 				lid_ho, port_num);
473 
474 			p_remote_physp = osm_physp_get_remote(p_physp);
475 			remote_node_type =
476 			    osm_node_get_type(p_remote_physp->p_node);
477 
478 			if (remote_node_type == IB_NODE_TYPE_SWITCH)
479 				remote_lid_ho =
480 				    cl_ntoh16(osm_node_get_base_lid
481 					      (p_remote_physp->p_node, 0));
482 			else
483 				remote_lid_ho =
484 				    cl_ntoh16(osm_node_get_base_lid
485 					      (p_remote_physp->p_node,
486 					       osm_physp_get_port_num
487 					       (p_remote_physp)));
488 
489 			if (!p_cache_sw ||
490 			    port_num >= p_cache_sw->num_ports ||
491 			    !p_cache_sw->ports[port_num].remote_lid_ho) {
492 				/*
493 				 * There is some uncached change on the port.
494 				 * In general, the reasons might be as follows:
495 				 *  - switch reset
496 				 *  - port reset (or port down/up)
497 				 *  - quick connection location change
498 				 *  - new link (or new switch)
499 				 *
500 				 * First two reasons allow cache usage, while
501 				 * the last two reasons should invalidate cache.
502 				 *
503 				 * In case of quick connection location change,
504 				 * cache would have been invalidated by
505 				 * osm_ucast_cache_check_new_link() function.
506 				 *
507 				 * In case of new link between two known nodes,
508 				 * cache also would have been invalidated by
509 				 * osm_ucast_cache_check_new_link() function.
510 				 *
511 				 * Another reason is cached link between two
512 				 * known switches went back. In this case the
513 				 * osm_ucast_cache_check_new_link() function would
514 				 * clear both sides of the link from the cache
515 				 * during the discovery process, so effectively
516 				 * this would be equivalent to port reset.
517 				 *
518 				 * So three possible reasons remain:
519 				 *  - switch reset
520 				 *  - port reset (or port down/up)
521 				 *  - link of a new switch
522 				 *
523 				 * To validate cache, we need to check only the
524 				 * third reason - link of a new node/switch:
525 				 *  - If this is the local switch that is new,
526 				 *    then it should have (p_sw->need_update == 2).
527 				 *  - If the remote node is switch and it's new,
528 				 *    then it also should have
529 				 *    (p_sw->need_update == 2).
530 				 *  - If the remote node is CA/RTR and it's new,
531 				 *    then its port should have is_new flag on.
532 				 */
533 				if (p_sw->need_update == 2) {
534 					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
535 						"New switch found (lid %u)\n",
536 						lid_ho);
537 					osm_ucast_cache_invalidate(p_mgr);
538 					goto Exit;
539 				}
540 
541 				if (remote_node_type == IB_NODE_TYPE_SWITCH) {
542 
543 					p_remote_sw =
544 					    p_remote_physp->p_node->sw;
545 					if (p_remote_sw->need_update == 2) {
546 						/* this could also be case of
547 						   switch coming back with an
548 						   additional link that it
549 						   didn't have before */
550 						OSM_LOG(p_mgr->p_log,
551 							OSM_LOG_DEBUG,
552 							"New switch/link found (lid %u)\n",
553 							remote_lid_ho);
554 						osm_ucast_cache_invalidate
555 						    (p_mgr);
556 						goto Exit;
557 					}
558 				} else {
559 					/*
560 					 * Remote node is CA/RTR.
561 					 * Get p_port of the remote node and
562 					 * check its p_port->is_new flag.
563 					 */
564 					p_remote_port =
565 					    osm_get_port_by_guid(p_mgr->p_subn,
566 								 osm_physp_get_port_guid
567 								 (p_remote_physp));
568 					if (!p_remote_port) {
569 						OSM_LOG(p_mgr->p_log,
570 							OSM_LOG_ERROR,
571 							"ERR AD04: No port was found for "
572 							"port GUID 0x%" PRIx64 "\n",
573 							cl_ntoh64(osm_physp_get_port_guid
574 								      (p_remote_physp)));
575 						osm_ucast_cache_invalidate
576 						    (p_mgr);
577 						goto Exit;
578 					}
579 					if (p_remote_port->is_new) {
580 						OSM_LOG(p_mgr->p_log,
581 							OSM_LOG_DEBUG,
582 							"New CA/RTR found (lid %u)\n",
583 							remote_lid_ho);
584 						osm_ucast_cache_invalidate
585 						    (p_mgr);
586 						goto Exit;
587 					}
588 				}
589 			} else {
590 				/*
591 				 * The change on the port is cached.
592 				 * In general, the reasons might be as follows:
593 				 *  - link between two known nodes went back
594 				 *  - one or more nodes went back, causing all
595 				 *    the links to reappear
596 				 *
597 				 * If it was link that went back, then this case
598 				 * would have been taken care of during the
599 				 * discovery by osm_ucast_cache_check_new_link(),
600 				 * so it's some node that went back.
601 				 */
602 				if ((p_cache_sw->ports[port_num].is_leaf &&
603 				     remote_node_type == IB_NODE_TYPE_SWITCH) ||
604 				    (!p_cache_sw->ports[port_num].is_leaf &&
605 				     remote_node_type != IB_NODE_TYPE_SWITCH)) {
606 					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
607 						"Remote node type change on switch lid %u, port %u\n",
608 						lid_ho, port_num);
609 					osm_ucast_cache_invalidate(p_mgr);
610 					goto Exit;
611 				}
612 
613 				if (p_cache_sw->ports[port_num].remote_lid_ho !=
614 				    remote_lid_ho) {
615 					OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
616 						"Remote lid change on switch lid %u, port %u"
617 						"(was %u, now %u)\n",
618 						lid_ho, port_num,
619 						p_cache_sw->ports[port_num].
620 						remote_lid_ho, remote_lid_ho);
621 					osm_ucast_cache_invalidate(p_mgr);
622 					goto Exit;
623 				}
624 
625 				/*
626 				 * We don't care who is the node that has
627 				 * reappeared in the subnet (local or remote).
628 				 * What's important that the cached link matches
629 				 * the real fabrics link.
630 				 * Just clean it from cache.
631 				 */
632 
633 				p_cache_sw->ports[port_num].remote_lid_ho = 0;
634 				p_cache_sw->ports[port_num].is_leaf = FALSE;
635 				if (p_cache_sw->dropped) {
636 					cache_restore_ucast_info(p_mgr,
637 								 p_cache_sw,
638 								 p_sw);
639 					p_cache_sw->dropped = FALSE;
640 				}
641 
642 				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
643 					"Restored link from cache: lid %u, port %u to lid %u\n",
644 					lid_ho, port_num, remote_lid_ho);
645 			}
646 		}
647 	}
648 
649 	/* Remove all the cached switches that
650 	   have all their ports restored */
651 	cache_cleanup_switches(p_mgr);
652 
653 	/*
654 	 * Done scanning all the physical switch ports in the subnet.
655 	 * Now we need to check the other side:
656 	 * Scan all the cached switches and their ports:
657 	 *  - If the cached switch is missing in the subnet
658 	 *    (dropped flag is on), check that it's a leaf switch.
659 	 *    If it's not a leaf, the cache is invalid, because
660 	 *    cache can tolerate only leaf switch removal.
661 	 *  - If the cached switch exists in fabric, check all
662 	 *    its cached ports. These cached ports represent
663 	 *    missing link in the fabric.
664 	 *    The missing links that can be tolerated are:
665 	 *      + link to missing CA/RTR
666 	 *      + link to missing leaf switch
667 	 */
668 	for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl);
669 	     p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl);
670 	     p_cache_sw =
671 	     (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) {
672 
673 		if (p_cache_sw->dropped) {
674 			if (!cache_sw_is_leaf(p_cache_sw)) {
675 				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
676 					"Missing non-leaf switch (lid %u)\n",
677 					cache_sw_get_base_lid_ho(p_cache_sw));
678 				osm_ucast_cache_invalidate(p_mgr);
679 				goto Exit;
680 			}
681 
682 			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
683 				"Missing leaf switch (lid %u) - "
684 				"continuing validation\n",
685 				cache_sw_get_base_lid_ho(p_cache_sw));
686 			continue;
687 		}
688 
689 		for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) {
690 			if (!p_cache_sw->ports[port_num].remote_lid_ho)
691 				continue;
692 
693 			if (p_cache_sw->ports[port_num].is_leaf) {
694 				CL_ASSERT(cache_sw_is_leaf(p_cache_sw));
695 				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
696 					"Switch lid %u, port %u: missing link to CA/RTR - "
697 					"continuing validation\n",
698 					cache_sw_get_base_lid_ho(p_cache_sw),
699 					port_num);
700 				continue;
701 			}
702 
703 			p_remote_cache_sw = cache_get_sw(p_mgr,
704 							 p_cache_sw->
705 							 ports[port_num].
706 							 remote_lid_ho);
707 
708 			if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) {
709 				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
710 					"Switch lid %u, port %u: missing link to existing switch\n",
711 					cache_sw_get_base_lid_ho(p_cache_sw),
712 					port_num);
713 				osm_ucast_cache_invalidate(p_mgr);
714 				goto Exit;
715 			}
716 
717 			if (!cache_sw_is_leaf(p_remote_cache_sw)) {
718 				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
719 					"Switch lid %u, port %u: missing link to non-leaf switch\n",
720 					cache_sw_get_base_lid_ho(p_cache_sw),
721 					port_num);
722 				osm_ucast_cache_invalidate(p_mgr);
723 				goto Exit;
724 			}
725 
726 			/*
727 			 * At this point we know that the missing link is to
728 			 * a leaf switch. However, one case deserves a special
729 			 * treatment. If there was a link between two leaf
730 			 * switches, then missing leaf switch might break
731 			 * routing. It is possible that there are routes
732 			 * that use leaf switches to get from switch to switch
733 			 * and not just to get to the CAs behind the leaf switch.
734 			 */
735 			if (cache_sw_is_leaf(p_cache_sw) &&
736 			    cache_sw_is_leaf(p_remote_cache_sw)) {
737 				OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
738 					"Switch lid %u, port %u: missing leaf-2-leaf link\n",
739 					cache_sw_get_base_lid_ho(p_cache_sw),
740 					port_num);
741 				osm_ucast_cache_invalidate(p_mgr);
742 				goto Exit;
743 			}
744 
745 			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
746 				"Switch lid %u, port %u: missing remote leaf switch - "
747 				"continuing validation\n",
748 				cache_sw_get_base_lid_ho(p_cache_sw),
749 				port_num);
750 		}
751 	}
752 
753 	OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n");
754 	ucast_cache_dump(p_mgr);
755 Exit:
756 	OSM_LOG_EXIT(p_mgr->p_log);
757 }				/* osm_ucast_cache_validate() */
758 
osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr,osm_node_t * p_node_1,uint8_t port_num_1,osm_node_t * p_node_2,uint8_t port_num_2)759 void osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr,
760 				    osm_node_t * p_node_1, uint8_t port_num_1,
761 				    osm_node_t * p_node_2, uint8_t port_num_2)
762 {
763 	uint16_t lid_ho_1;
764 	uint16_t lid_ho_2;
765 
766 	OSM_LOG_ENTER(p_mgr->p_log);
767 
768 	if (!p_mgr->cache_valid)
769 		goto Exit;
770 
771 	cache_check_link_change(p_mgr,
772 				osm_node_get_physp_ptr(p_node_1, port_num_1),
773 				osm_node_get_physp_ptr(p_node_2, port_num_2));
774 
775 	if (!p_mgr->cache_valid)
776 		goto Exit;
777 
778 	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
779 	    osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
780 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Found CA-2-CA link\n");
781 		osm_ucast_cache_invalidate(p_mgr);
782 		goto Exit;
783 	}
784 
785 	/* for code simplicity, we want the first node to be switch */
786 	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
787 		osm_node_t *tmp_node = p_node_1;
788 		uint8_t tmp_port_num = port_num_1;
789 		p_node_1 = p_node_2;
790 		port_num_1 = port_num_2;
791 		p_node_2 = tmp_node;
792 		port_num_2 = tmp_port_num;
793 	}
794 
795 	lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));
796 
797 	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
798 		lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
799 	else
800 		lid_ho_2 =
801 		    cl_ntoh16(osm_node_get_base_lid(p_node_2, port_num_2));
802 
803 	if (!lid_ho_1 || !lid_ho_2) {
804 		/*
805 		 * No lid assigned, which means that one of the nodes is new.
806 		 * Need to wait for lid manager to process this node.
807 		 * The switches and their links will be checked later when
808 		 * the whole cache validity will be verified.
809 		 */
810 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
811 			"Link port %u <-> %u reveals new node - cache will "
812 			"be validated later\n", port_num_1, port_num_2);
813 		goto Exit;
814 	}
815 
816 	cache_remove_port(p_mgr, lid_ho_1, port_num_1, lid_ho_2,
817 			  (osm_node_get_type(p_node_2) !=
818 			  IB_NODE_TYPE_SWITCH));
819 
820 	/* if node_2 is a switch, the link should be cleaned from its cache */
821 
822 	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH)
823 		cache_remove_port(p_mgr, lid_ho_2,
824 				  port_num_2, lid_ho_1, FALSE);
825 
826 Exit:
827 	OSM_LOG_EXIT(p_mgr->p_log);
828 }				/* osm_ucast_cache_check_new_link() */
829 
osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr,osm_physp_t * p_physp1,osm_physp_t * p_physp2)830 void osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr,
831 			      osm_physp_t * p_physp1, osm_physp_t * p_physp2)
832 {
833 	osm_node_t *p_node_1 = p_physp1->p_node, *p_node_2 = p_physp2->p_node;
834 	uint16_t lid_ho_1, lid_ho_2;
835 
836 	OSM_LOG_ENTER(p_mgr->p_log);
837 
838 	if (!p_mgr->cache_valid)
839 		goto Exit;
840 
841 	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH &&
842 	    osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) {
843 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Dropping CA-2-CA link\n");
844 		osm_ucast_cache_invalidate(p_mgr);
845 		goto Exit;
846 	}
847 
848 	if ((osm_node_get_type(p_node_1) == IB_NODE_TYPE_SWITCH &&
849 	     !osm_node_get_physp_ptr(p_node_1, 0)) ||
850 	    (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH &&
851 	     !osm_node_get_physp_ptr(p_node_2, 0))) {
852 		/* we're caching a link when one of the nodes
853 		   has already been dropped and cached */
854 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
855 			"Port %u <-> port %u: port0 on one of the nodes "
856 			"has already been dropped and cached\n",
857 			p_physp1->port_num, p_physp2->port_num);
858 		goto Exit;
859 	}
860 
861 	/* One of the nodes is switch. Just for code
862 	   simplicity, make sure that it's the first node. */
863 
864 	if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) {
865 		osm_physp_t *tmp = p_physp1;
866 		p_physp1 = p_physp2;
867 		p_physp2 = tmp;
868 		p_node_1 = p_physp1->p_node;
869 		p_node_2 = p_physp2->p_node;
870 	}
871 
872 	if (!p_node_1->sw) {
873 		/* something is wrong - we'd better not use cache */
874 		osm_ucast_cache_invalidate(p_mgr);
875 		goto Exit;
876 	}
877 
878 	lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0));
879 
880 	if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) {
881 
882 		if (!p_node_2->sw) {
883 			/* something is wrong - we'd better not use cache */
884 			osm_ucast_cache_invalidate(p_mgr);
885 			goto Exit;
886 		}
887 
888 		lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0));
889 
890 		/* lost switch-2-switch link - cache both sides */
891 		cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, FALSE);
892 		cache_add_sw_link(p_mgr, p_physp2, lid_ho_1, FALSE);
893 	} else {
894 		lid_ho_2 = cl_ntoh16(osm_physp_get_base_lid(p_physp2));
895 
896 		/* lost link to CA/RTR - cache only switch side */
897 		cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, TRUE);
898 	}
899 
900 Exit:
901 	OSM_LOG_EXIT(p_mgr->p_log);
902 }				/* osm_ucast_cache_add_link() */
903 
osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr,osm_node_t * p_node)904 void osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr, osm_node_t * p_node)
905 {
906 	uint16_t lid_ho;
907 	uint8_t max_ports;
908 	uint8_t port_num;
909 	osm_physp_t *p_physp;
910 	cache_switch_t *p_cache_sw;
911 
912 	OSM_LOG_ENTER(p_mgr->p_log);
913 
914 	if (!p_mgr->cache_valid)
915 		goto Exit;
916 
917 	if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) {
918 
919 		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0));
920 
921 		if (!lid_ho) {
922 			OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE,
923 				"Skip caching. Switch dropped before "
924 				"it gets a valid lid.\n");
925 			osm_ucast_cache_invalidate(p_mgr);
926 			goto Exit;
927 		}
928 
929 		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
930 			"Caching dropped switch lid %u\n", lid_ho);
931 
932 		if (!p_node->sw) {
933 			/* something is wrong - forget about cache */
934 			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
935 				"ERR AD03: no switch info for node lid %u - "
936 				"clearing cache\n", lid_ho);
937 			osm_ucast_cache_invalidate(p_mgr);
938 			goto Exit;
939 		}
940 
941 		/* unlink (add to cache) all the ports of this switch */
942 		max_ports = osm_node_get_num_physp(p_node);
943 		for (port_num = 1; port_num < max_ports; port_num++) {
944 
945 			p_physp = osm_node_get_physp_ptr(p_node, port_num);
946 			if (!p_physp || !p_physp->p_remote_physp)
947 				continue;
948 
949 			osm_ucast_cache_add_link(p_mgr, p_physp,
950 						 p_physp->p_remote_physp);
951 		}
952 
953 		/*
954 		 * All the ports have been dropped (cached).
955 		 * If one of the ports was connected to CA/RTR,
956 		 * then the cached switch would be marked as leaf.
957 		 * If it isn't, then the dropped switch isn't a leaf,
958 		 * and cache can't handle it.
959 		 */
960 
961 		p_cache_sw = cache_get_sw(p_mgr, lid_ho);
962 
963 		/* p_cache_sw could be NULL if it has no remote phys ports */
964 		if (!p_cache_sw || !cache_sw_is_leaf(p_cache_sw)) {
965 			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
966 				"Dropped non-leaf switch (lid %u)\n", lid_ho);
967 			osm_ucast_cache_invalidate(p_mgr);
968 			goto Exit;
969 		}
970 
971 		p_cache_sw->dropped = TRUE;
972 
973 		if (!p_node->sw->num_hops || !p_node->sw->hops) {
974 			OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
975 				"No LID matrices for switch lid %u\n", lid_ho);
976 			osm_ucast_cache_invalidate(p_mgr);
977 			goto Exit;
978 		}
979 
980 		/* lid matrices */
981 
982 		p_cache_sw->num_hops = p_node->sw->num_hops;
983 		p_node->sw->num_hops = 0;
984 		p_cache_sw->hops = p_node->sw->hops;
985 		p_node->sw->hops = NULL;
986 
987 		/* linear forwarding table */
988 
989 		if (p_node->sw->new_lft) {
990 			/* LFT buffer exists - we use it, because
991 			   it is more updated than the switch's LFT */
992 			p_cache_sw->lft = p_node->sw->new_lft;
993 			p_node->sw->new_lft = NULL;
994 		} else {
995 			/* no LFT buffer, so we use the switch's LFT */
996 			p_cache_sw->lft = p_node->sw->lft;
997 			p_node->sw->lft = NULL;
998 			p_node->sw->lft_size = 0;
999 		}
1000 		p_cache_sw->max_lid_ho = p_node->sw->max_lid_ho;
1001 	} else {
1002 		/* dropping CA/RTR: add to cache all the ports of this node */
1003 		max_ports = osm_node_get_num_physp(p_node);
1004 		for (port_num = 1; port_num < max_ports; port_num++) {
1005 
1006 			p_physp = osm_node_get_physp_ptr(p_node, port_num);
1007 			if (!p_physp || !p_physp->p_remote_physp)
1008 				continue;
1009 
1010 			CL_ASSERT(osm_node_get_type
1011 				  (p_physp->p_remote_physp->p_node) ==
1012 				  IB_NODE_TYPE_SWITCH);
1013 
1014 			osm_ucast_cache_add_link(p_mgr,
1015 						 p_physp->p_remote_physp,
1016 						 p_physp);
1017 		}
1018 	}
1019 Exit:
1020 	OSM_LOG_EXIT(p_mgr->p_log);
1021 }				/* osm_ucast_cache_add_node() */
1022 
osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr)1023 int osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr)
1024 {
1025 	cl_qmap_t *tbl = &p_mgr->p_subn->sw_guid_tbl;
1026 	cl_map_item_t *item;
1027 	osm_switch_t *p_sw;
1028 	uint16_t lft_size;
1029 
1030 	if (!p_mgr->p_subn->opt.use_ucast_cache)
1031 		return 1;
1032 
1033 	ucast_cache_validate(p_mgr);
1034 	if (!p_mgr->cache_valid)
1035 		return 1;
1036 
1037 	OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
1038 		"Configuring switch tables using cached routing\n");
1039 
1040 	for (item = cl_qmap_head(tbl); item != cl_qmap_end(tbl);
1041 	     item = cl_qmap_next(item)) {
1042 		p_sw = (osm_switch_t *) item;
1043 		CL_ASSERT(p_sw->new_lft);
1044 		if (!p_sw->lft) {
1045 			lft_size = (p_sw->max_lid_ho / IB_SMP_DATA_SIZE + 1)
1046 				   * IB_SMP_DATA_SIZE;
1047 			p_sw->lft = malloc(lft_size);
1048 			if (!p_sw->lft)
1049 				return IB_INSUFFICIENT_MEMORY;
1050 			p_sw->lft_size = lft_size;
1051 			memset(p_sw->lft, OSM_NO_PATH, p_sw->lft_size);
1052 		}
1053 
1054 	}
1055 
1056 	osm_ucast_mgr_set_fwd_tables(p_mgr);
1057 
1058 	return 0;
1059 }
1060