xref: /freebsd/contrib/ofed/libibnetdisc/ibnetdisc.c (revision 25ecdc7d52770caf1c9b44b5ec11f468f6b636f3)
1 /*
2  * Copyright (c) 2004-2009 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2007 Xsigo Systems Inc.  All rights reserved.
4  * Copyright (c) 2008 Lawrence Livermore National Laboratory
5  * Copyright (c) 2010-2011 Mellanox Technologies LTD.  All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  *
35  */
36 
37 #if HAVE_CONFIG_H
38 #include <config.h>
39 #endif				/* HAVE_CONFIG_H */
40 
41 #define _GNU_SOURCE
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <unistd.h>
45 #include <string.h>
46 #include <errno.h>
47 #include <inttypes.h>
48 
49 #include <infiniband/umad.h>
50 #include <infiniband/mad.h>
51 
52 #include <infiniband/ibnetdisc.h>
53 #include <complib/cl_nodenamemap.h>
54 
55 #include "internal.h"
56 #include "chassis.h"
57 
58 /* forward declarations */
59 struct ni_cbdata
60 {
61 	ibnd_node_t *node;
62 	int port_num;
63 };
64 static int query_node_info(smp_engine_t * engine, ib_portid_t * portid,
65 			   struct ni_cbdata * cbdata);
66 static int query_port_info(smp_engine_t * engine, ib_portid_t * portid,
67 			   ibnd_node_t * node, int portnum);
68 ibnd_port_t *ibnd_find_port_dr(ibnd_fabric_t * fabric, char *dr_str);
69 
70 static int recv_switch_info(smp_engine_t * engine, ibnd_smp_t * smp,
71 			    uint8_t * mad, void *cb_data)
72 {
73 	uint8_t *switch_info = mad + IB_SMP_DATA_OFFS;
74 	ibnd_node_t *node = cb_data;
75 	memcpy(node->switchinfo, switch_info, sizeof(node->switchinfo));
76 	mad_decode_field(node->switchinfo, IB_SW_ENHANCED_PORT0_F,
77 			 &node->smaenhsp0);
78 	return 0;
79 }
80 
81 static int query_switch_info(smp_engine_t * engine, ib_portid_t * portid,
82 			     ibnd_node_t * node)
83 {
84 	node->smaenhsp0 = 0;	/* assume base SP0 */
85 	return issue_smp(engine, portid, IB_ATTR_SWITCH_INFO, 0,
86 			 recv_switch_info, node);
87 }
88 
89 static int add_port_to_dpath(ib_dr_path_t * path, int nextport)
90 {
91 	if (path->cnt > sizeof(path->p) - 2)
92 		return -1;
93 	++path->cnt;
94 	path->p[path->cnt] = (uint8_t) nextport;
95 	return path->cnt;
96 }
97 
98 static int retract_dpath(smp_engine_t * engine, ib_portid_t * portid)
99 {
100 	ibnd_scan_t *scan = engine->user_data;
101 	f_internal_t *f_int = scan->f_int;
102 
103 	if (scan->cfg->max_hops &&
104 	    f_int->fabric.maxhops_discovered > scan->cfg->max_hops)
105 		return 0;
106 
107 	/* this may seem wrong but the only time we would retract the path is
108 	 * if the user specified a CA for the DR path and we are retracting
109 	 * from that to find the node it is connected to.  This counts as a
110 	 * positive hop discovered
111 	 */
112 	f_int->fabric.maxhops_discovered++;
113 	portid->drpath.p[portid->drpath.cnt] = 0;
114 	portid->drpath.cnt--;
115 	return 1;
116 }
117 
118 static int extend_dpath(smp_engine_t * engine, ib_portid_t * portid,
119 			int nextport)
120 {
121 	ibnd_scan_t *scan = engine->user_data;
122 	f_internal_t *f_int = scan->f_int;
123 
124 	if (scan->cfg->max_hops &&
125 	    f_int->fabric.maxhops_discovered > scan->cfg->max_hops)
126 		return 0;
127 
128 	if (portid->lid) {
129 		/* If we were LID routed we need to set up the drslid */
130 		portid->drpath.drslid = (uint16_t) scan->selfportid.lid;
131 		portid->drpath.drdlid = 0xFFFF;
132 	}
133 
134 	if (add_port_to_dpath(&portid->drpath, nextport) < 0) {
135 		IBND_ERROR("add port %d to DR path failed; %s\n", nextport,
136 			   portid2str(portid));
137 		return -1;
138 	}
139 
140 	if (((unsigned) portid->drpath.cnt - scan->initial_hops) >
141 	    f_int->fabric.maxhops_discovered)
142 		f_int->fabric.maxhops_discovered++;
143 
144 	return 1;
145 }
146 
147 static int recv_node_desc(smp_engine_t * engine, ibnd_smp_t * smp,
148 			  uint8_t * mad, void *cb_data)
149 {
150 	uint8_t *node_desc = mad + IB_SMP_DATA_OFFS;
151 	ibnd_node_t *node = cb_data;
152 	memcpy(node->nodedesc, node_desc, sizeof(node->nodedesc));
153 	return 0;
154 }
155 
156 static int query_node_desc(smp_engine_t * engine, ib_portid_t * portid,
157 			   ibnd_node_t * node)
158 {
159 	return issue_smp(engine, portid, IB_ATTR_NODE_DESC, 0,
160 			 recv_node_desc, node);
161 }
162 
163 static void debug_port(ib_portid_t * portid, ibnd_port_t * port)
164 {
165 	char width[64], speed[64];
166 	int iwidth;
167 	int ispeed, fdr10, espeed;
168 	uint8_t *info;
169 	uint32_t cap_mask;
170 
171 	iwidth = mad_get_field(port->info, 0, IB_PORT_LINK_WIDTH_ACTIVE_F);
172 	ispeed = mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_ACTIVE_F);
173 	fdr10 = mad_get_field(port->ext_info, 0,
174 			      IB_MLNX_EXT_PORT_LINK_SPEED_ACTIVE_F);
175 
176 	if (port->node->type == IB_NODE_SWITCH)
177 		info = (uint8_t *)&port->node->ports[0]->info;
178 	else
179 		info = (uint8_t *)&port->info;
180 	cap_mask = mad_get_field(info, 0, IB_PORT_CAPMASK_F);
181 	if (cap_mask & CL_NTOH32(IB_PORT_CAP_HAS_EXT_SPEEDS))
182 		espeed = mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_EXT_ACTIVE_F);
183 	else
184 		espeed = 0;
185 	IBND_DEBUG
186 	    ("portid %s portnum %d: base lid %d state %d physstate %d %s %s %s %s\n",
187 	     portid2str(portid), port->portnum, port->base_lid,
188 	     mad_get_field(port->info, 0, IB_PORT_STATE_F),
189 	     mad_get_field(port->info, 0, IB_PORT_PHYS_STATE_F),
190 	     mad_dump_val(IB_PORT_LINK_WIDTH_ACTIVE_F, width, 64, &iwidth),
191 	     mad_dump_val(IB_PORT_LINK_SPEED_ACTIVE_F, speed, 64, &ispeed),
192 	     (fdr10 & FDR10) ? "FDR10"  : "",
193 	     mad_dump_val(IB_PORT_LINK_SPEED_EXT_ACTIVE_F, speed, 64, &espeed));
194 }
195 
196 static int is_mlnx_ext_port_info_supported(ibnd_port_t * port)
197 {
198 	uint16_t devid = (uint16_t) mad_get_field(port->node->info, 0, IB_NODE_DEVID_F);
199 	uint32_t vendorid = (uint32_t) mad_get_field(port->node->info, 0, IB_NODE_VENDORID_F);
200 
201 	if ((devid >= 0xc738 && devid <= 0xc73b) || devid == 0xcb20 || devid == 0xcf08 ||
202 	    ((vendorid == 0x119f) &&
203 	     /* Bull SwitchX */
204 	     (devid == 0x1b02 || devid == 0x1b50 ||
205 	      /* Bull SwitchIB and SwitchIB2 */
206 	      devid == 0x1ba0 ||
207 	      (devid >= 0x1bd0 && devid <= 0x1bd5))))
208 		return 1;
209 	if ((devid >= 0x1003 && devid <= 0x1017) ||
210 	    ((vendorid == 0x119f) &&
211 	     /* Bull ConnectX3 */
212 	     (devid == 0x1b33 || devid == 0x1b73 ||
213 	      devid == 0x1b40 || devid == 0x1b41 ||
214 	      devid == 0x1b60 || devid == 0x1b61 ||
215 	     /* Bull ConnectIB */
216 	      devid == 0x1b83 ||
217 	      devid == 0x1b93 || devid == 0x1b94 ||
218 	      /* Bull ConnectX4 */
219 	      devid == 0x1bb4 || devid == 0x1bb5 ||
220 	      devid == 0x1bc4)))
221 		return 1;
222 	return 0;
223 }
224 
225 int mlnx_ext_port_info_err(smp_engine_t * engine, ibnd_smp_t * smp,
226 			   uint8_t * mad, void *cb_data)
227 {
228 	f_internal_t *f_int = ((ibnd_scan_t *) engine->user_data)->f_int;
229 	ibnd_node_t *node = cb_data;
230 	ibnd_port_t *port;
231 	uint8_t port_num, local_port;
232 
233 	port_num = (uint8_t) mad_get_field(mad, 0, IB_MAD_ATTRMOD_F);
234 	port = node->ports[port_num];
235 	if (!port) {
236 		IBND_ERROR("Failed to find 0x%" PRIx64 " port %u\n",
237 			   node->guid, port_num);
238 		return -1;
239 	}
240 
241 	local_port = (uint8_t) mad_get_field(port->info, 0, IB_PORT_LOCAL_PORT_F);
242 	debug_port(&smp->path, port);
243 
244 	if (port_num && mad_get_field(port->info, 0, IB_PORT_PHYS_STATE_F)
245 	    == IB_PORT_PHYS_STATE_LINKUP
246 	    && ((node->type == IB_NODE_SWITCH && port_num != local_port) ||
247 		(node == f_int->fabric.from_node && port_num == f_int->fabric.from_portnum))) {
248 		int rc = 0;
249 		ib_portid_t path = smp->path;
250 
251 		if (node->type != IB_NODE_SWITCH &&
252 		    node == f_int->fabric.from_node &&
253 		    path.drpath.cnt > 1)
254 			rc = retract_dpath(engine, &path);
255 		else {
256 			/* we can't proceed through an HCA with DR */
257 			if (path.lid == 0 || node->type == IB_NODE_SWITCH)
258 				rc = extend_dpath(engine, &path, port_num);
259 		}
260 
261 		if (rc > 0) {
262 			struct ni_cbdata * cbdata = malloc(sizeof(*cbdata));
263 			cbdata->node = node;
264 			cbdata->port_num = port_num;
265 			query_node_info(engine, &path, cbdata);
266 		}
267 	}
268 
269 	return 0;
270 }
271 
272 static int recv_mlnx_ext_port_info(smp_engine_t * engine, ibnd_smp_t * smp,
273 				   uint8_t * mad, void *cb_data)
274 {
275 	f_internal_t *f_int = ((ibnd_scan_t *) engine->user_data)->f_int;
276 	ibnd_node_t *node = cb_data;
277 	ibnd_port_t *port;
278 	uint8_t *ext_port_info = mad + IB_SMP_DATA_OFFS;
279 	uint8_t port_num, local_port;
280 
281 	port_num = (uint8_t) mad_get_field(mad, 0, IB_MAD_ATTRMOD_F);
282 	port = node->ports[port_num];
283 	if (!port) {
284 		IBND_ERROR("Failed to find 0x%" PRIx64 " port %u\n",
285 			   node->guid, port_num);
286 		return -1;
287 	}
288 
289 	memcpy(port->ext_info, ext_port_info, sizeof(port->ext_info));
290 	local_port = (uint8_t) mad_get_field(port->info, 0, IB_PORT_LOCAL_PORT_F);
291 	debug_port(&smp->path, port);
292 
293 	if (port_num && mad_get_field(port->info, 0, IB_PORT_PHYS_STATE_F)
294 	    == IB_PORT_PHYS_STATE_LINKUP
295 	    && ((node->type == IB_NODE_SWITCH && port_num != local_port) ||
296 		(node == f_int->fabric.from_node && port_num == f_int->fabric.from_portnum))) {
297 		int rc = 0;
298 		ib_portid_t path = smp->path;
299 
300 		if (node->type != IB_NODE_SWITCH &&
301 		    node == f_int->fabric.from_node &&
302 		    path.drpath.cnt > 1)
303 			rc = retract_dpath(engine, &path);
304 		else {
305 			/* we can't proceed through an HCA with DR */
306 			if (path.lid == 0 || node->type == IB_NODE_SWITCH)
307 				rc = extend_dpath(engine, &path, port_num);
308 		}
309 
310 		if (rc > 0) {
311 			struct ni_cbdata * cbdata = malloc(sizeof(*cbdata));
312 			cbdata->node = node;
313 			cbdata->port_num = port_num;
314 			query_node_info(engine, &path, cbdata);
315 		}
316 	}
317 
318 	return 0;
319 }
320 
321 static int query_mlnx_ext_port_info(smp_engine_t * engine, ib_portid_t * portid,
322 				    ibnd_node_t * node, int portnum)
323 {
324 	IBND_DEBUG("Query MLNX Extended Port Info; %s (0x%" PRIx64 "):%d\n",
325 		   portid2str(portid), node->guid, portnum);
326 	return issue_smp(engine, portid, IB_ATTR_MLNX_EXT_PORT_INFO, portnum,
327 			 recv_mlnx_ext_port_info, node);
328 }
329 
330 static int recv_port_info(smp_engine_t * engine, ibnd_smp_t * smp,
331 			  uint8_t * mad, void *cb_data)
332 {
333 	ibnd_scan_t *scan = (ibnd_scan_t *)engine->user_data;
334 	f_internal_t *f_int = scan->f_int;
335 	ibnd_node_t *node = cb_data;
336 	ibnd_port_t *port;
337 	uint8_t *port_info = mad + IB_SMP_DATA_OFFS;
338 	uint8_t port_num, local_port;
339 	int phystate, ispeed, espeed;
340 	uint8_t *info;
341 	uint32_t cap_mask;
342 
343 	port_num = (uint8_t) mad_get_field(mad, 0, IB_MAD_ATTRMOD_F);
344 	local_port = (uint8_t) mad_get_field(port_info, 0, IB_PORT_LOCAL_PORT_F);
345 
346 	/* this may have been created before */
347 	port = node->ports[port_num];
348 	if (!port) {
349 		port = node->ports[port_num] = calloc(1, sizeof(*port));
350 		if (!port) {
351 			IBND_ERROR("Failed to allocate 0x%" PRIx64 " port %u\n",
352 				    node->guid, port_num);
353 			return -1;
354 		}
355 		port->guid =
356 		    mad_get_field64(node->info, 0, IB_NODE_PORT_GUID_F);
357 	}
358 
359 	memcpy(port->info, port_info, sizeof(port->info));
360 	port->node = node;
361 	port->portnum = port_num;
362 	port->ext_portnum = 0;
363 	port->base_lid = (uint16_t) mad_get_field(port->info, 0, IB_PORT_LID_F);
364 	port->lmc = (uint8_t) mad_get_field(port->info, 0, IB_PORT_LMC_F);
365 
366 	if (port_num == 0) {
367 		node->smalid = port->base_lid;
368 		node->smalmc = port->lmc;
369 	} else if (node->type == IB_NODE_SWITCH) {
370 		port->base_lid = node->smalid;
371 		port->lmc = node->smalmc;
372 	}
373 
374 	int rc1 = add_to_portguid_hash(port, f_int->fabric.portstbl);
375 	if (rc1)
376 		IBND_ERROR("Error Occurred when trying"
377 			   " to insert new port guid 0x%016" PRIx64 " to DB\n",
378 			   port->guid);
379 
380 	add_to_portlid_hash(port, f_int->lid2guid);
381 
382 	if ((scan->cfg->flags & IBND_CONFIG_MLX_EPI)
383 	    && is_mlnx_ext_port_info_supported(port)) {
384 		phystate = mad_get_field(port->info, 0, IB_PORT_PHYS_STATE_F);
385 		ispeed = mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_ACTIVE_F);
386 		if (port->node->type == IB_NODE_SWITCH)
387 			info = (uint8_t *)&port->node->ports[0]->info;
388 		else
389 			info = (uint8_t *)&port->info;
390 		cap_mask = mad_get_field(info, 0, IB_PORT_CAPMASK_F);
391 		if (cap_mask & CL_NTOH32(IB_PORT_CAP_HAS_EXT_SPEEDS))
392 			espeed = mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_EXT_ACTIVE_F);
393 		else
394 			espeed = 0;
395 
396 		if (phystate == IB_PORT_PHYS_STATE_LINKUP &&
397 		    ispeed == IB_LINK_SPEED_ACTIVE_10 &&
398 		    espeed == IB_LINK_SPEED_EXT_ACTIVE_NONE) {	/* LinkUp/QDR */
399 			query_mlnx_ext_port_info(engine, &smp->path,
400 						 node, port_num);
401 			return 0;
402 		}
403 	}
404 
405 	debug_port(&smp->path, port);
406 
407 	if (port_num && mad_get_field(port->info, 0, IB_PORT_PHYS_STATE_F)
408 	    == IB_PORT_PHYS_STATE_LINKUP
409 	    && ((node->type == IB_NODE_SWITCH && port_num != local_port) ||
410 		(node == f_int->fabric.from_node && port_num == f_int->fabric.from_portnum))) {
411 
412 		int rc = 0;
413 		ib_portid_t path = smp->path;
414 
415 		if (node->type != IB_NODE_SWITCH &&
416 		    node == f_int->fabric.from_node &&
417 		    path.drpath.cnt > 1)
418 			rc = retract_dpath(engine, &path);
419 		else {
420 			/* we can't proceed through an HCA with DR */
421 			if (path.lid == 0 || node->type == IB_NODE_SWITCH)
422 				rc = extend_dpath(engine, &path, port_num);
423 		}
424 
425 		if (rc > 0) {
426 			struct ni_cbdata * cbdata = malloc(sizeof(*cbdata));
427 			cbdata->node = node;
428 			cbdata->port_num = port_num;
429 			query_node_info(engine, &path, cbdata);
430 		}
431 	}
432 
433 	return 0;
434 }
435 
436 static int recv_port0_info(smp_engine_t * engine, ibnd_smp_t * smp,
437 			   uint8_t * mad, void *cb_data)
438 {
439 	ibnd_node_t *node = cb_data;
440 	int i, status;
441 
442 	status = recv_port_info(engine, smp, mad, cb_data);
443 	/* Query PortInfo on switch external/physical ports */
444 	for (i = 1; i <= node->numports; i++)
445 		query_port_info(engine, &smp->path, node, i);
446 
447 	return status;
448 }
449 
450 static int query_port_info(smp_engine_t * engine, ib_portid_t * portid,
451 			   ibnd_node_t * node, int portnum)
452 {
453 	IBND_DEBUG("Query Port Info; %s (0x%" PRIx64 "):%d\n",
454 		   portid2str(portid), node->guid, portnum);
455 	return issue_smp(engine, portid, IB_ATTR_PORT_INFO, portnum,
456 			 portnum ? recv_port_info : recv_port0_info, node);
457 }
458 
459 static ibnd_node_t *create_node(smp_engine_t * engine, ib_portid_t * path,
460 				uint8_t * node_info)
461 {
462 	f_internal_t *f_int = ((ibnd_scan_t *) engine->user_data)->f_int;
463 	ibnd_node_t *rc = calloc(1, sizeof(*rc));
464 	if (!rc) {
465 		IBND_ERROR("OOM: node creation failed\n");
466 		return NULL;
467 	}
468 
469 	/* decode just a couple of fields for quicker reference. */
470 	mad_decode_field(node_info, IB_NODE_GUID_F, &rc->guid);
471 	mad_decode_field(node_info, IB_NODE_TYPE_F, &rc->type);
472 	mad_decode_field(node_info, IB_NODE_NPORTS_F, &rc->numports);
473 
474 	rc->ports = calloc(rc->numports + 1, sizeof(*rc->ports));
475 	if (!rc->ports) {
476 		free(rc);
477 		IBND_ERROR("OOM: Failed to allocate the ports array\n");
478 		return NULL;
479 	}
480 
481 	rc->path_portid = *path;
482 	memcpy(rc->info, node_info, sizeof(rc->info));
483 
484 	int rc1 = add_to_nodeguid_hash(rc, f_int->fabric.nodestbl);
485 	if (rc1)
486 		IBND_ERROR("Error Occurred when trying"
487 			   " to insert new node guid 0x%016" PRIx64 " to DB\n",
488 			   rc->guid);
489 
490 	/* add this to the all nodes list */
491 	rc->next = f_int->fabric.nodes;
492 	f_int->fabric.nodes = rc;
493 
494 	add_to_type_list(rc, f_int);
495 
496 	return rc;
497 }
498 
499 static void link_ports(ibnd_node_t * node, ibnd_port_t * port,
500 		       ibnd_node_t * remotenode, ibnd_port_t * remoteport)
501 {
502 	IBND_DEBUG("linking: 0x%" PRIx64 " %p->%p:%u and 0x%" PRIx64
503 		   " %p->%p:%u\n", node->guid, node, port, port->portnum,
504 		   remotenode->guid, remotenode, remoteport,
505 		   remoteport->portnum);
506 	if (port->remoteport)
507 		port->remoteport->remoteport = NULL;
508 	if (remoteport->remoteport)
509 		remoteport->remoteport->remoteport = NULL;
510 	port->remoteport = remoteport;
511 	remoteport->remoteport = port;
512 }
513 
514 static void dump_endnode(ib_portid_t * path, char *prompt,
515 			 ibnd_node_t * node, ibnd_port_t * port)
516 {
517 	char type[64];
518 	mad_dump_node_type(type, sizeof(type), &node->type, sizeof(int));
519 	printf("%s -> %s %s {%016" PRIx64 "} portnum %d lid %d-%d \"%s\"\n",
520 	       portid2str(path), prompt, type, node->guid,
521 	       node->type == IB_NODE_SWITCH ? 0 : port->portnum,
522 	       port->base_lid, port->base_lid + (1 << port->lmc) - 1,
523 	       node->nodedesc);
524 }
525 
526 static int recv_node_info(smp_engine_t * engine, ibnd_smp_t * smp,
527 			  uint8_t * mad, void *cb_data)
528 {
529 	ibnd_scan_t *scan = engine->user_data;
530 	f_internal_t *f_int = scan->f_int;
531 	uint8_t *node_info = mad + IB_SMP_DATA_OFFS;
532 	struct ni_cbdata *ni_cbdata = (struct ni_cbdata *)cb_data;
533 	ibnd_node_t *rem_node = NULL;
534 	int rem_port_num = 0;
535 	ibnd_node_t *node;
536 	int node_is_new = 0;
537 	uint64_t node_guid = mad_get_field64(node_info, 0, IB_NODE_GUID_F);
538 	uint64_t port_guid = mad_get_field64(node_info, 0, IB_NODE_PORT_GUID_F);
539 	int port_num = mad_get_field(node_info, 0, IB_NODE_LOCAL_PORT_F);
540 	ibnd_port_t *port = NULL;
541 
542 	if (ni_cbdata) {
543 		rem_node = ni_cbdata->node;
544 		rem_port_num = ni_cbdata->port_num;
545 		free(ni_cbdata);
546 	}
547 
548 	node = ibnd_find_node_guid(&f_int->fabric, node_guid);
549 	if (!node) {
550 		node = create_node(engine, &smp->path, node_info);
551 		if (!node)
552 			return -1;
553 		node_is_new = 1;
554 	}
555 	IBND_DEBUG("Found %s node GUID 0x%" PRIx64 " (%s)\n",
556 		   node_is_new ? "new" : "old", node->guid,
557 		   portid2str(&smp->path));
558 
559 	port = node->ports[port_num];
560 	if (!port) {
561 		/* If we have not see this port before create a shell for it */
562 		port = node->ports[port_num] = calloc(1, sizeof(*port));
563 		if (!port)
564 			return -1;
565 		port->node = node;
566 		port->portnum = port_num;
567 	}
568 	port->guid = port_guid;
569 
570 	if (scan->cfg->show_progress)
571 		dump_endnode(&smp->path, node_is_new ? "new" : "known",
572 			     node, port);
573 
574 	if (rem_node == NULL) {	/* this is the start node */
575 		f_int->fabric.from_node = node;
576 		f_int->fabric.from_portnum = port_num;
577 	} else {
578 		/* link ports... */
579 		if (!rem_node->ports[rem_port_num]) {
580 			IBND_ERROR("Internal Error; "
581 				   "Node(%p) 0x%" PRIx64
582 				   " Port %d no port created!?!?!?\n\n",
583 				   rem_node, rem_node->guid, rem_port_num);
584 			return -1;
585 		}
586 
587 		link_ports(node, port, rem_node, rem_node->ports[rem_port_num]);
588 	}
589 
590 	if (node_is_new) {
591 		query_node_desc(engine, &smp->path, node);
592 
593 		if (node->type == IB_NODE_SWITCH) {
594 			query_switch_info(engine, &smp->path, node);
595 			/* Query PortInfo on Switch Port 0 first */
596 			query_port_info(engine, &smp->path, node, 0);
597 		}
598 	}
599 
600 	if (node->type != IB_NODE_SWITCH)
601 		query_port_info(engine, &smp->path, node, port_num);
602 
603 	return 0;
604 }
605 
606 static int query_node_info(smp_engine_t * engine, ib_portid_t * portid,
607 			   struct ni_cbdata * cbdata)
608 {
609 	IBND_DEBUG("Query Node Info; %s\n", portid2str(portid));
610 	return issue_smp(engine, portid, IB_ATTR_NODE_INFO, 0,
611 			 recv_node_info, (void *)cbdata);
612 }
613 
614 ibnd_node_t *ibnd_find_node_guid(ibnd_fabric_t * fabric, uint64_t guid)
615 {
616 	int hash = HASHGUID(guid) % HTSZ;
617 	ibnd_node_t *node;
618 
619 	if (!fabric) {
620 		IBND_DEBUG("fabric parameter NULL\n");
621 		return NULL;
622 	}
623 
624 	for (node = fabric->nodestbl[hash]; node; node = node->htnext)
625 		if (node->guid == guid)
626 			return node;
627 
628 	return NULL;
629 }
630 
631 ibnd_node_t *ibnd_find_node_dr(ibnd_fabric_t * fabric, char *dr_str)
632 {
633 	ibnd_port_t *rc = ibnd_find_port_dr(fabric, dr_str);
634 	return rc->node;
635 }
636 
637 int add_to_nodeguid_hash(ibnd_node_t * node, ibnd_node_t * hash[])
638 {
639 	int rc = 0;
640 	ibnd_node_t *tblnode;
641 	int hash_idx = HASHGUID(node->guid) % HTSZ;
642 
643 	for (tblnode = hash[hash_idx]; tblnode; tblnode = tblnode->htnext) {
644 		if (tblnode == node) {
645 			IBND_ERROR("Duplicate Node: Node with guid 0x%016"
646 				   PRIx64 " already exists in nodes DB\n",
647 				   node->guid);
648 			return 1;
649 		}
650 	}
651 	node->htnext = hash[hash_idx];
652 	hash[hash_idx] = node;
653 	return rc;
654 }
655 
656 int add_to_portguid_hash(ibnd_port_t * port, ibnd_port_t * hash[])
657 {
658 	int rc = 0;
659 	ibnd_port_t *tblport;
660 	int hash_idx = HASHGUID(port->guid) % HTSZ;
661 
662 	for (tblport = hash[hash_idx]; tblport; tblport = tblport->htnext) {
663 		if (tblport == port) {
664 			IBND_ERROR("Duplicate Port: Port with guid 0x%016"
665 				   PRIx64 " already exists in ports DB\n",
666 				   port->guid);
667 			return 1;
668 		}
669 	}
670 	port->htnext = hash[hash_idx];
671 	hash[hash_idx] = port;
672 	return rc;
673 }
674 
675 void create_lid2guid(f_internal_t *f_int)
676 {
677 	f_int->lid2guid = g_hash_table_new_full(g_direct_hash, g_direct_equal,
678 				NULL, NULL);
679 }
680 
681 void destroy_lid2guid(f_internal_t *f_int)
682 {
683 	if (f_int->lid2guid) {
684 		g_hash_table_destroy(f_int->lid2guid);
685 	}
686 }
687 
688 void add_to_portlid_hash(ibnd_port_t * port, GHashTable *htable)
689 {
690 	uint16_t base_lid = port->base_lid;
691 	uint16_t lid_mask = ((1 << port->lmc) -1);
692 	uint16_t lid = 0;
693 	/* 0 < valid lid <= 0xbfff */
694 	if (base_lid > 0 && base_lid <= 0xbfff) {
695 		/* We add the port for all lids
696 		 * so it is easier to find any "random" lid specified */
697 		for (lid = base_lid; lid <= (base_lid + lid_mask); lid++) {
698 			g_hash_table_insert(htable, GINT_TO_POINTER(lid), port);
699 		}
700 	}
701 }
702 
703 void add_to_type_list(ibnd_node_t * node, f_internal_t * f_int)
704 {
705 	ibnd_fabric_t *fabric = &f_int->fabric;
706 	switch (node->type) {
707 	case IB_NODE_CA:
708 		node->type_next = fabric->ch_adapters;
709 		fabric->ch_adapters = node;
710 		break;
711 	case IB_NODE_SWITCH:
712 		node->type_next = fabric->switches;
713 		fabric->switches = node;
714 		break;
715 	case IB_NODE_ROUTER:
716 		node->type_next = fabric->routers;
717 		fabric->routers = node;
718 		break;
719 	}
720 }
721 
722 static int set_config(struct ibnd_config *config, struct ibnd_config *cfg)
723 {
724 	if (!config)
725 		return (-EINVAL);
726 
727 	if (cfg)
728 		memcpy(config, cfg, sizeof(*config));
729 
730 	if (!config->max_smps)
731 		config->max_smps = DEFAULT_MAX_SMP_ON_WIRE;
732 	if (!config->timeout_ms)
733 		config->timeout_ms = DEFAULT_TIMEOUT;
734 	if (!config->retries)
735 		config->retries = DEFAULT_RETRIES;
736 
737 	return (0);
738 }
739 
740 f_internal_t *allocate_fabric_internal(void)
741 {
742 	f_internal_t *f = calloc(1, sizeof(*f));
743 	if (f)
744 		create_lid2guid(f);
745 
746 	return (f);
747 }
748 
749 ibnd_fabric_t *ibnd_discover_fabric(char * ca_name, int ca_port,
750 				    ib_portid_t * from,
751 				    struct ibnd_config *cfg)
752 {
753 	struct ibnd_config config = { 0 };
754 	f_internal_t *f_int = NULL;
755 	ib_portid_t my_portid = { 0 };
756 	smp_engine_t engine;
757 	ibnd_scan_t scan;
758 	struct ibmad_port *ibmad_port;
759 	int nc = 2;
760 	int mc[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS };
761 
762 	/* If not specified start from "my" port */
763 	if (!from)
764 		from = &my_portid;
765 
766 	if (set_config(&config, cfg)) {
767 		IBND_ERROR("Invalid ibnd_config\n");
768 		return NULL;
769 	}
770 
771 	f_int = allocate_fabric_internal();
772 	if (!f_int) {
773 		IBND_ERROR("OOM: failed to calloc ibnd_fabric_t\n");
774 		return NULL;
775 	}
776 
777 	memset(&scan.selfportid, 0, sizeof(scan.selfportid));
778 	scan.f_int = f_int;
779 	scan.cfg = &config;
780 	scan.initial_hops = from->drpath.cnt;
781 
782 	ibmad_port = mad_rpc_open_port(ca_name, ca_port, mc, nc);
783 	if (!ibmad_port) {
784 		IBND_ERROR("can't open MAD port (%s:%d)\n", ca_name, ca_port);
785 		return (NULL);
786 	}
787 	mad_rpc_set_timeout(ibmad_port, cfg->timeout_ms);
788 	mad_rpc_set_retries(ibmad_port, cfg->retries);
789 	smp_mkey_set(ibmad_port, cfg->mkey);
790 
791 	if (ib_resolve_self_via(&scan.selfportid,
792 				NULL, NULL, ibmad_port) < 0) {
793 		IBND_ERROR("Failed to resolve self\n");
794 		mad_rpc_close_port(ibmad_port);
795 		return NULL;
796 	}
797 	mad_rpc_close_port(ibmad_port);
798 
799 	if (smp_engine_init(&engine, ca_name, ca_port, &scan, &config)) {
800 		free(f_int);
801 		return (NULL);
802 	}
803 
804 	IBND_DEBUG("from %s\n", portid2str(from));
805 
806 	if (!query_node_info(&engine, from, NULL))
807 		if (process_mads(&engine) != 0)
808 			goto error;
809 
810 	f_int->fabric.total_mads_used = engine.total_smps;
811 	f_int->fabric.maxhops_discovered += scan.initial_hops;
812 
813 	if (group_nodes(&f_int->fabric))
814 		goto error;
815 
816 	smp_engine_destroy(&engine);
817 	return (ibnd_fabric_t *)f_int;
818 error:
819 	smp_engine_destroy(&engine);
820 	ibnd_destroy_fabric(&f_int->fabric);
821 	return NULL;
822 }
823 
824 void destroy_node(ibnd_node_t * node)
825 {
826 	int p = 0;
827 
828 	if (node->ports) {
829 		for (p = 0; p <= node->numports; p++)
830 			free(node->ports[p]);
831 		free(node->ports);
832 	}
833 	free(node);
834 }
835 
836 void ibnd_destroy_fabric(ibnd_fabric_t * fabric)
837 {
838 	ibnd_node_t *node = NULL;
839 	ibnd_node_t *next = NULL;
840 	ibnd_chassis_t *ch, *ch_next;
841 
842 	if (!fabric)
843 		return;
844 
845 	ch = fabric->chassis;
846 	while (ch) {
847 		ch_next = ch->next;
848 		free(ch);
849 		ch = ch_next;
850 	}
851 	node = fabric->nodes;
852 	while (node) {
853 		next = node->next;
854 		destroy_node(node);
855 		node = next;
856 	}
857 	destroy_lid2guid((f_internal_t *)fabric);
858 	free(fabric);
859 }
860 
861 void ibnd_iter_nodes(ibnd_fabric_t * fabric, ibnd_iter_node_func_t func,
862 		     void *user_data)
863 {
864 	ibnd_node_t *cur = NULL;
865 
866 	if (!fabric) {
867 		IBND_DEBUG("fabric parameter NULL\n");
868 		return;
869 	}
870 
871 	if (!func) {
872 		IBND_DEBUG("func parameter NULL\n");
873 		return;
874 	}
875 
876 	for (cur = fabric->nodes; cur; cur = cur->next)
877 		func(cur, user_data);
878 }
879 
880 void ibnd_iter_nodes_type(ibnd_fabric_t * fabric, ibnd_iter_node_func_t func,
881 			  int node_type, void *user_data)
882 {
883 	ibnd_node_t *list = NULL;
884 	ibnd_node_t *cur = NULL;
885 
886 	if (!fabric) {
887 		IBND_DEBUG("fabric parameter NULL\n");
888 		return;
889 	}
890 
891 	if (!func) {
892 		IBND_DEBUG("func parameter NULL\n");
893 		return;
894 	}
895 
896 	switch (node_type) {
897 	case IB_NODE_SWITCH:
898 		list = fabric->switches;
899 		break;
900 	case IB_NODE_CA:
901 		list = fabric->ch_adapters;
902 		break;
903 	case IB_NODE_ROUTER:
904 		list = fabric->routers;
905 		break;
906 	default:
907 		IBND_DEBUG("Invalid node_type specified %d\n", node_type);
908 		break;
909 	}
910 
911 	for (cur = list; cur; cur = cur->type_next)
912 		func(cur, user_data);
913 }
914 
915 ibnd_port_t *ibnd_find_port_lid(ibnd_fabric_t * fabric,
916 				uint16_t lid)
917 {
918 	ibnd_port_t *port;
919 	f_internal_t *f = (f_internal_t *)fabric;
920 
921 	port = (ibnd_port_t *)g_hash_table_lookup(f->lid2guid,
922 					GINT_TO_POINTER(lid));
923 
924 	return port;
925 }
926 
927 ibnd_port_t *ibnd_find_port_guid(ibnd_fabric_t * fabric, uint64_t guid)
928 {
929 	int hash = HASHGUID(guid) % HTSZ;
930 	ibnd_port_t *port;
931 
932 	if (!fabric) {
933 		IBND_DEBUG("fabric parameter NULL\n");
934 		return NULL;
935 	}
936 
937 	for (port = fabric->portstbl[hash]; port; port = port->htnext)
938 		if (port->guid == guid)
939 			return port;
940 
941 	return NULL;
942 }
943 
944 ibnd_port_t *ibnd_find_port_dr(ibnd_fabric_t * fabric, char *dr_str)
945 {
946 	int i = 0;
947 	ibnd_node_t *cur_node;
948 	ibnd_port_t *rc = NULL;
949 	ib_dr_path_t path;
950 
951 	if (!fabric) {
952 		IBND_DEBUG("fabric parameter NULL\n");
953 		return NULL;
954 	}
955 
956 	if (!dr_str) {
957 		IBND_DEBUG("dr_str parameter NULL\n");
958 		return NULL;
959 	}
960 
961 	cur_node = fabric->from_node;
962 
963 	if (str2drpath(&path, dr_str, 0, 0) == -1)
964 		return NULL;
965 
966 	for (i = 0; i <= path.cnt; i++) {
967 		ibnd_port_t *remote_port = NULL;
968 		if (path.p[i] == 0)
969 			continue;
970 		if (!cur_node->ports)
971 			return NULL;
972 
973 		remote_port = cur_node->ports[path.p[i]]->remoteport;
974 		if (!remote_port)
975 			return NULL;
976 
977 		rc = remote_port;
978 		cur_node = remote_port->node;
979 	}
980 
981 	return rc;
982 }
983 
984 void ibnd_iter_ports(ibnd_fabric_t * fabric, ibnd_iter_port_func_t func,
985 			void *user_data)
986 {
987 	int i = 0;
988 	ibnd_port_t *cur = NULL;
989 
990 	if (!fabric) {
991 		IBND_DEBUG("fabric parameter NULL\n");
992 		return;
993 	}
994 
995 	if (!func) {
996 		IBND_DEBUG("func parameter NULL\n");
997 		return;
998 	}
999 
1000 	for (i = 0; i<HTSZ; i++)
1001 		for (cur = fabric->portstbl[i]; cur; cur = cur->htnext)
1002 			func(cur, user_data);
1003 }
1004