xref: /freebsd/contrib/ofed/infiniband-diags/src/ibqueryerrors.c (revision 4f52dfbb8d6c4d446500c5b097e3806ec219fbd4)
1 /*
2  * Copyright (c) 2004-2009 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2007 Xsigo Systems Inc.  All rights reserved.
4  * Copyright (c) 2008 Lawrence Livermore National Lab.  All rights reserved.
5  * Copyright (c) 2009 HNR Consulting.  All rights reserved.
6  * Copyright (c) 2010,2011 Mellanox Technologies LTD.  All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  *
36  */
37 
38 #if HAVE_CONFIG_H
39 #  include <config.h>
40 #endif				/* HAVE_CONFIG_H */
41 
42 #define _GNU_SOURCE
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <unistd.h>
46 #include <stdarg.h>
47 #include <time.h>
48 #include <string.h>
49 #include <getopt.h>
50 #include <errno.h>
51 #include <inttypes.h>
52 
53 #include <complib/cl_nodenamemap.h>
54 #include <infiniband/ibnetdisc.h>
55 #include <infiniband/mad.h>
56 
57 #include "ibdiag_common.h"
58 #include "ibdiag_sa.h"
59 
60 struct ibmad_port *ibmad_port;
61 static char *node_name_map_file = NULL;
62 static nn_map_t *node_name_map = NULL;
63 static char *load_cache_file = NULL;
64 static uint16_t lid2sl_table[sizeof(uint8_t) * 1024 * 48] = { 0 };
65 static int obtain_sl = 1;
66 
67 int data_counters = 0;
68 int data_counters_only = 0;
69 int port_config = 0;
70 uint64_t port_guid = 0;
71 char *port_guid_str = NULL;
72 #define SUP_MAX 64
73 int sup_total = 0;
74 enum MAD_FIELDS suppressed_fields[SUP_MAX];
75 char *dr_path = NULL;
76 uint8_t node_type_to_print = 0;
77 unsigned clear_errors = 0, clear_counts = 0, details = 0;
78 
79 #define PRINT_SWITCH 0x1
80 #define PRINT_CA     0x2
81 #define PRINT_ROUTER 0x4
82 #define PRINT_ALL 0xFF		/* all nodes default flag */
83 
84 #define DEFAULT_HALF_WORLD_PR_TIMEOUT (3000)
85 
86 struct {
87 	int nodes_checked;
88 	int bad_nodes;
89 	int ports_checked;
90 	int bad_ports;
91 	int pma_query_failures;
92 } summary = { 0 };
93 
94 #define DEF_THRES_FILE IBDIAG_CONFIG_PATH"/error_thresholds"
95 static char *threshold_file = DEF_THRES_FILE;
96 
97 /* define a "packet" with threshold values in it */
98 uint8_t thresholds[1204] = { 0 };
99 char * threshold_str = "";
100 
101 static unsigned valid_gid(ib_gid_t * gid)
102 {
103 	ib_gid_t zero_gid;
104 	memset(&zero_gid, 0, sizeof zero_gid);
105 	return memcmp(&zero_gid, gid, sizeof(*gid));
106 }
107 
108 static void set_thres(char *name, uint32_t val)
109 {
110 	int f;
111 	int n;
112 	char tmp[256];
113 	for (f = IB_PC_FIRST_F; f <= IB_PC_LAST_F; f++) {
114 		if (strcmp(name, mad_field_name(f)) == 0) {
115 			mad_encode_field(thresholds, f, &val);
116 			snprintf(tmp, 255, "[%s = %u]", name, val);
117 			threshold_str = realloc(threshold_str,
118 					strlen(threshold_str)+strlen(tmp)+1);
119 			if (!threshold_str) {
120 				fprintf(stderr, "Failed to allocate memory: "
121 					"%s\n", strerror(errno));
122 				exit(1);
123 			}
124 			n = strlen(threshold_str);
125 			strcpy(threshold_str+n, tmp);
126 		}
127 	}
128 }
129 
130 static void set_thresholds(char *threshold_file)
131 {
132 	char buf[1024];
133 	int val = 0;
134 	FILE *thresf = fopen(threshold_file, "r");
135 	char *p_prefix, *p_last;
136 	char *name;
137 	char *val_str;
138 	char str[64];
139 
140 	if (!thresf)
141 		return;
142 
143 	snprintf(str, 63, "Thresholds: ");
144 	threshold_str = malloc(strlen(str)+1);
145 	if (!threshold_str) {
146 		fprintf(stderr, "Failed to allocate memory: %s\n",
147 			strerror(errno));
148 		exit(1);
149 	}
150 	strcpy(threshold_str, str);
151 	while (fgets(buf, sizeof buf, thresf) != NULL) {
152 		p_prefix = strtok_r(buf, "\n", &p_last);
153 		if (!p_prefix)
154 			continue; /* ignore blank lines */
155 
156 		if (*p_prefix == '#')
157 			continue; /* ignore comment lines */
158 
159 		name = strtok_r(p_prefix, "=", &p_last);
160 		val_str = strtok_r(NULL, "\n", &p_last);
161 
162 		val = strtoul(val_str, NULL, 0);
163 		set_thres(name, val);
164 	}
165 
166 	fclose(thresf);
167 }
168 
169 static int exceeds_threshold(int field, unsigned val)
170 {
171 	uint32_t thres = 0;
172 	mad_decode_field(thresholds, field, &thres);
173 	return (val > thres);
174 }
175 
176 static void print_port_config(ibnd_node_t * node, int portnum)
177 {
178 	char width[64], speed[64], state[64], physstate[64];
179 	char remote_str[256];
180 	char link_str[256];
181 	char width_msg[256];
182 	char speed_msg[256];
183 	char ext_port_str[256];
184 	int iwidth, ispeed, fdr10, espeed, istate, iphystate, cap_mask;
185 	uint8_t *info;
186 
187 	ibnd_port_t *port = node->ports[portnum];
188 
189 	if (!port)
190 		return;
191 
192 	iwidth = mad_get_field(port->info, 0, IB_PORT_LINK_WIDTH_ACTIVE_F);
193 	ispeed = mad_get_field(port->info, 0, IB_PORT_LINK_SPEED_ACTIVE_F);
194 	fdr10 = mad_get_field(port->ext_info, 0,
195 			      IB_MLNX_EXT_PORT_LINK_SPEED_ACTIVE_F) & FDR10;
196 
197 	if (port->node->type == IB_NODE_SWITCH)
198 		info = (uint8_t *)&port->node->ports[0]->info;
199 	else
200 		info = (uint8_t *)&port->info;
201 	cap_mask = mad_get_field(info, 0, IB_PORT_CAPMASK_F);
202 	if (cap_mask & CL_NTOH32(IB_PORT_CAP_HAS_EXT_SPEEDS))
203 		espeed = mad_get_field(port->info, 0,
204 				       IB_PORT_LINK_SPEED_EXT_ACTIVE_F);
205 	else
206 		espeed = 0;
207 	istate = mad_get_field(port->info, 0, IB_PORT_STATE_F);
208 	iphystate = mad_get_field(port->info, 0, IB_PORT_PHYS_STATE_F);
209 
210 	remote_str[0] = '\0';
211 	link_str[0] = '\0';
212 	width_msg[0] = '\0';
213 	speed_msg[0] = '\0';
214 
215 	/* C14-24.2.1 states that a down port allows for invalid data to be
216 	 * returned for all PortInfo components except PortState and
217 	 * PortPhysicalState */
218 	if (istate != IB_LINK_DOWN) {
219 		if (!espeed) {
220 			if (fdr10)
221 				sprintf(speed, "10.0 Gbps (FDR10)");
222 			else
223 				mad_dump_val(IB_PORT_LINK_SPEED_ACTIVE_F, speed,
224 					     64, &ispeed);
225 		} else
226 			mad_dump_val(IB_PORT_LINK_SPEED_EXT_ACTIVE_F, speed,
227 			     64, &espeed);
228 
229 		snprintf(link_str, 256, "(%3s %18s %6s/%8s)",
230 			 mad_dump_val(IB_PORT_LINK_WIDTH_ACTIVE_F, width, 64, &iwidth),
231 			 speed,
232 			 mad_dump_val(IB_PORT_STATE_F, state, 64, &istate),
233 			 mad_dump_val(IB_PORT_PHYS_STATE_F, physstate, 64, &iphystate));
234 	} else {
235 		snprintf(link_str, 256, "(              %6s/%8s)",
236 			 mad_dump_val(IB_PORT_STATE_F, state, 64, &istate),
237 			 mad_dump_val(IB_PORT_PHYS_STATE_F, physstate, 64, &iphystate));
238 	}
239 
240 	if (port->remoteport) {
241 		char *rem_node_name = NULL;
242 
243 		if (port->remoteport->ext_portnum)
244 			snprintf(ext_port_str, 256, "%d",
245 				 port->remoteport->ext_portnum);
246 		else
247 			ext_port_str[0] = '\0';
248 
249 		get_max_msg(width_msg, speed_msg, 256, port);
250 
251 		rem_node_name = remap_node_name(node_name_map,
252 						port->remoteport->node->guid,
253 						port->remoteport->node->
254 						nodedesc);
255 
256 		snprintf(remote_str, 256,
257 			 "0x%016" PRIx64 " %6d %4d[%2s] \"%s\" (%s %s)\n",
258 			 port->remoteport->guid,
259 			 port->remoteport->base_lid ? port->remoteport->
260 			 base_lid : port->remoteport->node->smalid,
261 			 port->remoteport->portnum, ext_port_str, rem_node_name,
262 			 width_msg, speed_msg);
263 
264 		free(rem_node_name);
265 	} else
266 		snprintf(remote_str, 256, "           [  ] \"\" ( )\n");
267 
268 	if (port->ext_portnum)
269 		snprintf(ext_port_str, 256, "%d", port->ext_portnum);
270 	else
271 		ext_port_str[0] = '\0';
272 
273 	if (node->type == IB_NODE_SWITCH)
274 		printf("       Link info: %6d", node->smalid);
275 	else
276 		printf("       Link info: %6d", port->base_lid);
277 
278 	printf("%4d[%2s] ==%s==>  %s",
279 	       port->portnum, ext_port_str, link_str, remote_str);
280 }
281 
282 static int suppress(enum MAD_FIELDS field)
283 {
284 	int i = 0;
285 	for (i = 0; i < sup_total; i++)
286 		if (field == suppressed_fields[i])
287 			return 1;
288 	return 0;
289 }
290 
291 static void report_suppressed(void)
292 {
293 	int i = 0;
294 	printf("## Suppressed:");
295 	for (i = 0; i < sup_total; i++)
296 		printf(" %s", mad_field_name(suppressed_fields[i]));
297 	printf("\n");
298 }
299 
300 static int print_summary(void)
301 {
302 	printf("\n## Summary: %d nodes checked, %d bad nodes found\n",
303 		summary.nodes_checked, summary.bad_nodes);
304 	printf("##          %d ports checked, %d ports have errors beyond threshold\n",
305 		summary.ports_checked, summary.bad_ports);
306 	printf("## %s\n", threshold_str);
307 	if (summary.pma_query_failures)
308 		printf("##          %d PMA query failures\n", summary.pma_query_failures);
309 	report_suppressed();
310 	return (summary.bad_ports);
311 }
312 
313 static void insert_lid2sl_table(struct sa_query_result *r)
314 {
315     unsigned int i;
316     for (i = 0; i < r->result_cnt; i++) {
317 	    ib_path_rec_t *p_pr = (ib_path_rec_t *)sa_get_query_rec(r->p_result_madw, i);
318 	    lid2sl_table[cl_ntoh16(p_pr->dlid)] = ib_path_rec_sl(p_pr);
319     }
320 }
321 
322 static int path_record_query(ib_gid_t sgid,uint64_t dguid)
323 {
324      ib_path_rec_t pr;
325      ib_net64_t comp_mask = 0;
326      uint8_t reversible = 0;
327      struct sa_handle * h;
328 
329      if (!(h = sa_get_handle()))
330 	return -1;
331 
332      ibd_timeout = DEFAULT_HALF_WORLD_PR_TIMEOUT;
333      memset(&pr, 0, sizeof(pr));
334 
335      CHECK_AND_SET_GID(sgid, pr.sgid, PR, SGID);
336      if(dguid) {
337 	     mad_encode_field(sgid.raw, IB_GID_GUID_F, &dguid);
338 	     CHECK_AND_SET_GID(sgid, pr.dgid, PR, DGID);
339      }
340 
341      CHECK_AND_SET_VAL(1, 8, -1, pr.num_path, PR, NUMBPATH);/*to get only one PathRecord for each source and destination pair*/
342      CHECK_AND_SET_VAL(1, 8, -1, reversible, PR, REVERSIBLE);/*for a reversible path*/
343      pr.num_path |= reversible << 7;
344      struct sa_query_result result;
345      int ret = sa_query(h, IB_MAD_METHOD_GET_TABLE,
346                         (uint16_t)IB_SA_ATTR_PATHRECORD,0,cl_ntoh64(comp_mask),ibd_sakey,
347                         &pr, sizeof(pr), &result);
348      if (ret) {
349              sa_free_handle(h);
350              fprintf(stderr, "Query SA failed: %s; sa call path_query failed\n", strerror(ret));
351              return ret;
352      }
353      if (result.status != IB_SA_MAD_STATUS_SUCCESS) {
354              sa_report_err(result.status);
355              ret = EIO;
356              goto Exit;
357      }
358 
359      insert_lid2sl_table(&result);
360 Exit:
361      sa_free_handle(h);
362      sa_free_result_mad(&result);
363      return ret;
364 }
365 
366 static int query_and_dump(char *buf, size_t size, ib_portid_t * portid,
367 			  char *node_name, int portnum,
368 			  const char *attr_name, uint16_t attr_id,
369 			  int start_field, int end_field)
370 {
371 	uint8_t pc[1024];
372 	uint32_t val = 0;
373 	int i, n;
374 
375 	memset(pc, 0, sizeof(pc));
376 
377 	if (!pma_query_via(pc, portid, portnum, ibd_timeout, attr_id,
378 			   ibmad_port)) {
379 		IBWARN("%s query failed on %s, %s port %d", attr_name,
380 		       node_name, portid2str(portid), portnum);
381 		summary.pma_query_failures++;
382 		return 0;
383 	}
384 
385 	for (n = 0, i = start_field; i < end_field; i++) {
386 		mad_decode_field(pc, i, (void *)&val);
387 		if (val)
388 			n += snprintf(buf + n, size - n, " [%s == %u]",
389 				      mad_field_name(i), val);
390 	}
391 
392 	return n;
393 }
394 
395 
396 static int print_results(ib_portid_t * portid, char *node_name,
397 			 ibnd_node_t * node, uint8_t * pc, int portnum,
398 			 int *header_printed, uint8_t *pce, uint16_t cap_mask)
399 {
400 	char buf[1024];
401 	char *str = buf;
402 	uint32_t val = 0;
403 	int i, n;
404 
405 	for (n = 0, i = IB_PC_ERR_SYM_F; i <= IB_PC_VL15_DROPPED_F; i++) {
406 		if (suppress(i))
407 			continue;
408 
409 		/* this is not a counter, skip it */
410 		if (i == IB_PC_COUNTER_SELECT2_F)
411 			continue;
412 
413 		mad_decode_field(pc, i, (void *)&val);
414 		if (exceeds_threshold(i, val)) {
415 			n += snprintf(str + n, 1024 - n, " [%s == %u]",
416 				      mad_field_name(i), val);
417 
418 			/* If there are PortXmitDiscards, get details (if supported) */
419 			if (i == IB_PC_XMT_DISCARDS_F && details) {
420 				n += query_and_dump(str + n, sizeof(buf) - n, portid,
421 						    node_name, portnum,
422 						    "PortXmitDiscardDetails",
423 						    IB_GSI_PORT_XMIT_DISCARD_DETAILS,
424 						    IB_PC_RCV_LOCAL_PHY_ERR_F,
425 						    IB_PC_RCV_ERR_LAST_F);
426 				/* If there are PortRcvErrors, get details (if supported) */
427 			} else if (i == IB_PC_ERR_RCV_F && details) {
428 				n += query_and_dump(str + n, sizeof(buf) - n, portid,
429 						    node_name, portnum,
430 						    "PortRcvErrorDetails",
431 						    IB_GSI_PORT_RCV_ERROR_DETAILS,
432 						    IB_PC_XMT_INACT_DISC_F,
433 						    IB_PC_XMT_DISC_LAST_F);
434 			}
435 		}
436 	}
437 
438 	if (!suppress(IB_PC_XMT_WAIT_F)) {
439 		mad_decode_field(pc, IB_PC_XMT_WAIT_F, (void *)&val);
440 		if (exceeds_threshold(IB_PC_XMT_WAIT_F, val))
441 			n += snprintf(str + n, 1024 - n, " [%s == %u]",
442 				      mad_field_name(IB_PC_XMT_WAIT_F), val);
443 	}
444 
445 	/* if we found errors. */
446 	if (n != 0) {
447 		if (data_counters) {
448 			uint8_t *pkt = pc;
449 			int start_field = IB_PC_XMT_BYTES_F;
450 			int end_field = IB_PC_RCV_PKTS_F;
451 
452 			if (pce) {
453 				pkt = pce;
454 				start_field = IB_PC_EXT_XMT_BYTES_F;
455 				if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)
456 					end_field = IB_PC_EXT_RCV_MPKTS_F;
457 				else
458 					end_field = IB_PC_EXT_RCV_PKTS_F;
459 			}
460 
461 			for (i = start_field; i <= end_field; i++) {
462 				uint64_t val64 = 0;
463 				float val = 0;
464 				char *unit = "";
465 				mad_decode_field(pkt, i, (void *)&val64);
466 				if (val64) {
467 					int data = 0;
468 					if (i == IB_PC_EXT_XMT_BYTES_F ||
469 					    i == IB_PC_EXT_RCV_BYTES_F ||
470 					    i == IB_PC_XMT_BYTES_F ||
471 					    i == IB_PC_RCV_BYTES_F)
472 						data = 1;
473 					unit = conv_cnt_human_readable(val64,
474 								&val, data);
475 					n += snprintf(str + n, 1024 - n,
476 						" [%s == %" PRIu64
477 						" (%5.3f%s)]",
478 						mad_field_name(i), val64, val,
479 						unit);
480 				}
481 			}
482 		}
483 
484 		if (!*header_printed) {
485 			if (node->type == IB_NODE_SWITCH)
486 				printf("Errors for 0x%" PRIx64 " \"%s\"\n",
487 					node->ports[0]->guid, node_name);
488 			else
489 				printf("Errors for \"%s\"\n", node_name);
490 			*header_printed = 1;
491 			summary.bad_nodes++;
492 		}
493 
494 		if (portnum == 0xFF) {
495 			if (node->type == IB_NODE_SWITCH)
496 				printf("   GUID 0x%" PRIx64 " port ALL:%s\n",
497 				       node->ports[0]->guid, str);
498 		} else {
499 			printf("   GUID 0x%" PRIx64 " port %d:%s\n",
500 			       node->ports[portnum]->guid, portnum, str);
501 			if (port_config)
502 				print_port_config(node, portnum);
503 			summary.bad_ports++;
504 		}
505 	}
506 	return (n);
507 }
508 
509 static int query_cap_mask(ib_portid_t * portid, char *node_name, int portnum,
510 			  uint16_t * cap_mask)
511 {
512 	uint8_t pc[1024] = { 0 };
513 	uint16_t rc_cap_mask;
514 
515 	portid->sl = lid2sl_table[portid->lid];
516 
517 	/* PerfMgt ClassPortInfo is a required attribute */
518 	if (!pma_query_via(pc, portid, portnum, ibd_timeout, CLASS_PORT_INFO,
519 			   ibmad_port)) {
520 		IBWARN("classportinfo query failed on %s, %s port %d",
521 		       node_name, portid2str(portid), portnum);
522 		summary.pma_query_failures++;
523 		return -1;
524 	}
525 
526 	/* ClassPortInfo should be supported as part of libibmad */
527 	memcpy(&rc_cap_mask, pc + 2, sizeof(rc_cap_mask));	/* CapabilityMask */
528 
529 	*cap_mask = rc_cap_mask;
530 	return 0;
531 }
532 
533 static int print_data_cnts(ib_portid_t * portid, uint16_t cap_mask,
534 			   char *node_name, ibnd_node_t * node, int portnum,
535 			   int *header_printed)
536 {
537 	uint8_t pc[1024];
538 	int i;
539 	int start_field = IB_PC_XMT_BYTES_F;
540 	int end_field = IB_PC_RCV_PKTS_F;
541 
542 	memset(pc, 0, 1024);
543 
544 	portid->sl = lid2sl_table[portid->lid];
545 
546 	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
547 		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
548 				   IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
549 			IBWARN("IB_GSI_PORT_COUNTERS_EXT query failed on %s, %s port %d",
550 			       node_name, portid2str(portid), portnum);
551 			summary.pma_query_failures++;
552 			return (1);
553 		}
554 		start_field = IB_PC_EXT_XMT_BYTES_F;
555 		if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)
556 			end_field = IB_PC_EXT_RCV_MPKTS_F;
557 		else
558 			end_field = IB_PC_EXT_RCV_PKTS_F;
559 	} else {
560 		if (!pma_query_via(pc, portid, portnum, ibd_timeout,
561 				   IB_GSI_PORT_COUNTERS, ibmad_port)) {
562 			IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
563 			       node_name, portid2str(portid), portnum);
564 			summary.pma_query_failures++;
565 			return (1);
566 		}
567 		start_field = IB_PC_XMT_BYTES_F;
568 		end_field = IB_PC_RCV_PKTS_F;
569 	}
570 
571 	if (!*header_printed) {
572 		printf("Data Counters for 0x%" PRIx64 " \"%s\"\n", node->guid,
573 		       node_name);
574 		*header_printed = 1;
575 	}
576 
577 	if (portnum == 0xFF)
578 		printf("   GUID 0x%" PRIx64 " port ALL:", node->guid);
579 	else
580 		printf("   GUID 0x%" PRIx64 " port %d:",
581 		       node->guid, portnum);
582 
583 	for (i = start_field; i <= end_field; i++) {
584 		uint64_t val64 = 0;
585 		float val = 0;
586 		char *unit = "";
587 		int data = 0;
588 		mad_decode_field(pc, i, (void *)&val64);
589 		if (i == IB_PC_EXT_XMT_BYTES_F || i == IB_PC_EXT_RCV_BYTES_F ||
590 		    i == IB_PC_XMT_BYTES_F || i == IB_PC_RCV_BYTES_F)
591 			data = 1;
592 		unit = conv_cnt_human_readable(val64, &val, data);
593 		printf(" [%s == %" PRIu64 " (%5.3f%s)]", mad_field_name(i),
594 			val64, val, unit);
595 	}
596 	printf("\n");
597 
598 	if (portnum != 0xFF && port_config)
599 		print_port_config(node, portnum);
600 
601 	return (0);
602 }
603 
604 static int print_errors(ib_portid_t * portid, uint16_t cap_mask,
605 			char *node_name, ibnd_node_t * node, int portnum,
606 			int *header_printed)
607 {
608 	uint8_t pc[1024];
609 	uint8_t pce[1024];
610 	uint8_t *pc_ext = NULL;
611 
612 	memset(pc, 0, 1024);
613 	memset(pce, 0, 1024);
614 
615 	portid->sl = lid2sl_table[portid->lid];
616 
617 	if (!pma_query_via(pc, portid, portnum, ibd_timeout,
618 			   IB_GSI_PORT_COUNTERS, ibmad_port)) {
619 		IBWARN("IB_GSI_PORT_COUNTERS query failed on %s, %s port %d",
620 		       node_name, portid2str(portid), portnum);
621 		summary.pma_query_failures++;
622 		return (0);
623 	}
624 
625 	if (cap_mask & (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP)) {
626 		if (!pma_query_via(pce, portid, portnum, ibd_timeout,
627 		    IB_GSI_PORT_COUNTERS_EXT, ibmad_port)) {
628 			IBWARN("IB_GSI_PORT_COUNTERS_EXT query failed on %s, %s port %d",
629 			       node_name, portid2str(portid), portnum);
630 			summary.pma_query_failures++;
631 			return (0);
632 		}
633 		pc_ext = pce;
634 	}
635 
636 	if (!(cap_mask & IB_PM_PC_XMIT_WAIT_SUP)) {
637 		/* if PortCounters:PortXmitWait not supported clear this counter */
638 		uint32_t foo = 0;
639 		mad_encode_field(pc, IB_PC_XMT_WAIT_F, &foo);
640 	}
641 	return (print_results(portid, node_name, node, pc, portnum,
642 			      header_printed, pc_ext, cap_mask));
643 }
644 
645 uint8_t *reset_pc_ext(void *rcvbuf, ib_portid_t * dest,
646 		      int port, unsigned mask, unsigned timeout,
647 		      const struct ibmad_port * srcport)
648 {
649 	ib_rpc_t rpc = { 0 };
650 	int lid = dest->lid;
651 
652 	DEBUG("lid %u port %d mask 0x%x", lid, port, mask);
653 
654 	if (lid == -1) {
655 		IBWARN("only lid routed is supported");
656 		return NULL;
657 	}
658 
659 	if (!mask)
660 		mask = ~0;
661 
662 	rpc.mgtclass = IB_PERFORMANCE_CLASS;
663 	rpc.method = IB_MAD_METHOD_SET;
664 	rpc.attr.id = IB_GSI_PORT_COUNTERS_EXT;
665 
666 	memset(rcvbuf, 0, IB_MAD_SIZE);
667 
668 	/* Same for attribute IDs */
669 	mad_set_field(rcvbuf, 0, IB_PC_EXT_PORT_SELECT_F, port);
670 	mad_set_field(rcvbuf, 0, IB_PC_EXT_COUNTER_SELECT_F, mask);
671 	rpc.attr.mod = 0;
672 	rpc.timeout = timeout;
673 	rpc.datasz = IB_PC_DATA_SZ;
674 	rpc.dataoffs = IB_PC_DATA_OFFS;
675 	if (!dest->qp)
676 		dest->qp = 1;
677 	if (!dest->qkey)
678 		dest->qkey = IB_DEFAULT_QP1_QKEY;
679 
680 	return mad_rpc(srcport, &rpc, dest, rcvbuf, rcvbuf);
681 }
682 
683 static void clear_port(ib_portid_t * portid, uint16_t cap_mask,
684 		       char *node_name, int port)
685 {
686 	uint8_t pc[1024] = { 0 };
687 	/* bits defined in Table 228 PortCounters CounterSelect and
688 	 * CounterSelect2
689 	 */
690 	uint32_t mask = 0;
691 
692 	if (clear_errors) {
693 		mask |= 0xFFF;
694 		if (cap_mask & IB_PM_PC_XMIT_WAIT_SUP)
695 			mask |= 0x10000;
696 	}
697 	if (clear_counts)
698 		mask |= 0xF000;
699 
700 	if (mask)
701 		if (!performance_reset_via(pc, portid, port, mask, ibd_timeout,
702 					   IB_GSI_PORT_COUNTERS, ibmad_port))
703 			fprintf(stderr, "Failed to reset errors %s port %d\n", node_name,
704 				port);
705 
706 	if (clear_errors && details) {
707 		memset(pc, 0, 1024);
708 		performance_reset_via(pc, portid, port, 0xf, ibd_timeout,
709 				      IB_GSI_PORT_XMIT_DISCARD_DETAILS,
710 				      ibmad_port);
711 		memset(pc, 0, 1024);
712 		performance_reset_via(pc, portid, port, 0x3f, ibd_timeout,
713 				      IB_GSI_PORT_RCV_ERROR_DETAILS,
714 				      ibmad_port);
715 	}
716 
717 	if (clear_counts &&
718 	    (cap_mask &
719 	     (IB_PM_EXT_WIDTH_SUPPORTED | IB_PM_EXT_WIDTH_NOIETF_SUP))) {
720 		if (cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)
721 			mask = 0xFF;
722 		else
723 			mask = 0x0F;
724 
725 		if (!reset_pc_ext(pc, portid, port, mask, ibd_timeout,
726 		    ibmad_port))
727 			fprintf(stderr, "Failed to reset extended data counters %s, "
728 				"%s port %d\n", node_name, portid2str(portid),
729 				port);
730 	}
731 }
732 
733 void print_node(ibnd_node_t * node, void *user_data)
734 {
735 	int header_printed = 0;
736 	int p = 0;
737 	int startport = 1;
738 	int type = 0;
739 	int all_port_sup = 0;
740 	ib_portid_t portid = { 0 };
741 	uint16_t cap_mask = 0;
742 	char *node_name = NULL;
743 
744 	switch (node->type) {
745 	case IB_NODE_SWITCH:
746 		type = PRINT_SWITCH;
747 		break;
748 	case IB_NODE_CA:
749 		type = PRINT_CA;
750 		break;
751 	case IB_NODE_ROUTER:
752 		type = PRINT_ROUTER;
753 		break;
754 	}
755 
756 	if ((type & node_type_to_print) == 0)
757 		return;
758 
759 	if (node->type == IB_NODE_SWITCH && node->smaenhsp0)
760 		startport = 0;
761 
762 	node_name = remap_node_name(node_name_map, node->guid, node->nodedesc);
763 
764 	if (node->type == IB_NODE_SWITCH) {
765 		ib_portid_set(&portid, node->smalid, 0, 0);
766 		p = 0;
767 	} else {
768 		for (p = 1; p <= node->numports; p++) {
769 			if (node->ports[p]) {
770 				ib_portid_set(&portid,
771 					      node->ports[p]->base_lid,
772 					      0, 0);
773 				break;
774 			}
775 		}
776 	}
777 
778 	if ((query_cap_mask(&portid, node_name, p, &cap_mask) == 0) &&
779 	    (cap_mask & IB_PM_ALL_PORT_SELECT))
780 		all_port_sup = 1;
781 
782 	if (data_counters_only) {
783 		for (p = startport; p <= node->numports; p++) {
784 			if (node->ports[p]) {
785 				if (node->type == IB_NODE_SWITCH)
786 					ib_portid_set(&portid, node->smalid, 0, 0);
787 				else
788 					ib_portid_set(&portid, node->ports[p]->base_lid,
789 						      0, 0);
790 
791 				print_data_cnts(&portid, cap_mask, node_name, node, p,
792 						&header_printed);
793 				summary.ports_checked++;
794 				if (!all_port_sup)
795 					clear_port(&portid, cap_mask, node_name, p);
796 			}
797 		}
798 	} else {
799 		if (all_port_sup)
800 			if (!print_errors(&portid, cap_mask, node_name, node,
801 					  0xFF, &header_printed)) {
802 				summary.ports_checked += node->numports;
803 				goto clear;
804 			}
805 
806 		for (p = startport; p <= node->numports; p++) {
807 			if (node->ports[p]) {
808 				if (node->type == IB_NODE_SWITCH)
809 					ib_portid_set(&portid, node->smalid, 0, 0);
810 				else
811 					ib_portid_set(&portid, node->ports[p]->base_lid,
812 						      0, 0);
813 
814 				print_errors(&portid, cap_mask, node_name, node, p,
815 					     &header_printed);
816 				summary.ports_checked++;
817 				if (!all_port_sup)
818 					clear_port(&portid, cap_mask, node_name, p);
819 			}
820 		}
821 	}
822 
823 clear:
824 	summary.nodes_checked++;
825 	if (all_port_sup)
826 		clear_port(&portid, cap_mask, node_name, 0xFF);
827 
828 	free(node_name);
829 }
830 
831 static void add_suppressed(enum MAD_FIELDS field)
832 {
833 	if (sup_total >= SUP_MAX) {
834 		IBWARN("Maximum (%d) fields have been suppressed; skipping %s",
835 		       sup_total, mad_field_name(field));
836 		return;
837 	}
838 	suppressed_fields[sup_total++] = field;
839 }
840 
841 static void calculate_suppressed_fields(char *str)
842 {
843 	enum MAD_FIELDS f;
844 	char *val, *lasts = NULL;
845 	char *tmp = strdup(str);
846 
847 	val = strtok_r(tmp, ",", &lasts);
848 	while (val) {
849 		for (f = IB_PC_FIRST_F; f <= IB_PC_LAST_F; f++)
850 			if (strcmp(val, mad_field_name(f)) == 0)
851 				add_suppressed(f);
852 		val = strtok_r(NULL, ",", &lasts);
853 	}
854 
855 	free(tmp);
856 }
857 
858 static int process_opt(void *context, int ch, char *optarg)
859 {
860 	struct ibnd_config *cfg = context;
861 	switch (ch) {
862 	case 's':
863 		calculate_suppressed_fields(optarg);
864 		break;
865 	case 'c':
866 		/* Right now this is the only "common" error */
867 		add_suppressed(IB_PC_ERR_SWITCH_REL_F);
868 		break;
869 	case 1:
870 		node_name_map_file = strdup(optarg);
871 		break;
872 	case 2:
873 		data_counters++;
874 		break;
875 	case 3:
876 		node_type_to_print |= PRINT_SWITCH;
877 		break;
878 	case 4:
879 		node_type_to_print |= PRINT_CA;
880 		break;
881 	case 5:
882 		node_type_to_print |= PRINT_ROUTER;
883 		break;
884 	case 6:
885 		details = 1;
886 		break;
887 	case 7:
888 		load_cache_file = strdup(optarg);
889 		break;
890 	case 8:
891 		threshold_file = strdup(optarg);
892 		break;
893 	case 9:
894 		data_counters_only = 1;
895 		break;
896 	case 10:
897 		obtain_sl = 0;
898 		break;
899 	case 'G':
900 	case 'S':
901 		port_guid_str = optarg;
902 		port_guid = strtoull(optarg, 0, 0);
903 		break;
904 	case 'D':
905 		dr_path = strdup(optarg);
906 		break;
907 	case 'r':
908 		port_config++;
909 		break;
910 	case 'R':		/* nop */
911 		break;
912 	case 'k':
913 		clear_errors = 1;
914 		break;
915 	case 'K':
916 		clear_counts = 1;
917 		break;
918 	case 'o':
919 		cfg->max_smps = strtoul(optarg, NULL, 0);
920 		break;
921 	default:
922 		return -1;
923 	}
924 
925 	return 0;
926 }
927 
928 int main(int argc, char **argv)
929 {
930 	struct ibnd_config config = { 0 };
931 	int resolved = -1;
932 	ib_portid_t portid = { 0 };
933 	ib_portid_t self_portid = { 0 };
934 	int rc = 0;
935 	ibnd_fabric_t *fabric = NULL;
936 	ib_gid_t self_gid;
937 	int port = 0;
938 
939 	int mgmt_classes[4] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS, IB_SA_CLASS,
940 		IB_PERFORMANCE_CLASS
941 	};
942 
943 	const struct ibdiag_opt opts[] = {
944 		{"suppress", 's', 1, "<err1,err2,...>",
945 		 "suppress errors listed"},
946 		{"suppress-common", 'c', 0, NULL,
947 		 "suppress some of the common counters"},
948 		{"node-name-map", 1, 1, "<file>", "node name map file"},
949 		{"port-guid", 'G', 1, "<port_guid>",
950 		 "report the node containing the port specified by <port_guid>"},
951 		{"", 'S', 1, "<port_guid>",
952 		 "Same as \"-G\" for backward compatibility"},
953 		{"Direct", 'D', 1, "<dr_path>",
954 		 "report the node containing the port specified by <dr_path>"},
955 		{"skip-sl", 10, 0, NULL,"don't obtain SL to all destinations"},
956 		{"report-port", 'r', 0, NULL,
957 		 "report port link information"},
958 		{"threshold-file", 8, 1, NULL,
959 		 "specify an alternate threshold file, default: " DEF_THRES_FILE},
960 		{"GNDN", 'R', 0, NULL,
961 		 "(This option is obsolete and does nothing)"},
962 		{"data", 2, 0, NULL, "include data counters for ports with errors"},
963 		{"switch", 3, 0, NULL, "print data for switches only"},
964 		{"ca", 4, 0, NULL, "print data for CA's only"},
965 		{"router", 5, 0, NULL, "print data for routers only"},
966 		{"details", 6, 0, NULL, "include transmit discard details"},
967 		{"counters", 9, 0, NULL, "print data counters only"},
968 		{"clear-errors", 'k', 0, NULL,
969 		 "Clear error counters after read"},
970 		{"clear-counts", 'K', 0, NULL,
971 		 "Clear data counters after read"},
972 		{"load-cache", 7, 1, "<file>",
973 		 "filename of ibnetdiscover cache to load"},
974 		{"outstanding_smps", 'o', 1, NULL,
975 		 "specify the number of outstanding SMP's which should be "
976 		 "issued during the scan"},
977 		{0}
978 	};
979 	char usage_args[] = "";
980 
981 	memset(suppressed_fields, 0, sizeof suppressed_fields);
982 	ibdiag_process_opts(argc, argv, &config, "cDGKLnRrSs", opts, process_opt,
983 			    usage_args, NULL);
984 
985 	argc -= optind;
986 	argv += optind;
987 
988 	if (!node_type_to_print)
989 		node_type_to_print = PRINT_ALL;
990 
991 	ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 4);
992 	if (!ibmad_port)
993 		IBEXIT("Failed to open port; %s:%d\n", ibd_ca, ibd_ca_port);
994 
995 	smp_mkey_set(ibmad_port, ibd_mkey);
996 
997 	if (ibd_timeout) {
998 		mad_rpc_set_timeout(ibmad_port, ibd_timeout);
999 		config.timeout_ms = ibd_timeout;
1000 	}
1001 
1002 	config.flags = ibd_ibnetdisc_flags;
1003 	config.mkey = ibd_mkey;
1004 
1005 	if (dr_path && load_cache_file) {
1006 		mad_rpc_close_port(ibmad_port);
1007 		fprintf(stderr, "Cannot specify cache and direct route path\n");
1008 		exit(-1);
1009 	}
1010 
1011 	if (resolve_self(ibd_ca, ibd_ca_port, &self_portid, &port, &self_gid.raw) < 0) {
1012 		mad_rpc_close_port(ibmad_port);
1013 		IBEXIT("can't resolve self port %s", argv[0]);
1014 	}
1015 
1016 	node_name_map = open_node_name_map(node_name_map_file);
1017 
1018 	/* limit the scan the fabric around the target */
1019 	if (dr_path) {
1020 		if ((resolved =
1021 		     resolve_portid_str(ibd_ca, ibd_ca_port, &portid, dr_path,
1022 					IB_DEST_DRPATH, NULL, ibmad_port)) < 0)
1023 			IBWARN("Failed to resolve %s; attempting full scan",
1024 			       dr_path);
1025 	} else if (port_guid_str) {
1026 		if ((resolved =
1027 		     resolve_portid_str(ibd_ca, ibd_ca_port, &portid,
1028 					port_guid_str, IB_DEST_GUID, ibd_sm_id,
1029 					       ibmad_port)) < 0)
1030 			IBWARN("Failed to resolve %s; attempting full scan",
1031 			       port_guid_str);
1032 		if(obtain_sl)
1033 			lid2sl_table[portid.lid] = portid.sl;
1034 	}
1035 
1036 	mad_rpc_close_port(ibmad_port);
1037 
1038 	if (load_cache_file) {
1039 		if ((fabric = ibnd_load_fabric(load_cache_file, 0)) == NULL) {
1040 			fprintf(stderr, "loading cached fabric failed\n");
1041 			rc = -1;
1042 			goto close_port;
1043 		}
1044 	} else {
1045 		if (resolved >= 0) {
1046 			if (!config.max_hops)
1047 				config.max_hops = 1;
1048 			if (!(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port,
1049 						    &portid, &config)))
1050 				IBWARN("Single node discover failed;"
1051 				       " attempting full scan");
1052 		}
1053 
1054 		if (!fabric && !(fabric = ibnd_discover_fabric(ibd_ca,
1055 							       ibd_ca_port,
1056 							       NULL,
1057 							       &config))) {
1058 			fprintf(stderr, "discover failed\n");
1059 			rc = -1;
1060 			goto close_port;
1061 		}
1062 	}
1063 
1064 	set_thresholds(threshold_file);
1065 
1066 	/* reopen the global ibmad_port */
1067 	ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port,
1068 				       mgmt_classes, 4);
1069 	if (!ibmad_port) {
1070 		ibnd_destroy_fabric(fabric);
1071 		close_node_name_map(node_name_map);
1072 		IBEXIT("Failed to reopen port: %s:%d\n",
1073 			ibd_ca, ibd_ca_port);
1074 	}
1075 
1076 	smp_mkey_set(ibmad_port, ibd_mkey);
1077 
1078 	if (ibd_timeout)
1079 		mad_rpc_set_timeout(ibmad_port, ibd_timeout);
1080 
1081 	if (port_guid_str) {
1082 		ibnd_port_t *port = ibnd_find_port_guid(fabric, port_guid);
1083 		if (port)
1084 			print_node(port->node, NULL);
1085 		else
1086 			fprintf(stderr, "Failed to find node: %s\n",
1087 				port_guid_str);
1088 	} else if (dr_path) {
1089 		ibnd_port_t *port;
1090 		uint8_t ni[IB_SMP_DATA_SIZE] = { 0 };
1091 		if (!smp_query_via(ni, &portid, IB_ATTR_NODE_INFO, 0,
1092 			   ibd_timeout, ibmad_port)) {
1093 				fprintf(stderr, "Failed to query local Node Info\n");
1094 				goto destroy_fabric;
1095 		}
1096 
1097 		mad_decode_field(ni, IB_NODE_PORT_GUID_F, &(port_guid));
1098 
1099 		port = ibnd_find_port_guid(fabric, port_guid);
1100 		if (port) {
1101 			if(obtain_sl)
1102 				if(path_record_query(self_gid,port->guid))
1103 					goto destroy_fabric;
1104 			print_node(port->node, NULL);
1105 		} else
1106 			fprintf(stderr, "Failed to find node: %s\n", dr_path);
1107 	} else {
1108 		if(obtain_sl)
1109 			if(path_record_query(self_gid,0))
1110 				goto destroy_fabric;
1111 
1112 		ibnd_iter_nodes(fabric, print_node, NULL);
1113 	}
1114 
1115 	rc = print_summary();
1116 	if (rc)
1117 		rc = 1;
1118 
1119 destroy_fabric:
1120 	mad_rpc_close_port(ibmad_port);
1121 	ibnd_destroy_fabric(fabric);
1122 
1123 close_port:
1124 	close_node_name_map(node_name_map);
1125 	exit(rc);
1126 }
1127