xref: /titanic_44/usr/src/cmd/mdb/common/modules/ip/ip.c (revision 4b22b9337f359bfd063322244f5336cc7c6ffcfa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/stropts.h>
30 #include <sys/stream.h>
31 #include <sys/socket.h>
32 #include <sys/avl_impl.h>
33 #include <net/if.h>
34 #include <net/route.h>
35 #include <netinet/in.h>
36 #include <netinet/ip6.h>
37 #include <netinet/udp.h>
38 #include <netinet/sctp.h>
39 #include <inet/mib2.h>
40 #include <inet/common.h>
41 #include <inet/ip.h>
42 #include <inet/ip_ire.h>
43 #include <inet/ip6.h>
44 #include <inet/ipclassifier.h>
45 #include <inet/mi.h>
46 #include <sys/squeue_impl.h>
47 
48 #include <mdb/mdb_modapi.h>
49 #include <mdb/mdb_ks.h>
50 
51 #define	ADDR_WIDTH 11
52 
53 typedef struct {
54 	const char *bit_name;	/* name of bit */
55 	const char *bit_descr;	/* description of bit's purpose */
56 } bitname_t;
57 
58 static const bitname_t squeue_states[] = {
59 	{ "SQS_PROC",		"being processed" },
60 	{ "SQS_WORKER",		"... by a worker thread" },
61 	{ "SQS_ENTER",		"... by an squeue_enter() thread" },
62 	{ "SQS_FAST",		"... in fast-path mode" },
63 	{ "SQS_USER", 		"A non interrupt user" },
64 	{ "SQS_BOUND",		"worker thread bound to CPU" },
65 	{ "SQS_PROFILE",	"profiling enabled" },
66 	{ "SQS_REENTER",	"re-entered thred" },
67 	{ NULL }
68 };
69 
70 typedef struct illif_walk_data {
71 	ill_g_head_t ill_g_heads[MAX_G_HEADS];
72 	int ill_list;
73 	ill_if_t ill_if;
74 } illif_walk_data_t;
75 
76 static int iphdr(uintptr_t, uint_t, int, const mdb_arg_t *);
77 static int ip6hdr(uintptr_t, uint_t, int, const mdb_arg_t *);
78 
79 static int ire_format(uintptr_t addr, const ire_t *irep, uint_t *verbose);
80 
81 /*
82  * Given the kernel address of an ip_stack_t, return the stackid
83  */
84 static int
85 ips_to_stackid(uintptr_t kaddr)
86 {
87 	ip_stack_t ipss;
88 	netstack_t nss;
89 
90 	if (mdb_vread(&ipss, sizeof (ipss), kaddr) == -1) {
91 		mdb_warn("failed to read ip_stack_t %p", kaddr);
92 		return (0);
93 	}
94 	kaddr = (uintptr_t)ipss.ips_netstack;
95 	if (mdb_vread(&nss, sizeof (nss), kaddr) == -1) {
96 		mdb_warn("failed to read netstack_t %p", kaddr);
97 		return (0);
98 	}
99 	return (nss.netstack_stackid);
100 }
101 
102 int
103 ip_stacks_walk_init(mdb_walk_state_t *wsp)
104 {
105 	if (mdb_layered_walk("netstack", wsp) == -1) {
106 		mdb_warn("can't walk 'netstack'");
107 		return (WALK_ERR);
108 	}
109 	return (WALK_NEXT);
110 }
111 
112 int
113 ip_stacks_walk_step(mdb_walk_state_t *wsp)
114 {
115 	uintptr_t kaddr;
116 	netstack_t nss;
117 
118 #ifdef DEBUG
119 	mdb_printf("DEBUG: ip_stacks_walk_step: addr %p\n", wsp->walk_addr);
120 #endif
121 	if (mdb_vread(&nss, sizeof (nss), wsp->walk_addr) == -1) {
122 		mdb_warn("can't read netstack at %p", wsp->walk_addr);
123 		return (WALK_ERR);
124 	}
125 	kaddr = (uintptr_t)nss.netstack_modules[NS_IP];
126 
127 #ifdef DEBUG
128 	mdb_printf("DEBUG: ip_stacks_walk_step: ip_stack_t at %p\n", kaddr);
129 #endif
130 	return (wsp->walk_callback(kaddr, wsp->walk_layer, wsp->walk_cbdata));
131 }
132 
133 /*
134  * Called with walk_addr being the address of ips_ill_g_heads
135  */
136 int
137 illif_stack_walk_init(mdb_walk_state_t *wsp)
138 {
139 	illif_walk_data_t *iw;
140 
141 	if (wsp->walk_addr == NULL) {
142 		mdb_warn("illif_stack supports only local walks\n");
143 		return (WALK_ERR);
144 	}
145 
146 	iw = mdb_alloc(sizeof (illif_walk_data_t), UM_SLEEP);
147 
148 	if (mdb_vread(iw->ill_g_heads, MAX_G_HEADS * sizeof (ill_g_head_t),
149 	    wsp->walk_addr) == -1) {
150 		mdb_warn("failed to read 'ips_ill_g_heads' at %p",
151 		    wsp->walk_addr);
152 		mdb_free(iw, sizeof (illif_walk_data_t));
153 		return (WALK_ERR);
154 	}
155 
156 	iw->ill_list = 0;
157 	wsp->walk_addr = (uintptr_t)iw->ill_g_heads[0].ill_g_list_head;
158 	wsp->walk_data = iw;
159 
160 	return (WALK_NEXT);
161 }
162 
163 int
164 illif_stack_walk_step(mdb_walk_state_t *wsp)
165 {
166 	uintptr_t addr = wsp->walk_addr;
167 	illif_walk_data_t *iw = wsp->walk_data;
168 	int list = iw->ill_list;
169 
170 	if (mdb_vread(&iw->ill_if, sizeof (ill_if_t), addr) == -1) {
171 		mdb_warn("failed to read ill_if_t at %p", addr);
172 		return (WALK_ERR);
173 	}
174 
175 	wsp->walk_addr = (uintptr_t)iw->ill_if.illif_next;
176 
177 	if (wsp->walk_addr ==
178 	    (uintptr_t)iw->ill_g_heads[list].ill_g_list_head) {
179 
180 		if (++list >= MAX_G_HEADS)
181 			return (WALK_DONE);
182 
183 		iw->ill_list = list;
184 		wsp->walk_addr =
185 		    (uintptr_t)iw->ill_g_heads[list].ill_g_list_head;
186 		return (WALK_NEXT);
187 	}
188 
189 	return (wsp->walk_callback(addr, iw, wsp->walk_cbdata));
190 }
191 
192 void
193 illif_stack_walk_fini(mdb_walk_state_t *wsp)
194 {
195 	mdb_free(wsp->walk_data, sizeof (illif_walk_data_t));
196 }
197 
198 typedef struct illif_cbdata {
199 	uint_t ill_flags;
200 	uintptr_t ill_addr;
201 	int ill_printlist;	/* list to be printed (MAX_G_HEADS for all) */
202 	boolean_t ill_printed;
203 } illif_cbdata_t;
204 
205 static int
206 illif_cb(uintptr_t addr, const illif_walk_data_t *iw, illif_cbdata_t *id)
207 {
208 	const char *version;
209 
210 	if (id->ill_printlist < MAX_G_HEADS &&
211 	    id->ill_printlist != iw->ill_list)
212 		return (WALK_NEXT);
213 
214 	if (id->ill_flags & DCMD_ADDRSPEC && id->ill_addr != addr)
215 		return (WALK_NEXT);
216 
217 	if (id->ill_flags & DCMD_PIPE_OUT) {
218 		mdb_printf("%p\n", addr);
219 		return (WALK_NEXT);
220 	}
221 
222 	switch (iw->ill_list) {
223 		case IP_V4_G_HEAD:	version = "v4";	break;
224 		case IP_V6_G_HEAD:	version = "v6";	break;
225 		default:		version = "??"; break;
226 	}
227 
228 	mdb_printf("%?p %2s %?p %10d %?p %s\n",
229 	    addr, version, addr + offsetof(ill_if_t, illif_avl_by_ppa),
230 	    iw->ill_if.illif_avl_by_ppa.avl_numnodes,
231 	    iw->ill_if.illif_ppa_arena, iw->ill_if.illif_name);
232 
233 	id->ill_printed = TRUE;
234 
235 	return (WALK_NEXT);
236 }
237 
238 int
239 illif_walk_init(mdb_walk_state_t *wsp)
240 {
241 	if (mdb_layered_walk("ip_stacks", wsp) == -1) {
242 		mdb_warn("can't walk 'ip_stacks'");
243 		return (WALK_ERR);
244 	}
245 
246 	return (WALK_NEXT);
247 }
248 
249 int
250 illif_walk_step(mdb_walk_state_t *wsp)
251 {
252 	uintptr_t kaddr;
253 
254 #ifdef DEBUG
255 	mdb_printf("DEBUG: illif_walk_step: addr %p\n", wsp->walk_addr);
256 #endif
257 
258 	kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ill_g_heads);
259 
260 	if (mdb_vread(&kaddr, sizeof (kaddr), kaddr) == -1) {
261 		mdb_warn("can't read ips_ip_cache_table at %p", kaddr);
262 		return (WALK_ERR);
263 	}
264 #ifdef DEBUG
265 	mdb_printf("DEBUG: illif_walk_step: ips_ill_g_heads %p\n", kaddr);
266 #endif
267 
268 	if (mdb_pwalk("illif_stack", wsp->walk_callback,
269 		wsp->walk_cbdata, kaddr) == -1) {
270 		mdb_warn("couldn't walk 'illif_stack' for ips_ill_g_heads %p",
271 		    kaddr);
272 		return (WALK_ERR);
273 	}
274 	return (WALK_NEXT);
275 }
276 
277 int
278 illif(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
279 {
280 	illif_cbdata_t id;
281 	ill_if_t ill_if;
282 	const char *opt_P = NULL;
283 	int printlist = MAX_G_HEADS;
284 
285 	if (mdb_getopts(argc, argv,
286 	    'P', MDB_OPT_STR, &opt_P, NULL) != argc)
287 		return (DCMD_USAGE);
288 
289 	if (opt_P != NULL) {
290 		if (strcmp("v4", opt_P) == 0) {
291 			printlist = IP_V4_G_HEAD;
292 		} else if (strcmp("v6", opt_P) == 0) {
293 			printlist = IP_V6_G_HEAD;
294 		} else {
295 			mdb_warn("invalid protocol '%s'\n", opt_P);
296 			return (DCMD_USAGE);
297 		}
298 	}
299 
300 	if (DCMD_HDRSPEC(flags) && (flags & DCMD_PIPE_OUT) == 0) {
301 		mdb_printf("%<u>%?s %2s %?s %10s %?s %-10s%</u>\n",
302 		    "ADDR", "IP", "AVLADDR", "NUMNODES", "ARENA", "NAME");
303 	}
304 
305 	id.ill_flags = flags;
306 	id.ill_addr = addr;
307 	id.ill_printlist = printlist;
308 	id.ill_printed = FALSE;
309 
310 	if (mdb_walk("illif", (mdb_walk_cb_t)illif_cb, &id) == -1) {
311 		mdb_warn("can't walk ill_if_t structures");
312 		return (DCMD_ERR);
313 	}
314 
315 	if (!(flags & DCMD_ADDRSPEC) || opt_P != NULL || id.ill_printed)
316 		return (DCMD_OK);
317 
318 	/*
319 	 * If an address is specified and the walk doesn't find it,
320 	 * print it anyway.
321 	 */
322 	if (mdb_vread(&ill_if, sizeof (ill_if_t), addr) == -1) {
323 		mdb_warn("failed to read ill_if_t at %p", addr);
324 		return (DCMD_ERR);
325 	}
326 
327 	mdb_printf("%?p %2s %?p %10d %?p %s\n",
328 	    addr, "??", addr + offsetof(ill_if_t, illif_avl_by_ppa),
329 	    ill_if.illif_avl_by_ppa.avl_numnodes,
330 	    ill_if.illif_ppa_arena, ill_if.illif_name);
331 
332 	return (DCMD_OK);
333 }
334 
335 static void
336 illif_help(void)
337 {
338 	mdb_printf("Options:\n");
339 	mdb_printf("\t-P v4 | v6"
340 	    "\tfilter interface structures for the specified protocol\n");
341 }
342 
343 int
344 ire_walk_init(mdb_walk_state_t *wsp)
345 {
346 	if (mdb_layered_walk("ire_cache", wsp) == -1) {
347 		mdb_warn("can't walk 'ire_cache'");
348 		return (WALK_ERR);
349 	}
350 
351 	return (WALK_NEXT);
352 }
353 
354 int
355 ire_walk_step(mdb_walk_state_t *wsp)
356 {
357 	ire_t ire;
358 
359 	if (mdb_vread(&ire, sizeof (ire), wsp->walk_addr) == -1) {
360 		mdb_warn("can't read ire at %p", wsp->walk_addr);
361 		return (WALK_ERR);
362 	}
363 
364 	return (wsp->walk_callback(wsp->walk_addr, &ire, wsp->walk_cbdata));
365 }
366 
367 int
368 ire_ctable_walk_init(mdb_walk_state_t *wsp)
369 {
370 	if (mdb_layered_walk("ip_stacks", wsp) == -1) {
371 		mdb_warn("can't walk 'ip_stacks'");
372 		return (WALK_ERR);
373 	}
374 
375 	return (WALK_NEXT);
376 }
377 
378 int
379 ire_ctable_walk_step(mdb_walk_state_t *wsp)
380 {
381 	uintptr_t kaddr;
382 	irb_t *irb;
383 	int verbose = 0;
384 	uint32_t cache_table_size;
385 	int i;
386 
387 #ifdef DEBUG
388 	mdb_printf("DEBUG: ire_ctable_walk_step: addr %p\n", wsp->walk_addr);
389 #endif
390 
391 	kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ip_cache_table_size);
392 
393 	if (mdb_vread(&cache_table_size, sizeof (uint32_t), kaddr) == -1) {
394 		mdb_warn("can't read ips_ip_cache_table at %p", kaddr);
395 		return (WALK_ERR);
396 	}
397 #ifdef DEBUG
398 	mdb_printf("DEBUG: ire_ctable_walk_step: ips_ip_cache_table_size %u\n",
399 		cache_table_size);
400 #endif
401 
402 	kaddr = wsp->walk_addr + OFFSETOF(ip_stack_t, ips_ip_cache_table);
403 	if (mdb_vread(&kaddr, sizeof (kaddr), kaddr) == -1) {
404 		mdb_warn("can't read ips_ip_cache_table at %p", kaddr);
405 		return (WALK_ERR);
406 	}
407 #ifdef DEBUG
408 	mdb_printf("DEBUG: ire_ctable_walk_step: ips_ip_cache_table %p\n",
409 	    kaddr);
410 #endif
411 
412 	irb = mdb_alloc(sizeof (irb_t) * cache_table_size, UM_SLEEP|UM_GC);
413 	if (mdb_vread(irb, sizeof (irb_t) * cache_table_size, kaddr) == -1) {
414 		mdb_warn("can't read irb at %p", kaddr);
415 		return (WALK_ERR);
416 	}
417 	for (i = 0; i < cache_table_size; i++) {
418 		kaddr = (uintptr_t)irb[i].irb_ire;
419 #ifdef DEBUG
420 		mdb_printf("DEBUG: ire_ctable_walk_step: %d ire %p\n",
421 		    i, kaddr);
422 #endif
423 
424 		if (mdb_pwalk("ire_next", (mdb_walk_cb_t)ire_format, &verbose,
425 			kaddr) == -1) {
426 			mdb_warn("can't walk 'ire_next' for ire %p", kaddr);
427 			return (WALK_ERR);
428 		}
429 	}
430 	return (WALK_NEXT);
431 }
432 
433 /* ARGSUSED */
434 int
435 ire_next_walk_init(mdb_walk_state_t *wsp)
436 {
437 #ifdef DEBUG
438 	mdb_printf("DEBUG: ire_next_walk_init: addr %p\n", wsp->walk_addr);
439 #endif
440 	return (WALK_NEXT);
441 }
442 
443 int
444 ire_next_walk_step(mdb_walk_state_t *wsp)
445 {
446 	ire_t ire;
447 	int status;
448 
449 #ifdef DEBUG
450 	mdb_printf("DEBUG: ire_next_walk_step: addr %p\n", wsp->walk_addr);
451 #endif
452 
453 	if (wsp->walk_addr == NULL)
454 		return (WALK_DONE);
455 
456 	if (mdb_vread(&ire, sizeof (ire), wsp->walk_addr) == -1) {
457 		mdb_warn("can't read ire at %p", wsp->walk_addr);
458 		return (WALK_ERR);
459 	}
460 	status = wsp->walk_callback(wsp->walk_addr, &ire,
461 	    wsp->walk_cbdata);
462 
463 	if (status != WALK_NEXT)
464 		return (status);
465 
466 	wsp->walk_addr = (uintptr_t)ire.ire_next;
467 #ifdef DEBUG
468 	mdb_printf("DEBUG: ire_ctable_walk_step: next %p\n", wsp->walk_addr);
469 #endif
470 	return (status);
471 }
472 
473 static int
474 ire_format(uintptr_t addr, const ire_t *irep, uint_t *verbose)
475 {
476 	static const mdb_bitmask_t tmasks[] = {
477 		{ "BROADCAST",	IRE_BROADCAST,		IRE_BROADCAST	},
478 		{ "DEFAULT",	IRE_DEFAULT,		IRE_DEFAULT	},
479 		{ "LOCAL",	IRE_LOCAL,		IRE_LOCAL	},
480 		{ "LOOPBACK",	IRE_LOOPBACK,		IRE_LOOPBACK	},
481 		{ "PREFIX",	IRE_PREFIX,		IRE_PREFIX	},
482 		{ "CACHE",	IRE_CACHE,		IRE_CACHE	},
483 		{ "IF_NORESOLVER", IRE_IF_NORESOLVER,	IRE_IF_NORESOLVER },
484 		{ "IF_RESOLVER", IRE_IF_RESOLVER,	IRE_IF_RESOLVER	},
485 		{ "HOST",	IRE_HOST,		IRE_HOST	},
486 		{ "HOST_REDIRECT", IRE_HOST_REDIRECT,	IRE_HOST_REDIRECT },
487 		{ NULL,		0,			0		}
488 	};
489 
490 	static const mdb_bitmask_t mmasks[] = {
491 		{ "CONDEMNED",	IRE_MARK_CONDEMNED,	IRE_MARK_CONDEMNED },
492 		{ "NORECV",	IRE_MARK_NORECV,	IRE_MARK_NORECV	},
493 		{ "HIDDEN",	IRE_MARK_HIDDEN,	IRE_MARK_HIDDEN	},
494 		{ "NOADD",	IRE_MARK_NOADD,		IRE_MARK_NOADD	},
495 		{ "TEMPORARY",	IRE_MARK_TEMPORARY,	IRE_MARK_TEMPORARY },
496 		{ NULL,		0,			0		}
497 	};
498 
499 	static const mdb_bitmask_t fmasks[] = {
500 		{ "UP",		RTF_UP,			RTF_UP		},
501 		{ "GATEWAY",	RTF_GATEWAY,		RTF_GATEWAY	},
502 		{ "HOST",	RTF_HOST,		RTF_HOST	},
503 		{ "REJECT",	RTF_REJECT,		RTF_REJECT	},
504 		{ "DYNAMIC",	RTF_DYNAMIC,		RTF_DYNAMIC	},
505 		{ "MODIFIED",	RTF_MODIFIED,		RTF_MODIFIED	},
506 		{ "DONE",	RTF_DONE,		RTF_DONE	},
507 		{ "MASK",	RTF_MASK,		RTF_MASK	},
508 		{ "CLONING",	RTF_CLONING,		RTF_CLONING	},
509 		{ "XRESOLVE",	RTF_XRESOLVE,		RTF_XRESOLVE	},
510 		{ "LLINFO",	RTF_LLINFO,		RTF_LLINFO	},
511 		{ "STATIC",	RTF_STATIC,		RTF_STATIC	},
512 		{ "BLACKHOLE",	RTF_BLACKHOLE,		RTF_BLACKHOLE	},
513 		{ "PRIVATE",	RTF_PRIVATE,		RTF_PRIVATE	},
514 		{ "PROTO2",	RTF_PROTO2,		RTF_PROTO2	},
515 		{ "PROTO1",	RTF_PROTO1,		RTF_PROTO1	},
516 		{ "MULTIRT",	RTF_MULTIRT,		RTF_MULTIRT	},
517 		{ "SETSRC",	RTF_SETSRC,		RTF_SETSRC	},
518 		{ NULL,		0,			0		}
519 	};
520 
521 	if (irep->ire_ipversion == 6 && *verbose) {
522 
523 		mdb_printf("%<b>%?p%</b> %40N <%hb>\n"
524 		    "%?s %40N <%hb>\n"
525 		    "%?s %40d %4d <%hb>\n",
526 		    addr, &irep->ire_src_addr_v6, irep->ire_type, tmasks,
527 		    "", &irep->ire_addr_v6, (ushort_t)irep->ire_marks, mmasks,
528 		    "", ips_to_stackid((uintptr_t)irep->ire_ipst),
529 		    irep->ire_zoneid,
530 		    irep->ire_flags, fmasks);
531 
532 	} else if (irep->ire_ipversion == 6) {
533 
534 		mdb_printf("%?p %30N %30N %5d %4d\n",
535 		    addr, &irep->ire_src_addr_v6,
536 		    &irep->ire_addr_v6,
537 		    ips_to_stackid((uintptr_t)irep->ire_ipst),
538 		    irep->ire_zoneid);
539 
540 	} else if (*verbose) {
541 
542 		mdb_printf("%<b>%?p%</b> %40I <%hb>\n"
543 		    "%?s %40I <%hb>\n"
544 		    "%?s %40d <%hb>\n",
545 		    addr, irep->ire_src_addr, irep->ire_type, tmasks,
546 		    "", irep->ire_addr, (ushort_t)irep->ire_marks, mmasks,
547 		    "", ips_to_stackid((uintptr_t)irep->ire_ipst),
548 		    irep->ire_zoneid, irep->ire_flags, fmasks);
549 
550 	} else {
551 
552 		mdb_printf("%?p %30I %30I %5d %4d\n", addr, irep->ire_src_addr,
553 		    irep->ire_addr, ips_to_stackid((uintptr_t)irep->ire_ipst),
554 		    irep->ire_zoneid);
555 	}
556 
557 	return (WALK_NEXT);
558 }
559 
560 /*
561  * There are faster ways to do this.  Given the interactive nature of this
562  * use I don't think its worth much effort.
563  */
564 static unsigned short
565 ipcksum(void *p, int len)
566 {
567 	int32_t	sum = 0;
568 
569 	while (len > 1) {
570 		/* alignment */
571 		sum += *(uint16_t *)p;
572 		p = (char *)p + sizeof (uint16_t);
573 		if (sum & 0x80000000)
574 			sum = (sum & 0xFFFF) + (sum >> 16);
575 		len -= 2;
576 	}
577 
578 	if (len)
579 		sum += (uint16_t)*(unsigned char *)p;
580 
581 	while (sum >> 16)
582 		sum = (sum & 0xFFFF) + (sum >> 16);
583 
584 	return (~sum);
585 }
586 
587 static const mdb_bitmask_t tcp_flags[] = {
588 	{ "SYN",	TH_SYN,		TH_SYN	},
589 	{ "ACK",	TH_ACK,		TH_ACK	},
590 	{ "FIN",	TH_FIN,		TH_FIN	},
591 	{ "RST",	TH_RST,		TH_RST	},
592 	{ "PSH",	TH_PUSH,	TH_PUSH	},
593 	{ "ECE",	TH_ECE,		TH_ECE	},
594 	{ "CWR",	TH_CWR,		TH_CWR	},
595 	{ NULL,		0,		0	}
596 };
597 
598 static void
599 tcphdr_print(struct tcphdr *tcph)
600 {
601 	in_port_t	sport, dport;
602 	tcp_seq		seq, ack;
603 	uint16_t	win, urp;
604 
605 	mdb_printf("%<b>TCP header%</b>\n");
606 
607 	mdb_nhconvert(&sport, &tcph->th_sport, sizeof (sport));
608 	mdb_nhconvert(&dport, &tcph->th_dport, sizeof (dport));
609 	mdb_nhconvert(&seq, &tcph->th_seq, sizeof (seq));
610 	mdb_nhconvert(&ack, &tcph->th_ack, sizeof (ack));
611 	mdb_nhconvert(&win, &tcph->th_win, sizeof (win));
612 	mdb_nhconvert(&urp, &tcph->th_urp, sizeof (urp));
613 
614 	mdb_printf("%<u>%6s %6s %10s %10s %4s %5s %5s %5s %-15s%</u>\n",
615 	    "SPORT", "DPORT", "SEQ", "ACK", "HLEN", "WIN", "CSUM", "URP",
616 	    "FLAGS");
617 	mdb_printf("%6hu %6hu %10u %10u %4d %5hu %5hu %5hu <%b>\n",
618 	    sport, dport, seq, ack, tcph->th_off << 2, win,
619 	    tcph->th_sum, urp, tcph->th_flags, tcp_flags);
620 	mdb_printf("0x%04x 0x%04x 0x%08x 0x%08x\n\n",
621 	    sport, dport, seq, ack);
622 }
623 
624 /* ARGSUSED */
625 static int
626 tcphdr(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *av)
627 {
628 	struct tcphdr	tcph;
629 
630 	if (!(flags & DCMD_ADDRSPEC))
631 		return (DCMD_USAGE);
632 
633 	if (mdb_vread(&tcph, sizeof (tcph), addr) == -1) {
634 		mdb_warn("failed to read TCP header at %p", addr);
635 		return (DCMD_ERR);
636 	}
637 	tcphdr_print(&tcph);
638 	return (DCMD_OK);
639 }
640 
641 static void
642 udphdr_print(struct udphdr *udph)
643 {
644 	in_port_t	sport, dport;
645 	uint16_t	hlen;
646 
647 	mdb_printf("%<b>UDP header%</b>\n");
648 
649 	mdb_nhconvert(&sport, &udph->uh_sport, sizeof (sport));
650 	mdb_nhconvert(&dport, &udph->uh_dport, sizeof (dport));
651 	mdb_nhconvert(&hlen, &udph->uh_ulen, sizeof (hlen));
652 
653 	mdb_printf("%<u>%14s %14s %5s %6s%</u>\n",
654 	    "SPORT", "DPORT", "LEN", "CSUM");
655 	mdb_printf("%5hu (0x%04x) %5hu (0x%04x) %5hu 0x%04hx\n\n", sport, sport,
656 	    dport, dport, hlen, udph->uh_sum);
657 }
658 
659 /* ARGSUSED */
660 static int
661 udphdr(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *av)
662 {
663 	struct udphdr	udph;
664 
665 	if (!(flags & DCMD_ADDRSPEC))
666 		return (DCMD_USAGE);
667 
668 	if (mdb_vread(&udph, sizeof (udph), addr) == -1) {
669 		mdb_warn("failed to read UDP header at %p", addr);
670 		return (DCMD_ERR);
671 	}
672 	udphdr_print(&udph);
673 	return (DCMD_OK);
674 }
675 
676 static void
677 sctphdr_print(sctp_hdr_t *sctph)
678 {
679 	in_port_t sport, dport;
680 
681 	mdb_printf("%<b>SCTP header%</b>\n");
682 	mdb_nhconvert(&sport, &sctph->sh_sport, sizeof (sport));
683 	mdb_nhconvert(&dport, &sctph->sh_dport, sizeof (dport));
684 
685 	mdb_printf("%<u>%14s %14s %10s %10s%</u>\n",
686 	    "SPORT", "DPORT", "VTAG", "CHKSUM");
687 	mdb_printf("%5hu (0x%04x) %5hu (0x%04x) %10u 0x%08x\n\n", sport, sport,
688 	    dport, dport, sctph->sh_verf, sctph->sh_chksum);
689 }
690 
691 /* ARGSUSED */
692 static int
693 sctphdr(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *av)
694 {
695 	sctp_hdr_t sctph;
696 
697 	if (!(flags & DCMD_ADDRSPEC))
698 		return (DCMD_USAGE);
699 
700 	if (mdb_vread(&sctph, sizeof (sctph), addr) == -1) {
701 		mdb_warn("failed to read SCTP header at %p", addr);
702 		return (DCMD_ERR);
703 	}
704 
705 	sctphdr_print(&sctph);
706 	return (DCMD_OK);
707 }
708 
709 static int
710 transport_hdr(int proto, uintptr_t addr)
711 {
712 	mdb_printf("\n");
713 	switch (proto) {
714 	case IPPROTO_TCP: {
715 		struct tcphdr tcph;
716 
717 		if (mdb_vread(&tcph, sizeof (tcph), addr) == -1) {
718 			mdb_warn("failed to read TCP header at %p", addr);
719 			return (DCMD_ERR);
720 		}
721 		tcphdr_print(&tcph);
722 		break;
723 	}
724 	case IPPROTO_UDP:  {
725 		struct udphdr udph;
726 
727 		if (mdb_vread(&udph, sizeof (udph), addr) == -1) {
728 			mdb_warn("failed to read UDP header at %p", addr);
729 			return (DCMD_ERR);
730 		}
731 		udphdr_print(&udph);
732 		break;
733 	}
734 	case IPPROTO_SCTP: {
735 		sctp_hdr_t sctph;
736 
737 		if (mdb_vread(&sctph, sizeof (sctph), addr) == -1) {
738 			mdb_warn("failed to read SCTP header at %p", addr);
739 			return (DCMD_ERR);
740 		}
741 		sctphdr_print(&sctph);
742 		break;
743 	}
744 	default:
745 		break;
746 	}
747 
748 	return (DCMD_OK);
749 }
750 
751 static const mdb_bitmask_t ip_flags[] = {
752 	{ "DF",	IPH_DF, IPH_DF	},
753 	{ "MF", IPH_MF,	IPH_MF	},
754 	{ NULL, 0,	0	}
755 };
756 
757 /* ARGSUSED */
758 static int
759 iphdr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
760 {
761 	uint_t		verbose = FALSE, force = FALSE;
762 	ipha_t		iph[1];
763 	uint16_t	ver, totlen, hdrlen, ipid, off, csum;
764 	uintptr_t	nxt_proto;
765 	char		exp_csum[8];
766 
767 	if (mdb_getopts(argc, argv,
768 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
769 	    'f', MDB_OPT_SETBITS, TRUE, &force, NULL) != argc)
770 		return (DCMD_USAGE);
771 
772 	if (mdb_vread(iph, sizeof (*iph), addr) == -1) {
773 		mdb_warn("failed to read IPv4 header at %p", addr);
774 		return (DCMD_ERR);
775 	}
776 
777 	ver = (iph->ipha_version_and_hdr_length & 0xf0) >> 4;
778 	if (ver != IPV4_VERSION) {
779 		if (ver == IPV6_VERSION) {
780 			return (ip6hdr(addr, flags, argc, argv));
781 		} else if (!force) {
782 			mdb_warn("unknown IP version: %d\n", ver);
783 			return (DCMD_ERR);
784 		}
785 	}
786 
787 	mdb_printf("%<b>IPv4 header%</b>\n");
788 	mdb_printf("%-34s %-34s\n"
789 	    "%<u>%-4s %-4s %-5s %-5s %-6s %-5s %-5s %-6s %-8s %-6s%</u>\n",
790 	    "SRC", "DST",
791 	    "HLEN", "TOS", "LEN", "ID", "OFFSET", "TTL", "PROTO", "CHKSUM",
792 	    "EXP-CSUM", "FLGS");
793 
794 	hdrlen = (iph->ipha_version_and_hdr_length & 0x0f) << 2;
795 	mdb_nhconvert(&totlen, &iph->ipha_length, sizeof (totlen));
796 	mdb_nhconvert(&ipid, &iph->ipha_ident, sizeof (ipid));
797 	mdb_nhconvert(&off, &iph->ipha_fragment_offset_and_flags, sizeof (off));
798 	if (hdrlen == IP_SIMPLE_HDR_LENGTH) {
799 		if ((csum = ipcksum(iph, sizeof (*iph))) != 0)
800 			csum = ~(~csum + ~iph->ipha_hdr_checksum);
801 		else
802 			csum = iph->ipha_hdr_checksum;
803 		mdb_snprintf(exp_csum, 8, "%u", csum);
804 	} else {
805 		mdb_snprintf(exp_csum, 8, "<n/a>");
806 	}
807 
808 	mdb_printf("%-34I %-34I%\n"
809 	    "%-4d %-4d %-5hu %-5hu %-6hu %-5hu %-5hu %-6u %-8s <%5hb>\n",
810 	    iph->ipha_src, iph->ipha_dst,
811 	    hdrlen, iph->ipha_type_of_service, totlen, ipid,
812 	    (off << 3) & 0xffff, iph->ipha_ttl, iph->ipha_protocol,
813 	    iph->ipha_hdr_checksum, exp_csum, off, ip_flags);
814 
815 	if (verbose) {
816 		nxt_proto = addr + hdrlen;
817 		return (transport_hdr(iph->ipha_protocol, nxt_proto));
818 	} else {
819 		return (DCMD_OK);
820 	}
821 }
822 
823 /* ARGSUSED */
824 static int
825 ip6hdr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
826 {
827 	uint_t		verbose = FALSE, force = FALSE;
828 	ip6_t		iph[1];
829 	int		ver, class, flow;
830 	uint16_t	plen;
831 	uintptr_t	nxt_proto;
832 
833 	if (mdb_getopts(argc, argv,
834 	    'v', MDB_OPT_SETBITS, TRUE, &verbose,
835 	    'f', MDB_OPT_SETBITS, TRUE, &force, NULL) != argc)
836 		return (DCMD_USAGE);
837 
838 	if (mdb_vread(iph, sizeof (*iph), addr) == -1) {
839 		mdb_warn("failed to read IPv6 header at %p", addr);
840 		return (DCMD_ERR);
841 	}
842 
843 	ver = (iph->ip6_vfc & 0xf0) >> 4;
844 	if (ver != IPV6_VERSION) {
845 		if (ver == IPV4_VERSION) {
846 			return (iphdr(addr, flags, argc, argv));
847 		} else if (!force) {
848 			mdb_warn("unknown IP version: %d\n", ver);
849 			return (DCMD_ERR);
850 		}
851 	}
852 
853 	mdb_printf("%<b>IPv6 header%</b>\n");
854 	mdb_printf("%<u>%-26s %-26s %4s %7s %5s %3s %3s%</u>\n",
855 	    "SRC", "DST", "TCLS", "FLOW-ID", "PLEN", "NXT", "HOP");
856 
857 	class = (iph->ip6_vcf & IPV6_FLOWINFO_TCLASS) >> 20;
858 	mdb_nhconvert(&class, &class, sizeof (class));
859 	flow = iph->ip6_vcf & IPV6_FLOWINFO_FLOWLABEL;
860 	mdb_nhconvert(&flow, &flow, sizeof (flow));
861 	mdb_nhconvert(&plen, &iph->ip6_plen, sizeof (plen));
862 
863 	mdb_printf("%-26N %-26N %4d %7d %5hu %3d %3d\n",
864 	    &iph->ip6_src, &iph->ip6_dst,
865 	    class, flow, plen, iph->ip6_nxt, iph->ip6_hlim);
866 
867 	if (verbose) {
868 		nxt_proto = addr + sizeof (ip6_t);
869 		return (transport_hdr(iph->ip6_nxt, nxt_proto));
870 	} else {
871 		return (DCMD_OK);
872 	}
873 }
874 
875 int
876 ire(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
877 {
878 	uint_t verbose = FALSE;
879 	ire_t ire;
880 
881 	if (mdb_getopts(argc, argv,
882 	    'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL) != argc)
883 		return (DCMD_USAGE);
884 
885 	if ((flags & DCMD_LOOPFIRST) || !(flags & DCMD_LOOP)) {
886 
887 		if (verbose) {
888 			mdb_printf("%?s %40s %-20s%\n"
889 			    "%?s %40s %-20s%\n"
890 			    "%<u>%?s %40s %4s %-20s%</u>\n",
891 			    "ADDR", "SRC", "TYPE",
892 			    "", "DST", "MARKS",
893 			    "", "STACK", "ZONE", "FLAGS");
894 		} else {
895 			mdb_printf("%<u>%?s %30s %30s %5s %4s%</u>\n",
896 			    "ADDR", "SRC", "DST", "STACK", "ZONE");
897 		}
898 	}
899 
900 	if (flags & DCMD_ADDRSPEC) {
901 		(void) mdb_vread(&ire, sizeof (ire_t), addr);
902 		(void) ire_format(addr, &ire, &verbose);
903 	} else if (mdb_walk("ire", (mdb_walk_cb_t)ire_format, &verbose) == -1) {
904 		mdb_warn("failed to walk ire table");
905 		return (DCMD_ERR);
906 	}
907 
908 	return (DCMD_OK);
909 }
910 
911 static size_t
912 mi_osize(const queue_t *q)
913 {
914 	/*
915 	 * The code in common/inet/mi.c allocates an extra word to store the
916 	 * size of the allocation.  An mi_o_s is thus a size_t plus an mi_o_s.
917 	 */
918 	struct mi_block {
919 		size_t mi_nbytes;
920 		struct mi_o_s mi_o;
921 	} m;
922 
923 	if (mdb_vread(&m, sizeof (m), (uintptr_t)q->q_ptr -
924 	    sizeof (m)) == sizeof (m))
925 		return (m.mi_nbytes - sizeof (m));
926 
927 	return (0);
928 }
929 
930 static void
931 ip_ill_qinfo(const queue_t *q, char *buf, size_t nbytes)
932 {
933 	char name[32];
934 	ill_t ill;
935 
936 	if (mdb_vread(&ill, sizeof (ill),
937 	    (uintptr_t)q->q_ptr) == sizeof (ill) &&
938 	    mdb_readstr(name, sizeof (name), (uintptr_t)ill.ill_name) > 0)
939 		(void) mdb_snprintf(buf, nbytes, "if: %s", name);
940 }
941 
942 void
943 ip_qinfo(const queue_t *q, char *buf, size_t nbytes)
944 {
945 	size_t size = mi_osize(q);
946 
947 	if (size == sizeof (ill_t))
948 		ip_ill_qinfo(q, buf, nbytes);
949 }
950 
951 uintptr_t
952 ip_rnext(const queue_t *q)
953 {
954 	size_t size = mi_osize(q);
955 	ill_t ill;
956 
957 	if (size == sizeof (ill_t) && mdb_vread(&ill, sizeof (ill),
958 	    (uintptr_t)q->q_ptr) == sizeof (ill))
959 		return ((uintptr_t)ill.ill_rq);
960 
961 	return (NULL);
962 }
963 
964 uintptr_t
965 ip_wnext(const queue_t *q)
966 {
967 	size_t size = mi_osize(q);
968 	ill_t ill;
969 
970 	if (size == sizeof (ill_t) && mdb_vread(&ill, sizeof (ill),
971 	    (uintptr_t)q->q_ptr) == sizeof (ill))
972 		return ((uintptr_t)ill.ill_wq);
973 
974 	return (NULL);
975 }
976 
977 /*
978  * Print the core fields in an squeue_t.  With the "-v" argument,
979  * provide more verbose output.
980  */
981 static int
982 squeue(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
983 {
984 	unsigned int	i;
985 	unsigned int	verbose = FALSE;
986 	const int	SQUEUE_STATEDELT = (int)(sizeof (uintptr_t) + 9);
987 	boolean_t	arm;
988 	squeue_t	squeue;
989 
990 	if (!(flags & DCMD_ADDRSPEC)) {
991 		if (mdb_walk_dcmd("genunix`squeue_cache", "ip`squeue",
992 		    argc, argv) == -1) {
993 			mdb_warn("failed to walk squeue cache");
994 			return (DCMD_ERR);
995 		}
996 		return (DCMD_OK);
997 	}
998 
999 	if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, TRUE, &verbose, NULL)
1000 	    != argc)
1001 		return (DCMD_USAGE);
1002 
1003 	if (!DCMD_HDRSPEC(flags) && verbose)
1004 		mdb_printf("\n\n");
1005 
1006 	if (DCMD_HDRSPEC(flags) || verbose) {
1007 		mdb_printf("%?s %-5s %-3s %?s %?s %?s\n",
1008 		    "ADDR", "STATE", "CPU",
1009 		    "FIRST", "LAST", "WORKER");
1010 	}
1011 
1012 	if (mdb_vread(&squeue, sizeof (squeue_t), addr) == -1) {
1013 		mdb_warn("cannot read squeue_t at %p", addr);
1014 		return (DCMD_ERR);
1015 	}
1016 
1017 	mdb_printf("%0?p %05x %3d %0?p %0?p %0?p\n",
1018 	    addr, squeue.sq_state, squeue.sq_bind,
1019 	    squeue.sq_first, squeue.sq_last, squeue.sq_worker);
1020 
1021 	if (!verbose)
1022 		return (DCMD_OK);
1023 
1024 	arm = B_TRUE;
1025 	for (i = 0; squeue_states[i].bit_name != NULL; i++) {
1026 		if (((squeue.sq_state) & (1 << i)) == 0)
1027 			continue;
1028 
1029 		if (arm) {
1030 			mdb_printf("%*s|\n", SQUEUE_STATEDELT, "");
1031 			mdb_printf("%*s+-->  ", SQUEUE_STATEDELT, "");
1032 			arm = B_FALSE;
1033 		} else
1034 			mdb_printf("%*s      ", SQUEUE_STATEDELT, "");
1035 
1036 		mdb_printf("%-12s %s\n", squeue_states[i].bit_name,
1037 		    squeue_states[i].bit_descr);
1038 	}
1039 
1040 	return (DCMD_OK);
1041 }
1042 
1043 static void
1044 ip_squeue_help(void)
1045 {
1046 	mdb_printf("Print the core information for a given NCA squeue_t.\n\n");
1047 	mdb_printf("Options:\n");
1048 	mdb_printf("\t-v\tbe verbose (more descriptive)\n");
1049 }
1050 
1051 static const mdb_dcmd_t dcmds[] = {
1052 	{ "illif", "?[-P v4 | v6]",
1053 	    "display or filter IP Lower Level InterFace structures", illif,
1054 	    illif_help },
1055 	{ "iphdr", ":[-vf]", "display an IPv4 header", iphdr },
1056 	{ "ip6hdr", ":[-vf]", "display an IPv6 header", ip6hdr },
1057 	{ "ire", "?[-v]", "display Internet Route Entry structures", ire },
1058 	{ "squeue", ":[-v]", "print core squeue_t info", squeue,
1059 	    ip_squeue_help },
1060 	{ "tcphdr", ":", "display a TCP header", tcphdr },
1061 	{ "udphdr", ":", "display an UDP header", udphdr },
1062 	{ "sctphdr", ":", "display an SCTP header", sctphdr },
1063 	{ NULL }
1064 };
1065 
1066 static const mdb_walker_t walkers[] = {
1067 	{ "illif", "walk list of ill interface types for all stacks",
1068 		illif_walk_init, illif_walk_step, NULL },
1069 	{ "illif_stack", "walk list of ill interface types",
1070 		illif_stack_walk_init, illif_stack_walk_step,
1071 		illif_stack_walk_fini },
1072 	{ "ire", "walk active ire_t structures",
1073 		ire_walk_init, ire_walk_step, NULL },
1074 	{ "ire_ctable", "walk ire_t structures in the ctable",
1075 		ire_ctable_walk_init, ire_ctable_walk_step, NULL },
1076 	{ "ire_next", "walk ire_t structures in the ctable",
1077 		ire_next_walk_init, ire_next_walk_step, NULL },
1078 	{ "ip_stacks", "walk all the ip_stack_t",
1079 		ip_stacks_walk_init, ip_stacks_walk_step, NULL },
1080 	{ NULL }
1081 };
1082 
1083 static const mdb_qops_t ip_qops = { ip_qinfo, ip_rnext, ip_wnext };
1084 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1085 
1086 const mdb_modinfo_t *
1087 _mdb_init(void)
1088 {
1089 	GElf_Sym sym;
1090 
1091 	if (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0)
1092 		mdb_qops_install(&ip_qops, (uintptr_t)sym.st_value);
1093 
1094 	return (&modinfo);
1095 }
1096 
1097 void
1098 _mdb_fini(void)
1099 {
1100 	GElf_Sym sym;
1101 
1102 	if (mdb_lookup_by_obj("ip", "ipwinit", &sym) == 0)
1103 		mdb_qops_remove(&ip_qops, (uintptr_t)sym.st_value);
1104 }
1105