xref: /titanic_50/usr/src/cmd/mdb/common/modules/genunix/vfs.c (revision 585995d5d19489bf178112c08c8c61ffc049ff6e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <mdb/mdb_modapi.h>
27 #include <mdb/mdb_ks.h>
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/door.h>
32 #include <sys/file.h>
33 #include <sys/mount.h>
34 #include <sys/proc.h>
35 #include <sys/procfs.h>
36 #include <sys/proc/prdata.h>
37 #include <sys/stat.h>
38 #include <sys/vfs.h>
39 #include <sys/vnode.h>
40 #include <sys/fs/snode.h>
41 #include <sys/fs/fifonode.h>
42 #include <sys/fs/namenode.h>
43 #include <sys/socket.h>
44 #include <sys/stropts.h>
45 #include <sys/socketvar.h>
46 #include <sys/strsubr.h>
47 #include <sys/un.h>
48 #include <fs/sockfs/socktpi_impl.h>
49 #include <inet/ipclassifier.h>
50 #include <inet/ip_if.h>
51 #include <inet/sctp/sctp_impl.h>
52 #include <inet/sctp/sctp_addr.h>
53 
54 int
55 vfs_walk_init(mdb_walk_state_t *wsp)
56 {
57 	if (wsp->walk_addr == NULL &&
58 	    mdb_readvar(&wsp->walk_addr, "rootvfs") == -1) {
59 		mdb_warn("failed to read 'rootvfs'");
60 		return (WALK_ERR);
61 	}
62 
63 	wsp->walk_data = (void *)wsp->walk_addr;
64 	return (WALK_NEXT);
65 }
66 
67 int
68 vfs_walk_step(mdb_walk_state_t *wsp)
69 {
70 	vfs_t vfs;
71 	int status;
72 
73 	if (mdb_vread(&vfs, sizeof (vfs), wsp->walk_addr) == -1) {
74 		mdb_warn("failed to read vfs_t at %p", wsp->walk_addr);
75 		return (WALK_DONE);
76 	}
77 
78 	status = wsp->walk_callback(wsp->walk_addr, &vfs, wsp->walk_cbdata);
79 
80 	if (vfs.vfs_next == wsp->walk_data)
81 		return (WALK_DONE);
82 
83 	wsp->walk_addr = (uintptr_t)vfs.vfs_next;
84 
85 	return (status);
86 }
87 
88 /*
89  * Utility routine to read in a filesystem name given a vfs pointer.  If
90  * no vfssw entry for the vfs is available (as is the case with some pseudo-
91  * filesystems), we check against some known problem fs's: doorfs and
92  * portfs.  If that fails, we try to guess the filesystem name using
93  * symbol names.  fsname should be a buffer of size _ST_FSTYPSZ.
94  */
95 static int
96 read_fsname(uintptr_t vfsp, char *fsname)
97 {
98 	vfs_t vfs;
99 	struct vfssw vfssw_entry;
100 	GElf_Sym vfssw_sym, test_sym;
101 	char testname[MDB_SYM_NAMLEN];
102 
103 	if (mdb_vread(&vfs, sizeof (vfs), vfsp) == -1) {
104 		mdb_warn("failed to read vfs %p", vfsp);
105 		return (-1);
106 	}
107 
108 	if (mdb_lookup_by_name("vfssw", &vfssw_sym) == -1) {
109 		mdb_warn("failed to find vfssw");
110 		return (-1);
111 	}
112 
113 	/*
114 	 * vfssw is an array; we need vfssw[vfs.vfs_fstype].
115 	 */
116 	if (mdb_vread(&vfssw_entry, sizeof (vfssw_entry),
117 	    vfssw_sym.st_value + (sizeof (struct vfssw) * vfs.vfs_fstype))
118 	    == -1) {
119 		mdb_warn("failed to read vfssw index %d", vfs.vfs_fstype);
120 		return (-1);
121 	}
122 
123 	if (vfs.vfs_fstype != 0) {
124 		if (mdb_readstr(fsname, _ST_FSTYPSZ,
125 		    (uintptr_t)vfssw_entry.vsw_name) == -1) {
126 			mdb_warn("failed to find fs name %p",
127 			    vfssw_entry.vsw_name);
128 			return (-1);
129 		}
130 		return (0);
131 	}
132 
133 	/*
134 	 * Do precise detection for certain filesystem types that we
135 	 * know do not appear in vfssw[], and that we depend upon in other
136 	 * parts of the code: doorfs and portfs.
137 	 */
138 	if (mdb_lookup_by_name("door_vfs", &test_sym) != -1) {
139 		if (test_sym.st_value == vfsp) {
140 			strcpy(fsname, "doorfs");
141 			return (0);
142 		}
143 	}
144 	if (mdb_lookup_by_name("port_vfs", &test_sym) != -1) {
145 		if (test_sym.st_value == vfsp) {
146 			strcpy(fsname, "portfs");
147 			return (0);
148 		}
149 	}
150 
151 	/*
152 	 * Heuristic detection for other filesystems that don't have a
153 	 * vfssw[] entry.  These tend to be named <fsname>_vfs, so we do a
154 	 * lookup_by_addr and see if we find a symbol of that name.
155 	 */
156 	if (mdb_lookup_by_addr(vfsp, MDB_SYM_EXACT, testname, sizeof (testname),
157 	    &test_sym) != -1) {
158 		if ((strlen(testname) > 4) &&
159 		    (strcmp(testname + strlen(testname) - 4, "_vfs") == 0)) {
160 			testname[strlen(testname) - 4] = '\0';
161 			strncpy(fsname, testname, _ST_FSTYPSZ);
162 			return (0);
163 		}
164 	}
165 
166 	mdb_warn("unknown filesystem type for vfs %p", vfsp);
167 	return (-1);
168 }
169 
170 /*
171  * Column widths for mount point display in ::fsinfo output.
172  */
173 #ifdef _LP64
174 #define	FSINFO_MNTLEN	48
175 #else
176 #define	FSINFO_MNTLEN	56
177 #endif
178 
179 /* ARGSUSED */
180 int
181 fsinfo(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
182 {
183 	vfs_t vfs;
184 	int len;
185 	int opt_v = 0;
186 	char buf[MAXPATHLEN];
187 	char fsname[_ST_FSTYPSZ];
188 	mntopt_t *mntopts;
189 	size_t size;
190 	int i;
191 	int first = 1;
192 	char opt[MAX_MNTOPT_STR];
193 	uintptr_t global_zone;
194 
195 	if (!(flags & DCMD_ADDRSPEC)) {
196 		if (mdb_walk_dcmd("vfs", "fsinfo", argc, argv) == -1) {
197 			mdb_warn("failed to walk file system list");
198 			return (DCMD_ERR);
199 		}
200 		return (DCMD_OK);
201 	}
202 
203 	if (mdb_getopts(argc, argv,
204 	    'v', MDB_OPT_SETBITS, TRUE, &opt_v, NULL) != argc)
205 		return (DCMD_USAGE);
206 
207 	if (DCMD_HDRSPEC(flags))
208 		mdb_printf("%<u>%?s %-15s %s%</u>\n",
209 		    "VFSP", "FS", "MOUNT");
210 
211 	if (mdb_vread(&vfs, sizeof (vfs), addr) == -1) {
212 		mdb_warn("failed to read vfs_t %p", addr);
213 		return (DCMD_ERR);
214 	}
215 
216 	if ((len = mdb_read_refstr((uintptr_t)vfs.vfs_mntpt, buf,
217 	    sizeof (buf))) <= 0)
218 		strcpy(buf, "??");
219 
220 	else if (!opt_v && (len >= FSINFO_MNTLEN))
221 		/*
222 		 * In normal mode, we truncate the path to keep the output
223 		 * clean.  In -v mode, we just print the full path.
224 		 */
225 		strcpy(&buf[FSINFO_MNTLEN - 4], "...");
226 
227 	if (read_fsname(addr, fsname) == -1)
228 		return (DCMD_ERR);
229 
230 	mdb_printf("%0?p %-15s %s\n", addr, fsname, buf);
231 
232 	if (!opt_v)
233 		return (DCMD_OK);
234 
235 	/*
236 	 * Print 'resource' string; this shows what we're mounted upon.
237 	 */
238 	if (mdb_read_refstr((uintptr_t)vfs.vfs_resource, buf,
239 	    MAXPATHLEN) <= 0)
240 		strcpy(buf, "??");
241 
242 	mdb_printf("%?s %s\n", "R:", buf);
243 
244 	/*
245 	 * Print mount options array; it sucks to be a mimic, but we copy
246 	 * the same logic as in mntvnops.c for adding zone= tags, and we
247 	 * don't bother with the obsolete dev= option.
248 	 */
249 	size = vfs.vfs_mntopts.mo_count * sizeof (mntopt_t);
250 	mntopts = mdb_alloc(size, UM_SLEEP | UM_GC);
251 
252 	if (mdb_vread(mntopts, size,
253 	    (uintptr_t)vfs.vfs_mntopts.mo_list) == -1) {
254 		mdb_warn("failed to read mntopts %p", vfs.vfs_mntopts.mo_list);
255 		return (DCMD_ERR);
256 	}
257 
258 	for (i = 0; i < vfs.vfs_mntopts.mo_count; i++) {
259 		if (mntopts[i].mo_flags & MO_SET) {
260 			if (mdb_readstr(opt, sizeof (opt),
261 			    (uintptr_t)mntopts[i].mo_name) == -1) {
262 				mdb_warn("failed to read mntopt name %p",
263 				    mntopts[i].mo_name);
264 				return (DCMD_ERR);
265 			}
266 			if (first) {
267 				mdb_printf("%?s ", "O:");
268 				first = 0;
269 			} else {
270 				mdb_printf(",");
271 			}
272 			mdb_printf("%s", opt);
273 			if (mntopts[i].mo_flags & MO_HASVALUE) {
274 				if (mdb_readstr(opt, sizeof (opt),
275 				    (uintptr_t)mntopts[i].mo_arg) == -1) {
276 					mdb_warn("failed to read mntopt "
277 					    "value %p", mntopts[i].mo_arg);
278 					return (DCMD_ERR);
279 				}
280 				mdb_printf("=%s", opt);
281 			}
282 		}
283 	}
284 
285 	if (mdb_readvar(&global_zone, "global_zone") == -1) {
286 		mdb_warn("failed to locate global_zone");
287 		return (DCMD_ERR);
288 	}
289 
290 	if ((vfs.vfs_zone != NULL) &&
291 	    ((uintptr_t)vfs.vfs_zone != global_zone)) {
292 		zone_t z;
293 
294 		if (mdb_vread(&z, sizeof (z), (uintptr_t)vfs.vfs_zone) == -1) {
295 			mdb_warn("failed to read zone");
296 			return (DCMD_ERR);
297 		}
298 		/*
299 		 * zone names are much shorter than MAX_MNTOPT_STR
300 		 */
301 		if (mdb_readstr(opt, sizeof (opt),
302 		    (uintptr_t)z.zone_name) == -1) {
303 			mdb_warn("failed to read zone name");
304 			return (DCMD_ERR);
305 		}
306 		if (first) {
307 			mdb_printf("%?s ", "O:");
308 		} else {
309 			mdb_printf(",");
310 		}
311 		mdb_printf("zone=%s", opt);
312 	}
313 	return (DCMD_OK);
314 }
315 
316 
317 #define	REALVP_DONE	0
318 #define	REALVP_ERR	1
319 #define	REALVP_CONTINUE	2
320 
321 static int
322 next_realvp(uintptr_t invp, struct vnode *outvn, uintptr_t *outvp)
323 {
324 	char fsname[_ST_FSTYPSZ];
325 
326 	*outvp = invp;
327 	if (mdb_vread(outvn, sizeof (struct vnode), invp) == -1) {
328 		mdb_warn("failed to read vnode at %p", invp);
329 		return (REALVP_ERR);
330 	}
331 
332 	if (read_fsname((uintptr_t)outvn->v_vfsp, fsname) == -1)
333 		return (REALVP_ERR);
334 
335 	/*
336 	 * We know how to do 'realvp' for as many filesystems as possible;
337 	 * for all other filesystems, we assume that the vp we are given
338 	 * is the realvp.  In the kernel, a realvp operation will sometimes
339 	 * dig through multiple layers.  Here, we only fetch the pointer
340 	 * to the next layer down.  This allows dcmds to print out the
341 	 * various layers.
342 	 */
343 	if (strcmp(fsname, "fifofs") == 0) {
344 		fifonode_t fn;
345 		if (mdb_vread(&fn, sizeof (fn),
346 		    (uintptr_t)outvn->v_data) == -1) {
347 			mdb_warn("failed to read fifonode");
348 			return (REALVP_ERR);
349 		}
350 		*outvp = (uintptr_t)fn.fn_realvp;
351 
352 	} else if (strcmp(fsname, "namefs") == 0) {
353 		struct namenode nn;
354 		if (mdb_vread(&nn, sizeof (nn),
355 		    (uintptr_t)outvn->v_data) == -1) {
356 			mdb_warn("failed to read namenode");
357 			return (REALVP_ERR);
358 		}
359 		*outvp = (uintptr_t)nn.nm_filevp;
360 
361 	} else if (outvn->v_type == VSOCK && outvn->v_stream != NULL) {
362 		struct stdata stream;
363 
364 		/*
365 		 * Sockets have a strange and different layering scheme; we
366 		 * hop over into the sockfs vnode (accessible via the stream
367 		 * head) if possible.
368 		 */
369 		if (mdb_vread(&stream, sizeof (stream),
370 		    (uintptr_t)outvn->v_stream) == -1) {
371 			mdb_warn("failed to read stream data");
372 			return (REALVP_ERR);
373 		}
374 		*outvp = (uintptr_t)stream.sd_vnode;
375 	}
376 
377 	if (*outvp == invp || *outvp == NULL)
378 		return (REALVP_DONE);
379 
380 	return (REALVP_CONTINUE);
381 }
382 
383 static void
384 pfiles_print_addr(struct sockaddr *addr)
385 {
386 	struct sockaddr_in *s_in;
387 	struct sockaddr_un *s_un;
388 	struct sockaddr_in6 *s_in6;
389 	in_port_t port;
390 
391 	switch (addr->sa_family) {
392 	case AF_INET:
393 		/* LINTED: alignment */
394 		s_in = (struct sockaddr_in *)addr;
395 		mdb_nhconvert(&port, &s_in->sin_port, sizeof (port));
396 		mdb_printf("AF_INET %I %d ", s_in->sin_addr.s_addr, port);
397 		break;
398 
399 	case AF_INET6:
400 		/* LINTED: alignment */
401 		s_in6 = (struct sockaddr_in6 *)addr;
402 		mdb_nhconvert(&port, &s_in6->sin6_port, sizeof (port));
403 		mdb_printf("AF_INET6 %N %d ", &(s_in6->sin6_addr), port);
404 		break;
405 
406 	case AF_UNIX:
407 		s_un = (struct sockaddr_un *)addr;
408 		mdb_printf("AF_UNIX %s ", s_un->sun_path);
409 		break;
410 	default:
411 		mdb_printf("AF_?? (%d) ", addr->sa_family);
412 		break;
413 	}
414 }
415 
416 static int
417 pfiles_get_sonode(vnode_t *v_sock, struct sonode *sonode)
418 {
419 	if (mdb_vread(sonode, sizeof (struct sonode),
420 	    (uintptr_t)v_sock->v_data) == -1) {
421 		mdb_warn("failed to read sonode");
422 		return (-1);
423 	}
424 
425 	return (0);
426 }
427 
428 static int
429 pfiles_get_tpi_sonode(vnode_t *v_sock, sotpi_sonode_t *sotpi_sonode)
430 {
431 
432 	struct stdata stream;
433 
434 	if (mdb_vread(&stream, sizeof (stream),
435 	    (uintptr_t)v_sock->v_stream) == -1) {
436 		mdb_warn("failed to read stream data");
437 		return (-1);
438 	}
439 
440 	if (mdb_vread(v_sock, sizeof (vnode_t),
441 	    (uintptr_t)stream.sd_vnode) == -1) {
442 		mdb_warn("failed to read stream vnode");
443 		return (-1);
444 	}
445 
446 	if (mdb_vread(sotpi_sonode, sizeof (sotpi_sonode_t),
447 	    (uintptr_t)v_sock->v_data) == -1) {
448 		mdb_warn("failed to read sotpi_sonode");
449 		return (-1);
450 	}
451 
452 	return (0);
453 }
454 
455 /*
456  * Do some digging to get a reasonable pathname for this vnode. 'path'
457  * should point at a buffer of MAXPATHLEN in size.
458  */
459 static int
460 pfiles_dig_pathname(uintptr_t vp, char *path)
461 {
462 	vnode_t v;
463 
464 	bzero(path, MAXPATHLEN);
465 
466 	if (mdb_vread(&v, sizeof (v), vp) == -1) {
467 		mdb_warn("failed to read vnode");
468 		return (-1);
469 	}
470 
471 	if (v.v_path == NULL) {
472 		/*
473 		 * fifo's and doors are special.   Some have pathnames, and
474 		 * some do not.  And for these, it is pointless to go off to
475 		 * mdb_vnode2path, which is very slow.
476 		 *
477 		 * Event ports never have a pathname.
478 		 */
479 		if (v.v_type == VFIFO || v.v_type == VDOOR || v.v_type == VPORT)
480 			return (0);
481 
482 		/*
483 		 * For sockets, we won't find a path unless we print the path
484 		 * associated with transport's STREAM device.
485 		 */
486 		if (v.v_type == VSOCK) {
487 			struct sonode sonode;
488 
489 			if (pfiles_get_sonode(&v, &sonode) == -1) {
490 				return (-1);
491 			}
492 			if (!SOCK_IS_NONSTR(&sonode)) {
493 				struct sockparams *sp = sonode.so_sockparams;
494 				vp = (uintptr_t)sp->sp_sdev_info.sd_vnode;
495 			} else {
496 				vp = NULL;
497 			}
498 		}
499 	}
500 
501 
502 	/*
503 	 * mdb_vnode2path will print an error for us as needed, but not
504 	 * finding a pathname is not really an error, so we plow on.
505 	 */
506 	(void) mdb_vnode2path(vp, path, MAXPATHLEN);
507 
508 	/*
509 	 * A common problem is that device pathnames are prefixed with
510 	 * /dev/../devices/.  We just clean those up slightly:
511 	 * 	/dev/../devices/<mumble> --> /devices/<mumble>
512 	 * 	/dev/pts/../../devices/<mumble> --> /devices/<mumble>
513 	 */
514 	if (strncmp("/dev/../devices/", path, strlen("/dev/../devices/")) == 0)
515 		strcpy(path, path + 7);
516 
517 	if (strncmp("/dev/pts/../../devices/", path,
518 	    strlen("/dev/pts/../../devices/")) == 0)
519 		strcpy(path, path + 14);
520 
521 	return (0);
522 }
523 
524 const struct fs_type {
525 	int type;
526 	const char *name;
527 } fs_types[] = {
528 	{ VNON,   "NON" },
529 	{ VREG,   "REG" },
530 	{ VDIR,   "DIR" },
531 	{ VBLK,   "BLK" },
532 	{ VCHR,   "CHR" },
533 	{ VLNK,   "LNK" },
534 	{ VFIFO,  "FIFO" },
535 	{ VDOOR,  "DOOR" },
536 	{ VPROC,  "PROC" },
537 	{ VSOCK,  "SOCK" },
538 	{ VPORT,  "PORT" },
539 	{ VBAD,   "BAD" }
540 };
541 
542 #define	NUM_FS_TYPES (sizeof (fs_types) / sizeof (struct fs_type))
543 
544 struct pfiles_cbdata {
545 	int opt_p;
546 	int fd;
547 };
548 
549 #define	list_d2l(a, obj) ((list_node_t *)(((char *)obj) + (a)->list_offset))
550 #define	list_object(a, node) ((void *)(((char *)node) - (a)->list_offset))
551 
552 /*
553  * SCTP interface for geting the first source address of a sctp_t.
554  */
555 int
556 sctp_getsockaddr(sctp_t *sctp, struct sockaddr *addr)
557 {
558 	int			err = -1;
559 	int			i;
560 	int			l;
561 	sctp_saddr_ipif_t	*pobj;
562 	sctp_saddr_ipif_t	obj;
563 	size_t			added = 0;
564 	sin6_t			*sin6;
565 	sin_t			*sin4;
566 	int			scanned = 0;
567 	boolean_t		skip_lback = B_FALSE;
568 
569 	addr->sa_family = sctp->sctp_family;
570 	if (sctp->sctp_nsaddrs == 0)
571 		goto done;
572 
573 	/*
574 	 * Skip loopback addresses for non-loopback assoc.
575 	 */
576 	if (sctp->sctp_state >= SCTPS_ESTABLISHED && !sctp->sctp_loopback) {
577 		skip_lback = B_TRUE;
578 	}
579 
580 	for (i = 0; i < SCTP_IPIF_HASH; i++) {
581 		if (sctp->sctp_saddrs[i].ipif_count == 0)
582 			continue;
583 
584 		pobj = list_object(&sctp->sctp_saddrs[i].sctp_ipif_list,
585 		    sctp->sctp_saddrs[i].sctp_ipif_list.list_head.list_next);
586 		if (mdb_vread(&obj, sizeof (sctp_saddr_ipif_t),
587 		    (uintptr_t)pobj) == -1) {
588 			mdb_warn("failed to read sctp_saddr_ipif_t");
589 			return (err);
590 		}
591 
592 		for (l = 0; l < sctp->sctp_saddrs[i].ipif_count; l++) {
593 			sctp_ipif_t	ipif;
594 			in6_addr_t	laddr;
595 			list_node_t 	*pnode;
596 			list_node_t	node;
597 
598 			if (mdb_vread(&ipif, sizeof (sctp_ipif_t),
599 			    (uintptr_t)obj.saddr_ipifp) == -1) {
600 				mdb_warn("failed to read sctp_ipif_t");
601 				return (err);
602 			}
603 			laddr = ipif.sctp_ipif_saddr;
604 
605 			scanned++;
606 			if ((ipif.sctp_ipif_state == SCTP_IPIFS_CONDEMNED) ||
607 			    SCTP_DONT_SRC(&obj) ||
608 			    (ipif.sctp_ipif_ill->sctp_ill_flags &
609 			    PHYI_LOOPBACK) && skip_lback) {
610 				if (scanned >= sctp->sctp_nsaddrs)
611 					goto done;
612 
613 				/* LINTED: alignment */
614 				pnode = list_d2l(&sctp->sctp_saddrs[i].
615 				    sctp_ipif_list, pobj);
616 				if (mdb_vread(&node, sizeof (list_node_t),
617 				    (uintptr_t)pnode) == -1) {
618 					mdb_warn("failed to read list_node_t");
619 					return (err);
620 				}
621 				pobj = list_object(&sctp->sctp_saddrs[i].
622 				    sctp_ipif_list, node.list_next);
623 				if (mdb_vread(&obj, sizeof (sctp_saddr_ipif_t),
624 				    (uintptr_t)pobj) == -1) {
625 					mdb_warn("failed to read "
626 					    "sctp_saddr_ipif_t");
627 					return (err);
628 				}
629 				continue;
630 			}
631 
632 			switch (sctp->sctp_family) {
633 			case AF_INET:
634 				/* LINTED: alignment */
635 				sin4 = (sin_t *)addr;
636 				if ((sctp->sctp_state <= SCTPS_LISTEN) &&
637 				    sctp->sctp_bound_to_all) {
638 					sin4->sin_addr.s_addr = INADDR_ANY;
639 					sin4->sin_port = sctp->sctp_lport;
640 				} else {
641 					sin4 += added;
642 					sin4->sin_family = AF_INET;
643 					sin4->sin_port = sctp->sctp_lport;
644 					IN6_V4MAPPED_TO_INADDR(&laddr,
645 					    &sin4->sin_addr);
646 				}
647 				break;
648 
649 			case AF_INET6:
650 				/* LINTED: alignment */
651 				sin6 = (sin6_t *)addr;
652 				if ((sctp->sctp_state <= SCTPS_LISTEN) &&
653 				    sctp->sctp_bound_to_all) {
654 					bzero(&sin6->sin6_addr,
655 					    sizeof (sin6->sin6_addr));
656 					sin6->sin6_port = sctp->sctp_lport;
657 				} else {
658 					sin6 += added;
659 					sin6->sin6_family = AF_INET6;
660 					sin6->sin6_port = sctp->sctp_lport;
661 					sin6->sin6_addr = laddr;
662 				}
663 				sin6->sin6_flowinfo = sctp->sctp_ip6h->ip6_vcf &
664 				    ~IPV6_VERS_AND_FLOW_MASK;
665 				sin6->sin6_scope_id = 0;
666 				sin6->__sin6_src_id = 0;
667 				break;
668 			}
669 			added++;
670 			if (added >= 1) {
671 				err = 0;
672 				goto done;
673 			}
674 			if (scanned >= sctp->sctp_nsaddrs)
675 				goto done;
676 
677 			/* LINTED: alignment */
678 			pnode = list_d2l(&sctp->sctp_saddrs[i].sctp_ipif_list,
679 			    pobj);
680 			if (mdb_vread(&node, sizeof (list_node_t),
681 			    (uintptr_t)pnode) == -1) {
682 				mdb_warn("failed to read list_node_t");
683 				return (err);
684 			}
685 			pobj = list_object(&sctp->sctp_saddrs[i].
686 			    sctp_ipif_list, node.list_next);
687 			if (mdb_vread(&obj, sizeof (sctp_saddr_ipif_t),
688 			    (uintptr_t)pobj) == -1) {
689 				mdb_warn("failed to read sctp_saddr_ipif_t");
690 				return (err);
691 			}
692 		}
693 	}
694 done:
695 	return (err);
696 }
697 
698 /*
699  * SCTP interface for geting the primary peer address of a sctp_t.
700  */
701 static int
702 sctp_getpeeraddr(sctp_t *sctp, struct sockaddr *addr)
703 {
704 	struct sockaddr_in	*sin4;
705 	struct sockaddr_in6	*sin6;
706 	sctp_faddr_t		sctp_primary;
707 	in6_addr_t		faddr;
708 
709 	if (sctp->sctp_faddrs == NULL)
710 		return (-1);
711 
712 	addr->sa_family = sctp->sctp_family;
713 	if (mdb_vread(&sctp_primary, sizeof (sctp_faddr_t),
714 	    (uintptr_t)sctp->sctp_primary) == -1) {
715 		mdb_warn("failed to read sctp primary faddr");
716 		return (-1);
717 	}
718 	faddr = sctp_primary.faddr;
719 
720 	switch (sctp->sctp_family) {
721 	case AF_INET:
722 		/* LINTED: alignment */
723 		sin4 = (struct sockaddr_in *)addr;
724 		IN6_V4MAPPED_TO_INADDR(&faddr, &sin4->sin_addr);
725 		sin4->sin_port = sctp->sctp_fport;
726 		sin4->sin_family = AF_INET;
727 		break;
728 
729 	case AF_INET6:
730 		/* LINTED: alignment */
731 		sin6 = (struct sockaddr_in6 *)addr;
732 		sin6->sin6_addr = faddr;
733 		sin6->sin6_port = sctp->sctp_fport;
734 		sin6->sin6_family = AF_INET6;
735 		sin6->sin6_flowinfo = 0;
736 		sin6->sin6_scope_id = 0;
737 		sin6->__sin6_src_id = 0;
738 		break;
739 	}
740 
741 	return (0);
742 }
743 
744 static int
745 tpi_sock_print(sotpi_sonode_t *sotpi_sonode)
746 {
747 	if (sotpi_sonode->st_info.sti_laddr_valid == 1) {
748 		struct sockaddr *laddr =
749 		    mdb_alloc(sotpi_sonode->st_info.sti_laddr_len, UM_SLEEP);
750 		if (mdb_vread(laddr, sotpi_sonode->st_info.sti_laddr_len,
751 		    (uintptr_t)sotpi_sonode->st_info.sti_laddr_sa) == -1) {
752 			mdb_warn("failed to read sotpi_sonode socket addr");
753 			return (-1);
754 		}
755 
756 		mdb_printf("socket: ");
757 		pfiles_print_addr(laddr);
758 	}
759 
760 	if (sotpi_sonode->st_info.sti_faddr_valid == 1) {
761 		struct sockaddr *faddr =
762 		    mdb_alloc(sotpi_sonode->st_info.sti_faddr_len, UM_SLEEP);
763 		if (mdb_vread(faddr, sotpi_sonode->st_info.sti_faddr_len,
764 		    (uintptr_t)sotpi_sonode->st_info.sti_faddr_sa) == -1) {
765 			mdb_warn("failed to read sotpi_sonode remote addr");
766 			return (-1);
767 		}
768 
769 		mdb_printf("remote: ");
770 		pfiles_print_addr(faddr);
771 	}
772 
773 	return (0);
774 }
775 
776 static int
777 tcpip_sock_print(struct sonode *socknode)
778 {
779 	switch (socknode->so_family) {
780 	case AF_INET:
781 	{
782 		conn_t conn_t;
783 		in_port_t port;
784 
785 		if (mdb_vread(&conn_t, sizeof (conn_t),
786 		    (uintptr_t)socknode->so_proto_handle) == -1) {
787 			mdb_warn("failed to read conn_t V4");
788 			return (-1);
789 		}
790 
791 		mdb_printf("socket: ");
792 		mdb_nhconvert(&port, &conn_t.conn_lport, sizeof (port));
793 		mdb_printf("AF_INET %I %d ", conn_t.conn_src, port);
794 
795 		/*
796 		 * If this is a listening socket, we don't print
797 		 * the remote address.
798 		 */
799 		if (IPCL_IS_TCP(&conn_t) && IPCL_IS_BOUND(&conn_t) == 0 ||
800 		    IPCL_IS_UDP(&conn_t) && IPCL_IS_CONNECTED(&conn_t)) {
801 			mdb_printf("remote: ");
802 			mdb_nhconvert(&port, &conn_t.conn_fport, sizeof (port));
803 			mdb_printf("AF_INET %I %d ", conn_t.conn_rem, port);
804 		}
805 
806 		break;
807 	}
808 
809 	case AF_INET6:
810 	{
811 		conn_t conn_t;
812 		in_port_t port;
813 
814 		if (mdb_vread(&conn_t, sizeof (conn_t),
815 		    (uintptr_t)socknode->so_proto_handle) == -1) {
816 			mdb_warn("failed to read conn_t V6");
817 			return (-1);
818 		}
819 
820 		mdb_printf("socket: ");
821 		mdb_nhconvert(&port, &conn_t.conn_lport, sizeof (port));
822 		mdb_printf("AF_INET6 %N %d ", &conn_t.conn_srcv6, port);
823 
824 		/*
825 		 * If this is a listening socket, we don't print
826 		 * the remote address.
827 		 */
828 		if (IPCL_IS_TCP(&conn_t) && IPCL_IS_BOUND(&conn_t) == 0 ||
829 		    IPCL_IS_UDP(&conn_t) && IPCL_IS_CONNECTED(&conn_t)) {
830 			mdb_printf("remote: ");
831 			mdb_nhconvert(&port, &conn_t.conn_fport, sizeof (port));
832 			mdb_printf("AF_INET6 %N %d ", &conn_t.conn_remv6, port);
833 		}
834 
835 		break;
836 	}
837 
838 	default:
839 		mdb_printf("AF_?? (%d)", socknode->so_family);
840 		break;
841 	}
842 
843 	return (0);
844 }
845 
846 static int
847 sctp_sock_print(struct sonode *socknode)
848 {
849 	sctp_t sctp_t;
850 
851 	struct sockaddr *laddr = mdb_alloc(sizeof (struct sockaddr), UM_SLEEP);
852 	struct sockaddr *faddr = mdb_alloc(sizeof (struct sockaddr), UM_SLEEP);
853 
854 	if (mdb_vread(&sctp_t, sizeof (sctp_t),
855 	    (uintptr_t)socknode->so_proto_handle) == -1) {
856 		mdb_warn("failed to read sctp_t");
857 		return (-1);
858 	}
859 
860 	if (sctp_getsockaddr(&sctp_t, laddr) == 0) {
861 		mdb_printf("socket:");
862 		pfiles_print_addr(laddr);
863 	}
864 	if (sctp_getpeeraddr(&sctp_t, faddr) == 0) {
865 		mdb_printf("remote:");
866 		pfiles_print_addr(faddr);
867 	}
868 
869 	return (0);
870 }
871 
872 /* ARGSUSED */
873 static int
874 sdp_sock_print(struct sonode *socknode)
875 {
876 	return (0);
877 }
878 
879 struct sock_print {
880 	int	family;
881 	int	type;
882 	int	pro;
883 	int	(*print)(struct sonode *socknode);
884 } sock_prints[] = {
885 	{ 2,	2,	0,	tcpip_sock_print },	/* /dev/tcp	*/
886 	{ 2,	2,	6,	tcpip_sock_print },	/* /dev/tcp	*/
887 	{ 26,	2,	0,	tcpip_sock_print },	/* /dev/tcp6	*/
888 	{ 26,	2,	6,	tcpip_sock_print },	/* /dev/tcp6	*/
889 	{ 2,	1,	0,	tcpip_sock_print },	/* /dev/udp	*/
890 	{ 2,	1,	17,	tcpip_sock_print },	/* /dev/udp	*/
891 	{ 26,	1,	0,	tcpip_sock_print },	/* /dev/udp6	*/
892 	{ 26,	1,	17,	tcpip_sock_print },	/* /dev/udp6	*/
893 	{ 2,	4,	0,	tcpip_sock_print },	/* /dev/rawip	*/
894 	{ 26,	4,	0,	tcpip_sock_print },	/* /dev/rawip6	*/
895 	{ 2,	2,	132,	sctp_sock_print },	/* /dev/sctp	*/
896 	{ 26,	2,	132,	sctp_sock_print },	/* /dev/sctp6	*/
897 	{ 2,	6,	132,	sctp_sock_print },	/* /dev/sctp	*/
898 	{ 26,	6,	132,	sctp_sock_print },	/* /dev/sctp6	*/
899 	{ 24,	4,	0,	tcpip_sock_print },	/* /dev/rts	*/
900 	{ 2,	2,	257,	sdp_sock_print },	/* /dev/sdp	*/
901 	{ 26,	2,	257,	sdp_sock_print },	/* /dev/sdp	*/
902 };
903 
904 #define	NUM_SOCK_PRINTS                                         \
905 	(sizeof (sock_prints) / sizeof (struct sock_print))
906 
907 static int
908 pfile_callback(uintptr_t addr, const struct file *f, struct pfiles_cbdata *cb)
909 {
910 	vnode_t v, layer_vn;
911 	int myfd = cb->fd;
912 	const char *type;
913 	char path[MAXPATHLEN];
914 	uintptr_t top_vnodep, realvpp;
915 	char fsname[_ST_FSTYPSZ];
916 	int err, i;
917 
918 	cb->fd++;
919 
920 	if (addr == NULL) {
921 		return (WALK_NEXT);
922 	}
923 
924 	top_vnodep = realvpp = (uintptr_t)f->f_vnode;
925 
926 	if (mdb_vread(&v, sizeof (v), realvpp) == -1) {
927 		mdb_warn("failed to read vnode");
928 		return (DCMD_ERR);
929 	}
930 
931 	type = "?";
932 	for (i = 0; i <= NUM_FS_TYPES; i++) {
933 		if (fs_types[i].type == v.v_type)
934 			type = fs_types[i].name;
935 	}
936 
937 	do {
938 		uintptr_t next_realvpp;
939 
940 		err = next_realvp(realvpp, &layer_vn, &next_realvpp);
941 		if (next_realvpp != NULL)
942 			realvpp = next_realvpp;
943 
944 	} while (err == REALVP_CONTINUE);
945 
946 	if (err == REALVP_ERR) {
947 		mdb_warn("failed to do realvp() for %p", realvpp);
948 		return (DCMD_ERR);
949 	}
950 
951 	if (read_fsname((uintptr_t)layer_vn.v_vfsp, fsname) == -1)
952 		return (DCMD_ERR);
953 
954 	mdb_printf("%4d %4s %?0p ", myfd, type, top_vnodep);
955 
956 	if (cb->opt_p) {
957 		if (pfiles_dig_pathname(top_vnodep, path) == -1)
958 			return (DCMD_ERR);
959 
960 		mdb_printf("%s\n", path);
961 		return (DCMD_OK);
962 	}
963 
964 	/*
965 	 * Sockets generally don't have interesting pathnames; we only
966 	 * show those in the '-p' view.
967 	 */
968 	path[0] = '\0';
969 	if (v.v_type != VSOCK) {
970 		if (pfiles_dig_pathname(top_vnodep, path) == -1)
971 			return (DCMD_ERR);
972 	}
973 	mdb_printf("%s%s", path, path[0] == '\0' ? "" : " ");
974 
975 	switch (v.v_type) {
976 	case VDOOR:
977 	{
978 		door_node_t doornode;
979 		proc_t pr;
980 
981 		if (mdb_vread(&doornode, sizeof (doornode),
982 		    (uintptr_t)layer_vn.v_data) == -1) {
983 			mdb_warn("failed to read door_node");
984 			return (DCMD_ERR);
985 		}
986 
987 		if (mdb_vread(&pr, sizeof (pr),
988 		    (uintptr_t)doornode.door_target) == -1) {
989 			mdb_warn("failed to read door server process %p",
990 			    doornode.door_target);
991 			return (DCMD_ERR);
992 		}
993 		mdb_printf("[door to '%s' (proc=%p)]", pr.p_user.u_comm,
994 		    doornode.door_target);
995 		break;
996 	}
997 
998 	case VSOCK:
999 	{
1000 		vnode_t v_sock;
1001 		struct sonode so;
1002 
1003 		if (mdb_vread(&v_sock, sizeof (v_sock), realvpp) == -1) {
1004 			mdb_warn("failed to read socket vnode");
1005 			return (DCMD_ERR);
1006 		}
1007 
1008 		/*
1009 		 * Sockets can be non-stream or stream, they have to be dealed
1010 		 * with differently.
1011 		 */
1012 		if (v_sock.v_stream == NULL) {
1013 			if (pfiles_get_sonode(&v_sock, &so) == -1)
1014 				return (DCMD_ERR);
1015 
1016 			/* Pick the proper methods. */
1017 			for (i = 0; i <= NUM_SOCK_PRINTS; i++) {
1018 				if ((sock_prints[i].family == so.so_family &&
1019 				    sock_prints[i].type == so.so_type &&
1020 				    sock_prints[i].pro == so.so_protocol) ||
1021 				    (sock_prints[i].family == so.so_family &&
1022 				    sock_prints[i].type == so.so_type &&
1023 				    so.so_type == SOCK_RAW)) {
1024 					if ((*sock_prints[i].print)(&so) == -1)
1025 						return (DCMD_ERR);
1026 				}
1027 			}
1028 		} else {
1029 			sotpi_sonode_t sotpi_sonode;
1030 
1031 			if (pfiles_get_sonode(&v_sock, &so) == -1)
1032 				return (DCMD_ERR);
1033 
1034 			/*
1035 			 * If the socket is a fallback socket, read its related
1036 			 * information separately; otherwise, read it as a whole
1037 			 * tpi socket.
1038 			 */
1039 			if (so.so_state & SS_FALLBACK_COMP) {
1040 				sotpi_sonode.st_sonode = so;
1041 
1042 				if (mdb_vread(&(sotpi_sonode.st_info),
1043 				    sizeof (sotpi_info_t),
1044 				    (uintptr_t)so.so_priv) == -1)
1045 					return (DCMD_ERR);
1046 			} else {
1047 				if (pfiles_get_tpi_sonode(&v_sock,
1048 				    &sotpi_sonode) == -1)
1049 					return (DCMD_ERR);
1050 			}
1051 
1052 			if (tpi_sock_print(&sotpi_sonode) == -1)
1053 				return (DCMD_ERR);
1054 		}
1055 
1056 		break;
1057 	}
1058 
1059 	case VPORT:
1060 		mdb_printf("[event port (port=%p)]", v.v_data);
1061 		break;
1062 
1063 	case VPROC:
1064 	{
1065 		prnode_t prnode;
1066 		prcommon_t prcommon;
1067 
1068 		if (mdb_vread(&prnode, sizeof (prnode),
1069 		    (uintptr_t)layer_vn.v_data) == -1) {
1070 			mdb_warn("failed to read prnode");
1071 			return (DCMD_ERR);
1072 		}
1073 
1074 		if (mdb_vread(&prcommon, sizeof (prcommon),
1075 		    (uintptr_t)prnode.pr_common) == -1) {
1076 			mdb_warn("failed to read prcommon %p",
1077 			    prnode.pr_common);
1078 			return (DCMD_ERR);
1079 		}
1080 
1081 		mdb_printf("(proc=%p)", prcommon.prc_proc);
1082 		break;
1083 	}
1084 
1085 	default:
1086 		break;
1087 	}
1088 
1089 	mdb_printf("\n");
1090 
1091 	return (WALK_NEXT);
1092 }
1093 
1094 static int
1095 file_t_callback(uintptr_t addr, const struct file *f, struct pfiles_cbdata *cb)
1096 {
1097 	int myfd = cb->fd;
1098 
1099 	cb->fd++;
1100 
1101 	if (addr == NULL) {
1102 		return (WALK_NEXT);
1103 	}
1104 
1105 	/*
1106 	 * We really need 20 digits to print a 64-bit offset_t, but this
1107 	 * is exceedingly rare, so we cheat and assume a column width of 10
1108 	 * digits, in order to fit everything cleanly into 80 columns.
1109 	 */
1110 	mdb_printf("%?0p %4d %8x %?0p %10lld %?0p %4d\n",
1111 	    addr, myfd, f->f_flag, f->f_vnode, f->f_offset, f->f_cred,
1112 	    f->f_count);
1113 
1114 	return (WALK_NEXT);
1115 }
1116 
1117 int
1118 pfiles(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1119 {
1120 	int opt_f = 0;
1121 
1122 	struct pfiles_cbdata cb;
1123 
1124 	bzero(&cb, sizeof (cb));
1125 
1126 	if (!(flags & DCMD_ADDRSPEC))
1127 		return (DCMD_USAGE);
1128 
1129 	if (mdb_getopts(argc, argv,
1130 	    'p', MDB_OPT_SETBITS, TRUE, &cb.opt_p,
1131 	    'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL) != argc)
1132 		return (DCMD_USAGE);
1133 
1134 	if (opt_f) {
1135 		mdb_printf("%<u>%?s %4s %8s %?s %10s %?s %4s%</u>\n", "FILE",
1136 		    "FD", "FLAG", "VNODE", "OFFSET", "CRED", "CNT");
1137 		if (mdb_pwalk("allfile", (mdb_walk_cb_t)file_t_callback, &cb,
1138 		    addr) == -1) {
1139 			mdb_warn("failed to walk 'allfile'");
1140 			return (DCMD_ERR);
1141 		}
1142 	} else {
1143 		mdb_printf("%<u>%-4s %4s %?s ", "FD", "TYPE", "VNODE");
1144 		if (cb.opt_p)
1145 			mdb_printf("PATH");
1146 		else
1147 			mdb_printf("INFO");
1148 		mdb_printf("%</u>\n");
1149 
1150 		if (mdb_pwalk("allfile", (mdb_walk_cb_t)pfile_callback, &cb,
1151 		    addr) == -1) {
1152 			mdb_warn("failed to walk 'allfile'");
1153 			return (DCMD_ERR);
1154 		}
1155 	}
1156 
1157 
1158 	return (DCMD_OK);
1159 }
1160 
1161 void
1162 pfiles_help(void)
1163 {
1164 	mdb_printf(
1165 	    "Given the address of a process, print information about files\n"
1166 	    "which the process has open.  By default, this includes decoded\n"
1167 	    "information about the file depending on file and filesystem type\n"
1168 	    "\n"
1169 	    "\t-p\tPathnames; omit decoded information.  Only display "
1170 	    "pathnames\n"
1171 	    "\t-f\tfile_t view; show the file_t structure corresponding to "
1172 	    "the fd\n");
1173 }
1174