xref: /titanic_50/usr/src/uts/common/io/devinfo.c (revision 1dc8bc23152a02d4586ec1fd8612f7e8f57ceb42)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * driver for accessing kernel devinfo tree.
30  */
31 #include <sys/types.h>
32 #include <sys/pathname.h>
33 #include <sys/debug.h>
34 #include <sys/autoconf.h>
35 #include <sys/vmsystm.h>
36 #include <sys/conf.h>
37 #include <sys/file.h>
38 #include <sys/kmem.h>
39 #include <sys/modctl.h>
40 #include <sys/stat.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/sunldi_impl.h>
44 #include <sys/sunndi.h>
45 #include <sys/esunddi.h>
46 #include <sys/sunmdi.h>
47 #include <sys/ddi_impldefs.h>
48 #include <sys/ndi_impldefs.h>
49 #include <sys/mdi_impldefs.h>
50 #include <sys/devinfo_impl.h>
51 #include <sys/thread.h>
52 #include <sys/modhash.h>
53 #include <sys/bitmap.h>
54 #include <util/qsort.h>
55 #include <sys/disp.h>
56 #include <sys/kobj.h>
57 #include <sys/crc32.h>
58 
59 
60 #ifdef DEBUG
61 static int di_debug;
62 #define	dcmn_err(args) if (di_debug >= 1) cmn_err args
63 #define	dcmn_err2(args) if (di_debug >= 2) cmn_err args
64 #define	dcmn_err3(args) if (di_debug >= 3) cmn_err args
65 #else
66 #define	dcmn_err(args) /* nothing */
67 #define	dcmn_err2(args) /* nothing */
68 #define	dcmn_err3(args) /* nothing */
69 #endif
70 
71 /*
72  * We partition the space of devinfo minor nodes equally between the full and
73  * unprivileged versions of the driver.  The even-numbered minor nodes are the
74  * full version, while the odd-numbered ones are the read-only version.
75  */
76 static int di_max_opens = 32;
77 
78 static int di_prop_dyn = 1;		/* enable dynamic property support */
79 
80 #define	DI_FULL_PARENT		0
81 #define	DI_READONLY_PARENT	1
82 #define	DI_NODE_SPECIES		2
83 #define	DI_UNPRIVILEGED_NODE(x)	(((x) % 2) != 0)
84 
85 #define	IOC_IDLE	0	/* snapshot ioctl states */
86 #define	IOC_SNAP	1	/* snapshot in progress */
87 #define	IOC_DONE	2	/* snapshot done, but not copied out */
88 #define	IOC_COPY	3	/* copyout in progress */
89 
90 /*
91  * Keep max alignment so we can move snapshot to different platforms.
92  *
93  * NOTE: Most callers should rely on the di_checkmem return value
94  * being aligned, and reestablish *off_p with aligned value, instead
95  * of trying to align size of their allocations: this approach will
96  * minimize memory use.
97  */
98 #define	DI_ALIGN(addr)	((addr + 7l) & ~7l)
99 
100 /*
101  * To avoid wasting memory, make a linked list of memory chunks.
102  * Size of each chunk is buf_size.
103  */
104 struct di_mem {
105 	struct di_mem	*next;		/* link to next chunk */
106 	char		*buf;		/* contiguous kernel memory */
107 	size_t		buf_size;	/* size of buf in bytes */
108 	devmap_cookie_t	cook;		/* cookie from ddi_umem_alloc */
109 };
110 
111 /*
112  * This is a stack for walking the tree without using recursion.
113  * When the devinfo tree height is above some small size, one
114  * gets watchdog resets on sun4m.
115  */
116 struct di_stack {
117 	void		*offset[MAX_TREE_DEPTH];
118 	struct dev_info *dip[MAX_TREE_DEPTH];
119 	int		circ[MAX_TREE_DEPTH];
120 	int		depth;	/* depth of current node to be copied */
121 };
122 
123 #define	TOP_OFFSET(stack)	\
124 	((di_off_t *)(stack)->offset[(stack)->depth - 1])
125 #define	TOP_NODE(stack)		\
126 	((stack)->dip[(stack)->depth - 1])
127 #define	PARENT_OFFSET(stack)	\
128 	((di_off_t *)(stack)->offset[(stack)->depth - 2])
129 #define	EMPTY_STACK(stack)	((stack)->depth == 0)
130 #define	POP_STACK(stack)	{ \
131 	ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \
132 		(stack)->circ[(stack)->depth - 1]); \
133 	((stack)->depth--); \
134 }
135 #define	PUSH_STACK(stack, node, off_p)	{ \
136 	ASSERT(node != NULL); \
137 	ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \
138 	(stack)->dip[(stack)->depth] = (node); \
139 	(stack)->offset[(stack)->depth] = (void *)(off_p); \
140 	((stack)->depth)++; \
141 }
142 
143 #define	DI_ALL_PTR(s)	DI_ALL(di_mem_addr((s), 0))
144 
145 /*
146  * With devfs, the device tree has no global locks. The device tree is
147  * dynamic and dips may come and go if they are not locked locally. Under
148  * these conditions, pointers are no longer reliable as unique IDs.
149  * Specifically, these pointers cannot be used as keys for hash tables
150  * as the same devinfo structure may be freed in one part of the tree only
151  * to be allocated as the structure for a different device in another
152  * part of the tree. This can happen if DR and the snapshot are
153  * happening concurrently.
154  * The following data structures act as keys for devinfo nodes and
155  * pathinfo nodes.
156  */
157 
158 enum di_ktype {
159 	DI_DKEY = 1,
160 	DI_PKEY = 2
161 };
162 
163 struct di_dkey {
164 	dev_info_t	*dk_dip;
165 	major_t		dk_major;
166 	int		dk_inst;
167 	pnode_t		dk_nodeid;
168 };
169 
170 struct di_pkey {
171 	mdi_pathinfo_t	*pk_pip;
172 	char		*pk_path_addr;
173 	dev_info_t	*pk_client;
174 	dev_info_t	*pk_phci;
175 };
176 
177 struct di_key {
178 	enum di_ktype	k_type;
179 	union {
180 		struct di_dkey dkey;
181 		struct di_pkey pkey;
182 	} k_u;
183 };
184 
185 
186 struct i_lnode;
187 
188 typedef struct i_link {
189 	/*
190 	 * If a di_link struct representing this i_link struct makes it
191 	 * into the snapshot, then self will point to the offset of
192 	 * the di_link struct in the snapshot
193 	 */
194 	di_off_t	self;
195 
196 	int		spec_type;	/* block or char access type */
197 	struct i_lnode	*src_lnode;	/* src i_lnode */
198 	struct i_lnode	*tgt_lnode;	/* tgt i_lnode */
199 	struct i_link	*src_link_next;	/* next src i_link /w same i_lnode */
200 	struct i_link	*tgt_link_next;	/* next tgt i_link /w same i_lnode */
201 } i_link_t;
202 
203 typedef struct i_lnode {
204 	/*
205 	 * If a di_lnode struct representing this i_lnode struct makes it
206 	 * into the snapshot, then self will point to the offset of
207 	 * the di_lnode struct in the snapshot
208 	 */
209 	di_off_t	self;
210 
211 	/*
212 	 * used for hashing and comparing i_lnodes
213 	 */
214 	int		modid;
215 
216 	/*
217 	 * public information describing a link endpoint
218 	 */
219 	struct di_node	*di_node;	/* di_node in snapshot */
220 	dev_t		devt;		/* devt */
221 
222 	/*
223 	 * i_link ptr to links coming into this i_lnode node
224 	 * (this i_lnode is the target of these i_links)
225 	 */
226 	i_link_t	*link_in;
227 
228 	/*
229 	 * i_link ptr to links going out of this i_lnode node
230 	 * (this i_lnode is the source of these i_links)
231 	 */
232 	i_link_t	*link_out;
233 } i_lnode_t;
234 
235 /*
236  * Soft state associated with each instance of driver open.
237  */
238 static struct di_state {
239 	di_off_t	mem_size;	/* total # bytes in memlist */
240 	struct di_mem	*memlist;	/* head of memlist */
241 	uint_t		command;	/* command from ioctl */
242 	int		di_iocstate;	/* snapshot ioctl state	*/
243 	mod_hash_t	*reg_dip_hash;
244 	mod_hash_t	*reg_pip_hash;
245 	int		lnode_count;
246 	int		link_count;
247 
248 	mod_hash_t	*lnode_hash;
249 	mod_hash_t	*link_hash;
250 } **di_states;
251 
252 static kmutex_t di_lock;	/* serialize instance assignment */
253 
254 typedef enum {
255 	DI_QUIET = 0,	/* DI_QUIET must always be 0 */
256 	DI_ERR,
257 	DI_INFO,
258 	DI_TRACE,
259 	DI_TRACE1,
260 	DI_TRACE2
261 } di_cache_debug_t;
262 
263 static uint_t	di_chunk = 32;		/* I/O chunk size in pages */
264 
265 #define	DI_CACHE_LOCK(c)	(mutex_enter(&(c).cache_lock))
266 #define	DI_CACHE_UNLOCK(c)	(mutex_exit(&(c).cache_lock))
267 #define	DI_CACHE_LOCKED(c)	(mutex_owned(&(c).cache_lock))
268 
269 /*
270  * Check that whole device tree is being configured as a pre-condition for
271  * cleaning up /etc/devices files.
272  */
273 #define	DEVICES_FILES_CLEANABLE(st)	\
274 	(((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \
275 	strcmp(DI_ALL_PTR(st)->root_path, "/") == 0)
276 
277 #define	CACHE_DEBUG(args)	\
278 	{ if (di_cache_debug != DI_QUIET) di_cache_print args; }
279 
280 typedef struct phci_walk_arg {
281 	di_off_t	off;
282 	struct di_state	*st;
283 } phci_walk_arg_t;
284 
285 static int di_open(dev_t *, int, int, cred_t *);
286 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
287 static int di_close(dev_t, int, int, cred_t *);
288 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
289 static int di_attach(dev_info_t *, ddi_attach_cmd_t);
290 static int di_detach(dev_info_t *, ddi_detach_cmd_t);
291 
292 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int);
293 static di_off_t di_snapshot_and_clean(struct di_state *);
294 static di_off_t di_copydevnm(di_off_t *, struct di_state *);
295 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *);
296 static di_off_t di_copynode(struct dev_info *, struct di_stack *,
297     struct di_state *);
298 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t,
299     struct di_state *);
300 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *);
301 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *);
302 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *,
303     struct di_state *, struct dev_info *);
304 static void di_allocmem(struct di_state *, size_t);
305 static void di_freemem(struct di_state *);
306 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz);
307 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t);
308 static void *di_mem_addr(struct di_state *, di_off_t);
309 static int di_setstate(struct di_state *, int);
310 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t);
311 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t);
312 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t,
313     struct di_state *, int);
314 static di_off_t di_getlink_data(di_off_t, struct di_state *);
315 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p);
316 
317 static int cache_args_valid(struct di_state *st, int *error);
318 static int snapshot_is_cacheable(struct di_state *st);
319 static int di_cache_lookup(struct di_state *st);
320 static int di_cache_update(struct di_state *st);
321 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...);
322 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg);
323 static int build_phci_list(dev_info_t *ph_devinfo, void *arg);
324 
325 extern int modrootloaded;
326 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *);
327 extern void mdi_vhci_walk_phcis(dev_info_t *,
328 	int (*)(dev_info_t *, void *), void *);
329 
330 
331 static struct cb_ops di_cb_ops = {
332 	di_open,		/* open */
333 	di_close,		/* close */
334 	nodev,			/* strategy */
335 	nodev,			/* print */
336 	nodev,			/* dump */
337 	nodev,			/* read */
338 	nodev,			/* write */
339 	di_ioctl,		/* ioctl */
340 	nodev,			/* devmap */
341 	nodev,			/* mmap */
342 	nodev,			/* segmap */
343 	nochpoll,		/* poll */
344 	ddi_prop_op,		/* prop_op */
345 	NULL,			/* streamtab  */
346 	D_NEW | D_MP		/* Driver compatibility flag */
347 };
348 
349 static struct dev_ops di_ops = {
350 	DEVO_REV,		/* devo_rev, */
351 	0,			/* refcnt  */
352 	di_info,		/* info */
353 	nulldev,		/* identify */
354 	nulldev,		/* probe */
355 	di_attach,		/* attach */
356 	di_detach,		/* detach */
357 	nodev,			/* reset */
358 	&di_cb_ops,		/* driver operations */
359 	NULL			/* bus operations */
360 };
361 
362 /*
363  * Module linkage information for the kernel.
364  */
365 static struct modldrv modldrv = {
366 	&mod_driverops,
367 	"DEVINFO Driver %I%",
368 	&di_ops
369 };
370 
371 static struct modlinkage modlinkage = {
372 	MODREV_1,
373 	&modldrv,
374 	NULL
375 };
376 
377 int
378 _init(void)
379 {
380 	int	error;
381 
382 	mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL);
383 
384 	error = mod_install(&modlinkage);
385 	if (error != 0) {
386 		mutex_destroy(&di_lock);
387 		return (error);
388 	}
389 
390 	return (0);
391 }
392 
393 int
394 _info(struct modinfo *modinfop)
395 {
396 	return (mod_info(&modlinkage, modinfop));
397 }
398 
399 int
400 _fini(void)
401 {
402 	int	error;
403 
404 	error = mod_remove(&modlinkage);
405 	if (error != 0) {
406 		return (error);
407 	}
408 
409 	mutex_destroy(&di_lock);
410 	return (0);
411 }
412 
413 static dev_info_t *di_dip;
414 
415 /*ARGSUSED*/
416 static int
417 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
418 {
419 	int	error = DDI_FAILURE;
420 
421 	switch (infocmd) {
422 	case DDI_INFO_DEVT2DEVINFO:
423 		*result = (void *)di_dip;
424 		error = DDI_SUCCESS;
425 		break;
426 	case DDI_INFO_DEVT2INSTANCE:
427 		/*
428 		 * All dev_t's map to the same, single instance.
429 		 */
430 		*result = (void *)0;
431 		error = DDI_SUCCESS;
432 		break;
433 	default:
434 		break;
435 	}
436 
437 	return (error);
438 }
439 
440 static int
441 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
442 {
443 	int	error = DDI_FAILURE;
444 
445 	switch (cmd) {
446 	case DDI_ATTACH:
447 		di_states = kmem_zalloc(
448 		    di_max_opens * sizeof (struct di_state *), KM_SLEEP);
449 
450 		if (ddi_create_minor_node(dip, "devinfo", S_IFCHR,
451 		    DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE ||
452 		    ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR,
453 		    DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) {
454 			kmem_free(di_states,
455 			    di_max_opens * sizeof (struct di_state *));
456 			ddi_remove_minor_node(dip, NULL);
457 			error = DDI_FAILURE;
458 		} else {
459 			di_dip = dip;
460 			ddi_report_dev(dip);
461 
462 			error = DDI_SUCCESS;
463 		}
464 		break;
465 	default:
466 		error = DDI_FAILURE;
467 		break;
468 	}
469 
470 	return (error);
471 }
472 
473 static int
474 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
475 {
476 	int	error = DDI_FAILURE;
477 
478 	switch (cmd) {
479 	case DDI_DETACH:
480 		ddi_remove_minor_node(dip, NULL);
481 		di_dip = NULL;
482 		kmem_free(di_states, di_max_opens * sizeof (struct di_state *));
483 
484 		error = DDI_SUCCESS;
485 		break;
486 	default:
487 		error = DDI_FAILURE;
488 		break;
489 	}
490 
491 	return (error);
492 }
493 
494 /*
495  * Allow multiple opens by tweaking the dev_t such that it looks like each
496  * open is getting a different minor device.  Each minor gets a separate
497  * entry in the di_states[] table.  Based on the original minor number, we
498  * discriminate opens of the full and read-only nodes.  If all of the instances
499  * of the selected minor node are currently open, we return EAGAIN.
500  */
501 /*ARGSUSED*/
502 static int
503 di_open(dev_t *devp, int flag, int otyp, cred_t *credp)
504 {
505 	int	m;
506 	minor_t	minor_parent = getminor(*devp);
507 
508 	if (minor_parent != DI_FULL_PARENT &&
509 	    minor_parent != DI_READONLY_PARENT)
510 		return (ENXIO);
511 
512 	mutex_enter(&di_lock);
513 
514 	for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) {
515 		if (di_states[m] != NULL)
516 			continue;
517 
518 		di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP);
519 		break;	/* It's ours. */
520 	}
521 
522 	if (m >= di_max_opens) {
523 		/*
524 		 * maximum open instance for device reached
525 		 */
526 		mutex_exit(&di_lock);
527 		dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached"));
528 		return (EAGAIN);
529 	}
530 	mutex_exit(&di_lock);
531 
532 	ASSERT(m < di_max_opens);
533 	*devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES));
534 
535 	dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n",
536 	    (void *)curthread, m + DI_NODE_SPECIES));
537 
538 	return (0);
539 }
540 
541 /*ARGSUSED*/
542 static int
543 di_close(dev_t dev, int flag, int otype, cred_t *cred_p)
544 {
545 	struct di_state	*st;
546 	int		m = (int)getminor(dev) - DI_NODE_SPECIES;
547 
548 	if (m < 0) {
549 		cmn_err(CE_WARN, "closing non-existent devinfo minor %d",
550 		    m + DI_NODE_SPECIES);
551 		return (ENXIO);
552 	}
553 
554 	st = di_states[m];
555 	ASSERT(m < di_max_opens && st != NULL);
556 
557 	di_freemem(st);
558 	kmem_free(st, sizeof (struct di_state));
559 
560 	/*
561 	 * empty slot in state table
562 	 */
563 	mutex_enter(&di_lock);
564 	di_states[m] = NULL;
565 	dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n",
566 	    (void *)curthread, m + DI_NODE_SPECIES));
567 	mutex_exit(&di_lock);
568 
569 	return (0);
570 }
571 
572 
573 /*ARGSUSED*/
574 static int
575 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
576 {
577 	int		rv, error;
578 	di_off_t	off;
579 	struct di_all	*all;
580 	struct di_state	*st;
581 	int		m = (int)getminor(dev) - DI_NODE_SPECIES;
582 	major_t		i;
583 	char		*drv_name;
584 	size_t		map_size, size;
585 	struct di_mem	*dcp;
586 	int		ndi_flags;
587 
588 	if (m < 0 || m >= di_max_opens) {
589 		return (ENXIO);
590 	}
591 
592 	st = di_states[m];
593 	ASSERT(st != NULL);
594 
595 	dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd));
596 
597 	switch (cmd) {
598 	case DINFOIDENT:
599 		/*
600 		 * This is called from di_init to verify that the driver
601 		 * opened is indeed devinfo. The purpose is to guard against
602 		 * sending ioctl to an unknown driver in case of an
603 		 * unresolved major number conflict during bfu.
604 		 */
605 		*rvalp = DI_MAGIC;
606 		return (0);
607 
608 	case DINFOLODRV:
609 		/*
610 		 * Hold an installed driver and return the result
611 		 */
612 		if (DI_UNPRIVILEGED_NODE(m)) {
613 			/*
614 			 * Only the fully enabled instances may issue
615 			 * DINFOLDDRV.
616 			 */
617 			return (EACCES);
618 		}
619 
620 		drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
621 		if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) {
622 			kmem_free(drv_name, MAXNAMELEN);
623 			return (EFAULT);
624 		}
625 
626 		/*
627 		 * Some 3rd party driver's _init() walks the device tree,
628 		 * so we load the driver module before configuring driver.
629 		 */
630 		i = ddi_name_to_major(drv_name);
631 		if (ddi_hold_driver(i) == NULL) {
632 			kmem_free(drv_name, MAXNAMELEN);
633 			return (ENXIO);
634 		}
635 
636 		ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
637 
638 		/*
639 		 * i_ddi_load_drvconf() below will trigger a reprobe
640 		 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't
641 		 * needed here.
642 		 */
643 		modunload_disable();
644 		(void) i_ddi_load_drvconf(i);
645 		(void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i);
646 		kmem_free(drv_name, MAXNAMELEN);
647 		ddi_rele_driver(i);
648 		rv = i_ddi_devs_attached(i);
649 		modunload_enable();
650 
651 		i_ddi_di_cache_invalidate(KM_SLEEP);
652 
653 		return ((rv == DDI_SUCCESS)? 0 : ENXIO);
654 
655 	case DINFOUSRLD:
656 		/*
657 		 * The case for copying snapshot to userland
658 		 */
659 		if (di_setstate(st, IOC_COPY) == -1)
660 			return (EBUSY);
661 
662 		map_size = DI_ALL_PTR(st)->map_size;
663 		if (map_size == 0) {
664 			(void) di_setstate(st, IOC_DONE);
665 			return (EFAULT);
666 		}
667 
668 		/*
669 		 * copyout the snapshot
670 		 */
671 		map_size = (map_size + PAGEOFFSET) & PAGEMASK;
672 
673 		/*
674 		 * Return the map size, so caller may do a sanity
675 		 * check against the return value of snapshot ioctl()
676 		 */
677 		*rvalp = (int)map_size;
678 
679 		/*
680 		 * Copy one chunk at a time
681 		 */
682 		off = 0;
683 		dcp = st->memlist;
684 		while (map_size) {
685 			size = dcp->buf_size;
686 			if (map_size <= size) {
687 				size = map_size;
688 			}
689 
690 			if (ddi_copyout(di_mem_addr(st, off),
691 			    (void *)(arg + off), size, mode) != 0) {
692 				(void) di_setstate(st, IOC_DONE);
693 				return (EFAULT);
694 			}
695 
696 			map_size -= size;
697 			off += size;
698 			dcp = dcp->next;
699 		}
700 
701 		di_freemem(st);
702 		(void) di_setstate(st, IOC_IDLE);
703 		return (0);
704 
705 	default:
706 		if ((cmd & ~DIIOC_MASK) != DIIOC) {
707 			/*
708 			 * Invalid ioctl command
709 			 */
710 			return (ENOTTY);
711 		}
712 		/*
713 		 * take a snapshot
714 		 */
715 		st->command = cmd & DIIOC_MASK;
716 		/*FALLTHROUGH*/
717 	}
718 
719 	/*
720 	 * Obtain enough memory to hold header + rootpath.  We prevent kernel
721 	 * memory exhaustion by freeing any previously allocated snapshot and
722 	 * refusing the operation; otherwise we would be allowing ioctl(),
723 	 * ioctl(), ioctl(), ..., panic.
724 	 */
725 	if (di_setstate(st, IOC_SNAP) == -1)
726 		return (EBUSY);
727 
728 	/*
729 	 * Initial memlist always holds di_all and the root_path - and
730 	 * is at least a page and size.
731 	 */
732 	size = sizeof (struct di_all) +
733 	    sizeof (((struct dinfo_io *)(NULL))->root_path);
734 	if (size < PAGESIZE)
735 		size = PAGESIZE;
736 	off = di_checkmem(st, 0, size);
737 	all = DI_ALL_PTR(st);
738 	off += sizeof (struct di_all);		/* real length of di_all */
739 
740 	all->devcnt = devcnt;
741 	all->command = st->command;
742 	all->version = DI_SNAPSHOT_VERSION;
743 	all->top_vhci_devinfo = 0;		/* filled by build_vhci_list. */
744 
745 	/*
746 	 * Note the endianness in case we need to transport snapshot
747 	 * over the network.
748 	 */
749 #if defined(_LITTLE_ENDIAN)
750 	all->endianness = DI_LITTLE_ENDIAN;
751 #else
752 	all->endianness = DI_BIG_ENDIAN;
753 #endif
754 
755 	/* Copyin ioctl args, store in the snapshot. */
756 	if (copyinstr((void *)arg, all->root_path,
757 	    sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) {
758 		di_freemem(st);
759 		(void) di_setstate(st, IOC_IDLE);
760 		return (EFAULT);
761 	}
762 	off += size;				/* real length of root_path */
763 
764 	if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) {
765 		di_freemem(st);
766 		(void) di_setstate(st, IOC_IDLE);
767 		return (EINVAL);
768 	}
769 
770 	error = 0;
771 	if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) {
772 		di_freemem(st);
773 		(void) di_setstate(st, IOC_IDLE);
774 		return (error);
775 	}
776 
777 	/*
778 	 * Only the fully enabled version may force load drivers or read
779 	 * the parent private data from a driver.
780 	 */
781 	if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 &&
782 	    DI_UNPRIVILEGED_NODE(m)) {
783 		di_freemem(st);
784 		(void) di_setstate(st, IOC_IDLE);
785 		return (EACCES);
786 	}
787 
788 	/* Do we need private data? */
789 	if (st->command & DINFOPRIVDATA) {
790 		arg += sizeof (((struct dinfo_io *)(NULL))->root_path);
791 
792 #ifdef _MULTI_DATAMODEL
793 		switch (ddi_model_convert_from(mode & FMODELS)) {
794 		case DDI_MODEL_ILP32: {
795 			/*
796 			 * Cannot copy private data from 64-bit kernel
797 			 * to 32-bit app
798 			 */
799 			di_freemem(st);
800 			(void) di_setstate(st, IOC_IDLE);
801 			return (EINVAL);
802 		}
803 		case DDI_MODEL_NONE:
804 			if ((off = di_copyformat(off, st, arg, mode)) == 0) {
805 				di_freemem(st);
806 				(void) di_setstate(st, IOC_IDLE);
807 				return (EFAULT);
808 			}
809 			break;
810 		}
811 #else /* !_MULTI_DATAMODEL */
812 		if ((off = di_copyformat(off, st, arg, mode)) == 0) {
813 			di_freemem(st);
814 			(void) di_setstate(st, IOC_IDLE);
815 			return (EFAULT);
816 		}
817 #endif /* _MULTI_DATAMODEL */
818 	}
819 
820 	all->top_devinfo = DI_ALIGN(off);
821 
822 	/*
823 	 * For cache lookups we reallocate memory from scratch,
824 	 * so the value of "all" is no longer valid.
825 	 */
826 	all = NULL;
827 
828 	if (st->command & DINFOCACHE) {
829 		*rvalp = di_cache_lookup(st);
830 	} else if (snapshot_is_cacheable(st)) {
831 		DI_CACHE_LOCK(di_cache);
832 		*rvalp = di_cache_update(st);
833 		DI_CACHE_UNLOCK(di_cache);
834 	} else
835 		*rvalp = di_snapshot_and_clean(st);
836 
837 	if (*rvalp) {
838 		DI_ALL_PTR(st)->map_size = *rvalp;
839 		(void) di_setstate(st, IOC_DONE);
840 	} else {
841 		di_freemem(st);
842 		(void) di_setstate(st, IOC_IDLE);
843 	}
844 
845 	return (0);
846 }
847 
848 /*
849  * Get a chunk of memory >= size, for the snapshot
850  */
851 static void
852 di_allocmem(struct di_state *st, size_t size)
853 {
854 	struct di_mem	*mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP);
855 
856 	/*
857 	 * Round up size to nearest power of 2. If it is less
858 	 * than st->mem_size, set it to st->mem_size (i.e.,
859 	 * the mem_size is doubled every time) to reduce the
860 	 * number of memory allocations.
861 	 */
862 	size_t tmp = 1;
863 	while (tmp < size) {
864 		tmp <<= 1;
865 	}
866 	size = (tmp > st->mem_size) ? tmp : st->mem_size;
867 
868 	mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook);
869 	mem->buf_size = size;
870 
871 	dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size));
872 
873 	if (st->mem_size == 0) {	/* first chunk */
874 		st->memlist = mem;
875 	} else {
876 		/*
877 		 * locate end of linked list and add a chunk at the end
878 		 */
879 		struct di_mem *dcp = st->memlist;
880 		while (dcp->next != NULL) {
881 			dcp = dcp->next;
882 		}
883 
884 		dcp->next = mem;
885 	}
886 
887 	st->mem_size += size;
888 }
889 
890 /*
891  * Copy upto bufsiz bytes of the memlist to buf
892  */
893 static void
894 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz)
895 {
896 	struct di_mem	*dcp;
897 	size_t		copysz;
898 
899 	if (st->mem_size == 0) {
900 		ASSERT(st->memlist == NULL);
901 		return;
902 	}
903 
904 	copysz = 0;
905 	for (dcp = st->memlist; dcp; dcp = dcp->next) {
906 
907 		ASSERT(bufsiz > 0);
908 
909 		if (bufsiz <= dcp->buf_size)
910 			copysz = bufsiz;
911 		else
912 			copysz = dcp->buf_size;
913 
914 		bcopy(dcp->buf, buf, copysz);
915 
916 		buf += copysz;
917 		bufsiz -= copysz;
918 
919 		if (bufsiz == 0)
920 			break;
921 	}
922 }
923 
924 /*
925  * Free all memory for the snapshot
926  */
927 static void
928 di_freemem(struct di_state *st)
929 {
930 	struct di_mem	*dcp, *tmp;
931 
932 	dcmn_err2((CE_CONT, "di_freemem\n"));
933 
934 	if (st->mem_size) {
935 		dcp = st->memlist;
936 		while (dcp) {	/* traverse the linked list */
937 			tmp = dcp;
938 			dcp = dcp->next;
939 			ddi_umem_free(tmp->cook);
940 			kmem_free(tmp, sizeof (struct di_mem));
941 		}
942 		st->mem_size = 0;
943 		st->memlist = NULL;
944 	}
945 
946 	ASSERT(st->mem_size == 0);
947 	ASSERT(st->memlist == NULL);
948 }
949 
950 /*
951  * Copies cached data to the di_state structure.
952  * Returns:
953  *	- size of data copied, on SUCCESS
954  *	- 0 on failure
955  */
956 static int
957 di_cache2mem(struct di_cache *cache, struct di_state *st)
958 {
959 	caddr_t	pa;
960 
961 	ASSERT(st->mem_size == 0);
962 	ASSERT(st->memlist == NULL);
963 	ASSERT(!servicing_interrupt());
964 	ASSERT(DI_CACHE_LOCKED(*cache));
965 
966 	if (cache->cache_size == 0) {
967 		ASSERT(cache->cache_data == NULL);
968 		CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy"));
969 		return (0);
970 	}
971 
972 	ASSERT(cache->cache_data);
973 
974 	di_allocmem(st, cache->cache_size);
975 
976 	pa = di_mem_addr(st, 0);
977 
978 	ASSERT(pa);
979 
980 	/*
981 	 * Verify that di_allocmem() allocates contiguous memory,
982 	 * so that it is safe to do straight bcopy()
983 	 */
984 	ASSERT(st->memlist != NULL);
985 	ASSERT(st->memlist->next == NULL);
986 	bcopy(cache->cache_data, pa, cache->cache_size);
987 
988 	return (cache->cache_size);
989 }
990 
991 /*
992  * Copies a snapshot from di_state to the cache
993  * Returns:
994  *	- 0 on failure
995  *	- size of copied data on success
996  */
997 static size_t
998 di_mem2cache(struct di_state *st, struct di_cache *cache)
999 {
1000 	size_t	map_size;
1001 
1002 	ASSERT(cache->cache_size == 0);
1003 	ASSERT(cache->cache_data == NULL);
1004 	ASSERT(!servicing_interrupt());
1005 	ASSERT(DI_CACHE_LOCKED(*cache));
1006 
1007 	if (st->mem_size == 0) {
1008 		ASSERT(st->memlist == NULL);
1009 		CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy"));
1010 		return (0);
1011 	}
1012 
1013 	ASSERT(st->memlist);
1014 
1015 	/*
1016 	 * The size of the memory list may be much larger than the
1017 	 * size of valid data (map_size). Cache only the valid data
1018 	 */
1019 	map_size = DI_ALL_PTR(st)->map_size;
1020 	if (map_size == 0 || map_size < sizeof (struct di_all) ||
1021 	    map_size > st->mem_size) {
1022 		CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size));
1023 		return (0);
1024 	}
1025 
1026 	cache->cache_data = kmem_alloc(map_size, KM_SLEEP);
1027 	cache->cache_size = map_size;
1028 	di_copymem(st, cache->cache_data, cache->cache_size);
1029 
1030 	return (map_size);
1031 }
1032 
1033 /*
1034  * Make sure there is at least "size" bytes memory left before
1035  * going on. Otherwise, start on a new chunk.
1036  */
1037 static di_off_t
1038 di_checkmem(struct di_state *st, di_off_t off, size_t size)
1039 {
1040 	dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n",
1041 	    off, (int)size));
1042 
1043 	/*
1044 	 * di_checkmem() shouldn't be called with a size of zero.
1045 	 * But in case it is, we want to make sure we return a valid
1046 	 * offset within the memlist and not an offset that points us
1047 	 * at the end of the memlist.
1048 	 */
1049 	if (size == 0) {
1050 		dcmn_err((CE_WARN, "di_checkmem: invalid zero size used"));
1051 		size = 1;
1052 	}
1053 
1054 	off = DI_ALIGN(off);
1055 	if ((st->mem_size - off) < size) {
1056 		off = st->mem_size;
1057 		di_allocmem(st, size);
1058 	}
1059 
1060 	/* verify that return value is aligned */
1061 	ASSERT(off == DI_ALIGN(off));
1062 	return (off);
1063 }
1064 
1065 /*
1066  * Copy the private data format from ioctl arg.
1067  * On success, the ending offset is returned. On error 0 is returned.
1068  */
1069 static di_off_t
1070 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode)
1071 {
1072 	di_off_t		size;
1073 	struct di_priv_data	*priv;
1074 	struct di_all		*all = DI_ALL_PTR(st);
1075 
1076 	dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n",
1077 	    off, (void *)arg, mode));
1078 
1079 	/*
1080 	 * Copyin data and check version.
1081 	 * We only handle private data version 0.
1082 	 */
1083 	priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP);
1084 	if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data),
1085 	    mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) {
1086 		kmem_free(priv, sizeof (struct di_priv_data));
1087 		return (0);
1088 	}
1089 
1090 	/*
1091 	 * Save di_priv_data copied from userland in snapshot.
1092 	 */
1093 	all->pd_version = priv->version;
1094 	all->n_ppdata = priv->n_parent;
1095 	all->n_dpdata = priv->n_driver;
1096 
1097 	/*
1098 	 * copyin private data format, modify offset accordingly
1099 	 */
1100 	if (all->n_ppdata) {	/* parent private data format */
1101 		/*
1102 		 * check memory
1103 		 */
1104 		size = all->n_ppdata * sizeof (struct di_priv_format);
1105 		all->ppdata_format = off = di_checkmem(st, off, size);
1106 		if (ddi_copyin(priv->parent, di_mem_addr(st, off), size,
1107 		    mode) != 0) {
1108 			kmem_free(priv, sizeof (struct di_priv_data));
1109 			return (0);
1110 		}
1111 
1112 		off += size;
1113 	}
1114 
1115 	if (all->n_dpdata) {	/* driver private data format */
1116 		/*
1117 		 * check memory
1118 		 */
1119 		size = all->n_dpdata * sizeof (struct di_priv_format);
1120 		all->dpdata_format = off = di_checkmem(st, off, size);
1121 		if (ddi_copyin(priv->driver, di_mem_addr(st, off), size,
1122 		    mode) != 0) {
1123 			kmem_free(priv, sizeof (struct di_priv_data));
1124 			return (0);
1125 		}
1126 
1127 		off += size;
1128 	}
1129 
1130 	kmem_free(priv, sizeof (struct di_priv_data));
1131 	return (off);
1132 }
1133 
1134 /*
1135  * Return the real address based on the offset (off) within snapshot
1136  */
1137 static void *
1138 di_mem_addr(struct di_state *st, di_off_t off)
1139 {
1140 	struct di_mem	*dcp = st->memlist;
1141 
1142 	dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n",
1143 	    (void *)dcp, off));
1144 
1145 	ASSERT(off < st->mem_size);
1146 
1147 	while (off >= dcp->buf_size) {
1148 		off -= dcp->buf_size;
1149 		dcp = dcp->next;
1150 	}
1151 
1152 	dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n",
1153 	    off, (void *)(dcp->buf + off)));
1154 
1155 	return (dcp->buf + off);
1156 }
1157 
1158 /*
1159  * Ideally we would use the whole key to derive the hash
1160  * value. However, the probability that two keys will
1161  * have the same dip (or pip) is very low, so
1162  * hashing by dip (or pip) pointer should suffice.
1163  */
1164 static uint_t
1165 di_hash_byptr(void *arg, mod_hash_key_t key)
1166 {
1167 	struct di_key	*dik = key;
1168 	size_t		rshift;
1169 	void		*ptr;
1170 
1171 	ASSERT(arg == NULL);
1172 
1173 	switch (dik->k_type) {
1174 	case DI_DKEY:
1175 		ptr = dik->k_u.dkey.dk_dip;
1176 		rshift = highbit(sizeof (struct dev_info));
1177 		break;
1178 	case DI_PKEY:
1179 		ptr = dik->k_u.pkey.pk_pip;
1180 		rshift = highbit(sizeof (struct mdi_pathinfo));
1181 		break;
1182 	default:
1183 		panic("devinfo: unknown key type");
1184 		/*NOTREACHED*/
1185 	}
1186 	return (mod_hash_byptr((void *)rshift, ptr));
1187 }
1188 
1189 static void
1190 di_key_dtor(mod_hash_key_t key)
1191 {
1192 	char		*path_addr;
1193 	struct di_key	*dik = key;
1194 
1195 	switch (dik->k_type) {
1196 	case DI_DKEY:
1197 		break;
1198 	case DI_PKEY:
1199 		path_addr = dik->k_u.pkey.pk_path_addr;
1200 		if (path_addr)
1201 			kmem_free(path_addr, strlen(path_addr) + 1);
1202 		break;
1203 	default:
1204 		panic("devinfo: unknown key type");
1205 		/*NOTREACHED*/
1206 	}
1207 
1208 	kmem_free(dik, sizeof (struct di_key));
1209 }
1210 
1211 static int
1212 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2)
1213 {
1214 	if (dk1->dk_dip !=  dk2->dk_dip)
1215 		return (dk1->dk_dip > dk2->dk_dip ? 1 : -1);
1216 
1217 	if (dk1->dk_major != DDI_MAJOR_T_NONE &&
1218 	    dk2->dk_major != DDI_MAJOR_T_NONE) {
1219 		if (dk1->dk_major !=  dk2->dk_major)
1220 			return (dk1->dk_major > dk2->dk_major ? 1 : -1);
1221 
1222 		if (dk1->dk_inst !=  dk2->dk_inst)
1223 			return (dk1->dk_inst > dk2->dk_inst ? 1 : -1);
1224 	}
1225 
1226 	if (dk1->dk_nodeid != dk2->dk_nodeid)
1227 		return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1);
1228 
1229 	return (0);
1230 }
1231 
1232 static int
1233 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2)
1234 {
1235 	char	*p1, *p2;
1236 	int	rv;
1237 
1238 	if (pk1->pk_pip !=  pk2->pk_pip)
1239 		return (pk1->pk_pip > pk2->pk_pip ? 1 : -1);
1240 
1241 	p1 = pk1->pk_path_addr;
1242 	p2 = pk2->pk_path_addr;
1243 
1244 	p1 = p1 ? p1 : "";
1245 	p2 = p2 ? p2 : "";
1246 
1247 	rv = strcmp(p1, p2);
1248 	if (rv)
1249 		return (rv > 0  ? 1 : -1);
1250 
1251 	if (pk1->pk_client !=  pk2->pk_client)
1252 		return (pk1->pk_client > pk2->pk_client ? 1 : -1);
1253 
1254 	if (pk1->pk_phci !=  pk2->pk_phci)
1255 		return (pk1->pk_phci > pk2->pk_phci ? 1 : -1);
1256 
1257 	return (0);
1258 }
1259 
1260 static int
1261 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
1262 {
1263 	struct di_key	*dik1, *dik2;
1264 
1265 	dik1 = key1;
1266 	dik2 = key2;
1267 
1268 	if (dik1->k_type != dik2->k_type) {
1269 		panic("devinfo: mismatched keys");
1270 		/*NOTREACHED*/
1271 	}
1272 
1273 	switch (dik1->k_type) {
1274 	case DI_DKEY:
1275 		return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey)));
1276 	case DI_PKEY:
1277 		return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey)));
1278 	default:
1279 		panic("devinfo: unknown key type");
1280 		/*NOTREACHED*/
1281 	}
1282 }
1283 
1284 /*
1285  * This is the main function that takes a snapshot
1286  */
1287 static di_off_t
1288 di_snapshot(struct di_state *st)
1289 {
1290 	di_off_t	off;
1291 	struct di_all	*all;
1292 	dev_info_t	*rootnode;
1293 	char		buf[80];
1294 	int		plen;
1295 	char		*path;
1296 	vnode_t		*vp;
1297 
1298 	all = DI_ALL_PTR(st);
1299 	dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n"));
1300 
1301 	/*
1302 	 * Verify path before entrusting it to e_ddi_hold_devi_by_path because
1303 	 * some platforms have OBP bugs where executing the NDI_PROMNAME code
1304 	 * path against an invalid path results in panic.  The lookupnameat
1305 	 * is done relative to rootdir without a leading '/' on "devices/"
1306 	 * to force the lookup to occur in the global zone.
1307 	 */
1308 	plen = strlen("devices/") + strlen(all->root_path) + 1;
1309 	path = kmem_alloc(plen, KM_SLEEP);
1310 	(void) snprintf(path, plen, "devices/%s", all->root_path);
1311 	if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) {
1312 		dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1313 		    all->root_path));
1314 		kmem_free(path, plen);
1315 		return (0);
1316 	}
1317 	kmem_free(path, plen);
1318 	VN_RELE(vp);
1319 
1320 	/*
1321 	 * Hold the devinfo node referred by the path.
1322 	 */
1323 	rootnode = e_ddi_hold_devi_by_path(all->root_path, 0);
1324 	if (rootnode == NULL) {
1325 		dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1326 		    all->root_path));
1327 		return (0);
1328 	}
1329 
1330 	(void) snprintf(buf, sizeof (buf),
1331 	    "devinfo registered dips (statep=%p)", (void *)st);
1332 
1333 	st->reg_dip_hash = mod_hash_create_extended(buf, 64,
1334 	    di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1335 	    NULL, di_key_cmp, KM_SLEEP);
1336 
1337 
1338 	(void) snprintf(buf, sizeof (buf),
1339 	    "devinfo registered pips (statep=%p)", (void *)st);
1340 
1341 	st->reg_pip_hash = mod_hash_create_extended(buf, 64,
1342 	    di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1343 	    NULL, di_key_cmp, KM_SLEEP);
1344 
1345 	/*
1346 	 * copy the device tree
1347 	 */
1348 	off = di_copytree(DEVI(rootnode), &all->top_devinfo, st);
1349 
1350 	if (DINFOPATH & st->command) {
1351 		mdi_walk_vhcis(build_vhci_list, st);
1352 	}
1353 
1354 	ddi_release_devi(rootnode);
1355 
1356 	/*
1357 	 * copy the devnames array
1358 	 */
1359 	all->devnames = off;
1360 	off = di_copydevnm(&all->devnames, st);
1361 
1362 
1363 	/* initialize the hash tables */
1364 	st->lnode_count = 0;
1365 	st->link_count = 0;
1366 
1367 	if (DINFOLYR & st->command) {
1368 		off = di_getlink_data(off, st);
1369 	}
1370 
1371 	/*
1372 	 * Free up hash tables
1373 	 */
1374 	mod_hash_destroy_hash(st->reg_dip_hash);
1375 	mod_hash_destroy_hash(st->reg_pip_hash);
1376 
1377 	/*
1378 	 * Record the timestamp now that we are done with snapshot.
1379 	 *
1380 	 * We compute the checksum later and then only if we cache
1381 	 * the snapshot, since checksumming adds some overhead.
1382 	 * The checksum is checked later if we read the cache file.
1383 	 * from disk.
1384 	 *
1385 	 * Set checksum field to 0 as CRC is calculated with that
1386 	 * field set to 0.
1387 	 */
1388 	all->snapshot_time = ddi_get_time();
1389 	all->cache_checksum = 0;
1390 
1391 	ASSERT(all->snapshot_time != 0);
1392 
1393 	return (off);
1394 }
1395 
1396 /*
1397  * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set
1398  */
1399 static di_off_t
1400 di_snapshot_and_clean(struct di_state *st)
1401 {
1402 	di_off_t	off;
1403 
1404 	modunload_disable();
1405 	off = di_snapshot(st);
1406 	if (off != 0 && (st->command & DINFOCLEANUP)) {
1407 		ASSERT(DEVICES_FILES_CLEANABLE(st));
1408 		/*
1409 		 * Cleanup /etc/devices files:
1410 		 * In order to accurately account for the system configuration
1411 		 * in /etc/devices files, the appropriate drivers must be
1412 		 * fully configured before the cleanup starts.
1413 		 * So enable modunload only after the cleanup.
1414 		 */
1415 		i_ddi_clean_devices_files();
1416 		/*
1417 		 * Remove backing store nodes for unused devices,
1418 		 * which retain past permissions customizations
1419 		 * and may be undesired for newly configured devices.
1420 		 */
1421 		dev_devices_cleanup();
1422 	}
1423 	modunload_enable();
1424 
1425 	return (off);
1426 }
1427 
1428 /*
1429  * construct vhci linkage in the snapshot.
1430  */
1431 static int
1432 build_vhci_list(dev_info_t *vh_devinfo, void *arg)
1433 {
1434 	struct di_all	*all;
1435 	struct di_node	*me;
1436 	struct di_state	*st;
1437 	di_off_t	off;
1438 	phci_walk_arg_t	pwa;
1439 
1440 	dcmn_err3((CE_CONT, "build_vhci list\n"));
1441 
1442 	dcmn_err3((CE_CONT, "vhci node %s%d\n",
1443 	    ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo)));
1444 
1445 	st = (struct di_state *)arg;
1446 	if (di_dip_find(st, vh_devinfo, &off) != 0) {
1447 		dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1448 		return (DDI_WALK_TERMINATE);
1449 	}
1450 
1451 	dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n",
1452 	    st->mem_size, off));
1453 
1454 	all = DI_ALL_PTR(st);
1455 	if (all->top_vhci_devinfo == 0) {
1456 		all->top_vhci_devinfo = off;
1457 	} else {
1458 		me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo));
1459 
1460 		while (me->next_vhci != 0) {
1461 			me = DI_NODE(di_mem_addr(st, me->next_vhci));
1462 		}
1463 
1464 		me->next_vhci = off;
1465 	}
1466 
1467 	pwa.off = off;
1468 	pwa.st = st;
1469 	mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa);
1470 
1471 	return (DDI_WALK_CONTINUE);
1472 }
1473 
1474 /*
1475  * construct phci linkage for the given vhci in the snapshot.
1476  */
1477 static int
1478 build_phci_list(dev_info_t *ph_devinfo, void *arg)
1479 {
1480 	struct di_node	*vh_di_node;
1481 	struct di_node	*me;
1482 	phci_walk_arg_t	*pwa;
1483 	di_off_t	off;
1484 
1485 	pwa = (phci_walk_arg_t *)arg;
1486 
1487 	dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n",
1488 	    pwa->off));
1489 
1490 	vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off));
1491 	if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) {
1492 		dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1493 		return (DDI_WALK_TERMINATE);
1494 	}
1495 
1496 	dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n",
1497 	    ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off));
1498 
1499 	if (vh_di_node->top_phci == 0) {
1500 		vh_di_node->top_phci = off;
1501 		return (DDI_WALK_CONTINUE);
1502 	}
1503 
1504 	me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci));
1505 
1506 	while (me->next_phci != 0) {
1507 		me = DI_NODE(di_mem_addr(pwa->st, me->next_phci));
1508 	}
1509 	me->next_phci = off;
1510 
1511 	return (DDI_WALK_CONTINUE);
1512 }
1513 
1514 /*
1515  * Assumes all devinfo nodes in device tree have been snapshotted
1516  */
1517 static void
1518 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p)
1519 {
1520 	struct dev_info	*node;
1521 	struct di_node	*me;
1522 	di_off_t	off;
1523 
1524 	ASSERT(mutex_owned(&dnp->dn_lock));
1525 
1526 	node = DEVI(dnp->dn_head);
1527 	for (; node; node = node->devi_next) {
1528 		if (di_dip_find(st, (dev_info_t *)node, &off) != 0)
1529 			continue;
1530 
1531 		ASSERT(off > 0);
1532 		me = DI_NODE(di_mem_addr(st, off));
1533 		ASSERT(me->next == 0 || me->next == -1);
1534 		/*
1535 		 * Only nodes which were BOUND when they were
1536 		 * snapshotted will be added to per-driver list.
1537 		 */
1538 		if (me->next != -1)
1539 			continue;
1540 
1541 		*off_p = off;
1542 		off_p = &me->next;
1543 	}
1544 
1545 	*off_p = 0;
1546 }
1547 
1548 /*
1549  * Copy the devnames array, so we have a list of drivers in the snapshot.
1550  * Also makes it possible to locate the per-driver devinfo nodes.
1551  */
1552 static di_off_t
1553 di_copydevnm(di_off_t *off_p, struct di_state *st)
1554 {
1555 	int		i;
1556 	di_off_t	off;
1557 	size_t		size;
1558 	struct di_devnm	*dnp;
1559 
1560 	dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p));
1561 
1562 	/*
1563 	 * make sure there is some allocated memory
1564 	 */
1565 	size = devcnt * sizeof (struct di_devnm);
1566 	*off_p = off = di_checkmem(st, *off_p, size);
1567 	dnp = DI_DEVNM(di_mem_addr(st, off));
1568 	off += size;
1569 
1570 	dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n",
1571 	    devcnt, off));
1572 
1573 	for (i = 0; i < devcnt; i++) {
1574 		if (devnamesp[i].dn_name == NULL) {
1575 			continue;
1576 		}
1577 
1578 		/*
1579 		 * dn_name is not freed during driver unload or removal.
1580 		 *
1581 		 * There is a race condition when make_devname() changes
1582 		 * dn_name during our strcpy. This should be rare since
1583 		 * only add_drv does this. At any rate, we never had a
1584 		 * problem with ddi_name_to_major(), which should have
1585 		 * the same problem.
1586 		 */
1587 		dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n",
1588 		    devnamesp[i].dn_name, devnamesp[i].dn_instance, off));
1589 
1590 		size = strlen(devnamesp[i].dn_name) + 1;
1591 		dnp[i].name = off = di_checkmem(st, off, size);
1592 		(void) strcpy((char *)di_mem_addr(st, off),
1593 		    devnamesp[i].dn_name);
1594 		off += size;
1595 
1596 		mutex_enter(&devnamesp[i].dn_lock);
1597 
1598 		/*
1599 		 * Snapshot per-driver node list
1600 		 */
1601 		snap_driver_list(st, &devnamesp[i], &dnp[i].head);
1602 
1603 		/*
1604 		 * This is not used by libdevinfo, leave it for now
1605 		 */
1606 		dnp[i].flags = devnamesp[i].dn_flags;
1607 		dnp[i].instance = devnamesp[i].dn_instance;
1608 
1609 		/*
1610 		 * get global properties
1611 		 */
1612 		if ((DINFOPROP & st->command) &&
1613 		    devnamesp[i].dn_global_prop_ptr) {
1614 			dnp[i].global_prop = off;
1615 			off = di_getprop(DI_PROP_GLB_LIST,
1616 			    &devnamesp[i].dn_global_prop_ptr->prop_list,
1617 			    &dnp[i].global_prop, st, NULL);
1618 		}
1619 
1620 		/*
1621 		 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str
1622 		 */
1623 		if (CB_DRV_INSTALLED(devopsp[i])) {
1624 			if (devopsp[i]->devo_cb_ops) {
1625 				dnp[i].ops |= DI_CB_OPS;
1626 				if (devopsp[i]->devo_cb_ops->cb_str)
1627 					dnp[i].ops |= DI_STREAM_OPS;
1628 			}
1629 			if (NEXUS_DRV(devopsp[i])) {
1630 				dnp[i].ops |= DI_BUS_OPS;
1631 			}
1632 		}
1633 
1634 		mutex_exit(&devnamesp[i].dn_lock);
1635 	}
1636 
1637 	dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off));
1638 
1639 	return (off);
1640 }
1641 
1642 /*
1643  * Copy the kernel devinfo tree. The tree and the devnames array forms
1644  * the entire snapshot (see also di_copydevnm).
1645  */
1646 static di_off_t
1647 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st)
1648 {
1649 	di_off_t	off;
1650 	struct dev_info	*node;
1651 	struct di_stack	*dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP);
1652 
1653 	dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n",
1654 	    (void *)root, *off_p));
1655 
1656 	/* force attach drivers */
1657 	if (i_ddi_devi_attached((dev_info_t *)root) &&
1658 	    (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) {
1659 		(void) ndi_devi_config((dev_info_t *)root,
1660 		    NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT |
1661 		    NDI_DRV_CONF_REPROBE);
1662 	}
1663 
1664 	/*
1665 	 * Push top_devinfo onto a stack
1666 	 *
1667 	 * The stack is necessary to avoid recursion, which can overrun
1668 	 * the kernel stack.
1669 	 */
1670 	PUSH_STACK(dsp, root, off_p);
1671 
1672 	/*
1673 	 * As long as there is a node on the stack, copy the node.
1674 	 * di_copynode() is responsible for pushing and popping
1675 	 * child and sibling nodes on the stack.
1676 	 */
1677 	while (!EMPTY_STACK(dsp)) {
1678 		node = TOP_NODE(dsp);
1679 		off = di_copynode(node, dsp, st);
1680 	}
1681 
1682 	/*
1683 	 * Free the stack structure
1684 	 */
1685 	kmem_free(dsp, sizeof (struct di_stack));
1686 
1687 	return (off);
1688 }
1689 
1690 /*
1691  * This is the core function, which copies all data associated with a single
1692  * node into the snapshot. The amount of information is determined by the
1693  * ioctl command.
1694  */
1695 static di_off_t
1696 di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st)
1697 {
1698 	di_off_t	off;
1699 	struct di_node	*me;
1700 	size_t		size;
1701 
1702 	dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth));
1703 	ASSERT((node != NULL) && (node == TOP_NODE(dsp)));
1704 
1705 	/*
1706 	 * check memory usage, and fix offsets accordingly.
1707 	 */
1708 	size = sizeof (struct di_node);
1709 	*(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size);
1710 	me = DI_NODE(di_mem_addr(st, off));
1711 	me->self = off;
1712 	off += size;
1713 
1714 	dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n",
1715 	    node->devi_node_name, node->devi_instance, off));
1716 
1717 	/*
1718 	 * Node parameters:
1719 	 * self		-- offset of current node within snapshot
1720 	 * nodeid	-- pointer to PROM node (tri-valued)
1721 	 * state	-- hot plugging device state
1722 	 * node_state	-- devinfo node state
1723 	 */
1724 	me->instance = node->devi_instance;
1725 	me->nodeid = node->devi_nodeid;
1726 	me->node_class = node->devi_node_class;
1727 	me->attributes = node->devi_node_attributes;
1728 	me->state = node->devi_state;
1729 	me->flags = node->devi_flags;
1730 	me->node_state = node->devi_node_state;
1731 	me->next_vhci = 0;		/* Filled up by build_vhci_list. */
1732 	me->top_phci = 0;		/* Filled up by build_phci_list. */
1733 	me->next_phci = 0;		/* Filled up by build_phci_list. */
1734 	me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */
1735 	me->user_private_data = NULL;
1736 
1737 	/*
1738 	 * Get parent's offset in snapshot from the stack
1739 	 * and store it in the current node
1740 	 */
1741 	if (dsp->depth > 1) {
1742 		me->parent = *(PARENT_OFFSET(dsp));
1743 	}
1744 
1745 	/*
1746 	 * Save the offset of this di_node in a hash table.
1747 	 * This is used later to resolve references to this
1748 	 * dip from other parts of the tree (per-driver list,
1749 	 * multipathing linkages, layered usage linkages).
1750 	 * The key used for the hash table is derived from
1751 	 * information in the dip.
1752 	 */
1753 	di_register_dip(st, (dev_info_t *)node, me->self);
1754 
1755 #ifdef	DEVID_COMPATIBILITY
1756 	/* check for devid as property marker */
1757 	if (node->devi_devid_str) {
1758 		ddi_devid_t	devid;
1759 
1760 		/*
1761 		 * The devid is now represented as a property. For
1762 		 * compatibility with di_devid() interface in libdevinfo we
1763 		 * must return it as a binary structure in the snapshot. When
1764 		 * (if) di_devid() is removed from libdevinfo then the code
1765 		 * related to DEVID_COMPATIBILITY can be removed.
1766 		 */
1767 		if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) ==
1768 		    DDI_SUCCESS) {
1769 			size = ddi_devid_sizeof(devid);
1770 			off = di_checkmem(st, off, size);
1771 			me->devid = off;
1772 			bcopy(devid, di_mem_addr(st, off), size);
1773 			off += size;
1774 			ddi_devid_free(devid);
1775 		}
1776 	}
1777 #endif	/* DEVID_COMPATIBILITY */
1778 
1779 	if (node->devi_node_name) {
1780 		size = strlen(node->devi_node_name) + 1;
1781 		me->node_name = off = di_checkmem(st, off, size);
1782 		(void) strcpy(di_mem_addr(st, off), node->devi_node_name);
1783 		off += size;
1784 	}
1785 
1786 	if (node->devi_compat_names && (node->devi_compat_length > 1)) {
1787 		size = node->devi_compat_length;
1788 		me->compat_names = off = di_checkmem(st, off, size);
1789 		me->compat_length = (int)size;
1790 		bcopy(node->devi_compat_names, di_mem_addr(st, off), size);
1791 		off += size;
1792 	}
1793 
1794 	if (node->devi_addr) {
1795 		size = strlen(node->devi_addr) + 1;
1796 		me->address = off = di_checkmem(st, off, size);
1797 		(void) strcpy(di_mem_addr(st, off), node->devi_addr);
1798 		off += size;
1799 	}
1800 
1801 	if (node->devi_binding_name) {
1802 		size = strlen(node->devi_binding_name) + 1;
1803 		me->bind_name = off = di_checkmem(st, off, size);
1804 		(void) strcpy(di_mem_addr(st, off), node->devi_binding_name);
1805 		off += size;
1806 	}
1807 
1808 	me->drv_major = node->devi_major;
1809 
1810 	/*
1811 	 * If the dip is BOUND, set the next pointer of the
1812 	 * per-instance list to -1, indicating that it is yet to be resolved.
1813 	 * This will be resolved later in snap_driver_list().
1814 	 */
1815 	if (me->drv_major != -1) {
1816 		me->next = -1;
1817 	} else {
1818 		me->next = 0;
1819 	}
1820 
1821 	/*
1822 	 * An optimization to skip mutex_enter when not needed.
1823 	 */
1824 	if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) {
1825 		goto priv_data;
1826 	}
1827 
1828 	/*
1829 	 * LOCKING: We already have an active ndi_devi_enter to gather the
1830 	 * minor data, and we will take devi_lock to gather properties as
1831 	 * needed off di_getprop.
1832 	 */
1833 	if (!(DINFOMINOR & st->command)) {
1834 		goto path;
1835 	}
1836 
1837 	ASSERT(DEVI_BUSY_OWNED(node));
1838 	if (node->devi_minor) {		/* minor data */
1839 		me->minor_data = off;
1840 		off = di_getmdata(node->devi_minor, &me->minor_data,
1841 		    me->self, st);
1842 	}
1843 
1844 path:
1845 	if (!(DINFOPATH & st->command)) {
1846 		goto property;
1847 	}
1848 
1849 	if (MDI_VHCI(node)) {
1850 		me->multipath_component = MULTIPATH_COMPONENT_VHCI;
1851 	}
1852 
1853 	if (MDI_CLIENT(node)) {
1854 		me->multipath_component = MULTIPATH_COMPONENT_CLIENT;
1855 		me->multipath_client = off;
1856 		off = di_getpath_data((dev_info_t *)node, &me->multipath_client,
1857 		    me->self, st, 1);
1858 		dcmn_err((CE_WARN, "me->multipath_client = %x for node %p "
1859 		    "component type = %d.  off=%d",
1860 		    me->multipath_client,
1861 		    (void *)node, node->devi_mdi_component, off));
1862 	}
1863 
1864 	if (MDI_PHCI(node)) {
1865 		me->multipath_component = MULTIPATH_COMPONENT_PHCI;
1866 		me->multipath_phci = off;
1867 		off = di_getpath_data((dev_info_t *)node, &me->multipath_phci,
1868 		    me->self, st, 0);
1869 		dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p "
1870 		    "component type = %d.  off=%d",
1871 		    me->multipath_phci,
1872 		    (void *)node, node->devi_mdi_component, off));
1873 	}
1874 
1875 property:
1876 	if (!(DINFOPROP & st->command)) {
1877 		goto priv_data;
1878 	}
1879 
1880 	if (node->devi_drv_prop_ptr) {	/* driver property list */
1881 		me->drv_prop = off;
1882 		off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr,
1883 		    &me->drv_prop, st, node);
1884 	}
1885 
1886 	if (node->devi_sys_prop_ptr) {	/* system property list */
1887 		me->sys_prop = off;
1888 		off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr,
1889 		    &me->sys_prop, st, node);
1890 	}
1891 
1892 	if (node->devi_hw_prop_ptr) {	/* hardware property list */
1893 		me->hw_prop = off;
1894 		off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr,
1895 		    &me->hw_prop, st, node);
1896 	}
1897 
1898 	if (node->devi_global_prop_list == NULL) {
1899 		me->glob_prop = (di_off_t)-1;	/* not global property */
1900 	} else {
1901 		/*
1902 		 * Make copy of global property list if this devinfo refers
1903 		 * global properties different from what's on the devnames
1904 		 * array. It can happen if there has been a forced
1905 		 * driver.conf update. See mod_drv(1M).
1906 		 */
1907 		ASSERT(me->drv_major != -1);
1908 		if (node->devi_global_prop_list !=
1909 		    devnamesp[me->drv_major].dn_global_prop_ptr) {
1910 			me->glob_prop = off;
1911 			off = di_getprop(DI_PROP_GLB_LIST,
1912 			    &node->devi_global_prop_list->prop_list,
1913 			    &me->glob_prop, st, node);
1914 		}
1915 	}
1916 
1917 priv_data:
1918 	if (!(DINFOPRIVDATA & st->command)) {
1919 		goto pm_info;
1920 	}
1921 
1922 	if (ddi_get_parent_data((dev_info_t *)node) != NULL) {
1923 		me->parent_data = off;
1924 		off = di_getppdata(node, &me->parent_data, st);
1925 	}
1926 
1927 	if (ddi_get_driver_private((dev_info_t *)node) != NULL) {
1928 		me->driver_data = off;
1929 		off = di_getdpdata(node, &me->driver_data, st);
1930 	}
1931 
1932 pm_info: /* NOT implemented */
1933 
1934 subtree:
1935 	/* keep the stack aligned */
1936 	off = DI_ALIGN(off);
1937 
1938 	if (!(DINFOSUBTREE & st->command)) {
1939 		POP_STACK(dsp);
1940 		return (off);
1941 	}
1942 
1943 child:
1944 	/*
1945 	 * If there is a child--push child onto stack.
1946 	 * Hold the parent busy while doing so.
1947 	 */
1948 	if (node->devi_child) {
1949 		me->child = off;
1950 		PUSH_STACK(dsp, node->devi_child, &me->child);
1951 		return (me->child);
1952 	}
1953 
1954 sibling:
1955 	/*
1956 	 * no child node, unroll the stack till a sibling of
1957 	 * a parent node is found or root node is reached
1958 	 */
1959 	POP_STACK(dsp);
1960 	while (!EMPTY_STACK(dsp) && (node->devi_sibling == NULL)) {
1961 		node = TOP_NODE(dsp);
1962 		me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp))));
1963 		POP_STACK(dsp);
1964 	}
1965 
1966 	if (!EMPTY_STACK(dsp)) {
1967 		/*
1968 		 * a sibling is found, replace top of stack by its sibling
1969 		 */
1970 		me->sibling = off;
1971 		PUSH_STACK(dsp, node->devi_sibling, &me->sibling);
1972 		return (me->sibling);
1973 	}
1974 
1975 	/*
1976 	 * DONE with all nodes
1977 	 */
1978 	return (off);
1979 }
1980 
1981 static i_lnode_t *
1982 i_lnode_alloc(int modid)
1983 {
1984 	i_lnode_t	*i_lnode;
1985 
1986 	i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP);
1987 
1988 	ASSERT(modid != -1);
1989 	i_lnode->modid = modid;
1990 
1991 	return (i_lnode);
1992 }
1993 
1994 static void
1995 i_lnode_free(i_lnode_t *i_lnode)
1996 {
1997 	kmem_free(i_lnode, sizeof (i_lnode_t));
1998 }
1999 
2000 static void
2001 i_lnode_check_free(i_lnode_t *i_lnode)
2002 {
2003 	/* This lnode and its dip must have been snapshotted */
2004 	ASSERT(i_lnode->self > 0);
2005 	ASSERT(i_lnode->di_node->self > 0);
2006 
2007 	/* at least 1 link (in or out) must exist for this lnode */
2008 	ASSERT(i_lnode->link_in || i_lnode->link_out);
2009 
2010 	i_lnode_free(i_lnode);
2011 }
2012 
2013 static i_link_t *
2014 i_link_alloc(int spec_type)
2015 {
2016 	i_link_t	*i_link;
2017 
2018 	i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP);
2019 	i_link->spec_type = spec_type;
2020 
2021 	return (i_link);
2022 }
2023 
2024 static void
2025 i_link_check_free(i_link_t *i_link)
2026 {
2027 	/* This link must have been snapshotted */
2028 	ASSERT(i_link->self > 0);
2029 
2030 	/* Both endpoint lnodes must exist for this link */
2031 	ASSERT(i_link->src_lnode);
2032 	ASSERT(i_link->tgt_lnode);
2033 
2034 	kmem_free(i_link, sizeof (i_link_t));
2035 }
2036 
2037 /*ARGSUSED*/
2038 static uint_t
2039 i_lnode_hashfunc(void *arg, mod_hash_key_t key)
2040 {
2041 	i_lnode_t	*i_lnode = (i_lnode_t *)key;
2042 	struct di_node	*ptr;
2043 	dev_t		dev;
2044 
2045 	dev = i_lnode->devt;
2046 	if (dev != DDI_DEV_T_NONE)
2047 		return (i_lnode->modid + getminor(dev) + getmajor(dev));
2048 
2049 	ptr = i_lnode->di_node;
2050 	ASSERT(ptr->self > 0);
2051 	if (ptr) {
2052 		uintptr_t k = (uintptr_t)ptr;
2053 		k >>= (int)highbit(sizeof (struct di_node));
2054 		return ((uint_t)k);
2055 	}
2056 
2057 	return (i_lnode->modid);
2058 }
2059 
2060 static int
2061 i_lnode_cmp(void *arg1, void *arg2)
2062 {
2063 	i_lnode_t	*i_lnode1 = (i_lnode_t *)arg1;
2064 	i_lnode_t	*i_lnode2 = (i_lnode_t *)arg2;
2065 
2066 	if (i_lnode1->modid != i_lnode2->modid) {
2067 		return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1);
2068 	}
2069 
2070 	if (i_lnode1->di_node != i_lnode2->di_node)
2071 		return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1);
2072 
2073 	if (i_lnode1->devt != i_lnode2->devt)
2074 		return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1);
2075 
2076 	return (0);
2077 }
2078 
2079 /*
2080  * An lnode represents a {dip, dev_t} tuple. A link represents a
2081  * {src_lnode, tgt_lnode, spec_type} tuple.
2082  * The following callback assumes that LDI framework ref-counts the
2083  * src_dip and tgt_dip while invoking this callback.
2084  */
2085 static int
2086 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg)
2087 {
2088 	struct di_state	*st = (struct di_state *)arg;
2089 	i_lnode_t	*src_lnode, *tgt_lnode, *i_lnode;
2090 	i_link_t	**i_link_next, *i_link;
2091 	di_off_t	soff, toff;
2092 	mod_hash_val_t	nodep = NULL;
2093 	int		res;
2094 
2095 	/*
2096 	 * if the source or target of this device usage information doesn't
2097 	 * correspond to a device node then we don't report it via
2098 	 * libdevinfo so return.
2099 	 */
2100 	if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL))
2101 		return (LDI_USAGE_CONTINUE);
2102 
2103 	ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip));
2104 	ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip));
2105 
2106 	/*
2107 	 * Skip the ldi_usage if either src or tgt dip is not in the
2108 	 * snapshot. This saves us from pruning bad lnodes/links later.
2109 	 */
2110 	if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0)
2111 		return (LDI_USAGE_CONTINUE);
2112 	if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0)
2113 		return (LDI_USAGE_CONTINUE);
2114 
2115 	ASSERT(soff > 0);
2116 	ASSERT(toff > 0);
2117 
2118 	/*
2119 	 * allocate an i_lnode and add it to the lnode hash
2120 	 * if it is not already present. For this particular
2121 	 * link the lnode is a source, but it may
2122 	 * participate as tgt or src in any number of layered
2123 	 * operations - so it may already be in the hash.
2124 	 */
2125 	i_lnode = i_lnode_alloc(ldi_usage->src_modid);
2126 	i_lnode->di_node = DI_NODE(di_mem_addr(st, soff));
2127 	i_lnode->devt = ldi_usage->src_devt;
2128 
2129 	res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2130 	if (res == MH_ERR_NOTFOUND) {
2131 		/*
2132 		 * new i_lnode
2133 		 * add it to the hash and increment the lnode count
2134 		 */
2135 		res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2136 		ASSERT(res == 0);
2137 		st->lnode_count++;
2138 		src_lnode = i_lnode;
2139 	} else {
2140 		/* this i_lnode already exists in the lnode_hash */
2141 		i_lnode_free(i_lnode);
2142 		src_lnode = (i_lnode_t *)nodep;
2143 	}
2144 
2145 	/*
2146 	 * allocate a tgt i_lnode and add it to the lnode hash
2147 	 */
2148 	i_lnode = i_lnode_alloc(ldi_usage->tgt_modid);
2149 	i_lnode->di_node = DI_NODE(di_mem_addr(st, toff));
2150 	i_lnode->devt = ldi_usage->tgt_devt;
2151 
2152 	res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2153 	if (res == MH_ERR_NOTFOUND) {
2154 		/*
2155 		 * new i_lnode
2156 		 * add it to the hash and increment the lnode count
2157 		 */
2158 		res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2159 		ASSERT(res == 0);
2160 		st->lnode_count++;
2161 		tgt_lnode = i_lnode;
2162 	} else {
2163 		/* this i_lnode already exists in the lnode_hash */
2164 		i_lnode_free(i_lnode);
2165 		tgt_lnode = (i_lnode_t *)nodep;
2166 	}
2167 
2168 	/*
2169 	 * allocate a i_link
2170 	 */
2171 	i_link = i_link_alloc(ldi_usage->tgt_spec_type);
2172 	i_link->src_lnode = src_lnode;
2173 	i_link->tgt_lnode = tgt_lnode;
2174 
2175 	/*
2176 	 * add this link onto the src i_lnodes outbound i_link list
2177 	 */
2178 	i_link_next = &(src_lnode->link_out);
2179 	while (*i_link_next != NULL) {
2180 		if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) &&
2181 		    (i_link->spec_type == (*i_link_next)->spec_type)) {
2182 			/* this link already exists */
2183 			kmem_free(i_link, sizeof (i_link_t));
2184 			return (LDI_USAGE_CONTINUE);
2185 		}
2186 		i_link_next = &((*i_link_next)->src_link_next);
2187 	}
2188 	*i_link_next = i_link;
2189 
2190 	/*
2191 	 * add this link onto the tgt i_lnodes inbound i_link list
2192 	 */
2193 	i_link_next = &(tgt_lnode->link_in);
2194 	while (*i_link_next != NULL) {
2195 		ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0);
2196 		i_link_next = &((*i_link_next)->tgt_link_next);
2197 	}
2198 	*i_link_next = i_link;
2199 
2200 	/*
2201 	 * add this i_link to the link hash
2202 	 */
2203 	res = mod_hash_insert(st->link_hash, i_link, i_link);
2204 	ASSERT(res == 0);
2205 	st->link_count++;
2206 
2207 	return (LDI_USAGE_CONTINUE);
2208 }
2209 
2210 struct i_layer_data {
2211 	struct di_state	*st;
2212 	int		lnode_count;
2213 	int		link_count;
2214 	di_off_t	lnode_off;
2215 	di_off_t 	link_off;
2216 };
2217 
2218 /*ARGSUSED*/
2219 static uint_t
2220 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2221 {
2222 	i_link_t		*i_link  = (i_link_t *)key;
2223 	struct i_layer_data	*data = arg;
2224 	struct di_link		*me;
2225 	struct di_lnode		*melnode;
2226 	struct di_node		*medinode;
2227 
2228 	ASSERT(i_link->self == 0);
2229 
2230 	i_link->self = data->link_off +
2231 	    (data->link_count * sizeof (struct di_link));
2232 	data->link_count++;
2233 
2234 	ASSERT(data->link_off > 0 && data->link_count > 0);
2235 	ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */
2236 	ASSERT(data->link_count <= data->st->link_count);
2237 
2238 	/* fill in fields for the di_link snapshot */
2239 	me = DI_LINK(di_mem_addr(data->st, i_link->self));
2240 	me->self = i_link->self;
2241 	me->spec_type = i_link->spec_type;
2242 
2243 	/*
2244 	 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t
2245 	 * are created during the LDI table walk. Since we are
2246 	 * walking the link hash, the lnode hash has already been
2247 	 * walked and the lnodes have been snapshotted. Save lnode
2248 	 * offsets.
2249 	 */
2250 	me->src_lnode = i_link->src_lnode->self;
2251 	me->tgt_lnode = i_link->tgt_lnode->self;
2252 
2253 	/*
2254 	 * Save this link's offset in the src_lnode snapshot's link_out
2255 	 * field
2256 	 */
2257 	melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode));
2258 	me->src_link_next = melnode->link_out;
2259 	melnode->link_out = me->self;
2260 
2261 	/*
2262 	 * Put this link on the tgt_lnode's link_in field
2263 	 */
2264 	melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode));
2265 	me->tgt_link_next = melnode->link_in;
2266 	melnode->link_in = me->self;
2267 
2268 	/*
2269 	 * An i_lnode_t is only created if the corresponding dip exists
2270 	 * in the snapshot. A pointer to the di_node is saved in the
2271 	 * i_lnode_t when it is allocated. For this link, get the di_node
2272 	 * for the source lnode. Then put the link on the di_node's list
2273 	 * of src links
2274 	 */
2275 	medinode = i_link->src_lnode->di_node;
2276 	me->src_node_next = medinode->src_links;
2277 	medinode->src_links = me->self;
2278 
2279 	/*
2280 	 * Put this link on the tgt_links list of the target
2281 	 * dip.
2282 	 */
2283 	medinode = i_link->tgt_lnode->di_node;
2284 	me->tgt_node_next = medinode->tgt_links;
2285 	medinode->tgt_links = me->self;
2286 
2287 	return (MH_WALK_CONTINUE);
2288 }
2289 
2290 /*ARGSUSED*/
2291 static uint_t
2292 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2293 {
2294 	i_lnode_t		*i_lnode = (i_lnode_t *)key;
2295 	struct i_layer_data	*data = arg;
2296 	struct di_lnode		*me;
2297 	struct di_node		*medinode;
2298 
2299 	ASSERT(i_lnode->self == 0);
2300 
2301 	i_lnode->self = data->lnode_off +
2302 	    (data->lnode_count * sizeof (struct di_lnode));
2303 	data->lnode_count++;
2304 
2305 	ASSERT(data->lnode_off > 0 && data->lnode_count > 0);
2306 	ASSERT(data->link_count == 0); /* links not done yet */
2307 	ASSERT(data->lnode_count <= data->st->lnode_count);
2308 
2309 	/* fill in fields for the di_lnode snapshot */
2310 	me = DI_LNODE(di_mem_addr(data->st, i_lnode->self));
2311 	me->self = i_lnode->self;
2312 
2313 	if (i_lnode->devt == DDI_DEV_T_NONE) {
2314 		me->dev_major = DDI_MAJOR_T_NONE;
2315 		me->dev_minor = DDI_MAJOR_T_NONE;
2316 	} else {
2317 		me->dev_major = getmajor(i_lnode->devt);
2318 		me->dev_minor = getminor(i_lnode->devt);
2319 	}
2320 
2321 	/*
2322 	 * The dip corresponding to this lnode must exist in
2323 	 * the snapshot or we wouldn't have created the i_lnode_t
2324 	 * during LDI walk. Save the offset of the dip.
2325 	 */
2326 	ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0);
2327 	me->node = i_lnode->di_node->self;
2328 
2329 	/*
2330 	 * There must be at least one link in or out of this lnode
2331 	 * or we wouldn't have created it. These fields will be set
2332 	 * during the link hash walk.
2333 	 */
2334 	ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL));
2335 
2336 	/*
2337 	 * set the offset of the devinfo node associated with this
2338 	 * lnode. Also update the node_next next pointer.  this pointer
2339 	 * is set if there are multiple lnodes associated with the same
2340 	 * devinfo node.  (could occure when multiple minor nodes
2341 	 * are open for one device, etc.)
2342 	 */
2343 	medinode = i_lnode->di_node;
2344 	me->node_next = medinode->lnodes;
2345 	medinode->lnodes = me->self;
2346 
2347 	return (MH_WALK_CONTINUE);
2348 }
2349 
2350 static di_off_t
2351 di_getlink_data(di_off_t off, struct di_state *st)
2352 {
2353 	struct i_layer_data	data = {0};
2354 	size_t			size;
2355 
2356 	dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off));
2357 
2358 	st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32,
2359 	    mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free,
2360 	    i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP);
2361 
2362 	st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32,
2363 	    (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t));
2364 
2365 	/* get driver layering information */
2366 	(void) ldi_usage_walker(st, di_ldi_callback);
2367 
2368 	/* check if there is any link data to include in the snapshot */
2369 	if (st->lnode_count == 0) {
2370 		ASSERT(st->link_count == 0);
2371 		goto out;
2372 	}
2373 
2374 	ASSERT(st->link_count != 0);
2375 
2376 	/* get a pointer to snapshot memory for all the di_lnodes */
2377 	size = sizeof (struct di_lnode) * st->lnode_count;
2378 	data.lnode_off = off = di_checkmem(st, off, size);
2379 	off += size;
2380 
2381 	/* get a pointer to snapshot memory for all the di_links */
2382 	size = sizeof (struct di_link) * st->link_count;
2383 	data.link_off = off = di_checkmem(st, off, size);
2384 	off += size;
2385 
2386 	data.lnode_count = data.link_count = 0;
2387 	data.st = st;
2388 
2389 	/*
2390 	 * We have lnodes and links that will go into the
2391 	 * snapshot, so let's walk the respective hashes
2392 	 * and snapshot them. The various linkages are
2393 	 * also set up during the walk.
2394 	 */
2395 	mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data);
2396 	ASSERT(data.lnode_count == st->lnode_count);
2397 
2398 	mod_hash_walk(st->link_hash, i_link_walker, (void *)&data);
2399 	ASSERT(data.link_count == st->link_count);
2400 
2401 out:
2402 	/* free up the i_lnodes and i_links used to create the snapshot */
2403 	mod_hash_destroy_hash(st->lnode_hash);
2404 	mod_hash_destroy_hash(st->link_hash);
2405 	st->lnode_count = 0;
2406 	st->link_count = 0;
2407 
2408 	return (off);
2409 }
2410 
2411 
2412 /*
2413  * Copy all minor data nodes attached to a devinfo node into the snapshot.
2414  * It is called from di_copynode with active ndi_devi_enter to protect
2415  * the list of minor nodes.
2416  */
2417 static di_off_t
2418 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node,
2419 	struct di_state *st)
2420 {
2421 	di_off_t	off;
2422 	struct di_minor	*me;
2423 	size_t		size;
2424 
2425 	dcmn_err2((CE_CONT, "di_getmdata:\n"));
2426 
2427 	/*
2428 	 * check memory first
2429 	 */
2430 	off = di_checkmem(st, *off_p, sizeof (struct di_minor));
2431 	*off_p = off;
2432 
2433 	do {
2434 		me = DI_MINOR(di_mem_addr(st, off));
2435 		me->self = off;
2436 		me->type = mnode->type;
2437 		me->node = node;
2438 		me->user_private_data = NULL;
2439 
2440 		off += sizeof (struct di_minor);
2441 
2442 		/*
2443 		 * Split dev_t to major/minor, so it works for
2444 		 * both ILP32 and LP64 model
2445 		 */
2446 		me->dev_major = getmajor(mnode->ddm_dev);
2447 		me->dev_minor = getminor(mnode->ddm_dev);
2448 		me->spec_type = mnode->ddm_spec_type;
2449 
2450 		if (mnode->ddm_name) {
2451 			size = strlen(mnode->ddm_name) + 1;
2452 			me->name = off = di_checkmem(st, off, size);
2453 			(void) strcpy(di_mem_addr(st, off), mnode->ddm_name);
2454 			off += size;
2455 		}
2456 
2457 		if (mnode->ddm_node_type) {
2458 			size = strlen(mnode->ddm_node_type) + 1;
2459 			me->node_type = off = di_checkmem(st, off, size);
2460 			(void) strcpy(di_mem_addr(st, off),
2461 			    mnode->ddm_node_type);
2462 			off += size;
2463 		}
2464 
2465 		off = di_checkmem(st, off, sizeof (struct di_minor));
2466 		me->next = off;
2467 		mnode = mnode->next;
2468 	} while (mnode);
2469 
2470 	me->next = 0;
2471 
2472 	return (off);
2473 }
2474 
2475 /*
2476  * di_register_dip(), di_find_dip(): The dip must be protected
2477  * from deallocation when using these routines - this can either
2478  * be a reference count, a busy hold or a per-driver lock.
2479  */
2480 
2481 static void
2482 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off)
2483 {
2484 	struct dev_info	*node = DEVI(dip);
2485 	struct di_key	*key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2486 	struct di_dkey	*dk;
2487 
2488 	ASSERT(dip);
2489 	ASSERT(off > 0);
2490 
2491 	key->k_type = DI_DKEY;
2492 	dk = &(key->k_u.dkey);
2493 
2494 	dk->dk_dip = dip;
2495 	dk->dk_major = node->devi_major;
2496 	dk->dk_inst = node->devi_instance;
2497 	dk->dk_nodeid = node->devi_nodeid;
2498 
2499 	if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key,
2500 	    (mod_hash_val_t)(uintptr_t)off) != 0) {
2501 		panic(
2502 		    "duplicate devinfo (%p) registered during device "
2503 		    "tree walk", (void *)dip);
2504 	}
2505 }
2506 
2507 
2508 static int
2509 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p)
2510 {
2511 	/*
2512 	 * uintptr_t must be used because it matches the size of void *;
2513 	 * mod_hash expects clients to place results into pointer-size
2514 	 * containers; since di_off_t is always a 32-bit offset, alignment
2515 	 * would otherwise be broken on 64-bit kernels.
2516 	 */
2517 	uintptr_t	offset;
2518 	struct		di_key key = {0};
2519 	struct		di_dkey *dk;
2520 
2521 	ASSERT(st->reg_dip_hash);
2522 	ASSERT(dip);
2523 	ASSERT(off_p);
2524 
2525 
2526 	key.k_type = DI_DKEY;
2527 	dk = &(key.k_u.dkey);
2528 
2529 	dk->dk_dip = dip;
2530 	dk->dk_major = DEVI(dip)->devi_major;
2531 	dk->dk_inst = DEVI(dip)->devi_instance;
2532 	dk->dk_nodeid = DEVI(dip)->devi_nodeid;
2533 
2534 	if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key,
2535 	    (mod_hash_val_t *)&offset) == 0) {
2536 		*off_p = (di_off_t)offset;
2537 		return (0);
2538 	} else {
2539 		return (-1);
2540 	}
2541 }
2542 
2543 /*
2544  * di_register_pip(), di_find_pip(): The pip must be protected from deallocation
2545  * when using these routines. The caller must do this by protecting the
2546  * client(or phci)<->pip linkage while traversing the list and then holding the
2547  * pip when it is found in the list.
2548  */
2549 
2550 static void
2551 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off)
2552 {
2553 	struct di_key	*key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2554 	char		*path_addr;
2555 	struct di_pkey	*pk;
2556 
2557 	ASSERT(pip);
2558 	ASSERT(off > 0);
2559 
2560 	key->k_type = DI_PKEY;
2561 	pk = &(key->k_u.pkey);
2562 
2563 	pk->pk_pip = pip;
2564 	path_addr = mdi_pi_get_addr(pip);
2565 	if (path_addr)
2566 		pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP);
2567 	pk->pk_client = mdi_pi_get_client(pip);
2568 	pk->pk_phci = mdi_pi_get_phci(pip);
2569 
2570 	if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key,
2571 	    (mod_hash_val_t)(uintptr_t)off) != 0) {
2572 		panic(
2573 		    "duplicate pathinfo (%p) registered during device "
2574 		    "tree walk", (void *)pip);
2575 	}
2576 }
2577 
2578 /*
2579  * As with di_register_pip, the caller must hold or lock the pip
2580  */
2581 static int
2582 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p)
2583 {
2584 	/*
2585 	 * uintptr_t must be used because it matches the size of void *;
2586 	 * mod_hash expects clients to place results into pointer-size
2587 	 * containers; since di_off_t is always a 32-bit offset, alignment
2588 	 * would otherwise be broken on 64-bit kernels.
2589 	 */
2590 	uintptr_t	offset;
2591 	struct di_key	key = {0};
2592 	struct di_pkey	*pk;
2593 
2594 	ASSERT(st->reg_pip_hash);
2595 	ASSERT(off_p);
2596 
2597 	if (pip == NULL) {
2598 		*off_p = 0;
2599 		return (0);
2600 	}
2601 
2602 	key.k_type = DI_PKEY;
2603 	pk = &(key.k_u.pkey);
2604 
2605 	pk->pk_pip = pip;
2606 	pk->pk_path_addr = mdi_pi_get_addr(pip);
2607 	pk->pk_client = mdi_pi_get_client(pip);
2608 	pk->pk_phci = mdi_pi_get_phci(pip);
2609 
2610 	if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key,
2611 	    (mod_hash_val_t *)&offset) == 0) {
2612 		*off_p = (di_off_t)offset;
2613 		return (0);
2614 	} else {
2615 		return (-1);
2616 	}
2617 }
2618 
2619 static di_path_state_t
2620 path_state_convert(mdi_pathinfo_state_t st)
2621 {
2622 	switch (st) {
2623 	case MDI_PATHINFO_STATE_ONLINE:
2624 		return (DI_PATH_STATE_ONLINE);
2625 	case MDI_PATHINFO_STATE_STANDBY:
2626 		return (DI_PATH_STATE_STANDBY);
2627 	case MDI_PATHINFO_STATE_OFFLINE:
2628 		return (DI_PATH_STATE_OFFLINE);
2629 	case MDI_PATHINFO_STATE_FAULT:
2630 		return (DI_PATH_STATE_FAULT);
2631 	default:
2632 		return (DI_PATH_STATE_UNKNOWN);
2633 	}
2634 }
2635 
2636 
2637 static di_off_t
2638 di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p,
2639     struct di_state *st)
2640 {
2641 	nvpair_t		*prop = NULL;
2642 	struct di_path_prop	*me;
2643 	int			off;
2644 	size_t			size;
2645 	char			*str;
2646 	uchar_t			*buf;
2647 	uint_t			nelems;
2648 
2649 	off = *off_p;
2650 	if (mdi_pi_get_next_prop(pip, NULL) == NULL) {
2651 		*off_p = 0;
2652 		return (off);
2653 	}
2654 
2655 	off = di_checkmem(st, off, sizeof (struct di_path_prop));
2656 	*off_p = off;
2657 
2658 	while (prop = mdi_pi_get_next_prop(pip, prop)) {
2659 		me = DI_PATHPROP(di_mem_addr(st, off));
2660 		me->self = off;
2661 		off += sizeof (struct di_path_prop);
2662 
2663 		/*
2664 		 * property name
2665 		 */
2666 		size = strlen(nvpair_name(prop)) + 1;
2667 		me->prop_name = off = di_checkmem(st, off, size);
2668 		(void) strcpy(di_mem_addr(st, off), nvpair_name(prop));
2669 		off += size;
2670 
2671 		switch (nvpair_type(prop)) {
2672 		case DATA_TYPE_BYTE:
2673 		case DATA_TYPE_INT16:
2674 		case DATA_TYPE_UINT16:
2675 		case DATA_TYPE_INT32:
2676 		case DATA_TYPE_UINT32:
2677 			me->prop_type = DDI_PROP_TYPE_INT;
2678 			size = sizeof (int32_t);
2679 			off = di_checkmem(st, off, size);
2680 			(void) nvpair_value_int32(prop,
2681 			    (int32_t *)di_mem_addr(st, off));
2682 			break;
2683 
2684 		case DATA_TYPE_INT64:
2685 		case DATA_TYPE_UINT64:
2686 			me->prop_type = DDI_PROP_TYPE_INT64;
2687 			size = sizeof (int64_t);
2688 			off = di_checkmem(st, off, size);
2689 			(void) nvpair_value_int64(prop,
2690 			    (int64_t *)di_mem_addr(st, off));
2691 			break;
2692 
2693 		case DATA_TYPE_STRING:
2694 			me->prop_type = DDI_PROP_TYPE_STRING;
2695 			(void) nvpair_value_string(prop, &str);
2696 			size = strlen(str) + 1;
2697 			off = di_checkmem(st, off, size);
2698 			(void) strcpy(di_mem_addr(st, off), str);
2699 			break;
2700 
2701 		case DATA_TYPE_BYTE_ARRAY:
2702 		case DATA_TYPE_INT16_ARRAY:
2703 		case DATA_TYPE_UINT16_ARRAY:
2704 		case DATA_TYPE_INT32_ARRAY:
2705 		case DATA_TYPE_UINT32_ARRAY:
2706 		case DATA_TYPE_INT64_ARRAY:
2707 		case DATA_TYPE_UINT64_ARRAY:
2708 			me->prop_type = DDI_PROP_TYPE_BYTE;
2709 			(void) nvpair_value_byte_array(prop, &buf, &nelems);
2710 			size = nelems;
2711 			if (nelems != 0) {
2712 				off = di_checkmem(st, off, size);
2713 				bcopy(buf, di_mem_addr(st, off), size);
2714 			}
2715 			break;
2716 
2717 		default:	/* Unknown or unhandled type; skip it */
2718 			size = 0;
2719 			break;
2720 		}
2721 
2722 		if (size > 0) {
2723 			me->prop_data = off;
2724 		}
2725 
2726 		me->prop_len = (int)size;
2727 		off += size;
2728 
2729 		off = di_checkmem(st, off, sizeof (struct di_path_prop));
2730 		me->prop_next = off;
2731 	}
2732 
2733 	me->prop_next = 0;
2734 	return (off);
2735 }
2736 
2737 
2738 static void
2739 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp,
2740     int get_client)
2741 {
2742 	if (get_client) {
2743 		ASSERT(me->path_client == 0);
2744 		me->path_client = noff;
2745 		ASSERT(me->path_c_link == 0);
2746 		*off_pp = &me->path_c_link;
2747 		me->path_snap_state &=
2748 		    ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK);
2749 	} else {
2750 		ASSERT(me->path_phci == 0);
2751 		me->path_phci = noff;
2752 		ASSERT(me->path_p_link == 0);
2753 		*off_pp = &me->path_p_link;
2754 		me->path_snap_state &=
2755 		    ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK);
2756 	}
2757 }
2758 
2759 /*
2760  * off_p: pointer to the linkage field. This links pips along the client|phci
2761  *	   linkage list.
2762  * noff  : Offset for the endpoint dip snapshot.
2763  */
2764 static di_off_t
2765 di_getpath_data(dev_info_t *dip, di_off_t *off_p, di_off_t noff,
2766     struct di_state *st, int get_client)
2767 {
2768 	di_off_t	off;
2769 	mdi_pathinfo_t	*pip;
2770 	struct di_path	*me;
2771 	mdi_pathinfo_t	*(*next_pip)(dev_info_t *, mdi_pathinfo_t *);
2772 	size_t		size;
2773 
2774 	dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client));
2775 
2776 	/*
2777 	 * The naming of the following mdi_xyz() is unfortunately
2778 	 * non-intuitive. mdi_get_next_phci_path() follows the
2779 	 * client_link i.e. the list of pip's belonging to the
2780 	 * given client dip.
2781 	 */
2782 	if (get_client)
2783 		next_pip = &mdi_get_next_phci_path;
2784 	else
2785 		next_pip = &mdi_get_next_client_path;
2786 
2787 	off = *off_p;
2788 
2789 	pip = NULL;
2790 	while (pip = (*next_pip)(dip, pip)) {
2791 		mdi_pathinfo_state_t state;
2792 		di_off_t stored_offset;
2793 
2794 		dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip));
2795 
2796 		mdi_pi_lock(pip);
2797 
2798 		if (di_pip_find(st, pip, &stored_offset) != -1) {
2799 			/*
2800 			 * We've already seen this pathinfo node so we need to
2801 			 * take care not to snap it again; However, one endpoint
2802 			 * and linkage will be set here. The other endpoint
2803 			 * and linkage has already been set when the pip was
2804 			 * first snapshotted i.e. when the other endpoint dip
2805 			 * was snapshotted.
2806 			 */
2807 			me = DI_PATH(di_mem_addr(st, stored_offset));
2808 			*off_p = stored_offset;
2809 
2810 			di_path_one_endpoint(me, noff, &off_p, get_client);
2811 
2812 			/*
2813 			 * The other endpoint and linkage were set when this
2814 			 * pip was snapshotted. So we are done with both
2815 			 * endpoints and linkages.
2816 			 */
2817 			ASSERT(!(me->path_snap_state &
2818 			    (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI)));
2819 			ASSERT(!(me->path_snap_state &
2820 			    (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK)));
2821 
2822 			mdi_pi_unlock(pip);
2823 			continue;
2824 		}
2825 
2826 		/*
2827 		 * Now that we need to snapshot this pip, check memory
2828 		 */
2829 		size = sizeof (struct di_path);
2830 		*off_p = off = di_checkmem(st, off, size);
2831 		me = DI_PATH(di_mem_addr(st, off));
2832 		me->self = off;
2833 		off += size;
2834 
2835 		me->path_snap_state =
2836 		    DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK;
2837 		me->path_snap_state |=
2838 		    DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI;
2839 
2840 		/*
2841 		 * Zero out fields as di_checkmem() doesn't guarantee
2842 		 * zero-filled memory
2843 		 */
2844 		me->path_client = me->path_phci = 0;
2845 		me->path_c_link = me->path_p_link = 0;
2846 
2847 		di_path_one_endpoint(me, noff, &off_p, get_client);
2848 
2849 		/*
2850 		 * Note the existence of this pathinfo
2851 		 */
2852 		di_register_pip(st, pip, me->self);
2853 
2854 		state = mdi_pi_get_state(pip);
2855 		me->path_state = path_state_convert(state);
2856 
2857 		me->path_instance = mdi_pi_get_path_instance(pip);
2858 
2859 		/*
2860 		 * Get intermediate addressing info.
2861 		 */
2862 		size = strlen(mdi_pi_get_addr(pip)) + 1;
2863 		me->path_addr = off = di_checkmem(st, off, size);
2864 		(void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip));
2865 		off += size;
2866 
2867 		/*
2868 		 * Get path properties if props are to be included in the
2869 		 * snapshot
2870 		 */
2871 		if (DINFOPROP & st->command) {
2872 			me->path_prop = off;
2873 			off = di_path_getprop(pip, &me->path_prop, st);
2874 		} else {
2875 			me->path_prop = 0;
2876 		}
2877 
2878 		mdi_pi_unlock(pip);
2879 	}
2880 
2881 	*off_p = 0;
2882 	return (off);
2883 }
2884 
2885 /*
2886  * Return driver prop_op entry point for the specified devinfo node.
2887  *
2888  * To return a non-NULL value:
2889  * - driver must be attached and held:
2890  *   If driver is not attached we ignore the driver property list.
2891  *   No one should rely on such properties.
2892  * - driver "cb_prop_op != ddi_prop_op":
2893  *   If "cb_prop_op == ddi_prop_op", framework does not need to call driver.
2894  *   XXX or parent's bus_prop_op != ddi_bus_prop_op
2895  */
2896 static int
2897 (*di_getprop_prop_op(struct dev_info *dip))
2898 	(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, caddr_t, int *)
2899 {
2900 	struct dev_ops	*ops;
2901 
2902 	/* If driver is not attached we ignore the driver property list. */
2903 	if ((dip == NULL) || !i_ddi_devi_attached((dev_info_t *)dip))
2904 		return (NULL);
2905 
2906 	/*
2907 	 * Some nexus drivers incorrectly set cb_prop_op to nodev, nulldev,
2908 	 * or even NULL.
2909 	 */
2910 	ops = dip->devi_ops;
2911 	if (ops && ops->devo_cb_ops &&
2912 	    (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) &&
2913 	    (ops->devo_cb_ops->cb_prop_op != nodev) &&
2914 	    (ops->devo_cb_ops->cb_prop_op != nulldev) &&
2915 	    (ops->devo_cb_ops->cb_prop_op != NULL))
2916 		return (ops->devo_cb_ops->cb_prop_op);
2917 	return (NULL);
2918 }
2919 
2920 static di_off_t
2921 di_getprop_add(int list, int dyn, struct di_state *st, struct dev_info *dip,
2922     int (*prop_op)(),
2923     char *name, dev_t devt, int aflags, int alen, caddr_t aval,
2924     di_off_t off, di_off_t **off_pp)
2925 {
2926 	int		need_free = 0;
2927 	dev_t		pdevt;
2928 	int		pflags;
2929 	int		rv;
2930 	caddr_t		val;
2931 	int		len;
2932 	size_t		size;
2933 	struct di_prop	*pp;
2934 
2935 	/* If we have prop_op function, ask driver for latest value */
2936 	if (prop_op) {
2937 		ASSERT(dip);
2938 
2939 		/* Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY */
2940 		pdevt = (devt == DDI_DEV_T_NONE) ? DDI_DEV_T_ANY : devt;
2941 
2942 		/*
2943 		 * We have type information in flags, but are invoking an
2944 		 * old non-typed prop_op(9E) interface. Since not all types are
2945 		 * part of DDI_PROP_TYPE_ANY (example is DDI_PROP_TYPE_INT64),
2946 		 * we set DDI_PROP_CONSUMER_TYPED - causing the framework to
2947 		 * expand type bits beyond DDI_PROP_TYPE_ANY.  This allows us
2948 		 * to use the legacy prop_op(9E) interface to obtain updates
2949 		 * non-DDI_PROP_TYPE_ANY dynamic properties.
2950 		 */
2951 		pflags = aflags & ~DDI_PROP_TYPE_MASK;
2952 		pflags |= DDI_PROP_DONTPASS | DDI_PROP_NOTPROM |
2953 		    DDI_PROP_CONSUMER_TYPED;
2954 		rv = (*prop_op)(pdevt, (dev_info_t)dip, PROP_LEN_AND_VAL_ALLOC,
2955 		    pflags, name, &val, &len);
2956 
2957 		if (rv == DDI_PROP_SUCCESS) {
2958 			need_free = 1;		/* dynamic prop obtained */
2959 		} else if (dyn) {
2960 			/*
2961 			 * A dynamic property must succeed prop_op(9E) to show
2962 			 * up in the snapshot - that is the only source of its
2963 			 * value.
2964 			 */
2965 			return (off);		/* dynamic prop not supported */
2966 		} else {
2967 			/*
2968 			 * In case calling the driver caused an update off
2969 			 * prop_op(9E) of a non-dynamic property (code leading
2970 			 * to ddi_prop_change), we defer picking up val and
2971 			 * len informatiojn until after prop_op(9E) to ensure
2972 			 * that we snapshot the latest value.
2973 			 */
2974 			val = aval;
2975 			len = alen;
2976 
2977 		}
2978 	} else {
2979 		val = aval;
2980 		len = alen;
2981 	}
2982 
2983 	dcmn_err((CE_CONT, "di_getprop_add: list %d %s len %d val %p\n",
2984 	    list, name ? name : "NULL", len, (void *)val));
2985 
2986 	size = sizeof (struct di_prop);
2987 	**off_pp = off = di_checkmem(st, off, size);
2988 	pp = DI_PROP(di_mem_addr(st, off));
2989 	pp->self = off;
2990 	off += size;
2991 
2992 	pp->dev_major = getmajor(devt);
2993 	pp->dev_minor = getminor(devt);
2994 	pp->prop_flags = aflags;
2995 	pp->prop_list = list;
2996 
2997 	/* property name */
2998 	if (name) {
2999 		size = strlen(name) + 1;
3000 		pp->prop_name = off = di_checkmem(st, off, size);
3001 		(void) strcpy(di_mem_addr(st, off), name);
3002 		off += size;
3003 	} else {
3004 		pp->prop_name = -1;
3005 	}
3006 
3007 	pp->prop_len = len;
3008 	if (val == NULL) {
3009 		pp->prop_data = -1;
3010 	} else if (len != 0) {
3011 		size = len;
3012 		pp->prop_data = off = di_checkmem(st, off, size);
3013 		bcopy(val, di_mem_addr(st, off), size);
3014 		off += size;
3015 	}
3016 
3017 	pp->next = 0;			/* assume tail for now */
3018 	*off_pp = &pp->next;		/* return pointer to our next */
3019 
3020 	if (need_free)			/* free PROP_LEN_AND_VAL_ALLOC alloc */
3021 		kmem_free(val, len);
3022 	return (off);
3023 }
3024 
3025 
3026 /*
3027  * Copy a list of properties attached to a devinfo node. Called from
3028  * di_copynode with active ndi_devi_enter. The major number is passed in case
3029  * we need to call driver's prop_op entry. The value of list indicates
3030  * which list we are copying. Possible values are:
3031  * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST
3032  */
3033 static di_off_t
3034 di_getprop(int list, struct ddi_prop **pprop, di_off_t *off_p,
3035     struct di_state *st, struct dev_info *dip)
3036 {
3037 	struct ddi_prop		*prop;
3038 	int			(*prop_op)();
3039 	int			off;
3040 	struct ddi_minor_data	*mn;
3041 	i_ddi_prop_dyn_t	*dp;
3042 	struct plist {
3043 		struct plist	*pl_next;
3044 		char		*pl_name;
3045 		int		pl_flags;
3046 		dev_t		pl_dev;
3047 		int		pl_len;
3048 		caddr_t		pl_val;
3049 	}			*pl, *pl0, **plp;
3050 
3051 	ASSERT(st != NULL);
3052 
3053 	off = *off_p;
3054 	*off_p = 0;
3055 	dcmn_err((CE_CONT, "di_getprop: copy property list %d at addr %p\n",
3056 	    list, (void *)*pprop));
3057 
3058 	/* get pointer to driver's prop_op(9E) implementation if DRV_LIST */
3059 	prop_op = (list == DI_PROP_DRV_LIST) ? di_getprop_prop_op(dip) : NULL;
3060 
3061 	/*
3062 	 * Form private list of properties, holding devi_lock for properties
3063 	 * than hang off the dip.
3064 	 */
3065 	if (dip)
3066 		mutex_enter(&(dip->devi_lock));
3067 	for (plp = &pl0, prop = *pprop;
3068 	    prop; plp = &pl->pl_next, prop = prop->prop_next) {
3069 		pl = kmem_alloc(sizeof (*pl), KM_SLEEP);
3070 		*plp = pl;
3071 		pl->pl_next = NULL;
3072 		if (prop->prop_name)
3073 			pl->pl_name = i_ddi_strdup(prop->prop_name, KM_SLEEP);
3074 		else
3075 			pl->pl_name = NULL;
3076 		pl->pl_flags = prop->prop_flags;
3077 		pl->pl_dev = prop->prop_dev;
3078 		if (prop->prop_len) {
3079 			pl->pl_len = prop->prop_len;
3080 			pl->pl_val = kmem_alloc(pl->pl_len, KM_SLEEP);
3081 			bcopy(prop->prop_val, pl->pl_val, pl->pl_len);
3082 		} else {
3083 			pl->pl_len = 0;
3084 			pl->pl_val = NULL;
3085 		}
3086 	}
3087 	if (dip)
3088 		mutex_exit(&(dip->devi_lock));
3089 
3090 	/*
3091 	 * Now that we have dropped devi_lock, perform a second-pass to
3092 	 * add properties to the snapshot.  We do this as a second pass
3093 	 * because we may need to call prop_op(9E) and we can't hold
3094 	 * devi_lock across that call.
3095 	 */
3096 	for (pl = pl0; pl; pl = pl0) {
3097 		pl0 = pl->pl_next;
3098 		off = di_getprop_add(list, 0, st, dip, prop_op, pl->pl_name,
3099 		    pl->pl_dev, pl->pl_flags, pl->pl_len, pl->pl_val,
3100 		    off, &off_p);
3101 		if (pl->pl_val)
3102 			kmem_free(pl->pl_val, pl->pl_len);
3103 		if (pl->pl_name)
3104 			kmem_free(pl->pl_name, strlen(pl->pl_name) + 1);
3105 		kmem_free(pl, sizeof (*pl));
3106 	}
3107 
3108 	/*
3109 	 * If there is no prop_op or dynamic property support has been
3110 	 * disabled, we are done.
3111 	 */
3112 	if ((prop_op == NULL) || (di_prop_dyn == 0)) {
3113 		*off_p = 0;
3114 		return (off);
3115 	}
3116 
3117 	/* Add dynamic driver properties to snapshot */
3118 	for (dp = i_ddi_prop_dyn_driver_get((dev_info_t *)dip);
3119 	    dp && dp->dp_name; dp++) {
3120 		if (dp->dp_spec_type) {
3121 			/* if spec_type, property of matching minor */
3122 			ASSERT(DEVI_BUSY_OWNED(dip));
3123 			for (mn = dip->devi_minor; mn; mn = mn->next) {
3124 				if (mn->ddm_spec_type != dp->dp_spec_type)
3125 					continue;
3126 				off = di_getprop_add(list, 1, st, dip, prop_op,
3127 				    dp->dp_name, mn->ddm_dev, dp->dp_type,
3128 				    0, NULL, off, &off_p);
3129 			}
3130 		} else {
3131 			/* property of devinfo node */
3132 			off = di_getprop_add(list, 1, st, dip, prop_op,
3133 			    dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3134 			    0, NULL, off, &off_p);
3135 		}
3136 	}
3137 
3138 	/* Add dynamic parent properties to snapshot */
3139 	for (dp = i_ddi_prop_dyn_parent_get((dev_info_t *)dip);
3140 	    dp && dp->dp_name; dp++) {
3141 		if (dp->dp_spec_type) {
3142 			/* if spec_type, property of matching minor */
3143 			ASSERT(DEVI_BUSY_OWNED(dip));
3144 			for (mn = dip->devi_minor; mn; mn = mn->next) {
3145 				if (mn->ddm_spec_type != dp->dp_spec_type)
3146 					continue;
3147 				off = di_getprop_add(list, 1, st, dip, prop_op,
3148 				    dp->dp_name, mn->ddm_dev, dp->dp_type,
3149 				    0, NULL, off, &off_p);
3150 			}
3151 		} else {
3152 			/* property of devinfo node */
3153 			off = di_getprop_add(list, 1, st, dip, prop_op,
3154 			    dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3155 			    0, NULL, off, &off_p);
3156 		}
3157 	}
3158 
3159 	*off_p = 0;
3160 	return (off);
3161 }
3162 
3163 /*
3164  * find private data format attached to a dip
3165  * parent = 1 to match driver name of parent dip (for parent private data)
3166  *	0 to match driver name of current dip (for driver private data)
3167  */
3168 #define	DI_MATCH_DRIVER	0
3169 #define	DI_MATCH_PARENT	1
3170 
3171 struct di_priv_format *
3172 di_match_drv_name(struct dev_info *node, struct di_state *st, int match)
3173 {
3174 	int			i, count, len;
3175 	char			*drv_name;
3176 	major_t			major;
3177 	struct di_all		*all;
3178 	struct di_priv_format	*form;
3179 
3180 	dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n",
3181 	    node->devi_node_name, match));
3182 
3183 	if (match == DI_MATCH_PARENT) {
3184 		node = DEVI(node->devi_parent);
3185 	}
3186 
3187 	if (node == NULL) {
3188 		return (NULL);
3189 	}
3190 
3191 	major = ddi_name_to_major(node->devi_binding_name);
3192 	if (major == (major_t)(-1)) {
3193 		return (NULL);
3194 	}
3195 
3196 	/*
3197 	 * Match the driver name.
3198 	 */
3199 	drv_name = ddi_major_to_name(major);
3200 	if ((drv_name == NULL) || *drv_name == '\0') {
3201 		return (NULL);
3202 	}
3203 
3204 	/* Now get the di_priv_format array */
3205 	all = DI_ALL_PTR(st);
3206 	if (match == DI_MATCH_PARENT) {
3207 		count = all->n_ppdata;
3208 		form = DI_PRIV_FORMAT(di_mem_addr(st, all->ppdata_format));
3209 	} else {
3210 		count = all->n_dpdata;
3211 		form = DI_PRIV_FORMAT(di_mem_addr(st, all->dpdata_format));
3212 	}
3213 
3214 	len = strlen(drv_name);
3215 	for (i = 0; i < count; i++) {
3216 		char *tmp;
3217 
3218 		tmp = form[i].drv_name;
3219 		while (tmp && (*tmp != '\0')) {
3220 			if (strncmp(drv_name, tmp, len) == 0) {
3221 				return (&form[i]);
3222 			}
3223 			/*
3224 			 * Move to next driver name, skipping a white space
3225 			 */
3226 			if (tmp = strchr(tmp, ' ')) {
3227 				tmp++;
3228 			}
3229 		}
3230 	}
3231 
3232 	return (NULL);
3233 }
3234 
3235 /*
3236  * The following functions copy data as specified by the format passed in.
3237  * To prevent invalid format from panicing the system, we call on_fault().
3238  * A return value of 0 indicates an error. Otherwise, the total offset
3239  * is returned.
3240  */
3241 #define	DI_MAX_PRIVDATA	(PAGESIZE >> 1)	/* max private data size */
3242 
3243 static di_off_t
3244 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node,
3245     void *data, di_off_t *off_p, struct di_state *st)
3246 {
3247 	caddr_t		pa;
3248 	void		*ptr;
3249 	int		i, size, repeat;
3250 	di_off_t	off, off0, *tmp;
3251 	char		*path;
3252 	label_t		ljb;
3253 
3254 	dcmn_err2((CE_CONT, "di_getprvdata:\n"));
3255 
3256 	/*
3257 	 * check memory availability. Private data size is
3258 	 * limited to DI_MAX_PRIVDATA.
3259 	 */
3260 	off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA);
3261 	*off_p = off;
3262 
3263 	if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) {
3264 		goto failure;
3265 	}
3266 
3267 	if (!on_fault(&ljb)) {
3268 		/* copy the struct */
3269 		bcopy(data, di_mem_addr(st, off), pdp->bytes);
3270 		off0 = DI_ALIGN(pdp->bytes);	/* XXX remove DI_ALIGN */
3271 
3272 		/* dereferencing pointers */
3273 		for (i = 0; i < MAX_PTR_IN_PRV; i++) {
3274 
3275 			if (pdp->ptr[i].size == 0) {
3276 				goto success;	/* no more ptrs */
3277 			}
3278 
3279 			/*
3280 			 * first, get the pointer content
3281 			 */
3282 			if ((pdp->ptr[i].offset < 0) ||
3283 			    (pdp->ptr[i].offset > pdp->bytes - sizeof (char *)))
3284 				goto failure;	/* wrong offset */
3285 
3286 			pa = di_mem_addr(st, off + pdp->ptr[i].offset);
3287 
3288 			/* save a tmp ptr to store off_t later */
3289 			tmp = (di_off_t *)(intptr_t)pa;
3290 
3291 			/* get pointer value, if NULL continue */
3292 			ptr = *((void **) (intptr_t)pa);
3293 			if (ptr == NULL) {
3294 				continue;
3295 			}
3296 
3297 			/*
3298 			 * next, find the repeat count (array dimension)
3299 			 */
3300 			repeat = pdp->ptr[i].len_offset;
3301 
3302 			/*
3303 			 * Positive value indicates a fixed sized array.
3304 			 * 0 or negative value indicates variable sized array.
3305 			 *
3306 			 * For variable sized array, the variable must be
3307 			 * an int member of the structure, with an offset
3308 			 * equal to the absolution value of struct member.
3309 			 */
3310 			if (repeat > pdp->bytes - sizeof (int)) {
3311 				goto failure;	/* wrong offset */
3312 			}
3313 
3314 			if (repeat >= 0) {
3315 				repeat = *((int *)
3316 				    (intptr_t)((caddr_t)data + repeat));
3317 			} else {
3318 				repeat = -repeat;
3319 			}
3320 
3321 			/*
3322 			 * next, get the size of the object to be copied
3323 			 */
3324 			size = pdp->ptr[i].size * repeat;
3325 
3326 			/*
3327 			 * Arbitrarily limit the total size of object to be
3328 			 * copied (1 byte to 1/4 page).
3329 			 */
3330 			if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) {
3331 				goto failure;	/* wrong size or too big */
3332 			}
3333 
3334 			/*
3335 			 * Now copy the data
3336 			 */
3337 			*tmp = off0;
3338 			bcopy(ptr, di_mem_addr(st, off + off0), size);
3339 			off0 += DI_ALIGN(size);	/* XXX remove DI_ALIGN */
3340 		}
3341 	} else {
3342 		goto failure;
3343 	}
3344 
3345 success:
3346 	/*
3347 	 * success if reached here
3348 	 */
3349 	no_fault();
3350 	return (off + off0);
3351 	/*NOTREACHED*/
3352 
3353 failure:
3354 	/*
3355 	 * fault occurred
3356 	 */
3357 	no_fault();
3358 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3359 	cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p",
3360 	    ddi_pathname((dev_info_t *)node, path), data);
3361 	kmem_free(path, MAXPATHLEN);
3362 	*off_p = -1;	/* set private data to indicate error */
3363 
3364 	return (off);
3365 }
3366 
3367 /*
3368  * get parent private data; on error, returns original offset
3369  */
3370 static di_off_t
3371 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3372 {
3373 	int			off;
3374 	struct di_priv_format	*ppdp;
3375 
3376 	dcmn_err2((CE_CONT, "di_getppdata:\n"));
3377 
3378 	/* find the parent data format */
3379 	if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) {
3380 		off = *off_p;
3381 		*off_p = 0;	/* set parent data to none */
3382 		return (off);
3383 	}
3384 
3385 	return (di_getprvdata(ppdp, node,
3386 	    ddi_get_parent_data((dev_info_t *)node), off_p, st));
3387 }
3388 
3389 /*
3390  * get parent private data; returns original offset
3391  */
3392 static di_off_t
3393 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3394 {
3395 	int			off;
3396 	struct di_priv_format	*dpdp;
3397 
3398 	dcmn_err2((CE_CONT, "di_getdpdata:"));
3399 
3400 	/* find the parent data format */
3401 	if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) {
3402 		off = *off_p;
3403 		*off_p = 0;	/* set driver data to none */
3404 		return (off);
3405 	}
3406 
3407 	return (di_getprvdata(dpdp, node,
3408 	    ddi_get_driver_private((dev_info_t *)node), off_p, st));
3409 }
3410 
3411 /*
3412  * The driver is stateful across DINFOCPYALL and DINFOUSRLD.
3413  * This function encapsulates the state machine:
3414  *
3415  *	-> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY ->
3416  *	|		SNAPSHOT		USRLD	 |
3417  *	--------------------------------------------------
3418  *
3419  * Returns 0 on success and -1 on failure
3420  */
3421 static int
3422 di_setstate(struct di_state *st, int new_state)
3423 {
3424 	int	ret = 0;
3425 
3426 	mutex_enter(&di_lock);
3427 	switch (new_state) {
3428 	case IOC_IDLE:
3429 	case IOC_DONE:
3430 		break;
3431 	case IOC_SNAP:
3432 		if (st->di_iocstate != IOC_IDLE)
3433 			ret = -1;
3434 		break;
3435 	case IOC_COPY:
3436 		if (st->di_iocstate != IOC_DONE)
3437 			ret = -1;
3438 		break;
3439 	default:
3440 		ret = -1;
3441 	}
3442 
3443 	if (ret == 0)
3444 		st->di_iocstate = new_state;
3445 	else
3446 		cmn_err(CE_NOTE, "incorrect state transition from %d to %d",
3447 		    st->di_iocstate, new_state);
3448 	mutex_exit(&di_lock);
3449 	return (ret);
3450 }
3451 
3452 /*
3453  * We cannot assume the presence of the entire
3454  * snapshot in this routine. All we are guaranteed
3455  * is the di_all struct + 1 byte (for root_path)
3456  */
3457 static int
3458 header_plus_one_ok(struct di_all *all)
3459 {
3460 	/*
3461 	 * Refuse to read old versions
3462 	 */
3463 	if (all->version != DI_SNAPSHOT_VERSION) {
3464 		CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version));
3465 		return (0);
3466 	}
3467 
3468 	if (all->cache_magic != DI_CACHE_MAGIC) {
3469 		CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic));
3470 		return (0);
3471 	}
3472 
3473 	if (all->snapshot_time == 0) {
3474 		CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time));
3475 		return (0);
3476 	}
3477 
3478 	if (all->top_devinfo == 0) {
3479 		CACHE_DEBUG((DI_ERR, "NULL top devinfo"));
3480 		return (0);
3481 	}
3482 
3483 	if (all->map_size < sizeof (*all) + 1) {
3484 		CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size));
3485 		return (0);
3486 	}
3487 
3488 	if (all->root_path[0] != '/' || all->root_path[1] != '\0') {
3489 		CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c",
3490 		    all->root_path[0], all->root_path[1]));
3491 		return (0);
3492 	}
3493 
3494 	/*
3495 	 * We can't check checksum here as we just have the header
3496 	 */
3497 
3498 	return (1);
3499 }
3500 
3501 static int
3502 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len)
3503 {
3504 	rlim64_t	rlimit;
3505 	ssize_t		resid;
3506 	int		error = 0;
3507 
3508 
3509 	rlimit = RLIM64_INFINITY;
3510 
3511 	while (len) {
3512 		resid = 0;
3513 		error = vn_rdwr(UIO_WRITE, vp, buf, len, off,
3514 		    UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
3515 
3516 		if (error || resid < 0) {
3517 			error = error ? error : EIO;
3518 			CACHE_DEBUG((DI_ERR, "write error: %d", error));
3519 			break;
3520 		}
3521 
3522 		/*
3523 		 * Check if we are making progress
3524 		 */
3525 		if (resid >= len) {
3526 			error = ENOSPC;
3527 			break;
3528 		}
3529 		buf += len - resid;
3530 		off += len - resid;
3531 		len = resid;
3532 	}
3533 
3534 	return (error);
3535 }
3536 
3537 static void
3538 di_cache_write(struct di_cache *cache)
3539 {
3540 	struct di_all	*all;
3541 	struct vnode	*vp;
3542 	int		oflags;
3543 	size_t		map_size;
3544 	size_t		chunk;
3545 	offset_t	off;
3546 	int		error;
3547 	char		*buf;
3548 
3549 	ASSERT(DI_CACHE_LOCKED(*cache));
3550 	ASSERT(!servicing_interrupt());
3551 
3552 	if (cache->cache_size == 0) {
3553 		ASSERT(cache->cache_data == NULL);
3554 		CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write"));
3555 		return;
3556 	}
3557 
3558 	ASSERT(cache->cache_size > 0);
3559 	ASSERT(cache->cache_data);
3560 
3561 	if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) {
3562 		CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write"));
3563 		return;
3564 	}
3565 
3566 	all = (struct di_all *)cache->cache_data;
3567 
3568 	if (!header_plus_one_ok(all)) {
3569 		CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write"));
3570 		return;
3571 	}
3572 
3573 	ASSERT(strcmp(all->root_path, "/") == 0);
3574 
3575 	/*
3576 	 * The cache_size is the total allocated memory for the cache.
3577 	 * The map_size is the actual size of valid data in the cache.
3578 	 * map_size may be smaller than cache_size but cannot exceed
3579 	 * cache_size.
3580 	 */
3581 	if (all->map_size > cache->cache_size) {
3582 		CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)."
3583 		    " Skipping write", all->map_size, cache->cache_size));
3584 		return;
3585 	}
3586 
3587 	/*
3588 	 * First unlink the temp file
3589 	 */
3590 	error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE);
3591 	if (error && error != ENOENT) {
3592 		CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d",
3593 		    DI_CACHE_TEMP, error));
3594 	}
3595 
3596 	if (error == EROFS) {
3597 		CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write"));
3598 		return;
3599 	}
3600 
3601 	vp = NULL;
3602 	oflags = (FCREAT|FWRITE);
3603 	if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags,
3604 	    DI_CACHE_PERMS, &vp, CRCREAT, 0)) {
3605 		CACHE_DEBUG((DI_ERR, "%s: create failed: %d",
3606 		    DI_CACHE_TEMP, error));
3607 		return;
3608 	}
3609 
3610 	ASSERT(vp);
3611 
3612 	/*
3613 	 * Paranoid: Check if the file is on a read-only FS
3614 	 */
3615 	if (vn_is_readonly(vp)) {
3616 		CACHE_DEBUG((DI_ERR, "cannot write: readonly FS"));
3617 		goto fail;
3618 	}
3619 
3620 	/*
3621 	 * Note that we only write map_size bytes to disk - this saves
3622 	 * space as the actual cache size may be larger than size of
3623 	 * valid data in the cache.
3624 	 * Another advantage is that it makes verification of size
3625 	 * easier when the file is read later.
3626 	 */
3627 	map_size = all->map_size;
3628 	off = 0;
3629 	buf = cache->cache_data;
3630 
3631 	while (map_size) {
3632 		ASSERT(map_size > 0);
3633 		/*
3634 		 * Write in chunks so that VM system
3635 		 * is not overwhelmed
3636 		 */
3637 		if (map_size > di_chunk * PAGESIZE)
3638 			chunk = di_chunk * PAGESIZE;
3639 		else
3640 			chunk = map_size;
3641 
3642 		error = chunk_write(vp, off, buf, chunk);
3643 		if (error) {
3644 			CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d",
3645 			    off, error));
3646 			goto fail;
3647 		}
3648 
3649 		off += chunk;
3650 		buf += chunk;
3651 		map_size -= chunk;
3652 
3653 		/* If low on memory, give pageout a chance to run */
3654 		if (freemem < desfree)
3655 			delay(1);
3656 	}
3657 
3658 	/*
3659 	 * Now sync the file and close it
3660 	 */
3661 	if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) {
3662 		CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error));
3663 	}
3664 
3665 	if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) {
3666 		CACHE_DEBUG((DI_ERR, "close() failed: %d", error));
3667 		VN_RELE(vp);
3668 		return;
3669 	}
3670 
3671 	VN_RELE(vp);
3672 
3673 	/*
3674 	 * Now do the rename
3675 	 */
3676 	if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) {
3677 		CACHE_DEBUG((DI_ERR, "rename failed: %d", error));
3678 		return;
3679 	}
3680 
3681 	CACHE_DEBUG((DI_INFO, "Cache write successful."));
3682 
3683 	return;
3684 
3685 fail:
3686 	(void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL);
3687 	VN_RELE(vp);
3688 }
3689 
3690 
3691 /*
3692  * Since we could be called early in boot,
3693  * use kobj_read_file()
3694  */
3695 static void
3696 di_cache_read(struct di_cache *cache)
3697 {
3698 	struct _buf	*file;
3699 	struct di_all	*all;
3700 	int		n;
3701 	size_t		map_size, sz, chunk;
3702 	offset_t	off;
3703 	caddr_t		buf;
3704 	uint32_t	saved_crc, crc;
3705 
3706 	ASSERT(modrootloaded);
3707 	ASSERT(DI_CACHE_LOCKED(*cache));
3708 	ASSERT(cache->cache_data == NULL);
3709 	ASSERT(cache->cache_size == 0);
3710 	ASSERT(!servicing_interrupt());
3711 
3712 	file = kobj_open_file(DI_CACHE_FILE);
3713 	if (file == (struct _buf *)-1) {
3714 		CACHE_DEBUG((DI_ERR, "%s: open failed: %d",
3715 		    DI_CACHE_FILE, ENOENT));
3716 		return;
3717 	}
3718 
3719 	/*
3720 	 * Read in the header+root_path first. The root_path must be "/"
3721 	 */
3722 	all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP);
3723 	n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0);
3724 
3725 	if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) {
3726 		kmem_free(all, sizeof (*all) + 1);
3727 		kobj_close_file(file);
3728 		CACHE_DEBUG((DI_ERR, "cache header: read error or invalid"));
3729 		return;
3730 	}
3731 
3732 	map_size = all->map_size;
3733 
3734 	kmem_free(all, sizeof (*all) + 1);
3735 
3736 	ASSERT(map_size >= sizeof (*all) + 1);
3737 
3738 	buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP);
3739 	sz = map_size;
3740 	off = 0;
3741 	while (sz) {
3742 		/* Don't overload VM with large reads */
3743 		chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz;
3744 		n = kobj_read_file(file, buf, chunk, off);
3745 		if (n != chunk) {
3746 			CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld",
3747 			    DI_CACHE_FILE, off));
3748 			goto fail;
3749 		}
3750 		off += chunk;
3751 		buf += chunk;
3752 		sz -= chunk;
3753 	}
3754 
3755 	ASSERT(off == map_size);
3756 
3757 	/*
3758 	 * Read past expected EOF to verify size.
3759 	 */
3760 	if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) {
3761 		CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE));
3762 		goto fail;
3763 	}
3764 
3765 	all = (struct di_all *)di_cache.cache_data;
3766 	if (!header_plus_one_ok(all)) {
3767 		CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE));
3768 		goto fail;
3769 	}
3770 
3771 	/*
3772 	 * Compute CRC with checksum field in the cache data set to 0
3773 	 */
3774 	saved_crc = all->cache_checksum;
3775 	all->cache_checksum = 0;
3776 	CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table);
3777 	all->cache_checksum = saved_crc;
3778 
3779 	if (crc != all->cache_checksum) {
3780 		CACHE_DEBUG((DI_ERR,
3781 		    "%s: checksum error: expected=0x%x actual=0x%x",
3782 		    DI_CACHE_FILE, all->cache_checksum, crc));
3783 		goto fail;
3784 	}
3785 
3786 	if (all->map_size != map_size) {
3787 		CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE));
3788 		goto fail;
3789 	}
3790 
3791 	kobj_close_file(file);
3792 
3793 	di_cache.cache_size = map_size;
3794 
3795 	return;
3796 
3797 fail:
3798 	kmem_free(di_cache.cache_data, map_size);
3799 	kobj_close_file(file);
3800 	di_cache.cache_data = NULL;
3801 	di_cache.cache_size = 0;
3802 }
3803 
3804 
3805 /*
3806  * Checks if arguments are valid for using the cache.
3807  */
3808 static int
3809 cache_args_valid(struct di_state *st, int *error)
3810 {
3811 	ASSERT(error);
3812 	ASSERT(st->mem_size > 0);
3813 	ASSERT(st->memlist != NULL);
3814 
3815 	if (!modrootloaded || !i_ddi_io_initialized()) {
3816 		CACHE_DEBUG((DI_ERR,
3817 		    "cache lookup failure: I/O subsystem not inited"));
3818 		*error = ENOTACTIVE;
3819 		return (0);
3820 	}
3821 
3822 	/*
3823 	 * No other flags allowed with DINFOCACHE
3824 	 */
3825 	if (st->command != (DINFOCACHE & DIIOC_MASK)) {
3826 		CACHE_DEBUG((DI_ERR,
3827 		    "cache lookup failure: bad flags: 0x%x",
3828 		    st->command));
3829 		*error = EINVAL;
3830 		return (0);
3831 	}
3832 
3833 	if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
3834 		CACHE_DEBUG((DI_ERR,
3835 		    "cache lookup failure: bad root: %s",
3836 		    DI_ALL_PTR(st)->root_path));
3837 		*error = EINVAL;
3838 		return (0);
3839 	}
3840 
3841 	CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command));
3842 
3843 	*error = 0;
3844 
3845 	return (1);
3846 }
3847 
3848 static int
3849 snapshot_is_cacheable(struct di_state *st)
3850 {
3851 	ASSERT(st->mem_size > 0);
3852 	ASSERT(st->memlist != NULL);
3853 
3854 	if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) !=
3855 	    (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) {
3856 		CACHE_DEBUG((DI_INFO,
3857 		    "not cacheable: incompatible flags: 0x%x",
3858 		    st->command));
3859 		return (0);
3860 	}
3861 
3862 	if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
3863 		CACHE_DEBUG((DI_INFO,
3864 		    "not cacheable: incompatible root path: %s",
3865 		    DI_ALL_PTR(st)->root_path));
3866 		return (0);
3867 	}
3868 
3869 	CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command));
3870 
3871 	return (1);
3872 }
3873 
3874 static int
3875 di_cache_lookup(struct di_state *st)
3876 {
3877 	size_t	rval;
3878 	int	cache_valid;
3879 
3880 	ASSERT(cache_args_valid(st, &cache_valid));
3881 	ASSERT(modrootloaded);
3882 
3883 	DI_CACHE_LOCK(di_cache);
3884 
3885 	/*
3886 	 * The following assignment determines the validity
3887 	 * of the cache as far as this snapshot is concerned.
3888 	 */
3889 	cache_valid = di_cache.cache_valid;
3890 
3891 	if (cache_valid && di_cache.cache_data == NULL) {
3892 		di_cache_read(&di_cache);
3893 		/* check for read or file error */
3894 		if (di_cache.cache_data == NULL)
3895 			cache_valid = 0;
3896 	}
3897 
3898 	if (cache_valid) {
3899 		/*
3900 		 * Ok, the cache was valid as of this particular
3901 		 * snapshot. Copy the cached snapshot. This is safe
3902 		 * to do as the cache cannot be freed (we hold the
3903 		 * cache lock). Free the memory allocated in di_state
3904 		 * up until this point - we will simply copy everything
3905 		 * in the cache.
3906 		 */
3907 
3908 		ASSERT(di_cache.cache_data != NULL);
3909 		ASSERT(di_cache.cache_size > 0);
3910 
3911 		di_freemem(st);
3912 
3913 		rval = 0;
3914 		if (di_cache2mem(&di_cache, st) > 0) {
3915 			/*
3916 			 * map_size is size of valid data in the
3917 			 * cached snapshot and may be less than
3918 			 * size of the cache.
3919 			 */
3920 			ASSERT(DI_ALL_PTR(st));
3921 			rval = DI_ALL_PTR(st)->map_size;
3922 
3923 			ASSERT(rval >= sizeof (struct di_all));
3924 			ASSERT(rval <= di_cache.cache_size);
3925 		}
3926 	} else {
3927 		/*
3928 		 * The cache isn't valid, we need to take a snapshot.
3929 		 * Set the command flags appropriately
3930 		 */
3931 		ASSERT(st->command == (DINFOCACHE & DIIOC_MASK));
3932 		st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK);
3933 		rval = di_cache_update(st);
3934 		st->command = (DINFOCACHE & DIIOC_MASK);
3935 	}
3936 
3937 	DI_CACHE_UNLOCK(di_cache);
3938 
3939 	/*
3940 	 * For cached snapshots, the devinfo driver always returns
3941 	 * a snapshot rooted at "/".
3942 	 */
3943 	ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0);
3944 
3945 	return ((int)rval);
3946 }
3947 
3948 /*
3949  * This is a forced update of the cache  - the previous state of the cache
3950  * may be:
3951  *	- unpopulated
3952  *	- populated and invalid
3953  *	- populated and valid
3954  */
3955 static int
3956 di_cache_update(struct di_state *st)
3957 {
3958 	int		rval;
3959 	uint32_t	crc;
3960 	struct di_all	*all;
3961 
3962 	ASSERT(DI_CACHE_LOCKED(di_cache));
3963 	ASSERT(snapshot_is_cacheable(st));
3964 
3965 	/*
3966 	 * Free the in-core cache and the on-disk file (if they exist)
3967 	 */
3968 	i_ddi_di_cache_free(&di_cache);
3969 
3970 	/*
3971 	 * Set valid flag before taking the snapshot,
3972 	 * so that any invalidations that arrive
3973 	 * during or after the snapshot are not
3974 	 * removed by us.
3975 	 */
3976 	atomic_or_32(&di_cache.cache_valid, 1);
3977 
3978 	rval = di_snapshot_and_clean(st);
3979 
3980 	if (rval == 0) {
3981 		CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot"));
3982 		return (0);
3983 	}
3984 
3985 	DI_ALL_PTR(st)->map_size = rval;
3986 	if (di_mem2cache(st, &di_cache) == 0) {
3987 		CACHE_DEBUG((DI_ERR, "can't update cache: copy failed"));
3988 		return (0);
3989 	}
3990 
3991 	ASSERT(di_cache.cache_data);
3992 	ASSERT(di_cache.cache_size > 0);
3993 
3994 	/*
3995 	 * Now that we have cached the snapshot, compute its checksum.
3996 	 * The checksum is only computed over the valid data in the
3997 	 * cache, not the entire cache.
3998 	 * Also, set all the fields (except checksum) before computing
3999 	 * checksum.
4000 	 */
4001 	all = (struct di_all *)di_cache.cache_data;
4002 	all->cache_magic = DI_CACHE_MAGIC;
4003 	all->map_size = rval;
4004 
4005 	ASSERT(all->cache_checksum == 0);
4006 	CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table);
4007 	all->cache_checksum = crc;
4008 
4009 	di_cache_write(&di_cache);
4010 
4011 	return (rval);
4012 }
4013 
4014 static void
4015 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...)
4016 {
4017 	va_list	ap;
4018 
4019 	if (di_cache_debug <= DI_QUIET)
4020 		return;
4021 
4022 	if (di_cache_debug < msglevel)
4023 		return;
4024 
4025 	switch (msglevel) {
4026 		case DI_ERR:
4027 			msglevel = CE_WARN;
4028 			break;
4029 		case DI_INFO:
4030 		case DI_TRACE:
4031 		default:
4032 			msglevel = CE_NOTE;
4033 			break;
4034 	}
4035 
4036 	va_start(ap, fmt);
4037 	vcmn_err(msglevel, fmt, ap);
4038 	va_end(ap);
4039 }
4040