1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * driver for accessing kernel devinfo tree.
28 */
29 #include <sys/types.h>
30 #include <sys/pathname.h>
31 #include <sys/debug.h>
32 #include <sys/autoconf.h>
33 #include <sys/vmsystm.h>
34 #include <sys/conf.h>
35 #include <sys/file.h>
36 #include <sys/kmem.h>
37 #include <sys/modctl.h>
38 #include <sys/stat.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/sunldi_impl.h>
42 #include <sys/sunndi.h>
43 #include <sys/esunddi.h>
44 #include <sys/sunmdi.h>
45 #include <sys/ddi_impldefs.h>
46 #include <sys/ndi_impldefs.h>
47 #include <sys/mdi_impldefs.h>
48 #include <sys/devinfo_impl.h>
49 #include <sys/thread.h>
50 #include <sys/modhash.h>
51 #include <sys/bitmap.h>
52 #include <util/qsort.h>
53 #include <sys/disp.h>
54 #include <sys/kobj.h>
55 #include <sys/crc32.h>
56 #include <sys/ddi_hp.h>
57 #include <sys/ddi_hp_impl.h>
58 #include <sys/sysmacros.h>
59 #include <sys/list.h>
60
61
62 #ifdef DEBUG
63 static int di_debug;
64 #define dcmn_err(args) if (di_debug >= 1) cmn_err args
65 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args
66 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args
67 #else
68 #define dcmn_err(args) /* nothing */
69 #define dcmn_err2(args) /* nothing */
70 #define dcmn_err3(args) /* nothing */
71 #endif
72
73 /*
74 * We partition the space of devinfo minor nodes equally between the full and
75 * unprivileged versions of the driver. The even-numbered minor nodes are the
76 * full version, while the odd-numbered ones are the read-only version.
77 */
78 static int di_max_opens = 32;
79
80 static int di_prop_dyn = 1; /* enable dynamic property support */
81
82 #define DI_FULL_PARENT 0
83 #define DI_READONLY_PARENT 1
84 #define DI_NODE_SPECIES 2
85 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0)
86
87 #define IOC_IDLE 0 /* snapshot ioctl states */
88 #define IOC_SNAP 1 /* snapshot in progress */
89 #define IOC_DONE 2 /* snapshot done, but not copied out */
90 #define IOC_COPY 3 /* copyout in progress */
91
92 /*
93 * Keep max alignment so we can move snapshot to different platforms.
94 *
95 * NOTE: Most callers should rely on the di_checkmem return value
96 * being aligned, and reestablish *off_p with aligned value, instead
97 * of trying to align size of their allocations: this approach will
98 * minimize memory use.
99 */
100 #define DI_ALIGN(addr) ((addr + 7l) & ~7l)
101
102 /*
103 * To avoid wasting memory, make a linked list of memory chunks.
104 * Size of each chunk is buf_size.
105 */
106 struct di_mem {
107 struct di_mem *next; /* link to next chunk */
108 char *buf; /* contiguous kernel memory */
109 size_t buf_size; /* size of buf in bytes */
110 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */
111 };
112
113 /*
114 * This is a stack for walking the tree without using recursion.
115 * When the devinfo tree height is above some small size, one
116 * gets watchdog resets on sun4m.
117 */
118 struct di_stack {
119 void *offset[MAX_TREE_DEPTH];
120 struct dev_info *dip[MAX_TREE_DEPTH];
121 int circ[MAX_TREE_DEPTH];
122 int depth; /* depth of current node to be copied */
123 };
124
125 #define TOP_OFFSET(stack) \
126 ((di_off_t *)(stack)->offset[(stack)->depth - 1])
127 #define TOP_NODE(stack) \
128 ((stack)->dip[(stack)->depth - 1])
129 #define PARENT_OFFSET(stack) \
130 ((di_off_t *)(stack)->offset[(stack)->depth - 2])
131 #define EMPTY_STACK(stack) ((stack)->depth == 0)
132 #define POP_STACK(stack) { \
133 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \
134 (stack)->circ[(stack)->depth - 1]); \
135 ((stack)->depth--); \
136 }
137 #define PUSH_STACK(stack, node, off_p) { \
138 ASSERT(node != NULL); \
139 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \
140 (stack)->dip[(stack)->depth] = (node); \
141 (stack)->offset[(stack)->depth] = (void *)(off_p); \
142 ((stack)->depth)++; \
143 }
144
145 #define DI_ALL_PTR(s) DI_ALL(di_mem_addr((s), 0))
146
147 /*
148 * With devfs, the device tree has no global locks. The device tree is
149 * dynamic and dips may come and go if they are not locked locally. Under
150 * these conditions, pointers are no longer reliable as unique IDs.
151 * Specifically, these pointers cannot be used as keys for hash tables
152 * as the same devinfo structure may be freed in one part of the tree only
153 * to be allocated as the structure for a different device in another
154 * part of the tree. This can happen if DR and the snapshot are
155 * happening concurrently.
156 * The following data structures act as keys for devinfo nodes and
157 * pathinfo nodes.
158 */
159
160 enum di_ktype {
161 DI_DKEY = 1,
162 DI_PKEY = 2
163 };
164
165 struct di_dkey {
166 dev_info_t *dk_dip;
167 major_t dk_major;
168 int dk_inst;
169 pnode_t dk_nodeid;
170 };
171
172 struct di_pkey {
173 mdi_pathinfo_t *pk_pip;
174 char *pk_path_addr;
175 dev_info_t *pk_client;
176 dev_info_t *pk_phci;
177 };
178
179 struct di_key {
180 enum di_ktype k_type;
181 union {
182 struct di_dkey dkey;
183 struct di_pkey pkey;
184 } k_u;
185 };
186
187
188 struct i_lnode;
189
190 typedef struct i_link {
191 /*
192 * If a di_link struct representing this i_link struct makes it
193 * into the snapshot, then self will point to the offset of
194 * the di_link struct in the snapshot
195 */
196 di_off_t self;
197
198 int spec_type; /* block or char access type */
199 struct i_lnode *src_lnode; /* src i_lnode */
200 struct i_lnode *tgt_lnode; /* tgt i_lnode */
201 struct i_link *src_link_next; /* next src i_link /w same i_lnode */
202 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */
203 } i_link_t;
204
205 typedef struct i_lnode {
206 /*
207 * If a di_lnode struct representing this i_lnode struct makes it
208 * into the snapshot, then self will point to the offset of
209 * the di_lnode struct in the snapshot
210 */
211 di_off_t self;
212
213 /*
214 * used for hashing and comparing i_lnodes
215 */
216 int modid;
217
218 /*
219 * public information describing a link endpoint
220 */
221 struct di_node *di_node; /* di_node in snapshot */
222 dev_t devt; /* devt */
223
224 /*
225 * i_link ptr to links coming into this i_lnode node
226 * (this i_lnode is the target of these i_links)
227 */
228 i_link_t *link_in;
229
230 /*
231 * i_link ptr to links going out of this i_lnode node
232 * (this i_lnode is the source of these i_links)
233 */
234 i_link_t *link_out;
235 } i_lnode_t;
236
237 typedef struct i_hp {
238 di_off_t hp_off; /* Offset of di_hp_t in snapshot */
239 dev_info_t *hp_child; /* Child devinfo node of the di_hp_t */
240 list_node_t hp_link; /* List linkage */
241 } i_hp_t;
242
243 /*
244 * Soft state associated with each instance of driver open.
245 */
246 static struct di_state {
247 di_off_t mem_size; /* total # bytes in memlist */
248 struct di_mem *memlist; /* head of memlist */
249 uint_t command; /* command from ioctl */
250 int di_iocstate; /* snapshot ioctl state */
251 mod_hash_t *reg_dip_hash;
252 mod_hash_t *reg_pip_hash;
253 int lnode_count;
254 int link_count;
255
256 mod_hash_t *lnode_hash;
257 mod_hash_t *link_hash;
258
259 list_t hp_list;
260 } **di_states;
261
262 static kmutex_t di_lock; /* serialize instance assignment */
263
264 typedef enum {
265 DI_QUIET = 0, /* DI_QUIET must always be 0 */
266 DI_ERR,
267 DI_INFO,
268 DI_TRACE,
269 DI_TRACE1,
270 DI_TRACE2
271 } di_cache_debug_t;
272
273 static uint_t di_chunk = 32; /* I/O chunk size in pages */
274
275 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock))
276 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock))
277 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock))
278
279 /*
280 * Check that whole device tree is being configured as a pre-condition for
281 * cleaning up /etc/devices files.
282 */
283 #define DEVICES_FILES_CLEANABLE(st) \
284 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \
285 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0)
286
287 #define CACHE_DEBUG(args) \
288 { if (di_cache_debug != DI_QUIET) di_cache_print args; }
289
290 typedef struct phci_walk_arg {
291 di_off_t off;
292 struct di_state *st;
293 } phci_walk_arg_t;
294
295 static int di_open(dev_t *, int, int, cred_t *);
296 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
297 static int di_close(dev_t, int, int, cred_t *);
298 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
299 static int di_attach(dev_info_t *, ddi_attach_cmd_t);
300 static int di_detach(dev_info_t *, ddi_detach_cmd_t);
301
302 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int);
303 static di_off_t di_snapshot_and_clean(struct di_state *);
304 static di_off_t di_copydevnm(di_off_t *, struct di_state *);
305 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *);
306 static di_off_t di_copynode(struct dev_info *, struct di_stack *,
307 struct di_state *);
308 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t,
309 struct di_state *);
310 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *);
311 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *);
312 static di_off_t di_gethpdata(ddi_hp_cn_handle_t *, di_off_t *,
313 struct di_state *);
314 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *,
315 struct di_state *, struct dev_info *);
316 static void di_allocmem(struct di_state *, size_t);
317 static void di_freemem(struct di_state *);
318 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz);
319 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t);
320 static void *di_mem_addr(struct di_state *, di_off_t);
321 static int di_setstate(struct di_state *, int);
322 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t);
323 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t);
324 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t,
325 struct di_state *, int);
326 static di_off_t di_getlink_data(di_off_t, struct di_state *);
327 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p);
328
329 static int cache_args_valid(struct di_state *st, int *error);
330 static int snapshot_is_cacheable(struct di_state *st);
331 static int di_cache_lookup(struct di_state *st);
332 static int di_cache_update(struct di_state *st);
333 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...);
334 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg);
335 static int build_phci_list(dev_info_t *ph_devinfo, void *arg);
336 static void di_hotplug_children(struct di_state *st);
337
338 extern int modrootloaded;
339 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *);
340 extern void mdi_vhci_walk_phcis(dev_info_t *,
341 int (*)(dev_info_t *, void *), void *);
342
343
344 static struct cb_ops di_cb_ops = {
345 di_open, /* open */
346 di_close, /* close */
347 nodev, /* strategy */
348 nodev, /* print */
349 nodev, /* dump */
350 nodev, /* read */
351 nodev, /* write */
352 di_ioctl, /* ioctl */
353 nodev, /* devmap */
354 nodev, /* mmap */
355 nodev, /* segmap */
356 nochpoll, /* poll */
357 ddi_prop_op, /* prop_op */
358 NULL, /* streamtab */
359 D_NEW | D_MP /* Driver compatibility flag */
360 };
361
362 static struct dev_ops di_ops = {
363 DEVO_REV, /* devo_rev, */
364 0, /* refcnt */
365 di_info, /* info */
366 nulldev, /* identify */
367 nulldev, /* probe */
368 di_attach, /* attach */
369 di_detach, /* detach */
370 nodev, /* reset */
371 &di_cb_ops, /* driver operations */
372 NULL /* bus operations */
373 };
374
375 /*
376 * Module linkage information for the kernel.
377 */
378 static struct modldrv modldrv = {
379 &mod_driverops,
380 "DEVINFO Driver",
381 &di_ops
382 };
383
384 static struct modlinkage modlinkage = {
385 MODREV_1,
386 &modldrv,
387 NULL
388 };
389
390 int
_init(void)391 _init(void)
392 {
393 int error;
394
395 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL);
396
397 error = mod_install(&modlinkage);
398 if (error != 0) {
399 mutex_destroy(&di_lock);
400 return (error);
401 }
402
403 return (0);
404 }
405
406 int
_info(struct modinfo * modinfop)407 _info(struct modinfo *modinfop)
408 {
409 return (mod_info(&modlinkage, modinfop));
410 }
411
412 int
_fini(void)413 _fini(void)
414 {
415 int error;
416
417 error = mod_remove(&modlinkage);
418 if (error != 0) {
419 return (error);
420 }
421
422 mutex_destroy(&di_lock);
423 return (0);
424 }
425
426 static dev_info_t *di_dip;
427
428 /*ARGSUSED*/
429 static int
di_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)430 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
431 {
432 int error = DDI_FAILURE;
433
434 switch (infocmd) {
435 case DDI_INFO_DEVT2DEVINFO:
436 *result = (void *)di_dip;
437 error = DDI_SUCCESS;
438 break;
439 case DDI_INFO_DEVT2INSTANCE:
440 /*
441 * All dev_t's map to the same, single instance.
442 */
443 *result = (void *)0;
444 error = DDI_SUCCESS;
445 break;
446 default:
447 break;
448 }
449
450 return (error);
451 }
452
453 static int
di_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)454 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
455 {
456 int error = DDI_FAILURE;
457
458 switch (cmd) {
459 case DDI_ATTACH:
460 di_states = kmem_zalloc(
461 di_max_opens * sizeof (struct di_state *), KM_SLEEP);
462
463 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR,
464 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE ||
465 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR,
466 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) {
467 kmem_free(di_states,
468 di_max_opens * sizeof (struct di_state *));
469 ddi_remove_minor_node(dip, NULL);
470 error = DDI_FAILURE;
471 } else {
472 di_dip = dip;
473 ddi_report_dev(dip);
474
475 error = DDI_SUCCESS;
476 }
477 break;
478 default:
479 error = DDI_FAILURE;
480 break;
481 }
482
483 return (error);
484 }
485
486 static int
di_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)487 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
488 {
489 int error = DDI_FAILURE;
490
491 switch (cmd) {
492 case DDI_DETACH:
493 ddi_remove_minor_node(dip, NULL);
494 di_dip = NULL;
495 kmem_free(di_states, di_max_opens * sizeof (struct di_state *));
496
497 error = DDI_SUCCESS;
498 break;
499 default:
500 error = DDI_FAILURE;
501 break;
502 }
503
504 return (error);
505 }
506
507 /*
508 * Allow multiple opens by tweaking the dev_t such that it looks like each
509 * open is getting a different minor device. Each minor gets a separate
510 * entry in the di_states[] table. Based on the original minor number, we
511 * discriminate opens of the full and read-only nodes. If all of the instances
512 * of the selected minor node are currently open, we return EAGAIN.
513 */
514 /*ARGSUSED*/
515 static int
di_open(dev_t * devp,int flag,int otyp,cred_t * credp)516 di_open(dev_t *devp, int flag, int otyp, cred_t *credp)
517 {
518 int m;
519 minor_t minor_parent = getminor(*devp);
520
521 if (minor_parent != DI_FULL_PARENT &&
522 minor_parent != DI_READONLY_PARENT)
523 return (ENXIO);
524
525 mutex_enter(&di_lock);
526
527 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) {
528 if (di_states[m] != NULL)
529 continue;
530
531 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP);
532 break; /* It's ours. */
533 }
534
535 if (m >= di_max_opens) {
536 /*
537 * maximum open instance for device reached
538 */
539 mutex_exit(&di_lock);
540 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached"));
541 return (EAGAIN);
542 }
543 mutex_exit(&di_lock);
544
545 ASSERT(m < di_max_opens);
546 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES));
547
548 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n",
549 (void *)curthread, m + DI_NODE_SPECIES));
550
551 return (0);
552 }
553
554 /*ARGSUSED*/
555 static int
di_close(dev_t dev,int flag,int otype,cred_t * cred_p)556 di_close(dev_t dev, int flag, int otype, cred_t *cred_p)
557 {
558 struct di_state *st;
559 int m = (int)getminor(dev) - DI_NODE_SPECIES;
560
561 if (m < 0) {
562 cmn_err(CE_WARN, "closing non-existent devinfo minor %d",
563 m + DI_NODE_SPECIES);
564 return (ENXIO);
565 }
566
567 st = di_states[m];
568 ASSERT(m < di_max_opens && st != NULL);
569
570 di_freemem(st);
571 kmem_free(st, sizeof (struct di_state));
572
573 /*
574 * empty slot in state table
575 */
576 mutex_enter(&di_lock);
577 di_states[m] = NULL;
578 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n",
579 (void *)curthread, m + DI_NODE_SPECIES));
580 mutex_exit(&di_lock);
581
582 return (0);
583 }
584
585
586 /*ARGSUSED*/
587 static int
di_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)588 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
589 {
590 int rv, error;
591 di_off_t off;
592 struct di_all *all;
593 struct di_state *st;
594 int m = (int)getminor(dev) - DI_NODE_SPECIES;
595 major_t i;
596 char *drv_name;
597 size_t map_size, size;
598 struct di_mem *dcp;
599 int ndi_flags;
600
601 if (m < 0 || m >= di_max_opens) {
602 return (ENXIO);
603 }
604
605 st = di_states[m];
606 ASSERT(st != NULL);
607
608 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd));
609
610 switch (cmd) {
611 case DINFOIDENT:
612 /*
613 * This is called from di_init to verify that the driver
614 * opened is indeed devinfo. The purpose is to guard against
615 * sending ioctl to an unknown driver in case of an
616 * unresolved major number conflict during bfu.
617 */
618 *rvalp = DI_MAGIC;
619 return (0);
620
621 case DINFOLODRV:
622 /*
623 * Hold an installed driver and return the result
624 */
625 if (DI_UNPRIVILEGED_NODE(m)) {
626 /*
627 * Only the fully enabled instances may issue
628 * DINFOLDDRV.
629 */
630 return (EACCES);
631 }
632
633 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
634 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) {
635 kmem_free(drv_name, MAXNAMELEN);
636 return (EFAULT);
637 }
638
639 /*
640 * Some 3rd party driver's _init() walks the device tree,
641 * so we load the driver module before configuring driver.
642 */
643 i = ddi_name_to_major(drv_name);
644 if (ddi_hold_driver(i) == NULL) {
645 kmem_free(drv_name, MAXNAMELEN);
646 return (ENXIO);
647 }
648
649 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
650
651 /*
652 * i_ddi_load_drvconf() below will trigger a reprobe
653 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't
654 * needed here.
655 */
656 modunload_disable();
657 (void) i_ddi_load_drvconf(i);
658 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i);
659 kmem_free(drv_name, MAXNAMELEN);
660 ddi_rele_driver(i);
661 rv = i_ddi_devs_attached(i);
662 modunload_enable();
663
664 i_ddi_di_cache_invalidate();
665
666 return ((rv == DDI_SUCCESS)? 0 : ENXIO);
667
668 case DINFOUSRLD:
669 /*
670 * The case for copying snapshot to userland
671 */
672 if (di_setstate(st, IOC_COPY) == -1)
673 return (EBUSY);
674
675 map_size = DI_ALL_PTR(st)->map_size;
676 if (map_size == 0) {
677 (void) di_setstate(st, IOC_DONE);
678 return (EFAULT);
679 }
680
681 /*
682 * copyout the snapshot
683 */
684 map_size = (map_size + PAGEOFFSET) & PAGEMASK;
685
686 /*
687 * Return the map size, so caller may do a sanity
688 * check against the return value of snapshot ioctl()
689 */
690 *rvalp = (int)map_size;
691
692 /*
693 * Copy one chunk at a time
694 */
695 off = 0;
696 dcp = st->memlist;
697 while (map_size) {
698 size = dcp->buf_size;
699 if (map_size <= size) {
700 size = map_size;
701 }
702
703 if (ddi_copyout(di_mem_addr(st, off),
704 (void *)(arg + off), size, mode) != 0) {
705 (void) di_setstate(st, IOC_DONE);
706 return (EFAULT);
707 }
708
709 map_size -= size;
710 off += size;
711 dcp = dcp->next;
712 }
713
714 di_freemem(st);
715 (void) di_setstate(st, IOC_IDLE);
716 return (0);
717
718 default:
719 if ((cmd & ~DIIOC_MASK) != DIIOC) {
720 /*
721 * Invalid ioctl command
722 */
723 return (ENOTTY);
724 }
725 /*
726 * take a snapshot
727 */
728 st->command = cmd & DIIOC_MASK;
729 /*FALLTHROUGH*/
730 }
731
732 /*
733 * Obtain enough memory to hold header + rootpath. We prevent kernel
734 * memory exhaustion by freeing any previously allocated snapshot and
735 * refusing the operation; otherwise we would be allowing ioctl(),
736 * ioctl(), ioctl(), ..., panic.
737 */
738 if (di_setstate(st, IOC_SNAP) == -1)
739 return (EBUSY);
740
741 /*
742 * Initial memlist always holds di_all and the root_path - and
743 * is at least a page and size.
744 */
745 size = sizeof (struct di_all) +
746 sizeof (((struct dinfo_io *)(NULL))->root_path);
747 if (size < PAGESIZE)
748 size = PAGESIZE;
749 off = di_checkmem(st, 0, size);
750 all = DI_ALL_PTR(st);
751 off += sizeof (struct di_all); /* real length of di_all */
752
753 all->devcnt = devcnt;
754 all->command = st->command;
755 all->version = DI_SNAPSHOT_VERSION;
756 all->top_vhci_devinfo = 0; /* filled by build_vhci_list. */
757
758 /*
759 * Note the endianness in case we need to transport snapshot
760 * over the network.
761 */
762 #if defined(_LITTLE_ENDIAN)
763 all->endianness = DI_LITTLE_ENDIAN;
764 #else
765 all->endianness = DI_BIG_ENDIAN;
766 #endif
767
768 /* Copyin ioctl args, store in the snapshot. */
769 if (copyinstr((void *)arg, all->req_path,
770 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) {
771 di_freemem(st);
772 (void) di_setstate(st, IOC_IDLE);
773 return (EFAULT);
774 }
775 (void) strcpy(all->root_path, all->req_path);
776 off += size; /* real length of root_path */
777
778 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) {
779 di_freemem(st);
780 (void) di_setstate(st, IOC_IDLE);
781 return (EINVAL);
782 }
783
784 error = 0;
785 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) {
786 di_freemem(st);
787 (void) di_setstate(st, IOC_IDLE);
788 return (error);
789 }
790
791 /*
792 * Only the fully enabled version may force load drivers or read
793 * the parent private data from a driver.
794 */
795 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 &&
796 DI_UNPRIVILEGED_NODE(m)) {
797 di_freemem(st);
798 (void) di_setstate(st, IOC_IDLE);
799 return (EACCES);
800 }
801
802 /* Do we need private data? */
803 if (st->command & DINFOPRIVDATA) {
804 arg += sizeof (((struct dinfo_io *)(NULL))->root_path);
805
806 #ifdef _MULTI_DATAMODEL
807 switch (ddi_model_convert_from(mode & FMODELS)) {
808 case DDI_MODEL_ILP32: {
809 /*
810 * Cannot copy private data from 64-bit kernel
811 * to 32-bit app
812 */
813 di_freemem(st);
814 (void) di_setstate(st, IOC_IDLE);
815 return (EINVAL);
816 }
817 case DDI_MODEL_NONE:
818 if ((off = di_copyformat(off, st, arg, mode)) == 0) {
819 di_freemem(st);
820 (void) di_setstate(st, IOC_IDLE);
821 return (EFAULT);
822 }
823 break;
824 }
825 #else /* !_MULTI_DATAMODEL */
826 if ((off = di_copyformat(off, st, arg, mode)) == 0) {
827 di_freemem(st);
828 (void) di_setstate(st, IOC_IDLE);
829 return (EFAULT);
830 }
831 #endif /* _MULTI_DATAMODEL */
832 }
833
834 all->top_devinfo = DI_ALIGN(off);
835
836 /*
837 * For cache lookups we reallocate memory from scratch,
838 * so the value of "all" is no longer valid.
839 */
840 all = NULL;
841
842 if (st->command & DINFOCACHE) {
843 *rvalp = di_cache_lookup(st);
844 } else if (snapshot_is_cacheable(st)) {
845 DI_CACHE_LOCK(di_cache);
846 *rvalp = di_cache_update(st);
847 DI_CACHE_UNLOCK(di_cache);
848 } else
849 *rvalp = di_snapshot_and_clean(st);
850
851 if (*rvalp) {
852 DI_ALL_PTR(st)->map_size = *rvalp;
853 (void) di_setstate(st, IOC_DONE);
854 } else {
855 di_freemem(st);
856 (void) di_setstate(st, IOC_IDLE);
857 }
858
859 return (0);
860 }
861
862 /*
863 * Get a chunk of memory >= size, for the snapshot
864 */
865 static void
di_allocmem(struct di_state * st,size_t size)866 di_allocmem(struct di_state *st, size_t size)
867 {
868 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP);
869
870 /*
871 * Round up size to nearest power of 2. If it is less
872 * than st->mem_size, set it to st->mem_size (i.e.,
873 * the mem_size is doubled every time) to reduce the
874 * number of memory allocations.
875 */
876 size_t tmp = 1;
877 while (tmp < size) {
878 tmp <<= 1;
879 }
880 size = (tmp > st->mem_size) ? tmp : st->mem_size;
881
882 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook);
883 mem->buf_size = size;
884
885 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size));
886
887 if (st->mem_size == 0) { /* first chunk */
888 st->memlist = mem;
889 } else {
890 /*
891 * locate end of linked list and add a chunk at the end
892 */
893 struct di_mem *dcp = st->memlist;
894 while (dcp->next != NULL) {
895 dcp = dcp->next;
896 }
897
898 dcp->next = mem;
899 }
900
901 st->mem_size += size;
902 }
903
904 /*
905 * Copy upto bufsiz bytes of the memlist to buf
906 */
907 static void
di_copymem(struct di_state * st,caddr_t buf,size_t bufsiz)908 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz)
909 {
910 struct di_mem *dcp;
911 size_t copysz;
912
913 if (st->mem_size == 0) {
914 ASSERT(st->memlist == NULL);
915 return;
916 }
917
918 copysz = 0;
919 for (dcp = st->memlist; dcp; dcp = dcp->next) {
920
921 ASSERT(bufsiz > 0);
922
923 if (bufsiz <= dcp->buf_size)
924 copysz = bufsiz;
925 else
926 copysz = dcp->buf_size;
927
928 bcopy(dcp->buf, buf, copysz);
929
930 buf += copysz;
931 bufsiz -= copysz;
932
933 if (bufsiz == 0)
934 break;
935 }
936 }
937
938 /*
939 * Free all memory for the snapshot
940 */
941 static void
di_freemem(struct di_state * st)942 di_freemem(struct di_state *st)
943 {
944 struct di_mem *dcp, *tmp;
945
946 dcmn_err2((CE_CONT, "di_freemem\n"));
947
948 if (st->mem_size) {
949 dcp = st->memlist;
950 while (dcp) { /* traverse the linked list */
951 tmp = dcp;
952 dcp = dcp->next;
953 ddi_umem_free(tmp->cook);
954 kmem_free(tmp, sizeof (struct di_mem));
955 }
956 st->mem_size = 0;
957 st->memlist = NULL;
958 }
959
960 ASSERT(st->mem_size == 0);
961 ASSERT(st->memlist == NULL);
962 }
963
964 /*
965 * Copies cached data to the di_state structure.
966 * Returns:
967 * - size of data copied, on SUCCESS
968 * - 0 on failure
969 */
970 static int
di_cache2mem(struct di_cache * cache,struct di_state * st)971 di_cache2mem(struct di_cache *cache, struct di_state *st)
972 {
973 caddr_t pa;
974
975 ASSERT(st->mem_size == 0);
976 ASSERT(st->memlist == NULL);
977 ASSERT(!servicing_interrupt());
978 ASSERT(DI_CACHE_LOCKED(*cache));
979
980 if (cache->cache_size == 0) {
981 ASSERT(cache->cache_data == NULL);
982 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy"));
983 return (0);
984 }
985
986 ASSERT(cache->cache_data);
987
988 di_allocmem(st, cache->cache_size);
989
990 pa = di_mem_addr(st, 0);
991
992 ASSERT(pa);
993
994 /*
995 * Verify that di_allocmem() allocates contiguous memory,
996 * so that it is safe to do straight bcopy()
997 */
998 ASSERT(st->memlist != NULL);
999 ASSERT(st->memlist->next == NULL);
1000 bcopy(cache->cache_data, pa, cache->cache_size);
1001
1002 return (cache->cache_size);
1003 }
1004
1005 /*
1006 * Copies a snapshot from di_state to the cache
1007 * Returns:
1008 * - 0 on failure
1009 * - size of copied data on success
1010 */
1011 static size_t
di_mem2cache(struct di_state * st,struct di_cache * cache)1012 di_mem2cache(struct di_state *st, struct di_cache *cache)
1013 {
1014 size_t map_size;
1015
1016 ASSERT(cache->cache_size == 0);
1017 ASSERT(cache->cache_data == NULL);
1018 ASSERT(!servicing_interrupt());
1019 ASSERT(DI_CACHE_LOCKED(*cache));
1020
1021 if (st->mem_size == 0) {
1022 ASSERT(st->memlist == NULL);
1023 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy"));
1024 return (0);
1025 }
1026
1027 ASSERT(st->memlist);
1028
1029 /*
1030 * The size of the memory list may be much larger than the
1031 * size of valid data (map_size). Cache only the valid data
1032 */
1033 map_size = DI_ALL_PTR(st)->map_size;
1034 if (map_size == 0 || map_size < sizeof (struct di_all) ||
1035 map_size > st->mem_size) {
1036 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size));
1037 return (0);
1038 }
1039
1040 cache->cache_data = kmem_alloc(map_size, KM_SLEEP);
1041 cache->cache_size = map_size;
1042 di_copymem(st, cache->cache_data, cache->cache_size);
1043
1044 return (map_size);
1045 }
1046
1047 /*
1048 * Make sure there is at least "size" bytes memory left before
1049 * going on. Otherwise, start on a new chunk.
1050 */
1051 static di_off_t
di_checkmem(struct di_state * st,di_off_t off,size_t size)1052 di_checkmem(struct di_state *st, di_off_t off, size_t size)
1053 {
1054 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n",
1055 off, (int)size));
1056
1057 /*
1058 * di_checkmem() shouldn't be called with a size of zero.
1059 * But in case it is, we want to make sure we return a valid
1060 * offset within the memlist and not an offset that points us
1061 * at the end of the memlist.
1062 */
1063 if (size == 0) {
1064 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used"));
1065 size = 1;
1066 }
1067
1068 off = DI_ALIGN(off);
1069 if ((st->mem_size - off) < size) {
1070 off = st->mem_size;
1071 di_allocmem(st, size);
1072 }
1073
1074 /* verify that return value is aligned */
1075 ASSERT(off == DI_ALIGN(off));
1076 return (off);
1077 }
1078
1079 /*
1080 * Copy the private data format from ioctl arg.
1081 * On success, the ending offset is returned. On error 0 is returned.
1082 */
1083 static di_off_t
di_copyformat(di_off_t off,struct di_state * st,intptr_t arg,int mode)1084 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode)
1085 {
1086 di_off_t size;
1087 struct di_priv_data *priv;
1088 struct di_all *all = DI_ALL_PTR(st);
1089
1090 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n",
1091 off, (void *)arg, mode));
1092
1093 /*
1094 * Copyin data and check version.
1095 * We only handle private data version 0.
1096 */
1097 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP);
1098 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data),
1099 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) {
1100 kmem_free(priv, sizeof (struct di_priv_data));
1101 return (0);
1102 }
1103
1104 /*
1105 * Save di_priv_data copied from userland in snapshot.
1106 */
1107 all->pd_version = priv->version;
1108 all->n_ppdata = priv->n_parent;
1109 all->n_dpdata = priv->n_driver;
1110
1111 /*
1112 * copyin private data format, modify offset accordingly
1113 */
1114 if (all->n_ppdata) { /* parent private data format */
1115 /*
1116 * check memory
1117 */
1118 size = all->n_ppdata * sizeof (struct di_priv_format);
1119 all->ppdata_format = off = di_checkmem(st, off, size);
1120 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size,
1121 mode) != 0) {
1122 kmem_free(priv, sizeof (struct di_priv_data));
1123 return (0);
1124 }
1125
1126 off += size;
1127 }
1128
1129 if (all->n_dpdata) { /* driver private data format */
1130 /*
1131 * check memory
1132 */
1133 size = all->n_dpdata * sizeof (struct di_priv_format);
1134 all->dpdata_format = off = di_checkmem(st, off, size);
1135 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size,
1136 mode) != 0) {
1137 kmem_free(priv, sizeof (struct di_priv_data));
1138 return (0);
1139 }
1140
1141 off += size;
1142 }
1143
1144 kmem_free(priv, sizeof (struct di_priv_data));
1145 return (off);
1146 }
1147
1148 /*
1149 * Return the real address based on the offset (off) within snapshot
1150 */
1151 static void *
di_mem_addr(struct di_state * st,di_off_t off)1152 di_mem_addr(struct di_state *st, di_off_t off)
1153 {
1154 struct di_mem *dcp = st->memlist;
1155
1156 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n",
1157 (void *)dcp, off));
1158
1159 ASSERT(off < st->mem_size);
1160
1161 while (off >= dcp->buf_size) {
1162 off -= dcp->buf_size;
1163 dcp = dcp->next;
1164 }
1165
1166 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n",
1167 off, (void *)(dcp->buf + off)));
1168
1169 return (dcp->buf + off);
1170 }
1171
1172 /*
1173 * Ideally we would use the whole key to derive the hash
1174 * value. However, the probability that two keys will
1175 * have the same dip (or pip) is very low, so
1176 * hashing by dip (or pip) pointer should suffice.
1177 */
1178 static uint_t
di_hash_byptr(void * arg,mod_hash_key_t key)1179 di_hash_byptr(void *arg, mod_hash_key_t key)
1180 {
1181 struct di_key *dik = key;
1182 size_t rshift;
1183 void *ptr;
1184
1185 ASSERT(arg == NULL);
1186
1187 switch (dik->k_type) {
1188 case DI_DKEY:
1189 ptr = dik->k_u.dkey.dk_dip;
1190 rshift = highbit(sizeof (struct dev_info));
1191 break;
1192 case DI_PKEY:
1193 ptr = dik->k_u.pkey.pk_pip;
1194 rshift = highbit(sizeof (struct mdi_pathinfo));
1195 break;
1196 default:
1197 panic("devinfo: unknown key type");
1198 /*NOTREACHED*/
1199 }
1200 return (mod_hash_byptr((void *)rshift, ptr));
1201 }
1202
1203 static void
di_key_dtor(mod_hash_key_t key)1204 di_key_dtor(mod_hash_key_t key)
1205 {
1206 char *path_addr;
1207 struct di_key *dik = key;
1208
1209 switch (dik->k_type) {
1210 case DI_DKEY:
1211 break;
1212 case DI_PKEY:
1213 path_addr = dik->k_u.pkey.pk_path_addr;
1214 if (path_addr)
1215 kmem_free(path_addr, strlen(path_addr) + 1);
1216 break;
1217 default:
1218 panic("devinfo: unknown key type");
1219 /*NOTREACHED*/
1220 }
1221
1222 kmem_free(dik, sizeof (struct di_key));
1223 }
1224
1225 static int
di_dkey_cmp(struct di_dkey * dk1,struct di_dkey * dk2)1226 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2)
1227 {
1228 if (dk1->dk_dip != dk2->dk_dip)
1229 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1);
1230
1231 if (dk1->dk_major != DDI_MAJOR_T_NONE &&
1232 dk2->dk_major != DDI_MAJOR_T_NONE) {
1233 if (dk1->dk_major != dk2->dk_major)
1234 return (dk1->dk_major > dk2->dk_major ? 1 : -1);
1235
1236 if (dk1->dk_inst != dk2->dk_inst)
1237 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1);
1238 }
1239
1240 if (dk1->dk_nodeid != dk2->dk_nodeid)
1241 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1);
1242
1243 return (0);
1244 }
1245
1246 static int
di_pkey_cmp(struct di_pkey * pk1,struct di_pkey * pk2)1247 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2)
1248 {
1249 char *p1, *p2;
1250 int rv;
1251
1252 if (pk1->pk_pip != pk2->pk_pip)
1253 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1);
1254
1255 p1 = pk1->pk_path_addr;
1256 p2 = pk2->pk_path_addr;
1257
1258 p1 = p1 ? p1 : "";
1259 p2 = p2 ? p2 : "";
1260
1261 rv = strcmp(p1, p2);
1262 if (rv)
1263 return (rv > 0 ? 1 : -1);
1264
1265 if (pk1->pk_client != pk2->pk_client)
1266 return (pk1->pk_client > pk2->pk_client ? 1 : -1);
1267
1268 if (pk1->pk_phci != pk2->pk_phci)
1269 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1);
1270
1271 return (0);
1272 }
1273
1274 static int
di_key_cmp(mod_hash_key_t key1,mod_hash_key_t key2)1275 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
1276 {
1277 struct di_key *dik1, *dik2;
1278
1279 dik1 = key1;
1280 dik2 = key2;
1281
1282 if (dik1->k_type != dik2->k_type) {
1283 panic("devinfo: mismatched keys");
1284 /*NOTREACHED*/
1285 }
1286
1287 switch (dik1->k_type) {
1288 case DI_DKEY:
1289 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey)));
1290 case DI_PKEY:
1291 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey)));
1292 default:
1293 panic("devinfo: unknown key type");
1294 /*NOTREACHED*/
1295 }
1296 }
1297
1298 static void
di_copy_aliases(struct di_state * st,alias_pair_t * apair,di_off_t * offp)1299 di_copy_aliases(struct di_state *st, alias_pair_t *apair, di_off_t *offp)
1300 {
1301 di_off_t off;
1302 struct di_all *all = DI_ALL_PTR(st);
1303 struct di_alias *di_alias;
1304 di_off_t curroff;
1305 dev_info_t *currdip;
1306 size_t size;
1307
1308 currdip = NULL;
1309 if (resolve_pathname(apair->pair_alias, &currdip, NULL, NULL) != 0) {
1310 return;
1311 }
1312
1313 if (di_dip_find(st, currdip, &curroff) != 0) {
1314 ndi_rele_devi(currdip);
1315 return;
1316 }
1317 ndi_rele_devi(currdip);
1318
1319 off = *offp;
1320 size = sizeof (struct di_alias);
1321 size += strlen(apair->pair_alias) + 1;
1322 off = di_checkmem(st, off, size);
1323 di_alias = DI_ALIAS(di_mem_addr(st, off));
1324
1325 di_alias->self = off;
1326 di_alias->next = all->aliases;
1327 all->aliases = off;
1328 (void) strcpy(di_alias->alias, apair->pair_alias);
1329 di_alias->curroff = curroff;
1330
1331 off += size;
1332
1333 *offp = off;
1334 }
1335
1336 /*
1337 * This is the main function that takes a snapshot
1338 */
1339 static di_off_t
di_snapshot(struct di_state * st)1340 di_snapshot(struct di_state *st)
1341 {
1342 di_off_t off;
1343 struct di_all *all;
1344 dev_info_t *rootnode;
1345 char buf[80];
1346 int plen;
1347 char *path;
1348 vnode_t *vp;
1349 int i;
1350
1351 all = DI_ALL_PTR(st);
1352 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n"));
1353
1354 /*
1355 * Translate requested root path if an alias and snap-root != "/"
1356 */
1357 if (ddi_aliases_present == B_TRUE && strcmp(all->root_path, "/") != 0) {
1358 /* If there is no redirected alias, use root_path as is */
1359 rootnode = ddi_alias_redirect(all->root_path);
1360 if (rootnode) {
1361 (void) ddi_pathname(rootnode, all->root_path);
1362 goto got_root;
1363 }
1364 }
1365
1366 /*
1367 * Verify path before entrusting it to e_ddi_hold_devi_by_path because
1368 * some platforms have OBP bugs where executing the NDI_PROMNAME code
1369 * path against an invalid path results in panic. The lookupnameat
1370 * is done relative to rootdir without a leading '/' on "devices/"
1371 * to force the lookup to occur in the global zone.
1372 */
1373 plen = strlen("devices/") + strlen(all->root_path) + 1;
1374 path = kmem_alloc(plen, KM_SLEEP);
1375 (void) snprintf(path, plen, "devices/%s", all->root_path);
1376 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) {
1377 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1378 all->root_path));
1379 kmem_free(path, plen);
1380 return (0);
1381 }
1382 kmem_free(path, plen);
1383 VN_RELE(vp);
1384
1385 /*
1386 * Hold the devinfo node referred by the path.
1387 */
1388 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0);
1389 if (rootnode == NULL) {
1390 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1391 all->root_path));
1392 return (0);
1393 }
1394
1395 got_root:
1396 (void) snprintf(buf, sizeof (buf),
1397 "devinfo registered dips (statep=%p)", (void *)st);
1398
1399 st->reg_dip_hash = mod_hash_create_extended(buf, 64,
1400 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1401 NULL, di_key_cmp, KM_SLEEP);
1402
1403
1404 (void) snprintf(buf, sizeof (buf),
1405 "devinfo registered pips (statep=%p)", (void *)st);
1406
1407 st->reg_pip_hash = mod_hash_create_extended(buf, 64,
1408 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1409 NULL, di_key_cmp, KM_SLEEP);
1410
1411 if (DINFOHP & st->command) {
1412 list_create(&st->hp_list, sizeof (i_hp_t),
1413 offsetof(i_hp_t, hp_link));
1414 }
1415
1416 /*
1417 * copy the device tree
1418 */
1419 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st);
1420
1421 if (DINFOPATH & st->command) {
1422 mdi_walk_vhcis(build_vhci_list, st);
1423 }
1424
1425 if (DINFOHP & st->command) {
1426 di_hotplug_children(st);
1427 }
1428
1429 ddi_release_devi(rootnode);
1430
1431 /*
1432 * copy the devnames array
1433 */
1434 all->devnames = off;
1435 off = di_copydevnm(&all->devnames, st);
1436
1437
1438 /* initialize the hash tables */
1439 st->lnode_count = 0;
1440 st->link_count = 0;
1441
1442 if (DINFOLYR & st->command) {
1443 off = di_getlink_data(off, st);
1444 }
1445
1446 all->aliases = 0;
1447 if (ddi_aliases_present == B_FALSE)
1448 goto done;
1449
1450 for (i = 0; i < ddi_aliases.dali_num_pairs; i++) {
1451 di_copy_aliases(st, &(ddi_aliases.dali_alias_pairs[i]), &off);
1452 }
1453
1454 done:
1455 /*
1456 * Free up hash tables
1457 */
1458 mod_hash_destroy_hash(st->reg_dip_hash);
1459 mod_hash_destroy_hash(st->reg_pip_hash);
1460
1461 /*
1462 * Record the timestamp now that we are done with snapshot.
1463 *
1464 * We compute the checksum later and then only if we cache
1465 * the snapshot, since checksumming adds some overhead.
1466 * The checksum is checked later if we read the cache file.
1467 * from disk.
1468 *
1469 * Set checksum field to 0 as CRC is calculated with that
1470 * field set to 0.
1471 */
1472 all->snapshot_time = ddi_get_time();
1473 all->cache_checksum = 0;
1474
1475 ASSERT(all->snapshot_time != 0);
1476
1477 return (off);
1478 }
1479
1480 /*
1481 * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set
1482 */
1483 static di_off_t
di_snapshot_and_clean(struct di_state * st)1484 di_snapshot_and_clean(struct di_state *st)
1485 {
1486 di_off_t off;
1487
1488 modunload_disable();
1489 off = di_snapshot(st);
1490 if (off != 0 && (st->command & DINFOCLEANUP)) {
1491 ASSERT(DEVICES_FILES_CLEANABLE(st));
1492 /*
1493 * Cleanup /etc/devices files:
1494 * In order to accurately account for the system configuration
1495 * in /etc/devices files, the appropriate drivers must be
1496 * fully configured before the cleanup starts.
1497 * So enable modunload only after the cleanup.
1498 */
1499 i_ddi_clean_devices_files();
1500 /*
1501 * Remove backing store nodes for unused devices,
1502 * which retain past permissions customizations
1503 * and may be undesired for newly configured devices.
1504 */
1505 dev_devices_cleanup();
1506 }
1507 modunload_enable();
1508
1509 return (off);
1510 }
1511
1512 /*
1513 * construct vhci linkage in the snapshot.
1514 */
1515 static int
build_vhci_list(dev_info_t * vh_devinfo,void * arg)1516 build_vhci_list(dev_info_t *vh_devinfo, void *arg)
1517 {
1518 struct di_all *all;
1519 struct di_node *me;
1520 struct di_state *st;
1521 di_off_t off;
1522 phci_walk_arg_t pwa;
1523
1524 dcmn_err3((CE_CONT, "build_vhci list\n"));
1525
1526 dcmn_err3((CE_CONT, "vhci node %s%d\n",
1527 ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo)));
1528
1529 st = (struct di_state *)arg;
1530 if (di_dip_find(st, vh_devinfo, &off) != 0) {
1531 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1532 return (DDI_WALK_TERMINATE);
1533 }
1534
1535 dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n",
1536 st->mem_size, off));
1537
1538 all = DI_ALL_PTR(st);
1539 if (all->top_vhci_devinfo == 0) {
1540 all->top_vhci_devinfo = off;
1541 } else {
1542 me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo));
1543
1544 while (me->next_vhci != 0) {
1545 me = DI_NODE(di_mem_addr(st, me->next_vhci));
1546 }
1547
1548 me->next_vhci = off;
1549 }
1550
1551 pwa.off = off;
1552 pwa.st = st;
1553 mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa);
1554
1555 return (DDI_WALK_CONTINUE);
1556 }
1557
1558 /*
1559 * construct phci linkage for the given vhci in the snapshot.
1560 */
1561 static int
build_phci_list(dev_info_t * ph_devinfo,void * arg)1562 build_phci_list(dev_info_t *ph_devinfo, void *arg)
1563 {
1564 struct di_node *vh_di_node;
1565 struct di_node *me;
1566 phci_walk_arg_t *pwa;
1567 di_off_t off;
1568
1569 pwa = (phci_walk_arg_t *)arg;
1570
1571 dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n",
1572 pwa->off));
1573
1574 vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off));
1575 if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) {
1576 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1577 return (DDI_WALK_TERMINATE);
1578 }
1579
1580 dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n",
1581 ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off));
1582
1583 if (vh_di_node->top_phci == 0) {
1584 vh_di_node->top_phci = off;
1585 return (DDI_WALK_CONTINUE);
1586 }
1587
1588 me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci));
1589
1590 while (me->next_phci != 0) {
1591 me = DI_NODE(di_mem_addr(pwa->st, me->next_phci));
1592 }
1593 me->next_phci = off;
1594
1595 return (DDI_WALK_CONTINUE);
1596 }
1597
1598 /*
1599 * Assumes all devinfo nodes in device tree have been snapshotted
1600 */
1601 static void
snap_driver_list(struct di_state * st,struct devnames * dnp,di_off_t * off_p)1602 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p)
1603 {
1604 struct dev_info *node;
1605 struct di_node *me;
1606 di_off_t off;
1607
1608 ASSERT(mutex_owned(&dnp->dn_lock));
1609
1610 node = DEVI(dnp->dn_head);
1611 for (; node; node = node->devi_next) {
1612 if (di_dip_find(st, (dev_info_t *)node, &off) != 0)
1613 continue;
1614
1615 ASSERT(off > 0);
1616 me = DI_NODE(di_mem_addr(st, off));
1617 ASSERT(me->next == 0 || me->next == -1);
1618 /*
1619 * Only nodes which were BOUND when they were
1620 * snapshotted will be added to per-driver list.
1621 */
1622 if (me->next != -1)
1623 continue;
1624
1625 *off_p = off;
1626 off_p = &me->next;
1627 }
1628
1629 *off_p = 0;
1630 }
1631
1632 /*
1633 * Copy the devnames array, so we have a list of drivers in the snapshot.
1634 * Also makes it possible to locate the per-driver devinfo nodes.
1635 */
1636 static di_off_t
di_copydevnm(di_off_t * off_p,struct di_state * st)1637 di_copydevnm(di_off_t *off_p, struct di_state *st)
1638 {
1639 int i;
1640 di_off_t off;
1641 size_t size;
1642 struct di_devnm *dnp;
1643
1644 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p));
1645
1646 /*
1647 * make sure there is some allocated memory
1648 */
1649 size = devcnt * sizeof (struct di_devnm);
1650 *off_p = off = di_checkmem(st, *off_p, size);
1651 dnp = DI_DEVNM(di_mem_addr(st, off));
1652 off += size;
1653
1654 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n",
1655 devcnt, off));
1656
1657 for (i = 0; i < devcnt; i++) {
1658 if (devnamesp[i].dn_name == NULL) {
1659 continue;
1660 }
1661
1662 /*
1663 * dn_name is not freed during driver unload or removal.
1664 *
1665 * There is a race condition when make_devname() changes
1666 * dn_name during our strcpy. This should be rare since
1667 * only add_drv does this. At any rate, we never had a
1668 * problem with ddi_name_to_major(), which should have
1669 * the same problem.
1670 */
1671 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n",
1672 devnamesp[i].dn_name, devnamesp[i].dn_instance, off));
1673
1674 size = strlen(devnamesp[i].dn_name) + 1;
1675 dnp[i].name = off = di_checkmem(st, off, size);
1676 (void) strcpy((char *)di_mem_addr(st, off),
1677 devnamesp[i].dn_name);
1678 off += size;
1679
1680 mutex_enter(&devnamesp[i].dn_lock);
1681
1682 /*
1683 * Snapshot per-driver node list
1684 */
1685 snap_driver_list(st, &devnamesp[i], &dnp[i].head);
1686
1687 /*
1688 * This is not used by libdevinfo, leave it for now
1689 */
1690 dnp[i].flags = devnamesp[i].dn_flags;
1691 dnp[i].instance = devnamesp[i].dn_instance;
1692
1693 /*
1694 * get global properties
1695 */
1696 if ((DINFOPROP & st->command) &&
1697 devnamesp[i].dn_global_prop_ptr) {
1698 dnp[i].global_prop = off;
1699 off = di_getprop(DI_PROP_GLB_LIST,
1700 &devnamesp[i].dn_global_prop_ptr->prop_list,
1701 &dnp[i].global_prop, st, NULL);
1702 }
1703
1704 /*
1705 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str
1706 */
1707 if (CB_DRV_INSTALLED(devopsp[i])) {
1708 if (devopsp[i]->devo_cb_ops) {
1709 dnp[i].ops |= DI_CB_OPS;
1710 if (devopsp[i]->devo_cb_ops->cb_str)
1711 dnp[i].ops |= DI_STREAM_OPS;
1712 }
1713 if (NEXUS_DRV(devopsp[i])) {
1714 dnp[i].ops |= DI_BUS_OPS;
1715 }
1716 }
1717
1718 mutex_exit(&devnamesp[i].dn_lock);
1719 }
1720
1721 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off));
1722
1723 return (off);
1724 }
1725
1726 /*
1727 * Copy the kernel devinfo tree. The tree and the devnames array forms
1728 * the entire snapshot (see also di_copydevnm).
1729 */
1730 static di_off_t
di_copytree(struct dev_info * root,di_off_t * off_p,struct di_state * st)1731 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st)
1732 {
1733 di_off_t off;
1734 struct dev_info *node;
1735 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP);
1736
1737 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n",
1738 (void *)root, *off_p));
1739
1740 /* force attach drivers */
1741 if (i_ddi_devi_attached((dev_info_t *)root) &&
1742 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) {
1743 (void) ndi_devi_config((dev_info_t *)root,
1744 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT |
1745 NDI_DRV_CONF_REPROBE);
1746 }
1747
1748 /*
1749 * Push top_devinfo onto a stack
1750 *
1751 * The stack is necessary to avoid recursion, which can overrun
1752 * the kernel stack.
1753 */
1754 PUSH_STACK(dsp, root, off_p);
1755
1756 /*
1757 * As long as there is a node on the stack, copy the node.
1758 * di_copynode() is responsible for pushing and popping
1759 * child and sibling nodes on the stack.
1760 */
1761 while (!EMPTY_STACK(dsp)) {
1762 node = TOP_NODE(dsp);
1763 off = di_copynode(node, dsp, st);
1764 }
1765
1766 /*
1767 * Free the stack structure
1768 */
1769 kmem_free(dsp, sizeof (struct di_stack));
1770
1771 return (off);
1772 }
1773
1774 /*
1775 * This is the core function, which copies all data associated with a single
1776 * node into the snapshot. The amount of information is determined by the
1777 * ioctl command.
1778 */
1779 static di_off_t
di_copynode(struct dev_info * node,struct di_stack * dsp,struct di_state * st)1780 di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st)
1781 {
1782 di_off_t off;
1783 struct di_node *me;
1784 size_t size;
1785 struct dev_info *n;
1786
1787 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth));
1788 ASSERT((node != NULL) && (node == TOP_NODE(dsp)));
1789
1790 /*
1791 * check memory usage, and fix offsets accordingly.
1792 */
1793 size = sizeof (struct di_node);
1794 *(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size);
1795 me = DI_NODE(di_mem_addr(st, off));
1796 me->self = off;
1797 off += size;
1798
1799 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n",
1800 node->devi_node_name, node->devi_instance, off));
1801
1802 /*
1803 * Node parameters:
1804 * self -- offset of current node within snapshot
1805 * nodeid -- pointer to PROM node (tri-valued)
1806 * state -- hot plugging device state
1807 * node_state -- devinfo node state
1808 */
1809 me->instance = node->devi_instance;
1810 me->nodeid = node->devi_nodeid;
1811 me->node_class = node->devi_node_class;
1812 me->attributes = node->devi_node_attributes;
1813 me->state = node->devi_state;
1814 me->flags = node->devi_flags;
1815 me->node_state = node->devi_node_state;
1816 me->next_vhci = 0; /* Filled up by build_vhci_list. */
1817 me->top_phci = 0; /* Filled up by build_phci_list. */
1818 me->next_phci = 0; /* Filled up by build_phci_list. */
1819 me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */
1820 me->user_private_data = NULL;
1821
1822 /*
1823 * Get parent's offset in snapshot from the stack
1824 * and store it in the current node
1825 */
1826 if (dsp->depth > 1) {
1827 me->parent = *(PARENT_OFFSET(dsp));
1828 }
1829
1830 /*
1831 * Save the offset of this di_node in a hash table.
1832 * This is used later to resolve references to this
1833 * dip from other parts of the tree (per-driver list,
1834 * multipathing linkages, layered usage linkages).
1835 * The key used for the hash table is derived from
1836 * information in the dip.
1837 */
1838 di_register_dip(st, (dev_info_t *)node, me->self);
1839
1840 #ifdef DEVID_COMPATIBILITY
1841 /* check for devid as property marker */
1842 if (node->devi_devid_str) {
1843 ddi_devid_t devid;
1844
1845 /*
1846 * The devid is now represented as a property. For
1847 * compatibility with di_devid() interface in libdevinfo we
1848 * must return it as a binary structure in the snapshot. When
1849 * (if) di_devid() is removed from libdevinfo then the code
1850 * related to DEVID_COMPATIBILITY can be removed.
1851 */
1852 if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) ==
1853 DDI_SUCCESS) {
1854 size = ddi_devid_sizeof(devid);
1855 off = di_checkmem(st, off, size);
1856 me->devid = off;
1857 bcopy(devid, di_mem_addr(st, off), size);
1858 off += size;
1859 ddi_devid_free(devid);
1860 }
1861 }
1862 #endif /* DEVID_COMPATIBILITY */
1863
1864 if (node->devi_node_name) {
1865 size = strlen(node->devi_node_name) + 1;
1866 me->node_name = off = di_checkmem(st, off, size);
1867 (void) strcpy(di_mem_addr(st, off), node->devi_node_name);
1868 off += size;
1869 }
1870
1871 if (node->devi_compat_names && (node->devi_compat_length > 1)) {
1872 size = node->devi_compat_length;
1873 me->compat_names = off = di_checkmem(st, off, size);
1874 me->compat_length = (int)size;
1875 bcopy(node->devi_compat_names, di_mem_addr(st, off), size);
1876 off += size;
1877 }
1878
1879 if (node->devi_addr) {
1880 size = strlen(node->devi_addr) + 1;
1881 me->address = off = di_checkmem(st, off, size);
1882 (void) strcpy(di_mem_addr(st, off), node->devi_addr);
1883 off += size;
1884 }
1885
1886 if (node->devi_binding_name) {
1887 size = strlen(node->devi_binding_name) + 1;
1888 me->bind_name = off = di_checkmem(st, off, size);
1889 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name);
1890 off += size;
1891 }
1892
1893 me->drv_major = node->devi_major;
1894
1895 /*
1896 * If the dip is BOUND, set the next pointer of the
1897 * per-instance list to -1, indicating that it is yet to be resolved.
1898 * This will be resolved later in snap_driver_list().
1899 */
1900 if (me->drv_major != -1) {
1901 me->next = -1;
1902 } else {
1903 me->next = 0;
1904 }
1905
1906 /*
1907 * An optimization to skip mutex_enter when not needed.
1908 */
1909 if (!((DINFOMINOR | DINFOPROP | DINFOPATH | DINFOHP) & st->command)) {
1910 goto priv_data;
1911 }
1912
1913 /*
1914 * LOCKING: We already have an active ndi_devi_enter to gather the
1915 * minor data, and we will take devi_lock to gather properties as
1916 * needed off di_getprop.
1917 */
1918 if (!(DINFOMINOR & st->command)) {
1919 goto path;
1920 }
1921
1922 ASSERT(DEVI_BUSY_OWNED(node));
1923 if (node->devi_minor) { /* minor data */
1924 me->minor_data = off;
1925 off = di_getmdata(node->devi_minor, &me->minor_data,
1926 me->self, st);
1927 }
1928
1929 path:
1930 if (!(DINFOPATH & st->command)) {
1931 goto property;
1932 }
1933
1934 if (MDI_VHCI(node)) {
1935 me->multipath_component = MULTIPATH_COMPONENT_VHCI;
1936 }
1937
1938 if (MDI_CLIENT(node)) {
1939 me->multipath_component = MULTIPATH_COMPONENT_CLIENT;
1940 me->multipath_client = off;
1941 off = di_getpath_data((dev_info_t *)node, &me->multipath_client,
1942 me->self, st, 1);
1943 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p "
1944 "component type = %d. off=%d",
1945 me->multipath_client,
1946 (void *)node, node->devi_mdi_component, off));
1947 }
1948
1949 if (MDI_PHCI(node)) {
1950 me->multipath_component = MULTIPATH_COMPONENT_PHCI;
1951 me->multipath_phci = off;
1952 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci,
1953 me->self, st, 0);
1954 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p "
1955 "component type = %d. off=%d",
1956 me->multipath_phci,
1957 (void *)node, node->devi_mdi_component, off));
1958 }
1959
1960 property:
1961 if (!(DINFOPROP & st->command)) {
1962 goto hotplug_data;
1963 }
1964
1965 if (node->devi_drv_prop_ptr) { /* driver property list */
1966 me->drv_prop = off;
1967 off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr,
1968 &me->drv_prop, st, node);
1969 }
1970
1971 if (node->devi_sys_prop_ptr) { /* system property list */
1972 me->sys_prop = off;
1973 off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr,
1974 &me->sys_prop, st, node);
1975 }
1976
1977 if (node->devi_hw_prop_ptr) { /* hardware property list */
1978 me->hw_prop = off;
1979 off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr,
1980 &me->hw_prop, st, node);
1981 }
1982
1983 if (node->devi_global_prop_list == NULL) {
1984 me->glob_prop = (di_off_t)-1; /* not global property */
1985 } else {
1986 /*
1987 * Make copy of global property list if this devinfo refers
1988 * global properties different from what's on the devnames
1989 * array. It can happen if there has been a forced
1990 * driver.conf update. See mod_drv(1M).
1991 */
1992 ASSERT(me->drv_major != -1);
1993 if (node->devi_global_prop_list !=
1994 devnamesp[me->drv_major].dn_global_prop_ptr) {
1995 me->glob_prop = off;
1996 off = di_getprop(DI_PROP_GLB_LIST,
1997 &node->devi_global_prop_list->prop_list,
1998 &me->glob_prop, st, node);
1999 }
2000 }
2001
2002 hotplug_data:
2003 if (!(DINFOHP & st->command)) {
2004 goto priv_data;
2005 }
2006
2007 if (node->devi_hp_hdlp) { /* hotplug data */
2008 me->hp_data = off;
2009 off = di_gethpdata(node->devi_hp_hdlp, &me->hp_data, st);
2010 }
2011
2012 priv_data:
2013 if (!(DINFOPRIVDATA & st->command)) {
2014 goto pm_info;
2015 }
2016
2017 if (ddi_get_parent_data((dev_info_t *)node) != NULL) {
2018 me->parent_data = off;
2019 off = di_getppdata(node, &me->parent_data, st);
2020 }
2021
2022 if (ddi_get_driver_private((dev_info_t *)node) != NULL) {
2023 me->driver_data = off;
2024 off = di_getdpdata(node, &me->driver_data, st);
2025 }
2026
2027 pm_info: /* NOT implemented */
2028
2029 subtree:
2030 /* keep the stack aligned */
2031 off = DI_ALIGN(off);
2032
2033 if (!(DINFOSUBTREE & st->command)) {
2034 POP_STACK(dsp);
2035 return (off);
2036 }
2037
2038 child:
2039 /*
2040 * If there is a visible child--push child onto stack.
2041 * Hold the parent (me) busy while doing so.
2042 */
2043 if ((n = node->devi_child) != NULL) {
2044 /* skip hidden nodes */
2045 while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2046 n = n->devi_sibling;
2047 if (n) {
2048 me->child = off;
2049 PUSH_STACK(dsp, n, &me->child);
2050 return (me->child);
2051 }
2052 }
2053
2054 sibling:
2055 /*
2056 * Done with any child nodes, unroll the stack till a visible
2057 * sibling of a parent node is found or root node is reached.
2058 */
2059 POP_STACK(dsp);
2060 while (!EMPTY_STACK(dsp)) {
2061 if ((n = node->devi_sibling) != NULL) {
2062 /* skip hidden nodes */
2063 while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2064 n = n->devi_sibling;
2065 if (n) {
2066 me->sibling = DI_ALIGN(off);
2067 PUSH_STACK(dsp, n, &me->sibling);
2068 return (me->sibling);
2069 }
2070 }
2071 node = TOP_NODE(dsp);
2072 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp))));
2073 POP_STACK(dsp);
2074 }
2075
2076 /*
2077 * DONE with all nodes
2078 */
2079 return (off);
2080 }
2081
2082 static i_lnode_t *
i_lnode_alloc(int modid)2083 i_lnode_alloc(int modid)
2084 {
2085 i_lnode_t *i_lnode;
2086
2087 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP);
2088
2089 ASSERT(modid != -1);
2090 i_lnode->modid = modid;
2091
2092 return (i_lnode);
2093 }
2094
2095 static void
i_lnode_free(i_lnode_t * i_lnode)2096 i_lnode_free(i_lnode_t *i_lnode)
2097 {
2098 kmem_free(i_lnode, sizeof (i_lnode_t));
2099 }
2100
2101 static void
i_lnode_check_free(i_lnode_t * i_lnode)2102 i_lnode_check_free(i_lnode_t *i_lnode)
2103 {
2104 /* This lnode and its dip must have been snapshotted */
2105 ASSERT(i_lnode->self > 0);
2106 ASSERT(i_lnode->di_node->self > 0);
2107
2108 /* at least 1 link (in or out) must exist for this lnode */
2109 ASSERT(i_lnode->link_in || i_lnode->link_out);
2110
2111 i_lnode_free(i_lnode);
2112 }
2113
2114 static i_link_t *
i_link_alloc(int spec_type)2115 i_link_alloc(int spec_type)
2116 {
2117 i_link_t *i_link;
2118
2119 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP);
2120 i_link->spec_type = spec_type;
2121
2122 return (i_link);
2123 }
2124
2125 static void
i_link_check_free(i_link_t * i_link)2126 i_link_check_free(i_link_t *i_link)
2127 {
2128 /* This link must have been snapshotted */
2129 ASSERT(i_link->self > 0);
2130
2131 /* Both endpoint lnodes must exist for this link */
2132 ASSERT(i_link->src_lnode);
2133 ASSERT(i_link->tgt_lnode);
2134
2135 kmem_free(i_link, sizeof (i_link_t));
2136 }
2137
2138 /*ARGSUSED*/
2139 static uint_t
i_lnode_hashfunc(void * arg,mod_hash_key_t key)2140 i_lnode_hashfunc(void *arg, mod_hash_key_t key)
2141 {
2142 i_lnode_t *i_lnode = (i_lnode_t *)key;
2143 struct di_node *ptr;
2144 dev_t dev;
2145
2146 dev = i_lnode->devt;
2147 if (dev != DDI_DEV_T_NONE)
2148 return (i_lnode->modid + getminor(dev) + getmajor(dev));
2149
2150 ptr = i_lnode->di_node;
2151 ASSERT(ptr->self > 0);
2152 if (ptr) {
2153 uintptr_t k = (uintptr_t)ptr;
2154 k >>= (int)highbit(sizeof (struct di_node));
2155 return ((uint_t)k);
2156 }
2157
2158 return (i_lnode->modid);
2159 }
2160
2161 static int
i_lnode_cmp(void * arg1,void * arg2)2162 i_lnode_cmp(void *arg1, void *arg2)
2163 {
2164 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1;
2165 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2;
2166
2167 if (i_lnode1->modid != i_lnode2->modid) {
2168 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1);
2169 }
2170
2171 if (i_lnode1->di_node != i_lnode2->di_node)
2172 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1);
2173
2174 if (i_lnode1->devt != i_lnode2->devt)
2175 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1);
2176
2177 return (0);
2178 }
2179
2180 /*
2181 * An lnode represents a {dip, dev_t} tuple. A link represents a
2182 * {src_lnode, tgt_lnode, spec_type} tuple.
2183 * The following callback assumes that LDI framework ref-counts the
2184 * src_dip and tgt_dip while invoking this callback.
2185 */
2186 static int
di_ldi_callback(const ldi_usage_t * ldi_usage,void * arg)2187 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg)
2188 {
2189 struct di_state *st = (struct di_state *)arg;
2190 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode;
2191 i_link_t **i_link_next, *i_link;
2192 di_off_t soff, toff;
2193 mod_hash_val_t nodep = NULL;
2194 int res;
2195
2196 /*
2197 * if the source or target of this device usage information doesn't
2198 * correspond to a device node then we don't report it via
2199 * libdevinfo so return.
2200 */
2201 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL))
2202 return (LDI_USAGE_CONTINUE);
2203
2204 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip));
2205 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip));
2206
2207 /*
2208 * Skip the ldi_usage if either src or tgt dip is not in the
2209 * snapshot. This saves us from pruning bad lnodes/links later.
2210 */
2211 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0)
2212 return (LDI_USAGE_CONTINUE);
2213 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0)
2214 return (LDI_USAGE_CONTINUE);
2215
2216 ASSERT(soff > 0);
2217 ASSERT(toff > 0);
2218
2219 /*
2220 * allocate an i_lnode and add it to the lnode hash
2221 * if it is not already present. For this particular
2222 * link the lnode is a source, but it may
2223 * participate as tgt or src in any number of layered
2224 * operations - so it may already be in the hash.
2225 */
2226 i_lnode = i_lnode_alloc(ldi_usage->src_modid);
2227 i_lnode->di_node = DI_NODE(di_mem_addr(st, soff));
2228 i_lnode->devt = ldi_usage->src_devt;
2229
2230 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2231 if (res == MH_ERR_NOTFOUND) {
2232 /*
2233 * new i_lnode
2234 * add it to the hash and increment the lnode count
2235 */
2236 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2237 ASSERT(res == 0);
2238 st->lnode_count++;
2239 src_lnode = i_lnode;
2240 } else {
2241 /* this i_lnode already exists in the lnode_hash */
2242 i_lnode_free(i_lnode);
2243 src_lnode = (i_lnode_t *)nodep;
2244 }
2245
2246 /*
2247 * allocate a tgt i_lnode and add it to the lnode hash
2248 */
2249 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid);
2250 i_lnode->di_node = DI_NODE(di_mem_addr(st, toff));
2251 i_lnode->devt = ldi_usage->tgt_devt;
2252
2253 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2254 if (res == MH_ERR_NOTFOUND) {
2255 /*
2256 * new i_lnode
2257 * add it to the hash and increment the lnode count
2258 */
2259 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2260 ASSERT(res == 0);
2261 st->lnode_count++;
2262 tgt_lnode = i_lnode;
2263 } else {
2264 /* this i_lnode already exists in the lnode_hash */
2265 i_lnode_free(i_lnode);
2266 tgt_lnode = (i_lnode_t *)nodep;
2267 }
2268
2269 /*
2270 * allocate a i_link
2271 */
2272 i_link = i_link_alloc(ldi_usage->tgt_spec_type);
2273 i_link->src_lnode = src_lnode;
2274 i_link->tgt_lnode = tgt_lnode;
2275
2276 /*
2277 * add this link onto the src i_lnodes outbound i_link list
2278 */
2279 i_link_next = &(src_lnode->link_out);
2280 while (*i_link_next != NULL) {
2281 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) &&
2282 (i_link->spec_type == (*i_link_next)->spec_type)) {
2283 /* this link already exists */
2284 kmem_free(i_link, sizeof (i_link_t));
2285 return (LDI_USAGE_CONTINUE);
2286 }
2287 i_link_next = &((*i_link_next)->src_link_next);
2288 }
2289 *i_link_next = i_link;
2290
2291 /*
2292 * add this link onto the tgt i_lnodes inbound i_link list
2293 */
2294 i_link_next = &(tgt_lnode->link_in);
2295 while (*i_link_next != NULL) {
2296 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0);
2297 i_link_next = &((*i_link_next)->tgt_link_next);
2298 }
2299 *i_link_next = i_link;
2300
2301 /*
2302 * add this i_link to the link hash
2303 */
2304 res = mod_hash_insert(st->link_hash, i_link, i_link);
2305 ASSERT(res == 0);
2306 st->link_count++;
2307
2308 return (LDI_USAGE_CONTINUE);
2309 }
2310
2311 struct i_layer_data {
2312 struct di_state *st;
2313 int lnode_count;
2314 int link_count;
2315 di_off_t lnode_off;
2316 di_off_t link_off;
2317 };
2318
2319 /*ARGSUSED*/
2320 static uint_t
i_link_walker(mod_hash_key_t key,mod_hash_val_t * val,void * arg)2321 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2322 {
2323 i_link_t *i_link = (i_link_t *)key;
2324 struct i_layer_data *data = arg;
2325 struct di_link *me;
2326 struct di_lnode *melnode;
2327 struct di_node *medinode;
2328
2329 ASSERT(i_link->self == 0);
2330
2331 i_link->self = data->link_off +
2332 (data->link_count * sizeof (struct di_link));
2333 data->link_count++;
2334
2335 ASSERT(data->link_off > 0 && data->link_count > 0);
2336 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */
2337 ASSERT(data->link_count <= data->st->link_count);
2338
2339 /* fill in fields for the di_link snapshot */
2340 me = DI_LINK(di_mem_addr(data->st, i_link->self));
2341 me->self = i_link->self;
2342 me->spec_type = i_link->spec_type;
2343
2344 /*
2345 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t
2346 * are created during the LDI table walk. Since we are
2347 * walking the link hash, the lnode hash has already been
2348 * walked and the lnodes have been snapshotted. Save lnode
2349 * offsets.
2350 */
2351 me->src_lnode = i_link->src_lnode->self;
2352 me->tgt_lnode = i_link->tgt_lnode->self;
2353
2354 /*
2355 * Save this link's offset in the src_lnode snapshot's link_out
2356 * field
2357 */
2358 melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode));
2359 me->src_link_next = melnode->link_out;
2360 melnode->link_out = me->self;
2361
2362 /*
2363 * Put this link on the tgt_lnode's link_in field
2364 */
2365 melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode));
2366 me->tgt_link_next = melnode->link_in;
2367 melnode->link_in = me->self;
2368
2369 /*
2370 * An i_lnode_t is only created if the corresponding dip exists
2371 * in the snapshot. A pointer to the di_node is saved in the
2372 * i_lnode_t when it is allocated. For this link, get the di_node
2373 * for the source lnode. Then put the link on the di_node's list
2374 * of src links
2375 */
2376 medinode = i_link->src_lnode->di_node;
2377 me->src_node_next = medinode->src_links;
2378 medinode->src_links = me->self;
2379
2380 /*
2381 * Put this link on the tgt_links list of the target
2382 * dip.
2383 */
2384 medinode = i_link->tgt_lnode->di_node;
2385 me->tgt_node_next = medinode->tgt_links;
2386 medinode->tgt_links = me->self;
2387
2388 return (MH_WALK_CONTINUE);
2389 }
2390
2391 /*ARGSUSED*/
2392 static uint_t
i_lnode_walker(mod_hash_key_t key,mod_hash_val_t * val,void * arg)2393 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2394 {
2395 i_lnode_t *i_lnode = (i_lnode_t *)key;
2396 struct i_layer_data *data = arg;
2397 struct di_lnode *me;
2398 struct di_node *medinode;
2399
2400 ASSERT(i_lnode->self == 0);
2401
2402 i_lnode->self = data->lnode_off +
2403 (data->lnode_count * sizeof (struct di_lnode));
2404 data->lnode_count++;
2405
2406 ASSERT(data->lnode_off > 0 && data->lnode_count > 0);
2407 ASSERT(data->link_count == 0); /* links not done yet */
2408 ASSERT(data->lnode_count <= data->st->lnode_count);
2409
2410 /* fill in fields for the di_lnode snapshot */
2411 me = DI_LNODE(di_mem_addr(data->st, i_lnode->self));
2412 me->self = i_lnode->self;
2413
2414 if (i_lnode->devt == DDI_DEV_T_NONE) {
2415 me->dev_major = DDI_MAJOR_T_NONE;
2416 me->dev_minor = DDI_MAJOR_T_NONE;
2417 } else {
2418 me->dev_major = getmajor(i_lnode->devt);
2419 me->dev_minor = getminor(i_lnode->devt);
2420 }
2421
2422 /*
2423 * The dip corresponding to this lnode must exist in
2424 * the snapshot or we wouldn't have created the i_lnode_t
2425 * during LDI walk. Save the offset of the dip.
2426 */
2427 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0);
2428 me->node = i_lnode->di_node->self;
2429
2430 /*
2431 * There must be at least one link in or out of this lnode
2432 * or we wouldn't have created it. These fields will be set
2433 * during the link hash walk.
2434 */
2435 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL));
2436
2437 /*
2438 * set the offset of the devinfo node associated with this
2439 * lnode. Also update the node_next next pointer. this pointer
2440 * is set if there are multiple lnodes associated with the same
2441 * devinfo node. (could occure when multiple minor nodes
2442 * are open for one device, etc.)
2443 */
2444 medinode = i_lnode->di_node;
2445 me->node_next = medinode->lnodes;
2446 medinode->lnodes = me->self;
2447
2448 return (MH_WALK_CONTINUE);
2449 }
2450
2451 static di_off_t
di_getlink_data(di_off_t off,struct di_state * st)2452 di_getlink_data(di_off_t off, struct di_state *st)
2453 {
2454 struct i_layer_data data = {0};
2455 size_t size;
2456
2457 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off));
2458
2459 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32,
2460 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free,
2461 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP);
2462
2463 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32,
2464 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t));
2465
2466 /* get driver layering information */
2467 (void) ldi_usage_walker(st, di_ldi_callback);
2468
2469 /* check if there is any link data to include in the snapshot */
2470 if (st->lnode_count == 0) {
2471 ASSERT(st->link_count == 0);
2472 goto out;
2473 }
2474
2475 ASSERT(st->link_count != 0);
2476
2477 /* get a pointer to snapshot memory for all the di_lnodes */
2478 size = sizeof (struct di_lnode) * st->lnode_count;
2479 data.lnode_off = off = di_checkmem(st, off, size);
2480 off += size;
2481
2482 /* get a pointer to snapshot memory for all the di_links */
2483 size = sizeof (struct di_link) * st->link_count;
2484 data.link_off = off = di_checkmem(st, off, size);
2485 off += size;
2486
2487 data.lnode_count = data.link_count = 0;
2488 data.st = st;
2489
2490 /*
2491 * We have lnodes and links that will go into the
2492 * snapshot, so let's walk the respective hashes
2493 * and snapshot them. The various linkages are
2494 * also set up during the walk.
2495 */
2496 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data);
2497 ASSERT(data.lnode_count == st->lnode_count);
2498
2499 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data);
2500 ASSERT(data.link_count == st->link_count);
2501
2502 out:
2503 /* free up the i_lnodes and i_links used to create the snapshot */
2504 mod_hash_destroy_hash(st->lnode_hash);
2505 mod_hash_destroy_hash(st->link_hash);
2506 st->lnode_count = 0;
2507 st->link_count = 0;
2508
2509 return (off);
2510 }
2511
2512
2513 /*
2514 * Copy all minor data nodes attached to a devinfo node into the snapshot.
2515 * It is called from di_copynode with active ndi_devi_enter to protect
2516 * the list of minor nodes.
2517 */
2518 static di_off_t
di_getmdata(struct ddi_minor_data * mnode,di_off_t * off_p,di_off_t node,struct di_state * st)2519 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node,
2520 struct di_state *st)
2521 {
2522 di_off_t off;
2523 struct di_minor *me;
2524 size_t size;
2525
2526 dcmn_err2((CE_CONT, "di_getmdata:\n"));
2527
2528 /*
2529 * check memory first
2530 */
2531 off = di_checkmem(st, *off_p, sizeof (struct di_minor));
2532 *off_p = off;
2533
2534 do {
2535 me = DI_MINOR(di_mem_addr(st, off));
2536 me->self = off;
2537 me->type = mnode->type;
2538 me->node = node;
2539 me->user_private_data = NULL;
2540
2541 off += sizeof (struct di_minor);
2542
2543 /*
2544 * Split dev_t to major/minor, so it works for
2545 * both ILP32 and LP64 model
2546 */
2547 me->dev_major = getmajor(mnode->ddm_dev);
2548 me->dev_minor = getminor(mnode->ddm_dev);
2549 me->spec_type = mnode->ddm_spec_type;
2550
2551 if (mnode->ddm_name) {
2552 size = strlen(mnode->ddm_name) + 1;
2553 me->name = off = di_checkmem(st, off, size);
2554 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name);
2555 off += size;
2556 }
2557
2558 if (mnode->ddm_node_type) {
2559 size = strlen(mnode->ddm_node_type) + 1;
2560 me->node_type = off = di_checkmem(st, off, size);
2561 (void) strcpy(di_mem_addr(st, off),
2562 mnode->ddm_node_type);
2563 off += size;
2564 }
2565
2566 off = di_checkmem(st, off, sizeof (struct di_minor));
2567 me->next = off;
2568 mnode = mnode->next;
2569 } while (mnode);
2570
2571 me->next = 0;
2572
2573 return (off);
2574 }
2575
2576 /*
2577 * di_register_dip(), di_find_dip(): The dip must be protected
2578 * from deallocation when using these routines - this can either
2579 * be a reference count, a busy hold or a per-driver lock.
2580 */
2581
2582 static void
di_register_dip(struct di_state * st,dev_info_t * dip,di_off_t off)2583 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off)
2584 {
2585 struct dev_info *node = DEVI(dip);
2586 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2587 struct di_dkey *dk;
2588
2589 ASSERT(dip);
2590 ASSERT(off > 0);
2591
2592 key->k_type = DI_DKEY;
2593 dk = &(key->k_u.dkey);
2594
2595 dk->dk_dip = dip;
2596 dk->dk_major = node->devi_major;
2597 dk->dk_inst = node->devi_instance;
2598 dk->dk_nodeid = node->devi_nodeid;
2599
2600 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key,
2601 (mod_hash_val_t)(uintptr_t)off) != 0) {
2602 panic(
2603 "duplicate devinfo (%p) registered during device "
2604 "tree walk", (void *)dip);
2605 }
2606 }
2607
2608
2609 static int
di_dip_find(struct di_state * st,dev_info_t * dip,di_off_t * off_p)2610 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p)
2611 {
2612 /*
2613 * uintptr_t must be used because it matches the size of void *;
2614 * mod_hash expects clients to place results into pointer-size
2615 * containers; since di_off_t is always a 32-bit offset, alignment
2616 * would otherwise be broken on 64-bit kernels.
2617 */
2618 uintptr_t offset;
2619 struct di_key key = {0};
2620 struct di_dkey *dk;
2621
2622 ASSERT(st->reg_dip_hash);
2623 ASSERT(dip);
2624 ASSERT(off_p);
2625
2626
2627 key.k_type = DI_DKEY;
2628 dk = &(key.k_u.dkey);
2629
2630 dk->dk_dip = dip;
2631 dk->dk_major = DEVI(dip)->devi_major;
2632 dk->dk_inst = DEVI(dip)->devi_instance;
2633 dk->dk_nodeid = DEVI(dip)->devi_nodeid;
2634
2635 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key,
2636 (mod_hash_val_t *)&offset) == 0) {
2637 *off_p = (di_off_t)offset;
2638 return (0);
2639 } else {
2640 return (-1);
2641 }
2642 }
2643
2644 /*
2645 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation
2646 * when using these routines. The caller must do this by protecting the
2647 * client(or phci)<->pip linkage while traversing the list and then holding the
2648 * pip when it is found in the list.
2649 */
2650
2651 static void
di_register_pip(struct di_state * st,mdi_pathinfo_t * pip,di_off_t off)2652 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off)
2653 {
2654 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2655 char *path_addr;
2656 struct di_pkey *pk;
2657
2658 ASSERT(pip);
2659 ASSERT(off > 0);
2660
2661 key->k_type = DI_PKEY;
2662 pk = &(key->k_u.pkey);
2663
2664 pk->pk_pip = pip;
2665 path_addr = mdi_pi_get_addr(pip);
2666 if (path_addr)
2667 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP);
2668 pk->pk_client = mdi_pi_get_client(pip);
2669 pk->pk_phci = mdi_pi_get_phci(pip);
2670
2671 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key,
2672 (mod_hash_val_t)(uintptr_t)off) != 0) {
2673 panic(
2674 "duplicate pathinfo (%p) registered during device "
2675 "tree walk", (void *)pip);
2676 }
2677 }
2678
2679 /*
2680 * As with di_register_pip, the caller must hold or lock the pip
2681 */
2682 static int
di_pip_find(struct di_state * st,mdi_pathinfo_t * pip,di_off_t * off_p)2683 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p)
2684 {
2685 /*
2686 * uintptr_t must be used because it matches the size of void *;
2687 * mod_hash expects clients to place results into pointer-size
2688 * containers; since di_off_t is always a 32-bit offset, alignment
2689 * would otherwise be broken on 64-bit kernels.
2690 */
2691 uintptr_t offset;
2692 struct di_key key = {0};
2693 struct di_pkey *pk;
2694
2695 ASSERT(st->reg_pip_hash);
2696 ASSERT(off_p);
2697
2698 if (pip == NULL) {
2699 *off_p = 0;
2700 return (0);
2701 }
2702
2703 key.k_type = DI_PKEY;
2704 pk = &(key.k_u.pkey);
2705
2706 pk->pk_pip = pip;
2707 pk->pk_path_addr = mdi_pi_get_addr(pip);
2708 pk->pk_client = mdi_pi_get_client(pip);
2709 pk->pk_phci = mdi_pi_get_phci(pip);
2710
2711 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key,
2712 (mod_hash_val_t *)&offset) == 0) {
2713 *off_p = (di_off_t)offset;
2714 return (0);
2715 } else {
2716 return (-1);
2717 }
2718 }
2719
2720 static di_path_state_t
path_state_convert(mdi_pathinfo_state_t st)2721 path_state_convert(mdi_pathinfo_state_t st)
2722 {
2723 switch (st) {
2724 case MDI_PATHINFO_STATE_ONLINE:
2725 return (DI_PATH_STATE_ONLINE);
2726 case MDI_PATHINFO_STATE_STANDBY:
2727 return (DI_PATH_STATE_STANDBY);
2728 case MDI_PATHINFO_STATE_OFFLINE:
2729 return (DI_PATH_STATE_OFFLINE);
2730 case MDI_PATHINFO_STATE_FAULT:
2731 return (DI_PATH_STATE_FAULT);
2732 default:
2733 return (DI_PATH_STATE_UNKNOWN);
2734 }
2735 }
2736
2737 static uint_t
path_flags_convert(uint_t pi_path_flags)2738 path_flags_convert(uint_t pi_path_flags)
2739 {
2740 uint_t di_path_flags = 0;
2741
2742 /* MDI_PATHINFO_FLAGS_HIDDEN nodes not in snapshot */
2743
2744 if (pi_path_flags & MDI_PATHINFO_FLAGS_DEVICE_REMOVED)
2745 di_path_flags |= DI_PATH_FLAGS_DEVICE_REMOVED;
2746
2747 return (di_path_flags);
2748 }
2749
2750
2751 static di_off_t
di_path_getprop(mdi_pathinfo_t * pip,di_off_t * off_p,struct di_state * st)2752 di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p,
2753 struct di_state *st)
2754 {
2755 nvpair_t *prop = NULL;
2756 struct di_path_prop *me;
2757 int off;
2758 size_t size;
2759 char *str;
2760 uchar_t *buf;
2761 uint_t nelems;
2762
2763 off = *off_p;
2764 if (mdi_pi_get_next_prop(pip, NULL) == NULL) {
2765 *off_p = 0;
2766 return (off);
2767 }
2768
2769 off = di_checkmem(st, off, sizeof (struct di_path_prop));
2770 *off_p = off;
2771
2772 while (prop = mdi_pi_get_next_prop(pip, prop)) {
2773 me = DI_PATHPROP(di_mem_addr(st, off));
2774 me->self = off;
2775 off += sizeof (struct di_path_prop);
2776
2777 /*
2778 * property name
2779 */
2780 size = strlen(nvpair_name(prop)) + 1;
2781 me->prop_name = off = di_checkmem(st, off, size);
2782 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop));
2783 off += size;
2784
2785 switch (nvpair_type(prop)) {
2786 case DATA_TYPE_BYTE:
2787 case DATA_TYPE_INT16:
2788 case DATA_TYPE_UINT16:
2789 case DATA_TYPE_INT32:
2790 case DATA_TYPE_UINT32:
2791 me->prop_type = DDI_PROP_TYPE_INT;
2792 size = sizeof (int32_t);
2793 off = di_checkmem(st, off, size);
2794 (void) nvpair_value_int32(prop,
2795 (int32_t *)di_mem_addr(st, off));
2796 break;
2797
2798 case DATA_TYPE_INT64:
2799 case DATA_TYPE_UINT64:
2800 me->prop_type = DDI_PROP_TYPE_INT64;
2801 size = sizeof (int64_t);
2802 off = di_checkmem(st, off, size);
2803 (void) nvpair_value_int64(prop,
2804 (int64_t *)di_mem_addr(st, off));
2805 break;
2806
2807 case DATA_TYPE_STRING:
2808 me->prop_type = DDI_PROP_TYPE_STRING;
2809 (void) nvpair_value_string(prop, &str);
2810 size = strlen(str) + 1;
2811 off = di_checkmem(st, off, size);
2812 (void) strcpy(di_mem_addr(st, off), str);
2813 break;
2814
2815 case DATA_TYPE_BYTE_ARRAY:
2816 case DATA_TYPE_INT16_ARRAY:
2817 case DATA_TYPE_UINT16_ARRAY:
2818 case DATA_TYPE_INT32_ARRAY:
2819 case DATA_TYPE_UINT32_ARRAY:
2820 case DATA_TYPE_INT64_ARRAY:
2821 case DATA_TYPE_UINT64_ARRAY:
2822 me->prop_type = DDI_PROP_TYPE_BYTE;
2823 (void) nvpair_value_byte_array(prop, &buf, &nelems);
2824 size = nelems;
2825 if (nelems != 0) {
2826 off = di_checkmem(st, off, size);
2827 bcopy(buf, di_mem_addr(st, off), size);
2828 }
2829 break;
2830
2831 default: /* Unknown or unhandled type; skip it */
2832 size = 0;
2833 break;
2834 }
2835
2836 if (size > 0) {
2837 me->prop_data = off;
2838 }
2839
2840 me->prop_len = (int)size;
2841 off += size;
2842
2843 off = di_checkmem(st, off, sizeof (struct di_path_prop));
2844 me->prop_next = off;
2845 }
2846
2847 me->prop_next = 0;
2848 return (off);
2849 }
2850
2851
2852 static void
di_path_one_endpoint(struct di_path * me,di_off_t noff,di_off_t ** off_pp,int get_client)2853 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp,
2854 int get_client)
2855 {
2856 if (get_client) {
2857 ASSERT(me->path_client == 0);
2858 me->path_client = noff;
2859 ASSERT(me->path_c_link == 0);
2860 *off_pp = &me->path_c_link;
2861 me->path_snap_state &=
2862 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK);
2863 } else {
2864 ASSERT(me->path_phci == 0);
2865 me->path_phci = noff;
2866 ASSERT(me->path_p_link == 0);
2867 *off_pp = &me->path_p_link;
2868 me->path_snap_state &=
2869 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK);
2870 }
2871 }
2872
2873 /*
2874 * off_p: pointer to the linkage field. This links pips along the client|phci
2875 * linkage list.
2876 * noff : Offset for the endpoint dip snapshot.
2877 */
2878 static di_off_t
di_getpath_data(dev_info_t * dip,di_off_t * off_p,di_off_t noff,struct di_state * st,int get_client)2879 di_getpath_data(dev_info_t *dip, di_off_t *off_p, di_off_t noff,
2880 struct di_state *st, int get_client)
2881 {
2882 di_off_t off;
2883 mdi_pathinfo_t *pip;
2884 struct di_path *me;
2885 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *);
2886 size_t size;
2887
2888 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client));
2889
2890 /*
2891 * The naming of the following mdi_xyz() is unfortunately
2892 * non-intuitive. mdi_get_next_phci_path() follows the
2893 * client_link i.e. the list of pip's belonging to the
2894 * given client dip.
2895 */
2896 if (get_client)
2897 next_pip = &mdi_get_next_phci_path;
2898 else
2899 next_pip = &mdi_get_next_client_path;
2900
2901 off = *off_p;
2902
2903 pip = NULL;
2904 while (pip = (*next_pip)(dip, pip)) {
2905 di_off_t stored_offset;
2906
2907 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip));
2908
2909 mdi_pi_lock(pip);
2910
2911 /* We don't represent hidden paths in the snapshot */
2912 if (mdi_pi_ishidden(pip)) {
2913 dcmn_err((CE_WARN, "hidden, skip"));
2914 mdi_pi_unlock(pip);
2915 continue;
2916 }
2917
2918 if (di_pip_find(st, pip, &stored_offset) != -1) {
2919 /*
2920 * We've already seen this pathinfo node so we need to
2921 * take care not to snap it again; However, one endpoint
2922 * and linkage will be set here. The other endpoint
2923 * and linkage has already been set when the pip was
2924 * first snapshotted i.e. when the other endpoint dip
2925 * was snapshotted.
2926 */
2927 me = DI_PATH(di_mem_addr(st, stored_offset));
2928 *off_p = stored_offset;
2929
2930 di_path_one_endpoint(me, noff, &off_p, get_client);
2931
2932 /*
2933 * The other endpoint and linkage were set when this
2934 * pip was snapshotted. So we are done with both
2935 * endpoints and linkages.
2936 */
2937 ASSERT(!(me->path_snap_state &
2938 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI)));
2939 ASSERT(!(me->path_snap_state &
2940 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK)));
2941
2942 mdi_pi_unlock(pip);
2943 continue;
2944 }
2945
2946 /*
2947 * Now that we need to snapshot this pip, check memory
2948 */
2949 size = sizeof (struct di_path);
2950 *off_p = off = di_checkmem(st, off, size);
2951 me = DI_PATH(di_mem_addr(st, off));
2952 me->self = off;
2953 off += size;
2954
2955 me->path_snap_state =
2956 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK;
2957 me->path_snap_state |=
2958 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI;
2959
2960 /*
2961 * Zero out fields as di_checkmem() doesn't guarantee
2962 * zero-filled memory
2963 */
2964 me->path_client = me->path_phci = 0;
2965 me->path_c_link = me->path_p_link = 0;
2966
2967 di_path_one_endpoint(me, noff, &off_p, get_client);
2968
2969 /*
2970 * Note the existence of this pathinfo
2971 */
2972 di_register_pip(st, pip, me->self);
2973
2974 me->path_state = path_state_convert(mdi_pi_get_state(pip));
2975 me->path_flags = path_flags_convert(mdi_pi_get_flags(pip));
2976
2977 me->path_instance = mdi_pi_get_path_instance(pip);
2978
2979 /*
2980 * Get intermediate addressing info.
2981 */
2982 size = strlen(mdi_pi_get_addr(pip)) + 1;
2983 me->path_addr = off = di_checkmem(st, off, size);
2984 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip));
2985 off += size;
2986
2987 /*
2988 * Get path properties if props are to be included in the
2989 * snapshot
2990 */
2991 if (DINFOPROP & st->command) {
2992 me->path_prop = off;
2993 off = di_path_getprop(pip, &me->path_prop, st);
2994 } else {
2995 me->path_prop = 0;
2996 }
2997
2998 mdi_pi_unlock(pip);
2999 }
3000
3001 *off_p = 0;
3002 return (off);
3003 }
3004
3005 /*
3006 * Return driver prop_op entry point for the specified devinfo node.
3007 *
3008 * To return a non-NULL value:
3009 * - driver must be attached and held:
3010 * If driver is not attached we ignore the driver property list.
3011 * No one should rely on such properties.
3012 * - driver "cb_prop_op != ddi_prop_op":
3013 * If "cb_prop_op == ddi_prop_op", framework does not need to call driver.
3014 * XXX or parent's bus_prop_op != ddi_bus_prop_op
3015 */
3016 static int
di_getprop_prop_op(struct dev_info * dip)3017 (*di_getprop_prop_op(struct dev_info *dip))
3018 (dev_t, dev_info_t *, ddi_prop_op_t, int, char *, caddr_t, int *)
3019 {
3020 struct dev_ops *ops;
3021
3022 /* If driver is not attached we ignore the driver property list. */
3023 if ((dip == NULL) || !i_ddi_devi_attached((dev_info_t *)dip))
3024 return (NULL);
3025
3026 /*
3027 * Some nexus drivers incorrectly set cb_prop_op to nodev, nulldev,
3028 * or even NULL.
3029 */
3030 ops = dip->devi_ops;
3031 if (ops && ops->devo_cb_ops &&
3032 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) &&
3033 (ops->devo_cb_ops->cb_prop_op != nodev) &&
3034 (ops->devo_cb_ops->cb_prop_op != nulldev) &&
3035 (ops->devo_cb_ops->cb_prop_op != NULL))
3036 return (ops->devo_cb_ops->cb_prop_op);
3037 return (NULL);
3038 }
3039
3040 static di_off_t
di_getprop_add(int list,int dyn,struct di_state * st,struct dev_info * dip,int (* prop_op)(),char * name,dev_t devt,int aflags,int alen,caddr_t aval,di_off_t off,di_off_t ** off_pp)3041 di_getprop_add(int list, int dyn, struct di_state *st, struct dev_info *dip,
3042 int (*prop_op)(),
3043 char *name, dev_t devt, int aflags, int alen, caddr_t aval,
3044 di_off_t off, di_off_t **off_pp)
3045 {
3046 int need_free = 0;
3047 dev_t pdevt;
3048 int pflags;
3049 int rv;
3050 caddr_t val;
3051 int len;
3052 size_t size;
3053 struct di_prop *pp;
3054
3055 /* If we have prop_op function, ask driver for latest value */
3056 if (prop_op) {
3057 ASSERT(dip);
3058
3059 /* Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY */
3060 pdevt = (devt == DDI_DEV_T_NONE) ? DDI_DEV_T_ANY : devt;
3061
3062 /*
3063 * We have type information in flags, but are invoking an
3064 * old non-typed prop_op(9E) interface. Since not all types are
3065 * part of DDI_PROP_TYPE_ANY (example is DDI_PROP_TYPE_INT64),
3066 * we set DDI_PROP_CONSUMER_TYPED - causing the framework to
3067 * expand type bits beyond DDI_PROP_TYPE_ANY. This allows us
3068 * to use the legacy prop_op(9E) interface to obtain updates
3069 * non-DDI_PROP_TYPE_ANY dynamic properties.
3070 */
3071 pflags = aflags & ~DDI_PROP_TYPE_MASK;
3072 pflags |= DDI_PROP_DONTPASS | DDI_PROP_NOTPROM |
3073 DDI_PROP_CONSUMER_TYPED;
3074
3075 /*
3076 * Hold and exit across prop_op(9E) to avoid lock order
3077 * issues between
3078 * [ndi_devi_enter() ..prop_op(9E).. driver-lock]
3079 * .vs.
3080 * [..ioctl(9E).. driver-lock ..ddi_remove_minor_node(9F)..
3081 * ndi_devi_enter()]
3082 * ordering.
3083 */
3084 ndi_hold_devi((dev_info_t *)dip);
3085 ndi_devi_exit((dev_info_t *)dip, dip->devi_circular);
3086 rv = (*prop_op)(pdevt, (dev_info_t *)dip,
3087 PROP_LEN_AND_VAL_ALLOC, pflags, name, &val, &len);
3088 ndi_devi_enter((dev_info_t *)dip, &dip->devi_circular);
3089 ndi_rele_devi((dev_info_t *)dip);
3090
3091 if (rv == DDI_PROP_SUCCESS) {
3092 need_free = 1; /* dynamic prop obtained */
3093 } else if (dyn) {
3094 /*
3095 * A dynamic property must succeed prop_op(9E) to show
3096 * up in the snapshot - that is the only source of its
3097 * value.
3098 */
3099 return (off); /* dynamic prop not supported */
3100 } else {
3101 /*
3102 * In case calling the driver caused an update off
3103 * prop_op(9E) of a non-dynamic property (code leading
3104 * to ddi_prop_change), we defer picking up val and
3105 * len informatiojn until after prop_op(9E) to ensure
3106 * that we snapshot the latest value.
3107 */
3108 val = aval;
3109 len = alen;
3110
3111 }
3112 } else {
3113 val = aval;
3114 len = alen;
3115 }
3116
3117 dcmn_err((CE_CONT, "di_getprop_add: list %d %s len %d val %p\n",
3118 list, name ? name : "NULL", len, (void *)val));
3119
3120 size = sizeof (struct di_prop);
3121 **off_pp = off = di_checkmem(st, off, size);
3122 pp = DI_PROP(di_mem_addr(st, off));
3123 pp->self = off;
3124 off += size;
3125
3126 pp->dev_major = getmajor(devt);
3127 pp->dev_minor = getminor(devt);
3128 pp->prop_flags = aflags;
3129 pp->prop_list = list;
3130
3131 /* property name */
3132 if (name) {
3133 size = strlen(name) + 1;
3134 pp->prop_name = off = di_checkmem(st, off, size);
3135 (void) strcpy(di_mem_addr(st, off), name);
3136 off += size;
3137 } else {
3138 pp->prop_name = -1;
3139 }
3140
3141 pp->prop_len = len;
3142 if (val == NULL) {
3143 pp->prop_data = -1;
3144 } else if (len != 0) {
3145 size = len;
3146 pp->prop_data = off = di_checkmem(st, off, size);
3147 bcopy(val, di_mem_addr(st, off), size);
3148 off += size;
3149 }
3150
3151 pp->next = 0; /* assume tail for now */
3152 *off_pp = &pp->next; /* return pointer to our next */
3153
3154 if (need_free) /* free PROP_LEN_AND_VAL_ALLOC alloc */
3155 kmem_free(val, len);
3156 return (off);
3157 }
3158
3159
3160 /*
3161 * Copy a list of properties attached to a devinfo node. Called from
3162 * di_copynode with active ndi_devi_enter. The major number is passed in case
3163 * we need to call driver's prop_op entry. The value of list indicates
3164 * which list we are copying. Possible values are:
3165 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST
3166 */
3167 static di_off_t
di_getprop(int list,struct ddi_prop ** pprop,di_off_t * off_p,struct di_state * st,struct dev_info * dip)3168 di_getprop(int list, struct ddi_prop **pprop, di_off_t *off_p,
3169 struct di_state *st, struct dev_info *dip)
3170 {
3171 struct ddi_prop *prop;
3172 int (*prop_op)();
3173 int off;
3174 struct ddi_minor_data *mn;
3175 i_ddi_prop_dyn_t *dp;
3176 struct plist {
3177 struct plist *pl_next;
3178 char *pl_name;
3179 int pl_flags;
3180 dev_t pl_dev;
3181 int pl_len;
3182 caddr_t pl_val;
3183 } *pl, *pl0, **plp;
3184
3185 ASSERT(st != NULL);
3186
3187 off = *off_p;
3188 *off_p = 0;
3189 dcmn_err((CE_CONT, "di_getprop: copy property list %d at addr %p\n",
3190 list, (void *)*pprop));
3191
3192 /* get pointer to driver's prop_op(9E) implementation if DRV_LIST */
3193 prop_op = (list == DI_PROP_DRV_LIST) ? di_getprop_prop_op(dip) : NULL;
3194
3195 /*
3196 * Form private list of properties, holding devi_lock for properties
3197 * that hang off the dip.
3198 */
3199 if (dip)
3200 mutex_enter(&(dip->devi_lock));
3201 for (pl0 = NULL, plp = &pl0, prop = *pprop;
3202 prop; plp = &pl->pl_next, prop = prop->prop_next) {
3203 pl = kmem_alloc(sizeof (*pl), KM_SLEEP);
3204 *plp = pl;
3205 pl->pl_next = NULL;
3206 if (prop->prop_name)
3207 pl->pl_name = i_ddi_strdup(prop->prop_name, KM_SLEEP);
3208 else
3209 pl->pl_name = NULL;
3210 pl->pl_flags = prop->prop_flags;
3211 pl->pl_dev = prop->prop_dev;
3212 if (prop->prop_len) {
3213 pl->pl_len = prop->prop_len;
3214 pl->pl_val = kmem_alloc(pl->pl_len, KM_SLEEP);
3215 bcopy(prop->prop_val, pl->pl_val, pl->pl_len);
3216 } else {
3217 pl->pl_len = 0;
3218 pl->pl_val = NULL;
3219 }
3220 }
3221 if (dip)
3222 mutex_exit(&(dip->devi_lock));
3223
3224 /*
3225 * Now that we have dropped devi_lock, perform a second-pass to
3226 * add properties to the snapshot. We do this as a second pass
3227 * because we may need to call prop_op(9E) and we can't hold
3228 * devi_lock across that call.
3229 */
3230 for (pl = pl0; pl; pl = pl0) {
3231 pl0 = pl->pl_next;
3232 off = di_getprop_add(list, 0, st, dip, prop_op, pl->pl_name,
3233 pl->pl_dev, pl->pl_flags, pl->pl_len, pl->pl_val,
3234 off, &off_p);
3235 if (pl->pl_val)
3236 kmem_free(pl->pl_val, pl->pl_len);
3237 if (pl->pl_name)
3238 kmem_free(pl->pl_name, strlen(pl->pl_name) + 1);
3239 kmem_free(pl, sizeof (*pl));
3240 }
3241
3242 /*
3243 * If there is no prop_op or dynamic property support has been
3244 * disabled, we are done.
3245 */
3246 if ((prop_op == NULL) || (di_prop_dyn == 0)) {
3247 *off_p = 0;
3248 return (off);
3249 }
3250
3251 /* Add dynamic driver properties to snapshot */
3252 for (dp = i_ddi_prop_dyn_driver_get((dev_info_t *)dip);
3253 dp && dp->dp_name; dp++) {
3254 if (dp->dp_spec_type) {
3255 /* if spec_type, property of matching minor */
3256 ASSERT(DEVI_BUSY_OWNED(dip));
3257 for (mn = dip->devi_minor; mn; mn = mn->next) {
3258 if (mn->ddm_spec_type != dp->dp_spec_type)
3259 continue;
3260 off = di_getprop_add(list, 1, st, dip, prop_op,
3261 dp->dp_name, mn->ddm_dev, dp->dp_type,
3262 0, NULL, off, &off_p);
3263 }
3264 } else {
3265 /* property of devinfo node */
3266 off = di_getprop_add(list, 1, st, dip, prop_op,
3267 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3268 0, NULL, off, &off_p);
3269 }
3270 }
3271
3272 /* Add dynamic parent properties to snapshot */
3273 for (dp = i_ddi_prop_dyn_parent_get((dev_info_t *)dip);
3274 dp && dp->dp_name; dp++) {
3275 if (dp->dp_spec_type) {
3276 /* if spec_type, property of matching minor */
3277 ASSERT(DEVI_BUSY_OWNED(dip));
3278 for (mn = dip->devi_minor; mn; mn = mn->next) {
3279 if (mn->ddm_spec_type != dp->dp_spec_type)
3280 continue;
3281 off = di_getprop_add(list, 1, st, dip, prop_op,
3282 dp->dp_name, mn->ddm_dev, dp->dp_type,
3283 0, NULL, off, &off_p);
3284 }
3285 } else {
3286 /* property of devinfo node */
3287 off = di_getprop_add(list, 1, st, dip, prop_op,
3288 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3289 0, NULL, off, &off_p);
3290 }
3291 }
3292
3293 *off_p = 0;
3294 return (off);
3295 }
3296
3297 /*
3298 * find private data format attached to a dip
3299 * parent = 1 to match driver name of parent dip (for parent private data)
3300 * 0 to match driver name of current dip (for driver private data)
3301 */
3302 #define DI_MATCH_DRIVER 0
3303 #define DI_MATCH_PARENT 1
3304
3305 struct di_priv_format *
di_match_drv_name(struct dev_info * node,struct di_state * st,int match)3306 di_match_drv_name(struct dev_info *node, struct di_state *st, int match)
3307 {
3308 int i, count, len;
3309 char *drv_name;
3310 major_t major;
3311 struct di_all *all;
3312 struct di_priv_format *form;
3313
3314 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n",
3315 node->devi_node_name, match));
3316
3317 if (match == DI_MATCH_PARENT) {
3318 node = DEVI(node->devi_parent);
3319 }
3320
3321 if (node == NULL) {
3322 return (NULL);
3323 }
3324
3325 major = node->devi_major;
3326 if (major == (major_t)(-1)) {
3327 return (NULL);
3328 }
3329
3330 /*
3331 * Match the driver name.
3332 */
3333 drv_name = ddi_major_to_name(major);
3334 if ((drv_name == NULL) || *drv_name == '\0') {
3335 return (NULL);
3336 }
3337
3338 /* Now get the di_priv_format array */
3339 all = DI_ALL_PTR(st);
3340 if (match == DI_MATCH_PARENT) {
3341 count = all->n_ppdata;
3342 form = DI_PRIV_FORMAT(di_mem_addr(st, all->ppdata_format));
3343 } else {
3344 count = all->n_dpdata;
3345 form = DI_PRIV_FORMAT(di_mem_addr(st, all->dpdata_format));
3346 }
3347
3348 len = strlen(drv_name);
3349 for (i = 0; i < count; i++) {
3350 char *tmp;
3351
3352 tmp = form[i].drv_name;
3353 while (tmp && (*tmp != '\0')) {
3354 if (strncmp(drv_name, tmp, len) == 0) {
3355 return (&form[i]);
3356 }
3357 /*
3358 * Move to next driver name, skipping a white space
3359 */
3360 if (tmp = strchr(tmp, ' ')) {
3361 tmp++;
3362 }
3363 }
3364 }
3365
3366 return (NULL);
3367 }
3368
3369 /*
3370 * The following functions copy data as specified by the format passed in.
3371 * To prevent invalid format from panicing the system, we call on_fault().
3372 * A return value of 0 indicates an error. Otherwise, the total offset
3373 * is returned.
3374 */
3375 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */
3376
3377 static di_off_t
di_getprvdata(struct di_priv_format * pdp,struct dev_info * node,void * data,di_off_t * off_p,struct di_state * st)3378 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node,
3379 void *data, di_off_t *off_p, struct di_state *st)
3380 {
3381 caddr_t pa;
3382 void *ptr;
3383 int i, size, repeat;
3384 di_off_t off, off0, *tmp;
3385 char *path;
3386 label_t ljb;
3387
3388 dcmn_err2((CE_CONT, "di_getprvdata:\n"));
3389
3390 /*
3391 * check memory availability. Private data size is
3392 * limited to DI_MAX_PRIVDATA.
3393 */
3394 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA);
3395 *off_p = off;
3396
3397 if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) {
3398 goto failure;
3399 }
3400
3401 if (!on_fault(&ljb)) {
3402 /* copy the struct */
3403 bcopy(data, di_mem_addr(st, off), pdp->bytes);
3404 off0 = DI_ALIGN(pdp->bytes); /* XXX remove DI_ALIGN */
3405
3406 /* dereferencing pointers */
3407 for (i = 0; i < MAX_PTR_IN_PRV; i++) {
3408
3409 if (pdp->ptr[i].size == 0) {
3410 goto success; /* no more ptrs */
3411 }
3412
3413 /*
3414 * first, get the pointer content
3415 */
3416 if ((pdp->ptr[i].offset < 0) ||
3417 (pdp->ptr[i].offset > pdp->bytes - sizeof (char *)))
3418 goto failure; /* wrong offset */
3419
3420 pa = di_mem_addr(st, off + pdp->ptr[i].offset);
3421
3422 /* save a tmp ptr to store off_t later */
3423 tmp = (di_off_t *)(intptr_t)pa;
3424
3425 /* get pointer value, if NULL continue */
3426 ptr = *((void **) (intptr_t)pa);
3427 if (ptr == NULL) {
3428 continue;
3429 }
3430
3431 /*
3432 * next, find the repeat count (array dimension)
3433 */
3434 repeat = pdp->ptr[i].len_offset;
3435
3436 /*
3437 * Positive value indicates a fixed sized array.
3438 * 0 or negative value indicates variable sized array.
3439 *
3440 * For variable sized array, the variable must be
3441 * an int member of the structure, with an offset
3442 * equal to the absolution value of struct member.
3443 */
3444 if (repeat > pdp->bytes - sizeof (int)) {
3445 goto failure; /* wrong offset */
3446 }
3447
3448 if (repeat >= 0) {
3449 repeat = *((int *)
3450 (intptr_t)((caddr_t)data + repeat));
3451 } else {
3452 repeat = -repeat;
3453 }
3454
3455 /*
3456 * next, get the size of the object to be copied
3457 */
3458 size = pdp->ptr[i].size * repeat;
3459
3460 /*
3461 * Arbitrarily limit the total size of object to be
3462 * copied (1 byte to 1/4 page).
3463 */
3464 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) {
3465 goto failure; /* wrong size or too big */
3466 }
3467
3468 /*
3469 * Now copy the data
3470 */
3471 *tmp = off0;
3472 bcopy(ptr, di_mem_addr(st, off + off0), size);
3473 off0 += DI_ALIGN(size); /* XXX remove DI_ALIGN */
3474 }
3475 } else {
3476 goto failure;
3477 }
3478
3479 success:
3480 /*
3481 * success if reached here
3482 */
3483 no_fault();
3484 return (off + off0);
3485 /*NOTREACHED*/
3486
3487 failure:
3488 /*
3489 * fault occurred
3490 */
3491 no_fault();
3492 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3493 cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p",
3494 ddi_pathname((dev_info_t *)node, path), data);
3495 kmem_free(path, MAXPATHLEN);
3496 *off_p = -1; /* set private data to indicate error */
3497
3498 return (off);
3499 }
3500
3501 /*
3502 * get parent private data; on error, returns original offset
3503 */
3504 static di_off_t
di_getppdata(struct dev_info * node,di_off_t * off_p,struct di_state * st)3505 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3506 {
3507 int off;
3508 struct di_priv_format *ppdp;
3509
3510 dcmn_err2((CE_CONT, "di_getppdata:\n"));
3511
3512 /* find the parent data format */
3513 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) {
3514 off = *off_p;
3515 *off_p = 0; /* set parent data to none */
3516 return (off);
3517 }
3518
3519 return (di_getprvdata(ppdp, node,
3520 ddi_get_parent_data((dev_info_t *)node), off_p, st));
3521 }
3522
3523 /*
3524 * get parent private data; returns original offset
3525 */
3526 static di_off_t
di_getdpdata(struct dev_info * node,di_off_t * off_p,struct di_state * st)3527 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3528 {
3529 int off;
3530 struct di_priv_format *dpdp;
3531
3532 dcmn_err2((CE_CONT, "di_getdpdata:"));
3533
3534 /* find the parent data format */
3535 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) {
3536 off = *off_p;
3537 *off_p = 0; /* set driver data to none */
3538 return (off);
3539 }
3540
3541 return (di_getprvdata(dpdp, node,
3542 ddi_get_driver_private((dev_info_t *)node), off_p, st));
3543 }
3544
3545 /*
3546 * Copy hotplug data associated with a devinfo node into the snapshot.
3547 */
3548 static di_off_t
di_gethpdata(ddi_hp_cn_handle_t * hp_hdl,di_off_t * off_p,struct di_state * st)3549 di_gethpdata(ddi_hp_cn_handle_t *hp_hdl, di_off_t *off_p,
3550 struct di_state *st)
3551 {
3552 struct i_hp *hp;
3553 struct di_hp *me;
3554 size_t size;
3555 di_off_t off;
3556
3557 dcmn_err2((CE_CONT, "di_gethpdata:\n"));
3558
3559 /*
3560 * check memory first
3561 */
3562 off = di_checkmem(st, *off_p, sizeof (struct di_hp));
3563 *off_p = off;
3564
3565 do {
3566 me = DI_HP(di_mem_addr(st, off));
3567 me->self = off;
3568 me->hp_name = 0;
3569 me->hp_connection = (int)hp_hdl->cn_info.cn_num;
3570 me->hp_depends_on = (int)hp_hdl->cn_info.cn_num_dpd_on;
3571 (void) ddihp_cn_getstate(hp_hdl);
3572 me->hp_state = (int)hp_hdl->cn_info.cn_state;
3573 me->hp_type = (int)hp_hdl->cn_info.cn_type;
3574 me->hp_type_str = 0;
3575 me->hp_last_change = (uint32_t)hp_hdl->cn_info.cn_last_change;
3576 me->hp_child = 0;
3577
3578 /*
3579 * Child links are resolved later by di_hotplug_children().
3580 * Store a reference to this di_hp_t in the list used later
3581 * by di_hotplug_children().
3582 */
3583 hp = kmem_zalloc(sizeof (i_hp_t), KM_SLEEP);
3584 hp->hp_off = off;
3585 hp->hp_child = hp_hdl->cn_info.cn_child;
3586 list_insert_tail(&st->hp_list, hp);
3587
3588 off += sizeof (struct di_hp);
3589
3590 /* Add name of this di_hp_t to the snapshot */
3591 if (hp_hdl->cn_info.cn_name) {
3592 size = strlen(hp_hdl->cn_info.cn_name) + 1;
3593 me->hp_name = off = di_checkmem(st, off, size);
3594 (void) strcpy(di_mem_addr(st, off),
3595 hp_hdl->cn_info.cn_name);
3596 off += size;
3597 }
3598
3599 /* Add type description of this di_hp_t to the snapshot */
3600 if (hp_hdl->cn_info.cn_type_str) {
3601 size = strlen(hp_hdl->cn_info.cn_type_str) + 1;
3602 me->hp_type_str = off = di_checkmem(st, off, size);
3603 (void) strcpy(di_mem_addr(st, off),
3604 hp_hdl->cn_info.cn_type_str);
3605 off += size;
3606 }
3607
3608 /*
3609 * Set link to next in the chain of di_hp_t nodes,
3610 * or terminate the chain when processing the last node.
3611 */
3612 if (hp_hdl->next != NULL) {
3613 off = di_checkmem(st, off, sizeof (struct di_hp));
3614 me->next = off;
3615 } else {
3616 me->next = 0;
3617 }
3618
3619 /* Update pointer to next in the chain */
3620 hp_hdl = hp_hdl->next;
3621
3622 } while (hp_hdl);
3623
3624 return (off);
3625 }
3626
3627 /*
3628 * The driver is stateful across DINFOCPYALL and DINFOUSRLD.
3629 * This function encapsulates the state machine:
3630 *
3631 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY ->
3632 * | SNAPSHOT USRLD |
3633 * --------------------------------------------------
3634 *
3635 * Returns 0 on success and -1 on failure
3636 */
3637 static int
di_setstate(struct di_state * st,int new_state)3638 di_setstate(struct di_state *st, int new_state)
3639 {
3640 int ret = 0;
3641
3642 mutex_enter(&di_lock);
3643 switch (new_state) {
3644 case IOC_IDLE:
3645 case IOC_DONE:
3646 break;
3647 case IOC_SNAP:
3648 if (st->di_iocstate != IOC_IDLE)
3649 ret = -1;
3650 break;
3651 case IOC_COPY:
3652 if (st->di_iocstate != IOC_DONE)
3653 ret = -1;
3654 break;
3655 default:
3656 ret = -1;
3657 }
3658
3659 if (ret == 0)
3660 st->di_iocstate = new_state;
3661 else
3662 cmn_err(CE_NOTE, "incorrect state transition from %d to %d",
3663 st->di_iocstate, new_state);
3664 mutex_exit(&di_lock);
3665 return (ret);
3666 }
3667
3668 /*
3669 * We cannot assume the presence of the entire
3670 * snapshot in this routine. All we are guaranteed
3671 * is the di_all struct + 1 byte (for root_path)
3672 */
3673 static int
header_plus_one_ok(struct di_all * all)3674 header_plus_one_ok(struct di_all *all)
3675 {
3676 /*
3677 * Refuse to read old versions
3678 */
3679 if (all->version != DI_SNAPSHOT_VERSION) {
3680 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version));
3681 return (0);
3682 }
3683
3684 if (all->cache_magic != DI_CACHE_MAGIC) {
3685 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic));
3686 return (0);
3687 }
3688
3689 if (all->snapshot_time == 0) {
3690 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time));
3691 return (0);
3692 }
3693
3694 if (all->top_devinfo == 0) {
3695 CACHE_DEBUG((DI_ERR, "NULL top devinfo"));
3696 return (0);
3697 }
3698
3699 if (all->map_size < sizeof (*all) + 1) {
3700 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size));
3701 return (0);
3702 }
3703
3704 if (all->root_path[0] != '/' || all->root_path[1] != '\0') {
3705 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c",
3706 all->root_path[0], all->root_path[1]));
3707 return (0);
3708 }
3709
3710 /*
3711 * We can't check checksum here as we just have the header
3712 */
3713
3714 return (1);
3715 }
3716
3717 static int
chunk_write(struct vnode * vp,offset_t off,caddr_t buf,size_t len)3718 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len)
3719 {
3720 rlim64_t rlimit;
3721 ssize_t resid;
3722 int error = 0;
3723
3724
3725 rlimit = RLIM64_INFINITY;
3726
3727 while (len) {
3728 resid = 0;
3729 error = vn_rdwr(UIO_WRITE, vp, buf, len, off,
3730 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
3731
3732 if (error || resid < 0) {
3733 error = error ? error : EIO;
3734 CACHE_DEBUG((DI_ERR, "write error: %d", error));
3735 break;
3736 }
3737
3738 /*
3739 * Check if we are making progress
3740 */
3741 if (resid >= len) {
3742 error = ENOSPC;
3743 break;
3744 }
3745 buf += len - resid;
3746 off += len - resid;
3747 len = resid;
3748 }
3749
3750 return (error);
3751 }
3752
3753 static void
di_cache_write(struct di_cache * cache)3754 di_cache_write(struct di_cache *cache)
3755 {
3756 struct di_all *all;
3757 struct vnode *vp;
3758 int oflags;
3759 size_t map_size;
3760 size_t chunk;
3761 offset_t off;
3762 int error;
3763 char *buf;
3764
3765 ASSERT(DI_CACHE_LOCKED(*cache));
3766 ASSERT(!servicing_interrupt());
3767
3768 if (cache->cache_size == 0) {
3769 ASSERT(cache->cache_data == NULL);
3770 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write"));
3771 return;
3772 }
3773
3774 ASSERT(cache->cache_size > 0);
3775 ASSERT(cache->cache_data);
3776
3777 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) {
3778 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write"));
3779 return;
3780 }
3781
3782 all = (struct di_all *)cache->cache_data;
3783
3784 if (!header_plus_one_ok(all)) {
3785 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write"));
3786 return;
3787 }
3788
3789 ASSERT(strcmp(all->root_path, "/") == 0);
3790
3791 /*
3792 * The cache_size is the total allocated memory for the cache.
3793 * The map_size is the actual size of valid data in the cache.
3794 * map_size may be smaller than cache_size but cannot exceed
3795 * cache_size.
3796 */
3797 if (all->map_size > cache->cache_size) {
3798 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)."
3799 " Skipping write", all->map_size, cache->cache_size));
3800 return;
3801 }
3802
3803 /*
3804 * First unlink the temp file
3805 */
3806 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE);
3807 if (error && error != ENOENT) {
3808 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d",
3809 DI_CACHE_TEMP, error));
3810 }
3811
3812 if (error == EROFS) {
3813 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write"));
3814 return;
3815 }
3816
3817 vp = NULL;
3818 oflags = (FCREAT|FWRITE);
3819 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags,
3820 DI_CACHE_PERMS, &vp, CRCREAT, 0)) {
3821 CACHE_DEBUG((DI_ERR, "%s: create failed: %d",
3822 DI_CACHE_TEMP, error));
3823 return;
3824 }
3825
3826 ASSERT(vp);
3827
3828 /*
3829 * Paranoid: Check if the file is on a read-only FS
3830 */
3831 if (vn_is_readonly(vp)) {
3832 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS"));
3833 goto fail;
3834 }
3835
3836 /*
3837 * Note that we only write map_size bytes to disk - this saves
3838 * space as the actual cache size may be larger than size of
3839 * valid data in the cache.
3840 * Another advantage is that it makes verification of size
3841 * easier when the file is read later.
3842 */
3843 map_size = all->map_size;
3844 off = 0;
3845 buf = cache->cache_data;
3846
3847 while (map_size) {
3848 ASSERT(map_size > 0);
3849 /*
3850 * Write in chunks so that VM system
3851 * is not overwhelmed
3852 */
3853 if (map_size > di_chunk * PAGESIZE)
3854 chunk = di_chunk * PAGESIZE;
3855 else
3856 chunk = map_size;
3857
3858 error = chunk_write(vp, off, buf, chunk);
3859 if (error) {
3860 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d",
3861 off, error));
3862 goto fail;
3863 }
3864
3865 off += chunk;
3866 buf += chunk;
3867 map_size -= chunk;
3868
3869 /* If low on memory, give pageout a chance to run */
3870 if (freemem < desfree)
3871 delay(1);
3872 }
3873
3874 /*
3875 * Now sync the file and close it
3876 */
3877 if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) {
3878 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error));
3879 }
3880
3881 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) {
3882 CACHE_DEBUG((DI_ERR, "close() failed: %d", error));
3883 VN_RELE(vp);
3884 return;
3885 }
3886
3887 VN_RELE(vp);
3888
3889 /*
3890 * Now do the rename
3891 */
3892 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) {
3893 CACHE_DEBUG((DI_ERR, "rename failed: %d", error));
3894 return;
3895 }
3896
3897 CACHE_DEBUG((DI_INFO, "Cache write successful."));
3898
3899 return;
3900
3901 fail:
3902 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL);
3903 VN_RELE(vp);
3904 }
3905
3906
3907 /*
3908 * Since we could be called early in boot,
3909 * use kobj_read_file()
3910 */
3911 static void
di_cache_read(struct di_cache * cache)3912 di_cache_read(struct di_cache *cache)
3913 {
3914 struct _buf *file;
3915 struct di_all *all;
3916 int n;
3917 size_t map_size, sz, chunk;
3918 offset_t off;
3919 caddr_t buf;
3920 uint32_t saved_crc, crc;
3921
3922 ASSERT(modrootloaded);
3923 ASSERT(DI_CACHE_LOCKED(*cache));
3924 ASSERT(cache->cache_data == NULL);
3925 ASSERT(cache->cache_size == 0);
3926 ASSERT(!servicing_interrupt());
3927
3928 file = kobj_open_file(DI_CACHE_FILE);
3929 if (file == (struct _buf *)-1) {
3930 CACHE_DEBUG((DI_ERR, "%s: open failed: %d",
3931 DI_CACHE_FILE, ENOENT));
3932 return;
3933 }
3934
3935 /*
3936 * Read in the header+root_path first. The root_path must be "/"
3937 */
3938 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP);
3939 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0);
3940
3941 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) {
3942 kmem_free(all, sizeof (*all) + 1);
3943 kobj_close_file(file);
3944 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid"));
3945 return;
3946 }
3947
3948 map_size = all->map_size;
3949
3950 kmem_free(all, sizeof (*all) + 1);
3951
3952 ASSERT(map_size >= sizeof (*all) + 1);
3953
3954 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP);
3955 sz = map_size;
3956 off = 0;
3957 while (sz) {
3958 /* Don't overload VM with large reads */
3959 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz;
3960 n = kobj_read_file(file, buf, chunk, off);
3961 if (n != chunk) {
3962 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld",
3963 DI_CACHE_FILE, off));
3964 goto fail;
3965 }
3966 off += chunk;
3967 buf += chunk;
3968 sz -= chunk;
3969 }
3970
3971 ASSERT(off == map_size);
3972
3973 /*
3974 * Read past expected EOF to verify size.
3975 */
3976 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) {
3977 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE));
3978 goto fail;
3979 }
3980
3981 all = (struct di_all *)di_cache.cache_data;
3982 if (!header_plus_one_ok(all)) {
3983 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE));
3984 goto fail;
3985 }
3986
3987 /*
3988 * Compute CRC with checksum field in the cache data set to 0
3989 */
3990 saved_crc = all->cache_checksum;
3991 all->cache_checksum = 0;
3992 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table);
3993 all->cache_checksum = saved_crc;
3994
3995 if (crc != all->cache_checksum) {
3996 CACHE_DEBUG((DI_ERR,
3997 "%s: checksum error: expected=0x%x actual=0x%x",
3998 DI_CACHE_FILE, all->cache_checksum, crc));
3999 goto fail;
4000 }
4001
4002 if (all->map_size != map_size) {
4003 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE));
4004 goto fail;
4005 }
4006
4007 kobj_close_file(file);
4008
4009 di_cache.cache_size = map_size;
4010
4011 return;
4012
4013 fail:
4014 kmem_free(di_cache.cache_data, map_size);
4015 kobj_close_file(file);
4016 di_cache.cache_data = NULL;
4017 di_cache.cache_size = 0;
4018 }
4019
4020
4021 /*
4022 * Checks if arguments are valid for using the cache.
4023 */
4024 static int
cache_args_valid(struct di_state * st,int * error)4025 cache_args_valid(struct di_state *st, int *error)
4026 {
4027 ASSERT(error);
4028 ASSERT(st->mem_size > 0);
4029 ASSERT(st->memlist != NULL);
4030
4031 if (!modrootloaded || !i_ddi_io_initialized()) {
4032 CACHE_DEBUG((DI_ERR,
4033 "cache lookup failure: I/O subsystem not inited"));
4034 *error = ENOTACTIVE;
4035 return (0);
4036 }
4037
4038 /*
4039 * No other flags allowed with DINFOCACHE
4040 */
4041 if (st->command != (DINFOCACHE & DIIOC_MASK)) {
4042 CACHE_DEBUG((DI_ERR,
4043 "cache lookup failure: bad flags: 0x%x",
4044 st->command));
4045 *error = EINVAL;
4046 return (0);
4047 }
4048
4049 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4050 CACHE_DEBUG((DI_ERR,
4051 "cache lookup failure: bad root: %s",
4052 DI_ALL_PTR(st)->root_path));
4053 *error = EINVAL;
4054 return (0);
4055 }
4056
4057 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command));
4058
4059 *error = 0;
4060
4061 return (1);
4062 }
4063
4064 static int
snapshot_is_cacheable(struct di_state * st)4065 snapshot_is_cacheable(struct di_state *st)
4066 {
4067 ASSERT(st->mem_size > 0);
4068 ASSERT(st->memlist != NULL);
4069
4070 if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) !=
4071 (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) {
4072 CACHE_DEBUG((DI_INFO,
4073 "not cacheable: incompatible flags: 0x%x",
4074 st->command));
4075 return (0);
4076 }
4077
4078 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4079 CACHE_DEBUG((DI_INFO,
4080 "not cacheable: incompatible root path: %s",
4081 DI_ALL_PTR(st)->root_path));
4082 return (0);
4083 }
4084
4085 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command));
4086
4087 return (1);
4088 }
4089
4090 static int
di_cache_lookup(struct di_state * st)4091 di_cache_lookup(struct di_state *st)
4092 {
4093 size_t rval;
4094 int cache_valid;
4095
4096 ASSERT(cache_args_valid(st, &cache_valid));
4097 ASSERT(modrootloaded);
4098
4099 DI_CACHE_LOCK(di_cache);
4100
4101 /*
4102 * The following assignment determines the validity
4103 * of the cache as far as this snapshot is concerned.
4104 */
4105 cache_valid = di_cache.cache_valid;
4106
4107 if (cache_valid && di_cache.cache_data == NULL) {
4108 di_cache_read(&di_cache);
4109 /* check for read or file error */
4110 if (di_cache.cache_data == NULL)
4111 cache_valid = 0;
4112 }
4113
4114 if (cache_valid) {
4115 /*
4116 * Ok, the cache was valid as of this particular
4117 * snapshot. Copy the cached snapshot. This is safe
4118 * to do as the cache cannot be freed (we hold the
4119 * cache lock). Free the memory allocated in di_state
4120 * up until this point - we will simply copy everything
4121 * in the cache.
4122 */
4123
4124 ASSERT(di_cache.cache_data != NULL);
4125 ASSERT(di_cache.cache_size > 0);
4126
4127 di_freemem(st);
4128
4129 rval = 0;
4130 if (di_cache2mem(&di_cache, st) > 0) {
4131 /*
4132 * map_size is size of valid data in the
4133 * cached snapshot and may be less than
4134 * size of the cache.
4135 */
4136 ASSERT(DI_ALL_PTR(st));
4137 rval = DI_ALL_PTR(st)->map_size;
4138
4139 ASSERT(rval >= sizeof (struct di_all));
4140 ASSERT(rval <= di_cache.cache_size);
4141 }
4142 } else {
4143 /*
4144 * The cache isn't valid, we need to take a snapshot.
4145 * Set the command flags appropriately
4146 */
4147 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK));
4148 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK);
4149 rval = di_cache_update(st);
4150 st->command = (DINFOCACHE & DIIOC_MASK);
4151 }
4152
4153 DI_CACHE_UNLOCK(di_cache);
4154
4155 /*
4156 * For cached snapshots, the devinfo driver always returns
4157 * a snapshot rooted at "/".
4158 */
4159 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0);
4160
4161 return ((int)rval);
4162 }
4163
4164 /*
4165 * This is a forced update of the cache - the previous state of the cache
4166 * may be:
4167 * - unpopulated
4168 * - populated and invalid
4169 * - populated and valid
4170 */
4171 static int
di_cache_update(struct di_state * st)4172 di_cache_update(struct di_state *st)
4173 {
4174 int rval;
4175 uint32_t crc;
4176 struct di_all *all;
4177
4178 ASSERT(DI_CACHE_LOCKED(di_cache));
4179 ASSERT(snapshot_is_cacheable(st));
4180
4181 /*
4182 * Free the in-core cache and the on-disk file (if they exist)
4183 */
4184 i_ddi_di_cache_free(&di_cache);
4185
4186 /*
4187 * Set valid flag before taking the snapshot,
4188 * so that any invalidations that arrive
4189 * during or after the snapshot are not
4190 * removed by us.
4191 */
4192 atomic_or_32(&di_cache.cache_valid, 1);
4193
4194 rval = di_snapshot_and_clean(st);
4195
4196 if (rval == 0) {
4197 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot"));
4198 return (0);
4199 }
4200
4201 DI_ALL_PTR(st)->map_size = rval;
4202 if (di_mem2cache(st, &di_cache) == 0) {
4203 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed"));
4204 return (0);
4205 }
4206
4207 ASSERT(di_cache.cache_data);
4208 ASSERT(di_cache.cache_size > 0);
4209
4210 /*
4211 * Now that we have cached the snapshot, compute its checksum.
4212 * The checksum is only computed over the valid data in the
4213 * cache, not the entire cache.
4214 * Also, set all the fields (except checksum) before computing
4215 * checksum.
4216 */
4217 all = (struct di_all *)di_cache.cache_data;
4218 all->cache_magic = DI_CACHE_MAGIC;
4219 all->map_size = rval;
4220
4221 ASSERT(all->cache_checksum == 0);
4222 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table);
4223 all->cache_checksum = crc;
4224
4225 di_cache_write(&di_cache);
4226
4227 return (rval);
4228 }
4229
4230 static void
di_cache_print(di_cache_debug_t msglevel,char * fmt,...)4231 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...)
4232 {
4233 va_list ap;
4234
4235 if (di_cache_debug <= DI_QUIET)
4236 return;
4237
4238 if (di_cache_debug < msglevel)
4239 return;
4240
4241 switch (msglevel) {
4242 case DI_ERR:
4243 msglevel = CE_WARN;
4244 break;
4245 case DI_INFO:
4246 case DI_TRACE:
4247 default:
4248 msglevel = CE_NOTE;
4249 break;
4250 }
4251
4252 va_start(ap, fmt);
4253 vcmn_err(msglevel, fmt, ap);
4254 va_end(ap);
4255 }
4256
4257 static void
di_hotplug_children(struct di_state * st)4258 di_hotplug_children(struct di_state *st)
4259 {
4260 di_off_t off;
4261 struct di_hp *hp;
4262 struct i_hp *hp_list_node;
4263
4264 while (hp_list_node = (struct i_hp *)list_remove_head(&st->hp_list)) {
4265
4266 if ((hp_list_node->hp_child != NULL) &&
4267 (di_dip_find(st, hp_list_node->hp_child, &off) == 0)) {
4268 hp = DI_HP(di_mem_addr(st, hp_list_node->hp_off));
4269 hp->hp_child = off;
4270 }
4271
4272 kmem_free(hp_list_node, sizeof (i_hp_t));
4273 }
4274
4275 list_destroy(&st->hp_list);
4276 }
4277