1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * driver for accessing kernel devinfo tree. 29 */ 30 #include <sys/types.h> 31 #include <sys/pathname.h> 32 #include <sys/debug.h> 33 #include <sys/autoconf.h> 34 #include <sys/vmsystm.h> 35 #include <sys/conf.h> 36 #include <sys/file.h> 37 #include <sys/kmem.h> 38 #include <sys/modctl.h> 39 #include <sys/stat.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/sunldi_impl.h> 43 #include <sys/sunndi.h> 44 #include <sys/esunddi.h> 45 #include <sys/sunmdi.h> 46 #include <sys/ddi_impldefs.h> 47 #include <sys/ndi_impldefs.h> 48 #include <sys/mdi_impldefs.h> 49 #include <sys/devinfo_impl.h> 50 #include <sys/thread.h> 51 #include <sys/modhash.h> 52 #include <sys/bitmap.h> 53 #include <util/qsort.h> 54 #include <sys/disp.h> 55 #include <sys/kobj.h> 56 #include <sys/crc32.h> 57 58 59 #ifdef DEBUG 60 static int di_debug; 61 #define dcmn_err(args) if (di_debug >= 1) cmn_err args 62 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args 63 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args 64 #else 65 #define dcmn_err(args) /* nothing */ 66 #define dcmn_err2(args) /* nothing */ 67 #define dcmn_err3(args) /* nothing */ 68 #endif 69 70 /* 71 * We partition the space of devinfo minor nodes equally between the full and 72 * unprivileged versions of the driver. The even-numbered minor nodes are the 73 * full version, while the odd-numbered ones are the read-only version. 74 */ 75 static int di_max_opens = 32; 76 77 static int di_prop_dyn = 1; /* enable dynamic property support */ 78 79 #define DI_FULL_PARENT 0 80 #define DI_READONLY_PARENT 1 81 #define DI_NODE_SPECIES 2 82 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) 83 84 #define IOC_IDLE 0 /* snapshot ioctl states */ 85 #define IOC_SNAP 1 /* snapshot in progress */ 86 #define IOC_DONE 2 /* snapshot done, but not copied out */ 87 #define IOC_COPY 3 /* copyout in progress */ 88 89 /* 90 * Keep max alignment so we can move snapshot to different platforms. 91 * 92 * NOTE: Most callers should rely on the di_checkmem return value 93 * being aligned, and reestablish *off_p with aligned value, instead 94 * of trying to align size of their allocations: this approach will 95 * minimize memory use. 96 */ 97 #define DI_ALIGN(addr) ((addr + 7l) & ~7l) 98 99 /* 100 * To avoid wasting memory, make a linked list of memory chunks. 101 * Size of each chunk is buf_size. 102 */ 103 struct di_mem { 104 struct di_mem *next; /* link to next chunk */ 105 char *buf; /* contiguous kernel memory */ 106 size_t buf_size; /* size of buf in bytes */ 107 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ 108 }; 109 110 /* 111 * This is a stack for walking the tree without using recursion. 112 * When the devinfo tree height is above some small size, one 113 * gets watchdog resets on sun4m. 114 */ 115 struct di_stack { 116 void *offset[MAX_TREE_DEPTH]; 117 struct dev_info *dip[MAX_TREE_DEPTH]; 118 int circ[MAX_TREE_DEPTH]; 119 int depth; /* depth of current node to be copied */ 120 }; 121 122 #define TOP_OFFSET(stack) \ 123 ((di_off_t *)(stack)->offset[(stack)->depth - 1]) 124 #define TOP_NODE(stack) \ 125 ((stack)->dip[(stack)->depth - 1]) 126 #define PARENT_OFFSET(stack) \ 127 ((di_off_t *)(stack)->offset[(stack)->depth - 2]) 128 #define EMPTY_STACK(stack) ((stack)->depth == 0) 129 #define POP_STACK(stack) { \ 130 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ 131 (stack)->circ[(stack)->depth - 1]); \ 132 ((stack)->depth--); \ 133 } 134 #define PUSH_STACK(stack, node, off_p) { \ 135 ASSERT(node != NULL); \ 136 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ 137 (stack)->dip[(stack)->depth] = (node); \ 138 (stack)->offset[(stack)->depth] = (void *)(off_p); \ 139 ((stack)->depth)++; \ 140 } 141 142 #define DI_ALL_PTR(s) DI_ALL(di_mem_addr((s), 0)) 143 144 /* 145 * With devfs, the device tree has no global locks. The device tree is 146 * dynamic and dips may come and go if they are not locked locally. Under 147 * these conditions, pointers are no longer reliable as unique IDs. 148 * Specifically, these pointers cannot be used as keys for hash tables 149 * as the same devinfo structure may be freed in one part of the tree only 150 * to be allocated as the structure for a different device in another 151 * part of the tree. This can happen if DR and the snapshot are 152 * happening concurrently. 153 * The following data structures act as keys for devinfo nodes and 154 * pathinfo nodes. 155 */ 156 157 enum di_ktype { 158 DI_DKEY = 1, 159 DI_PKEY = 2 160 }; 161 162 struct di_dkey { 163 dev_info_t *dk_dip; 164 major_t dk_major; 165 int dk_inst; 166 pnode_t dk_nodeid; 167 }; 168 169 struct di_pkey { 170 mdi_pathinfo_t *pk_pip; 171 char *pk_path_addr; 172 dev_info_t *pk_client; 173 dev_info_t *pk_phci; 174 }; 175 176 struct di_key { 177 enum di_ktype k_type; 178 union { 179 struct di_dkey dkey; 180 struct di_pkey pkey; 181 } k_u; 182 }; 183 184 185 struct i_lnode; 186 187 typedef struct i_link { 188 /* 189 * If a di_link struct representing this i_link struct makes it 190 * into the snapshot, then self will point to the offset of 191 * the di_link struct in the snapshot 192 */ 193 di_off_t self; 194 195 int spec_type; /* block or char access type */ 196 struct i_lnode *src_lnode; /* src i_lnode */ 197 struct i_lnode *tgt_lnode; /* tgt i_lnode */ 198 struct i_link *src_link_next; /* next src i_link /w same i_lnode */ 199 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ 200 } i_link_t; 201 202 typedef struct i_lnode { 203 /* 204 * If a di_lnode struct representing this i_lnode struct makes it 205 * into the snapshot, then self will point to the offset of 206 * the di_lnode struct in the snapshot 207 */ 208 di_off_t self; 209 210 /* 211 * used for hashing and comparing i_lnodes 212 */ 213 int modid; 214 215 /* 216 * public information describing a link endpoint 217 */ 218 struct di_node *di_node; /* di_node in snapshot */ 219 dev_t devt; /* devt */ 220 221 /* 222 * i_link ptr to links coming into this i_lnode node 223 * (this i_lnode is the target of these i_links) 224 */ 225 i_link_t *link_in; 226 227 /* 228 * i_link ptr to links going out of this i_lnode node 229 * (this i_lnode is the source of these i_links) 230 */ 231 i_link_t *link_out; 232 } i_lnode_t; 233 234 /* 235 * Soft state associated with each instance of driver open. 236 */ 237 static struct di_state { 238 di_off_t mem_size; /* total # bytes in memlist */ 239 struct di_mem *memlist; /* head of memlist */ 240 uint_t command; /* command from ioctl */ 241 int di_iocstate; /* snapshot ioctl state */ 242 mod_hash_t *reg_dip_hash; 243 mod_hash_t *reg_pip_hash; 244 int lnode_count; 245 int link_count; 246 247 mod_hash_t *lnode_hash; 248 mod_hash_t *link_hash; 249 } **di_states; 250 251 static kmutex_t di_lock; /* serialize instance assignment */ 252 253 typedef enum { 254 DI_QUIET = 0, /* DI_QUIET must always be 0 */ 255 DI_ERR, 256 DI_INFO, 257 DI_TRACE, 258 DI_TRACE1, 259 DI_TRACE2 260 } di_cache_debug_t; 261 262 static uint_t di_chunk = 32; /* I/O chunk size in pages */ 263 264 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) 265 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) 266 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) 267 268 /* 269 * Check that whole device tree is being configured as a pre-condition for 270 * cleaning up /etc/devices files. 271 */ 272 #define DEVICES_FILES_CLEANABLE(st) \ 273 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \ 274 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0) 275 276 #define CACHE_DEBUG(args) \ 277 { if (di_cache_debug != DI_QUIET) di_cache_print args; } 278 279 typedef struct phci_walk_arg { 280 di_off_t off; 281 struct di_state *st; 282 } phci_walk_arg_t; 283 284 static int di_open(dev_t *, int, int, cred_t *); 285 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 286 static int di_close(dev_t, int, int, cred_t *); 287 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 288 static int di_attach(dev_info_t *, ddi_attach_cmd_t); 289 static int di_detach(dev_info_t *, ddi_detach_cmd_t); 290 291 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); 292 static di_off_t di_snapshot_and_clean(struct di_state *); 293 static di_off_t di_copydevnm(di_off_t *, struct di_state *); 294 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); 295 static di_off_t di_copynode(struct dev_info *, struct di_stack *, 296 struct di_state *); 297 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, 298 struct di_state *); 299 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); 300 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); 301 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *, 302 struct di_state *, struct dev_info *); 303 static void di_allocmem(struct di_state *, size_t); 304 static void di_freemem(struct di_state *); 305 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); 306 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); 307 static void *di_mem_addr(struct di_state *, di_off_t); 308 static int di_setstate(struct di_state *, int); 309 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); 310 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); 311 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, 312 struct di_state *, int); 313 static di_off_t di_getlink_data(di_off_t, struct di_state *); 314 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); 315 316 static int cache_args_valid(struct di_state *st, int *error); 317 static int snapshot_is_cacheable(struct di_state *st); 318 static int di_cache_lookup(struct di_state *st); 319 static int di_cache_update(struct di_state *st); 320 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); 321 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg); 322 static int build_phci_list(dev_info_t *ph_devinfo, void *arg); 323 324 extern int modrootloaded; 325 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *); 326 extern void mdi_vhci_walk_phcis(dev_info_t *, 327 int (*)(dev_info_t *, void *), void *); 328 329 330 static struct cb_ops di_cb_ops = { 331 di_open, /* open */ 332 di_close, /* close */ 333 nodev, /* strategy */ 334 nodev, /* print */ 335 nodev, /* dump */ 336 nodev, /* read */ 337 nodev, /* write */ 338 di_ioctl, /* ioctl */ 339 nodev, /* devmap */ 340 nodev, /* mmap */ 341 nodev, /* segmap */ 342 nochpoll, /* poll */ 343 ddi_prop_op, /* prop_op */ 344 NULL, /* streamtab */ 345 D_NEW | D_MP /* Driver compatibility flag */ 346 }; 347 348 static struct dev_ops di_ops = { 349 DEVO_REV, /* devo_rev, */ 350 0, /* refcnt */ 351 di_info, /* info */ 352 nulldev, /* identify */ 353 nulldev, /* probe */ 354 di_attach, /* attach */ 355 di_detach, /* detach */ 356 nodev, /* reset */ 357 &di_cb_ops, /* driver operations */ 358 NULL /* bus operations */ 359 }; 360 361 /* 362 * Module linkage information for the kernel. 363 */ 364 static struct modldrv modldrv = { 365 &mod_driverops, 366 "DEVINFO Driver", 367 &di_ops 368 }; 369 370 static struct modlinkage modlinkage = { 371 MODREV_1, 372 &modldrv, 373 NULL 374 }; 375 376 int 377 _init(void) 378 { 379 int error; 380 381 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); 382 383 error = mod_install(&modlinkage); 384 if (error != 0) { 385 mutex_destroy(&di_lock); 386 return (error); 387 } 388 389 return (0); 390 } 391 392 int 393 _info(struct modinfo *modinfop) 394 { 395 return (mod_info(&modlinkage, modinfop)); 396 } 397 398 int 399 _fini(void) 400 { 401 int error; 402 403 error = mod_remove(&modlinkage); 404 if (error != 0) { 405 return (error); 406 } 407 408 mutex_destroy(&di_lock); 409 return (0); 410 } 411 412 static dev_info_t *di_dip; 413 414 /*ARGSUSED*/ 415 static int 416 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 417 { 418 int error = DDI_FAILURE; 419 420 switch (infocmd) { 421 case DDI_INFO_DEVT2DEVINFO: 422 *result = (void *)di_dip; 423 error = DDI_SUCCESS; 424 break; 425 case DDI_INFO_DEVT2INSTANCE: 426 /* 427 * All dev_t's map to the same, single instance. 428 */ 429 *result = (void *)0; 430 error = DDI_SUCCESS; 431 break; 432 default: 433 break; 434 } 435 436 return (error); 437 } 438 439 static int 440 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 441 { 442 int error = DDI_FAILURE; 443 444 switch (cmd) { 445 case DDI_ATTACH: 446 di_states = kmem_zalloc( 447 di_max_opens * sizeof (struct di_state *), KM_SLEEP); 448 449 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, 450 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || 451 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, 452 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { 453 kmem_free(di_states, 454 di_max_opens * sizeof (struct di_state *)); 455 ddi_remove_minor_node(dip, NULL); 456 error = DDI_FAILURE; 457 } else { 458 di_dip = dip; 459 ddi_report_dev(dip); 460 461 error = DDI_SUCCESS; 462 } 463 break; 464 default: 465 error = DDI_FAILURE; 466 break; 467 } 468 469 return (error); 470 } 471 472 static int 473 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 474 { 475 int error = DDI_FAILURE; 476 477 switch (cmd) { 478 case DDI_DETACH: 479 ddi_remove_minor_node(dip, NULL); 480 di_dip = NULL; 481 kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); 482 483 error = DDI_SUCCESS; 484 break; 485 default: 486 error = DDI_FAILURE; 487 break; 488 } 489 490 return (error); 491 } 492 493 /* 494 * Allow multiple opens by tweaking the dev_t such that it looks like each 495 * open is getting a different minor device. Each minor gets a separate 496 * entry in the di_states[] table. Based on the original minor number, we 497 * discriminate opens of the full and read-only nodes. If all of the instances 498 * of the selected minor node are currently open, we return EAGAIN. 499 */ 500 /*ARGSUSED*/ 501 static int 502 di_open(dev_t *devp, int flag, int otyp, cred_t *credp) 503 { 504 int m; 505 minor_t minor_parent = getminor(*devp); 506 507 if (minor_parent != DI_FULL_PARENT && 508 minor_parent != DI_READONLY_PARENT) 509 return (ENXIO); 510 511 mutex_enter(&di_lock); 512 513 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { 514 if (di_states[m] != NULL) 515 continue; 516 517 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); 518 break; /* It's ours. */ 519 } 520 521 if (m >= di_max_opens) { 522 /* 523 * maximum open instance for device reached 524 */ 525 mutex_exit(&di_lock); 526 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); 527 return (EAGAIN); 528 } 529 mutex_exit(&di_lock); 530 531 ASSERT(m < di_max_opens); 532 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); 533 534 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", 535 (void *)curthread, m + DI_NODE_SPECIES)); 536 537 return (0); 538 } 539 540 /*ARGSUSED*/ 541 static int 542 di_close(dev_t dev, int flag, int otype, cred_t *cred_p) 543 { 544 struct di_state *st; 545 int m = (int)getminor(dev) - DI_NODE_SPECIES; 546 547 if (m < 0) { 548 cmn_err(CE_WARN, "closing non-existent devinfo minor %d", 549 m + DI_NODE_SPECIES); 550 return (ENXIO); 551 } 552 553 st = di_states[m]; 554 ASSERT(m < di_max_opens && st != NULL); 555 556 di_freemem(st); 557 kmem_free(st, sizeof (struct di_state)); 558 559 /* 560 * empty slot in state table 561 */ 562 mutex_enter(&di_lock); 563 di_states[m] = NULL; 564 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", 565 (void *)curthread, m + DI_NODE_SPECIES)); 566 mutex_exit(&di_lock); 567 568 return (0); 569 } 570 571 572 /*ARGSUSED*/ 573 static int 574 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 575 { 576 int rv, error; 577 di_off_t off; 578 struct di_all *all; 579 struct di_state *st; 580 int m = (int)getminor(dev) - DI_NODE_SPECIES; 581 major_t i; 582 char *drv_name; 583 size_t map_size, size; 584 struct di_mem *dcp; 585 int ndi_flags; 586 587 if (m < 0 || m >= di_max_opens) { 588 return (ENXIO); 589 } 590 591 st = di_states[m]; 592 ASSERT(st != NULL); 593 594 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); 595 596 switch (cmd) { 597 case DINFOIDENT: 598 /* 599 * This is called from di_init to verify that the driver 600 * opened is indeed devinfo. The purpose is to guard against 601 * sending ioctl to an unknown driver in case of an 602 * unresolved major number conflict during bfu. 603 */ 604 *rvalp = DI_MAGIC; 605 return (0); 606 607 case DINFOLODRV: 608 /* 609 * Hold an installed driver and return the result 610 */ 611 if (DI_UNPRIVILEGED_NODE(m)) { 612 /* 613 * Only the fully enabled instances may issue 614 * DINFOLDDRV. 615 */ 616 return (EACCES); 617 } 618 619 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 620 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { 621 kmem_free(drv_name, MAXNAMELEN); 622 return (EFAULT); 623 } 624 625 /* 626 * Some 3rd party driver's _init() walks the device tree, 627 * so we load the driver module before configuring driver. 628 */ 629 i = ddi_name_to_major(drv_name); 630 if (ddi_hold_driver(i) == NULL) { 631 kmem_free(drv_name, MAXNAMELEN); 632 return (ENXIO); 633 } 634 635 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; 636 637 /* 638 * i_ddi_load_drvconf() below will trigger a reprobe 639 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't 640 * needed here. 641 */ 642 modunload_disable(); 643 (void) i_ddi_load_drvconf(i); 644 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); 645 kmem_free(drv_name, MAXNAMELEN); 646 ddi_rele_driver(i); 647 rv = i_ddi_devs_attached(i); 648 modunload_enable(); 649 650 i_ddi_di_cache_invalidate(KM_SLEEP); 651 652 return ((rv == DDI_SUCCESS)? 0 : ENXIO); 653 654 case DINFOUSRLD: 655 /* 656 * The case for copying snapshot to userland 657 */ 658 if (di_setstate(st, IOC_COPY) == -1) 659 return (EBUSY); 660 661 map_size = DI_ALL_PTR(st)->map_size; 662 if (map_size == 0) { 663 (void) di_setstate(st, IOC_DONE); 664 return (EFAULT); 665 } 666 667 /* 668 * copyout the snapshot 669 */ 670 map_size = (map_size + PAGEOFFSET) & PAGEMASK; 671 672 /* 673 * Return the map size, so caller may do a sanity 674 * check against the return value of snapshot ioctl() 675 */ 676 *rvalp = (int)map_size; 677 678 /* 679 * Copy one chunk at a time 680 */ 681 off = 0; 682 dcp = st->memlist; 683 while (map_size) { 684 size = dcp->buf_size; 685 if (map_size <= size) { 686 size = map_size; 687 } 688 689 if (ddi_copyout(di_mem_addr(st, off), 690 (void *)(arg + off), size, mode) != 0) { 691 (void) di_setstate(st, IOC_DONE); 692 return (EFAULT); 693 } 694 695 map_size -= size; 696 off += size; 697 dcp = dcp->next; 698 } 699 700 di_freemem(st); 701 (void) di_setstate(st, IOC_IDLE); 702 return (0); 703 704 default: 705 if ((cmd & ~DIIOC_MASK) != DIIOC) { 706 /* 707 * Invalid ioctl command 708 */ 709 return (ENOTTY); 710 } 711 /* 712 * take a snapshot 713 */ 714 st->command = cmd & DIIOC_MASK; 715 /*FALLTHROUGH*/ 716 } 717 718 /* 719 * Obtain enough memory to hold header + rootpath. We prevent kernel 720 * memory exhaustion by freeing any previously allocated snapshot and 721 * refusing the operation; otherwise we would be allowing ioctl(), 722 * ioctl(), ioctl(), ..., panic. 723 */ 724 if (di_setstate(st, IOC_SNAP) == -1) 725 return (EBUSY); 726 727 /* 728 * Initial memlist always holds di_all and the root_path - and 729 * is at least a page and size. 730 */ 731 size = sizeof (struct di_all) + 732 sizeof (((struct dinfo_io *)(NULL))->root_path); 733 if (size < PAGESIZE) 734 size = PAGESIZE; 735 off = di_checkmem(st, 0, size); 736 all = DI_ALL_PTR(st); 737 off += sizeof (struct di_all); /* real length of di_all */ 738 739 all->devcnt = devcnt; 740 all->command = st->command; 741 all->version = DI_SNAPSHOT_VERSION; 742 all->top_vhci_devinfo = 0; /* filled by build_vhci_list. */ 743 744 /* 745 * Note the endianness in case we need to transport snapshot 746 * over the network. 747 */ 748 #if defined(_LITTLE_ENDIAN) 749 all->endianness = DI_LITTLE_ENDIAN; 750 #else 751 all->endianness = DI_BIG_ENDIAN; 752 #endif 753 754 /* Copyin ioctl args, store in the snapshot. */ 755 if (copyinstr((void *)arg, all->root_path, 756 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { 757 di_freemem(st); 758 (void) di_setstate(st, IOC_IDLE); 759 return (EFAULT); 760 } 761 off += size; /* real length of root_path */ 762 763 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) { 764 di_freemem(st); 765 (void) di_setstate(st, IOC_IDLE); 766 return (EINVAL); 767 } 768 769 error = 0; 770 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { 771 di_freemem(st); 772 (void) di_setstate(st, IOC_IDLE); 773 return (error); 774 } 775 776 /* 777 * Only the fully enabled version may force load drivers or read 778 * the parent private data from a driver. 779 */ 780 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && 781 DI_UNPRIVILEGED_NODE(m)) { 782 di_freemem(st); 783 (void) di_setstate(st, IOC_IDLE); 784 return (EACCES); 785 } 786 787 /* Do we need private data? */ 788 if (st->command & DINFOPRIVDATA) { 789 arg += sizeof (((struct dinfo_io *)(NULL))->root_path); 790 791 #ifdef _MULTI_DATAMODEL 792 switch (ddi_model_convert_from(mode & FMODELS)) { 793 case DDI_MODEL_ILP32: { 794 /* 795 * Cannot copy private data from 64-bit kernel 796 * to 32-bit app 797 */ 798 di_freemem(st); 799 (void) di_setstate(st, IOC_IDLE); 800 return (EINVAL); 801 } 802 case DDI_MODEL_NONE: 803 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 804 di_freemem(st); 805 (void) di_setstate(st, IOC_IDLE); 806 return (EFAULT); 807 } 808 break; 809 } 810 #else /* !_MULTI_DATAMODEL */ 811 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 812 di_freemem(st); 813 (void) di_setstate(st, IOC_IDLE); 814 return (EFAULT); 815 } 816 #endif /* _MULTI_DATAMODEL */ 817 } 818 819 all->top_devinfo = DI_ALIGN(off); 820 821 /* 822 * For cache lookups we reallocate memory from scratch, 823 * so the value of "all" is no longer valid. 824 */ 825 all = NULL; 826 827 if (st->command & DINFOCACHE) { 828 *rvalp = di_cache_lookup(st); 829 } else if (snapshot_is_cacheable(st)) { 830 DI_CACHE_LOCK(di_cache); 831 *rvalp = di_cache_update(st); 832 DI_CACHE_UNLOCK(di_cache); 833 } else 834 *rvalp = di_snapshot_and_clean(st); 835 836 if (*rvalp) { 837 DI_ALL_PTR(st)->map_size = *rvalp; 838 (void) di_setstate(st, IOC_DONE); 839 } else { 840 di_freemem(st); 841 (void) di_setstate(st, IOC_IDLE); 842 } 843 844 return (0); 845 } 846 847 /* 848 * Get a chunk of memory >= size, for the snapshot 849 */ 850 static void 851 di_allocmem(struct di_state *st, size_t size) 852 { 853 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP); 854 855 /* 856 * Round up size to nearest power of 2. If it is less 857 * than st->mem_size, set it to st->mem_size (i.e., 858 * the mem_size is doubled every time) to reduce the 859 * number of memory allocations. 860 */ 861 size_t tmp = 1; 862 while (tmp < size) { 863 tmp <<= 1; 864 } 865 size = (tmp > st->mem_size) ? tmp : st->mem_size; 866 867 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); 868 mem->buf_size = size; 869 870 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); 871 872 if (st->mem_size == 0) { /* first chunk */ 873 st->memlist = mem; 874 } else { 875 /* 876 * locate end of linked list and add a chunk at the end 877 */ 878 struct di_mem *dcp = st->memlist; 879 while (dcp->next != NULL) { 880 dcp = dcp->next; 881 } 882 883 dcp->next = mem; 884 } 885 886 st->mem_size += size; 887 } 888 889 /* 890 * Copy upto bufsiz bytes of the memlist to buf 891 */ 892 static void 893 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) 894 { 895 struct di_mem *dcp; 896 size_t copysz; 897 898 if (st->mem_size == 0) { 899 ASSERT(st->memlist == NULL); 900 return; 901 } 902 903 copysz = 0; 904 for (dcp = st->memlist; dcp; dcp = dcp->next) { 905 906 ASSERT(bufsiz > 0); 907 908 if (bufsiz <= dcp->buf_size) 909 copysz = bufsiz; 910 else 911 copysz = dcp->buf_size; 912 913 bcopy(dcp->buf, buf, copysz); 914 915 buf += copysz; 916 bufsiz -= copysz; 917 918 if (bufsiz == 0) 919 break; 920 } 921 } 922 923 /* 924 * Free all memory for the snapshot 925 */ 926 static void 927 di_freemem(struct di_state *st) 928 { 929 struct di_mem *dcp, *tmp; 930 931 dcmn_err2((CE_CONT, "di_freemem\n")); 932 933 if (st->mem_size) { 934 dcp = st->memlist; 935 while (dcp) { /* traverse the linked list */ 936 tmp = dcp; 937 dcp = dcp->next; 938 ddi_umem_free(tmp->cook); 939 kmem_free(tmp, sizeof (struct di_mem)); 940 } 941 st->mem_size = 0; 942 st->memlist = NULL; 943 } 944 945 ASSERT(st->mem_size == 0); 946 ASSERT(st->memlist == NULL); 947 } 948 949 /* 950 * Copies cached data to the di_state structure. 951 * Returns: 952 * - size of data copied, on SUCCESS 953 * - 0 on failure 954 */ 955 static int 956 di_cache2mem(struct di_cache *cache, struct di_state *st) 957 { 958 caddr_t pa; 959 960 ASSERT(st->mem_size == 0); 961 ASSERT(st->memlist == NULL); 962 ASSERT(!servicing_interrupt()); 963 ASSERT(DI_CACHE_LOCKED(*cache)); 964 965 if (cache->cache_size == 0) { 966 ASSERT(cache->cache_data == NULL); 967 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); 968 return (0); 969 } 970 971 ASSERT(cache->cache_data); 972 973 di_allocmem(st, cache->cache_size); 974 975 pa = di_mem_addr(st, 0); 976 977 ASSERT(pa); 978 979 /* 980 * Verify that di_allocmem() allocates contiguous memory, 981 * so that it is safe to do straight bcopy() 982 */ 983 ASSERT(st->memlist != NULL); 984 ASSERT(st->memlist->next == NULL); 985 bcopy(cache->cache_data, pa, cache->cache_size); 986 987 return (cache->cache_size); 988 } 989 990 /* 991 * Copies a snapshot from di_state to the cache 992 * Returns: 993 * - 0 on failure 994 * - size of copied data on success 995 */ 996 static size_t 997 di_mem2cache(struct di_state *st, struct di_cache *cache) 998 { 999 size_t map_size; 1000 1001 ASSERT(cache->cache_size == 0); 1002 ASSERT(cache->cache_data == NULL); 1003 ASSERT(!servicing_interrupt()); 1004 ASSERT(DI_CACHE_LOCKED(*cache)); 1005 1006 if (st->mem_size == 0) { 1007 ASSERT(st->memlist == NULL); 1008 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); 1009 return (0); 1010 } 1011 1012 ASSERT(st->memlist); 1013 1014 /* 1015 * The size of the memory list may be much larger than the 1016 * size of valid data (map_size). Cache only the valid data 1017 */ 1018 map_size = DI_ALL_PTR(st)->map_size; 1019 if (map_size == 0 || map_size < sizeof (struct di_all) || 1020 map_size > st->mem_size) { 1021 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); 1022 return (0); 1023 } 1024 1025 cache->cache_data = kmem_alloc(map_size, KM_SLEEP); 1026 cache->cache_size = map_size; 1027 di_copymem(st, cache->cache_data, cache->cache_size); 1028 1029 return (map_size); 1030 } 1031 1032 /* 1033 * Make sure there is at least "size" bytes memory left before 1034 * going on. Otherwise, start on a new chunk. 1035 */ 1036 static di_off_t 1037 di_checkmem(struct di_state *st, di_off_t off, size_t size) 1038 { 1039 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", 1040 off, (int)size)); 1041 1042 /* 1043 * di_checkmem() shouldn't be called with a size of zero. 1044 * But in case it is, we want to make sure we return a valid 1045 * offset within the memlist and not an offset that points us 1046 * at the end of the memlist. 1047 */ 1048 if (size == 0) { 1049 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); 1050 size = 1; 1051 } 1052 1053 off = DI_ALIGN(off); 1054 if ((st->mem_size - off) < size) { 1055 off = st->mem_size; 1056 di_allocmem(st, size); 1057 } 1058 1059 /* verify that return value is aligned */ 1060 ASSERT(off == DI_ALIGN(off)); 1061 return (off); 1062 } 1063 1064 /* 1065 * Copy the private data format from ioctl arg. 1066 * On success, the ending offset is returned. On error 0 is returned. 1067 */ 1068 static di_off_t 1069 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) 1070 { 1071 di_off_t size; 1072 struct di_priv_data *priv; 1073 struct di_all *all = DI_ALL_PTR(st); 1074 1075 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", 1076 off, (void *)arg, mode)); 1077 1078 /* 1079 * Copyin data and check version. 1080 * We only handle private data version 0. 1081 */ 1082 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); 1083 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), 1084 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { 1085 kmem_free(priv, sizeof (struct di_priv_data)); 1086 return (0); 1087 } 1088 1089 /* 1090 * Save di_priv_data copied from userland in snapshot. 1091 */ 1092 all->pd_version = priv->version; 1093 all->n_ppdata = priv->n_parent; 1094 all->n_dpdata = priv->n_driver; 1095 1096 /* 1097 * copyin private data format, modify offset accordingly 1098 */ 1099 if (all->n_ppdata) { /* parent private data format */ 1100 /* 1101 * check memory 1102 */ 1103 size = all->n_ppdata * sizeof (struct di_priv_format); 1104 all->ppdata_format = off = di_checkmem(st, off, size); 1105 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, 1106 mode) != 0) { 1107 kmem_free(priv, sizeof (struct di_priv_data)); 1108 return (0); 1109 } 1110 1111 off += size; 1112 } 1113 1114 if (all->n_dpdata) { /* driver private data format */ 1115 /* 1116 * check memory 1117 */ 1118 size = all->n_dpdata * sizeof (struct di_priv_format); 1119 all->dpdata_format = off = di_checkmem(st, off, size); 1120 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, 1121 mode) != 0) { 1122 kmem_free(priv, sizeof (struct di_priv_data)); 1123 return (0); 1124 } 1125 1126 off += size; 1127 } 1128 1129 kmem_free(priv, sizeof (struct di_priv_data)); 1130 return (off); 1131 } 1132 1133 /* 1134 * Return the real address based on the offset (off) within snapshot 1135 */ 1136 static void * 1137 di_mem_addr(struct di_state *st, di_off_t off) 1138 { 1139 struct di_mem *dcp = st->memlist; 1140 1141 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", 1142 (void *)dcp, off)); 1143 1144 ASSERT(off < st->mem_size); 1145 1146 while (off >= dcp->buf_size) { 1147 off -= dcp->buf_size; 1148 dcp = dcp->next; 1149 } 1150 1151 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", 1152 off, (void *)(dcp->buf + off))); 1153 1154 return (dcp->buf + off); 1155 } 1156 1157 /* 1158 * Ideally we would use the whole key to derive the hash 1159 * value. However, the probability that two keys will 1160 * have the same dip (or pip) is very low, so 1161 * hashing by dip (or pip) pointer should suffice. 1162 */ 1163 static uint_t 1164 di_hash_byptr(void *arg, mod_hash_key_t key) 1165 { 1166 struct di_key *dik = key; 1167 size_t rshift; 1168 void *ptr; 1169 1170 ASSERT(arg == NULL); 1171 1172 switch (dik->k_type) { 1173 case DI_DKEY: 1174 ptr = dik->k_u.dkey.dk_dip; 1175 rshift = highbit(sizeof (struct dev_info)); 1176 break; 1177 case DI_PKEY: 1178 ptr = dik->k_u.pkey.pk_pip; 1179 rshift = highbit(sizeof (struct mdi_pathinfo)); 1180 break; 1181 default: 1182 panic("devinfo: unknown key type"); 1183 /*NOTREACHED*/ 1184 } 1185 return (mod_hash_byptr((void *)rshift, ptr)); 1186 } 1187 1188 static void 1189 di_key_dtor(mod_hash_key_t key) 1190 { 1191 char *path_addr; 1192 struct di_key *dik = key; 1193 1194 switch (dik->k_type) { 1195 case DI_DKEY: 1196 break; 1197 case DI_PKEY: 1198 path_addr = dik->k_u.pkey.pk_path_addr; 1199 if (path_addr) 1200 kmem_free(path_addr, strlen(path_addr) + 1); 1201 break; 1202 default: 1203 panic("devinfo: unknown key type"); 1204 /*NOTREACHED*/ 1205 } 1206 1207 kmem_free(dik, sizeof (struct di_key)); 1208 } 1209 1210 static int 1211 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) 1212 { 1213 if (dk1->dk_dip != dk2->dk_dip) 1214 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); 1215 1216 if (dk1->dk_major != DDI_MAJOR_T_NONE && 1217 dk2->dk_major != DDI_MAJOR_T_NONE) { 1218 if (dk1->dk_major != dk2->dk_major) 1219 return (dk1->dk_major > dk2->dk_major ? 1 : -1); 1220 1221 if (dk1->dk_inst != dk2->dk_inst) 1222 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); 1223 } 1224 1225 if (dk1->dk_nodeid != dk2->dk_nodeid) 1226 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); 1227 1228 return (0); 1229 } 1230 1231 static int 1232 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) 1233 { 1234 char *p1, *p2; 1235 int rv; 1236 1237 if (pk1->pk_pip != pk2->pk_pip) 1238 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); 1239 1240 p1 = pk1->pk_path_addr; 1241 p2 = pk2->pk_path_addr; 1242 1243 p1 = p1 ? p1 : ""; 1244 p2 = p2 ? p2 : ""; 1245 1246 rv = strcmp(p1, p2); 1247 if (rv) 1248 return (rv > 0 ? 1 : -1); 1249 1250 if (pk1->pk_client != pk2->pk_client) 1251 return (pk1->pk_client > pk2->pk_client ? 1 : -1); 1252 1253 if (pk1->pk_phci != pk2->pk_phci) 1254 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); 1255 1256 return (0); 1257 } 1258 1259 static int 1260 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 1261 { 1262 struct di_key *dik1, *dik2; 1263 1264 dik1 = key1; 1265 dik2 = key2; 1266 1267 if (dik1->k_type != dik2->k_type) { 1268 panic("devinfo: mismatched keys"); 1269 /*NOTREACHED*/ 1270 } 1271 1272 switch (dik1->k_type) { 1273 case DI_DKEY: 1274 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); 1275 case DI_PKEY: 1276 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); 1277 default: 1278 panic("devinfo: unknown key type"); 1279 /*NOTREACHED*/ 1280 } 1281 } 1282 1283 /* 1284 * This is the main function that takes a snapshot 1285 */ 1286 static di_off_t 1287 di_snapshot(struct di_state *st) 1288 { 1289 di_off_t off; 1290 struct di_all *all; 1291 dev_info_t *rootnode; 1292 char buf[80]; 1293 int plen; 1294 char *path; 1295 vnode_t *vp; 1296 1297 all = DI_ALL_PTR(st); 1298 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); 1299 1300 /* 1301 * Verify path before entrusting it to e_ddi_hold_devi_by_path because 1302 * some platforms have OBP bugs where executing the NDI_PROMNAME code 1303 * path against an invalid path results in panic. The lookupnameat 1304 * is done relative to rootdir without a leading '/' on "devices/" 1305 * to force the lookup to occur in the global zone. 1306 */ 1307 plen = strlen("devices/") + strlen(all->root_path) + 1; 1308 path = kmem_alloc(plen, KM_SLEEP); 1309 (void) snprintf(path, plen, "devices/%s", all->root_path); 1310 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) { 1311 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1312 all->root_path)); 1313 kmem_free(path, plen); 1314 return (0); 1315 } 1316 kmem_free(path, plen); 1317 VN_RELE(vp); 1318 1319 /* 1320 * Hold the devinfo node referred by the path. 1321 */ 1322 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); 1323 if (rootnode == NULL) { 1324 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1325 all->root_path)); 1326 return (0); 1327 } 1328 1329 (void) snprintf(buf, sizeof (buf), 1330 "devinfo registered dips (statep=%p)", (void *)st); 1331 1332 st->reg_dip_hash = mod_hash_create_extended(buf, 64, 1333 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1334 NULL, di_key_cmp, KM_SLEEP); 1335 1336 1337 (void) snprintf(buf, sizeof (buf), 1338 "devinfo registered pips (statep=%p)", (void *)st); 1339 1340 st->reg_pip_hash = mod_hash_create_extended(buf, 64, 1341 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1342 NULL, di_key_cmp, KM_SLEEP); 1343 1344 /* 1345 * copy the device tree 1346 */ 1347 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); 1348 1349 if (DINFOPATH & st->command) { 1350 mdi_walk_vhcis(build_vhci_list, st); 1351 } 1352 1353 ddi_release_devi(rootnode); 1354 1355 /* 1356 * copy the devnames array 1357 */ 1358 all->devnames = off; 1359 off = di_copydevnm(&all->devnames, st); 1360 1361 1362 /* initialize the hash tables */ 1363 st->lnode_count = 0; 1364 st->link_count = 0; 1365 1366 if (DINFOLYR & st->command) { 1367 off = di_getlink_data(off, st); 1368 } 1369 1370 /* 1371 * Free up hash tables 1372 */ 1373 mod_hash_destroy_hash(st->reg_dip_hash); 1374 mod_hash_destroy_hash(st->reg_pip_hash); 1375 1376 /* 1377 * Record the timestamp now that we are done with snapshot. 1378 * 1379 * We compute the checksum later and then only if we cache 1380 * the snapshot, since checksumming adds some overhead. 1381 * The checksum is checked later if we read the cache file. 1382 * from disk. 1383 * 1384 * Set checksum field to 0 as CRC is calculated with that 1385 * field set to 0. 1386 */ 1387 all->snapshot_time = ddi_get_time(); 1388 all->cache_checksum = 0; 1389 1390 ASSERT(all->snapshot_time != 0); 1391 1392 return (off); 1393 } 1394 1395 /* 1396 * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set 1397 */ 1398 static di_off_t 1399 di_snapshot_and_clean(struct di_state *st) 1400 { 1401 di_off_t off; 1402 1403 modunload_disable(); 1404 off = di_snapshot(st); 1405 if (off != 0 && (st->command & DINFOCLEANUP)) { 1406 ASSERT(DEVICES_FILES_CLEANABLE(st)); 1407 /* 1408 * Cleanup /etc/devices files: 1409 * In order to accurately account for the system configuration 1410 * in /etc/devices files, the appropriate drivers must be 1411 * fully configured before the cleanup starts. 1412 * So enable modunload only after the cleanup. 1413 */ 1414 i_ddi_clean_devices_files(); 1415 /* 1416 * Remove backing store nodes for unused devices, 1417 * which retain past permissions customizations 1418 * and may be undesired for newly configured devices. 1419 */ 1420 dev_devices_cleanup(); 1421 } 1422 modunload_enable(); 1423 1424 return (off); 1425 } 1426 1427 /* 1428 * construct vhci linkage in the snapshot. 1429 */ 1430 static int 1431 build_vhci_list(dev_info_t *vh_devinfo, void *arg) 1432 { 1433 struct di_all *all; 1434 struct di_node *me; 1435 struct di_state *st; 1436 di_off_t off; 1437 phci_walk_arg_t pwa; 1438 1439 dcmn_err3((CE_CONT, "build_vhci list\n")); 1440 1441 dcmn_err3((CE_CONT, "vhci node %s%d\n", 1442 ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo))); 1443 1444 st = (struct di_state *)arg; 1445 if (di_dip_find(st, vh_devinfo, &off) != 0) { 1446 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1447 return (DDI_WALK_TERMINATE); 1448 } 1449 1450 dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n", 1451 st->mem_size, off)); 1452 1453 all = DI_ALL_PTR(st); 1454 if (all->top_vhci_devinfo == 0) { 1455 all->top_vhci_devinfo = off; 1456 } else { 1457 me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo)); 1458 1459 while (me->next_vhci != 0) { 1460 me = DI_NODE(di_mem_addr(st, me->next_vhci)); 1461 } 1462 1463 me->next_vhci = off; 1464 } 1465 1466 pwa.off = off; 1467 pwa.st = st; 1468 mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa); 1469 1470 return (DDI_WALK_CONTINUE); 1471 } 1472 1473 /* 1474 * construct phci linkage for the given vhci in the snapshot. 1475 */ 1476 static int 1477 build_phci_list(dev_info_t *ph_devinfo, void *arg) 1478 { 1479 struct di_node *vh_di_node; 1480 struct di_node *me; 1481 phci_walk_arg_t *pwa; 1482 di_off_t off; 1483 1484 pwa = (phci_walk_arg_t *)arg; 1485 1486 dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n", 1487 pwa->off)); 1488 1489 vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off)); 1490 if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) { 1491 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1492 return (DDI_WALK_TERMINATE); 1493 } 1494 1495 dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n", 1496 ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off)); 1497 1498 if (vh_di_node->top_phci == 0) { 1499 vh_di_node->top_phci = off; 1500 return (DDI_WALK_CONTINUE); 1501 } 1502 1503 me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci)); 1504 1505 while (me->next_phci != 0) { 1506 me = DI_NODE(di_mem_addr(pwa->st, me->next_phci)); 1507 } 1508 me->next_phci = off; 1509 1510 return (DDI_WALK_CONTINUE); 1511 } 1512 1513 /* 1514 * Assumes all devinfo nodes in device tree have been snapshotted 1515 */ 1516 static void 1517 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p) 1518 { 1519 struct dev_info *node; 1520 struct di_node *me; 1521 di_off_t off; 1522 1523 ASSERT(mutex_owned(&dnp->dn_lock)); 1524 1525 node = DEVI(dnp->dn_head); 1526 for (; node; node = node->devi_next) { 1527 if (di_dip_find(st, (dev_info_t *)node, &off) != 0) 1528 continue; 1529 1530 ASSERT(off > 0); 1531 me = DI_NODE(di_mem_addr(st, off)); 1532 ASSERT(me->next == 0 || me->next == -1); 1533 /* 1534 * Only nodes which were BOUND when they were 1535 * snapshotted will be added to per-driver list. 1536 */ 1537 if (me->next != -1) 1538 continue; 1539 1540 *off_p = off; 1541 off_p = &me->next; 1542 } 1543 1544 *off_p = 0; 1545 } 1546 1547 /* 1548 * Copy the devnames array, so we have a list of drivers in the snapshot. 1549 * Also makes it possible to locate the per-driver devinfo nodes. 1550 */ 1551 static di_off_t 1552 di_copydevnm(di_off_t *off_p, struct di_state *st) 1553 { 1554 int i; 1555 di_off_t off; 1556 size_t size; 1557 struct di_devnm *dnp; 1558 1559 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); 1560 1561 /* 1562 * make sure there is some allocated memory 1563 */ 1564 size = devcnt * sizeof (struct di_devnm); 1565 *off_p = off = di_checkmem(st, *off_p, size); 1566 dnp = DI_DEVNM(di_mem_addr(st, off)); 1567 off += size; 1568 1569 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", 1570 devcnt, off)); 1571 1572 for (i = 0; i < devcnt; i++) { 1573 if (devnamesp[i].dn_name == NULL) { 1574 continue; 1575 } 1576 1577 /* 1578 * dn_name is not freed during driver unload or removal. 1579 * 1580 * There is a race condition when make_devname() changes 1581 * dn_name during our strcpy. This should be rare since 1582 * only add_drv does this. At any rate, we never had a 1583 * problem with ddi_name_to_major(), which should have 1584 * the same problem. 1585 */ 1586 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", 1587 devnamesp[i].dn_name, devnamesp[i].dn_instance, off)); 1588 1589 size = strlen(devnamesp[i].dn_name) + 1; 1590 dnp[i].name = off = di_checkmem(st, off, size); 1591 (void) strcpy((char *)di_mem_addr(st, off), 1592 devnamesp[i].dn_name); 1593 off += size; 1594 1595 mutex_enter(&devnamesp[i].dn_lock); 1596 1597 /* 1598 * Snapshot per-driver node list 1599 */ 1600 snap_driver_list(st, &devnamesp[i], &dnp[i].head); 1601 1602 /* 1603 * This is not used by libdevinfo, leave it for now 1604 */ 1605 dnp[i].flags = devnamesp[i].dn_flags; 1606 dnp[i].instance = devnamesp[i].dn_instance; 1607 1608 /* 1609 * get global properties 1610 */ 1611 if ((DINFOPROP & st->command) && 1612 devnamesp[i].dn_global_prop_ptr) { 1613 dnp[i].global_prop = off; 1614 off = di_getprop(DI_PROP_GLB_LIST, 1615 &devnamesp[i].dn_global_prop_ptr->prop_list, 1616 &dnp[i].global_prop, st, NULL); 1617 } 1618 1619 /* 1620 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str 1621 */ 1622 if (CB_DRV_INSTALLED(devopsp[i])) { 1623 if (devopsp[i]->devo_cb_ops) { 1624 dnp[i].ops |= DI_CB_OPS; 1625 if (devopsp[i]->devo_cb_ops->cb_str) 1626 dnp[i].ops |= DI_STREAM_OPS; 1627 } 1628 if (NEXUS_DRV(devopsp[i])) { 1629 dnp[i].ops |= DI_BUS_OPS; 1630 } 1631 } 1632 1633 mutex_exit(&devnamesp[i].dn_lock); 1634 } 1635 1636 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); 1637 1638 return (off); 1639 } 1640 1641 /* 1642 * Copy the kernel devinfo tree. The tree and the devnames array forms 1643 * the entire snapshot (see also di_copydevnm). 1644 */ 1645 static di_off_t 1646 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) 1647 { 1648 di_off_t off; 1649 struct dev_info *node; 1650 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); 1651 1652 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", 1653 (void *)root, *off_p)); 1654 1655 /* force attach drivers */ 1656 if (i_ddi_devi_attached((dev_info_t *)root) && 1657 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { 1658 (void) ndi_devi_config((dev_info_t *)root, 1659 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | 1660 NDI_DRV_CONF_REPROBE); 1661 } 1662 1663 /* 1664 * Push top_devinfo onto a stack 1665 * 1666 * The stack is necessary to avoid recursion, which can overrun 1667 * the kernel stack. 1668 */ 1669 PUSH_STACK(dsp, root, off_p); 1670 1671 /* 1672 * As long as there is a node on the stack, copy the node. 1673 * di_copynode() is responsible for pushing and popping 1674 * child and sibling nodes on the stack. 1675 */ 1676 while (!EMPTY_STACK(dsp)) { 1677 node = TOP_NODE(dsp); 1678 off = di_copynode(node, dsp, st); 1679 } 1680 1681 /* 1682 * Free the stack structure 1683 */ 1684 kmem_free(dsp, sizeof (struct di_stack)); 1685 1686 return (off); 1687 } 1688 1689 /* 1690 * This is the core function, which copies all data associated with a single 1691 * node into the snapshot. The amount of information is determined by the 1692 * ioctl command. 1693 */ 1694 static di_off_t 1695 di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st) 1696 { 1697 di_off_t off; 1698 struct di_node *me; 1699 size_t size; struct dev_info *n; 1700 1701 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth)); 1702 ASSERT((node != NULL) && (node == TOP_NODE(dsp))); 1703 1704 /* 1705 * check memory usage, and fix offsets accordingly. 1706 */ 1707 size = sizeof (struct di_node); 1708 *(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size); 1709 me = DI_NODE(di_mem_addr(st, off)); 1710 me->self = off; 1711 off += size; 1712 1713 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", 1714 node->devi_node_name, node->devi_instance, off)); 1715 1716 /* 1717 * Node parameters: 1718 * self -- offset of current node within snapshot 1719 * nodeid -- pointer to PROM node (tri-valued) 1720 * state -- hot plugging device state 1721 * node_state -- devinfo node state 1722 */ 1723 me->instance = node->devi_instance; 1724 me->nodeid = node->devi_nodeid; 1725 me->node_class = node->devi_node_class; 1726 me->attributes = node->devi_node_attributes; 1727 me->state = node->devi_state; 1728 me->flags = node->devi_flags; 1729 me->node_state = node->devi_node_state; 1730 me->next_vhci = 0; /* Filled up by build_vhci_list. */ 1731 me->top_phci = 0; /* Filled up by build_phci_list. */ 1732 me->next_phci = 0; /* Filled up by build_phci_list. */ 1733 me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */ 1734 me->user_private_data = NULL; 1735 1736 /* 1737 * Get parent's offset in snapshot from the stack 1738 * and store it in the current node 1739 */ 1740 if (dsp->depth > 1) { 1741 me->parent = *(PARENT_OFFSET(dsp)); 1742 } 1743 1744 /* 1745 * Save the offset of this di_node in a hash table. 1746 * This is used later to resolve references to this 1747 * dip from other parts of the tree (per-driver list, 1748 * multipathing linkages, layered usage linkages). 1749 * The key used for the hash table is derived from 1750 * information in the dip. 1751 */ 1752 di_register_dip(st, (dev_info_t *)node, me->self); 1753 1754 #ifdef DEVID_COMPATIBILITY 1755 /* check for devid as property marker */ 1756 if (node->devi_devid_str) { 1757 ddi_devid_t devid; 1758 1759 /* 1760 * The devid is now represented as a property. For 1761 * compatibility with di_devid() interface in libdevinfo we 1762 * must return it as a binary structure in the snapshot. When 1763 * (if) di_devid() is removed from libdevinfo then the code 1764 * related to DEVID_COMPATIBILITY can be removed. 1765 */ 1766 if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) == 1767 DDI_SUCCESS) { 1768 size = ddi_devid_sizeof(devid); 1769 off = di_checkmem(st, off, size); 1770 me->devid = off; 1771 bcopy(devid, di_mem_addr(st, off), size); 1772 off += size; 1773 ddi_devid_free(devid); 1774 } 1775 } 1776 #endif /* DEVID_COMPATIBILITY */ 1777 1778 if (node->devi_node_name) { 1779 size = strlen(node->devi_node_name) + 1; 1780 me->node_name = off = di_checkmem(st, off, size); 1781 (void) strcpy(di_mem_addr(st, off), node->devi_node_name); 1782 off += size; 1783 } 1784 1785 if (node->devi_compat_names && (node->devi_compat_length > 1)) { 1786 size = node->devi_compat_length; 1787 me->compat_names = off = di_checkmem(st, off, size); 1788 me->compat_length = (int)size; 1789 bcopy(node->devi_compat_names, di_mem_addr(st, off), size); 1790 off += size; 1791 } 1792 1793 if (node->devi_addr) { 1794 size = strlen(node->devi_addr) + 1; 1795 me->address = off = di_checkmem(st, off, size); 1796 (void) strcpy(di_mem_addr(st, off), node->devi_addr); 1797 off += size; 1798 } 1799 1800 if (node->devi_binding_name) { 1801 size = strlen(node->devi_binding_name) + 1; 1802 me->bind_name = off = di_checkmem(st, off, size); 1803 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name); 1804 off += size; 1805 } 1806 1807 me->drv_major = node->devi_major; 1808 1809 /* 1810 * If the dip is BOUND, set the next pointer of the 1811 * per-instance list to -1, indicating that it is yet to be resolved. 1812 * This will be resolved later in snap_driver_list(). 1813 */ 1814 if (me->drv_major != -1) { 1815 me->next = -1; 1816 } else { 1817 me->next = 0; 1818 } 1819 1820 /* 1821 * An optimization to skip mutex_enter when not needed. 1822 */ 1823 if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) { 1824 goto priv_data; 1825 } 1826 1827 /* 1828 * LOCKING: We already have an active ndi_devi_enter to gather the 1829 * minor data, and we will take devi_lock to gather properties as 1830 * needed off di_getprop. 1831 */ 1832 if (!(DINFOMINOR & st->command)) { 1833 goto path; 1834 } 1835 1836 ASSERT(DEVI_BUSY_OWNED(node)); 1837 if (node->devi_minor) { /* minor data */ 1838 me->minor_data = off; 1839 off = di_getmdata(node->devi_minor, &me->minor_data, 1840 me->self, st); 1841 } 1842 1843 path: 1844 if (!(DINFOPATH & st->command)) { 1845 goto property; 1846 } 1847 1848 if (MDI_VHCI(node)) { 1849 me->multipath_component = MULTIPATH_COMPONENT_VHCI; 1850 } 1851 1852 if (MDI_CLIENT(node)) { 1853 me->multipath_component = MULTIPATH_COMPONENT_CLIENT; 1854 me->multipath_client = off; 1855 off = di_getpath_data((dev_info_t *)node, &me->multipath_client, 1856 me->self, st, 1); 1857 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " 1858 "component type = %d. off=%d", 1859 me->multipath_client, 1860 (void *)node, node->devi_mdi_component, off)); 1861 } 1862 1863 if (MDI_PHCI(node)) { 1864 me->multipath_component = MULTIPATH_COMPONENT_PHCI; 1865 me->multipath_phci = off; 1866 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, 1867 me->self, st, 0); 1868 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " 1869 "component type = %d. off=%d", 1870 me->multipath_phci, 1871 (void *)node, node->devi_mdi_component, off)); 1872 } 1873 1874 property: 1875 if (!(DINFOPROP & st->command)) { 1876 goto priv_data; 1877 } 1878 1879 if (node->devi_drv_prop_ptr) { /* driver property list */ 1880 me->drv_prop = off; 1881 off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr, 1882 &me->drv_prop, st, node); 1883 } 1884 1885 if (node->devi_sys_prop_ptr) { /* system property list */ 1886 me->sys_prop = off; 1887 off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr, 1888 &me->sys_prop, st, node); 1889 } 1890 1891 if (node->devi_hw_prop_ptr) { /* hardware property list */ 1892 me->hw_prop = off; 1893 off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr, 1894 &me->hw_prop, st, node); 1895 } 1896 1897 if (node->devi_global_prop_list == NULL) { 1898 me->glob_prop = (di_off_t)-1; /* not global property */ 1899 } else { 1900 /* 1901 * Make copy of global property list if this devinfo refers 1902 * global properties different from what's on the devnames 1903 * array. It can happen if there has been a forced 1904 * driver.conf update. See mod_drv(1M). 1905 */ 1906 ASSERT(me->drv_major != -1); 1907 if (node->devi_global_prop_list != 1908 devnamesp[me->drv_major].dn_global_prop_ptr) { 1909 me->glob_prop = off; 1910 off = di_getprop(DI_PROP_GLB_LIST, 1911 &node->devi_global_prop_list->prop_list, 1912 &me->glob_prop, st, node); 1913 } 1914 } 1915 1916 priv_data: 1917 if (!(DINFOPRIVDATA & st->command)) { 1918 goto pm_info; 1919 } 1920 1921 if (ddi_get_parent_data((dev_info_t *)node) != NULL) { 1922 me->parent_data = off; 1923 off = di_getppdata(node, &me->parent_data, st); 1924 } 1925 1926 if (ddi_get_driver_private((dev_info_t *)node) != NULL) { 1927 me->driver_data = off; 1928 off = di_getdpdata(node, &me->driver_data, st); 1929 } 1930 1931 pm_info: /* NOT implemented */ 1932 1933 subtree: 1934 /* keep the stack aligned */ 1935 off = DI_ALIGN(off); 1936 1937 if (!(DINFOSUBTREE & st->command)) { 1938 POP_STACK(dsp); 1939 return (off); 1940 } 1941 1942 child: 1943 /* 1944 * If there is a visible child--push child onto stack. 1945 * Hold the parent (me) busy while doing so. 1946 */ 1947 if ((n = node->devi_child) != NULL) { 1948 /* skip hidden nodes */ 1949 while (n && ndi_dev_is_hidden_node((dev_info_t *)n)) 1950 n = n->devi_sibling; 1951 if (n) { 1952 me->child = off; 1953 PUSH_STACK(dsp, n, &me->child); 1954 return (me->child); 1955 } 1956 } 1957 1958 sibling: 1959 /* 1960 * Done with any child nodes, unroll the stack till a visible 1961 * sibling of a parent node is found or root node is reached. 1962 */ 1963 POP_STACK(dsp); 1964 while (!EMPTY_STACK(dsp)) { 1965 if ((n = node->devi_sibling) != NULL) { 1966 /* skip hidden nodes */ 1967 while (n && ndi_dev_is_hidden_node((dev_info_t *)n)) 1968 n = n->devi_sibling; 1969 if (n) { 1970 me->sibling = DI_ALIGN(off); 1971 PUSH_STACK(dsp, n, &me->sibling); 1972 return (me->sibling); 1973 } 1974 } 1975 node = TOP_NODE(dsp); 1976 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); 1977 POP_STACK(dsp); 1978 } 1979 1980 /* 1981 * DONE with all nodes 1982 */ 1983 return (off); 1984 } 1985 1986 static i_lnode_t * 1987 i_lnode_alloc(int modid) 1988 { 1989 i_lnode_t *i_lnode; 1990 1991 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); 1992 1993 ASSERT(modid != -1); 1994 i_lnode->modid = modid; 1995 1996 return (i_lnode); 1997 } 1998 1999 static void 2000 i_lnode_free(i_lnode_t *i_lnode) 2001 { 2002 kmem_free(i_lnode, sizeof (i_lnode_t)); 2003 } 2004 2005 static void 2006 i_lnode_check_free(i_lnode_t *i_lnode) 2007 { 2008 /* This lnode and its dip must have been snapshotted */ 2009 ASSERT(i_lnode->self > 0); 2010 ASSERT(i_lnode->di_node->self > 0); 2011 2012 /* at least 1 link (in or out) must exist for this lnode */ 2013 ASSERT(i_lnode->link_in || i_lnode->link_out); 2014 2015 i_lnode_free(i_lnode); 2016 } 2017 2018 static i_link_t * 2019 i_link_alloc(int spec_type) 2020 { 2021 i_link_t *i_link; 2022 2023 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); 2024 i_link->spec_type = spec_type; 2025 2026 return (i_link); 2027 } 2028 2029 static void 2030 i_link_check_free(i_link_t *i_link) 2031 { 2032 /* This link must have been snapshotted */ 2033 ASSERT(i_link->self > 0); 2034 2035 /* Both endpoint lnodes must exist for this link */ 2036 ASSERT(i_link->src_lnode); 2037 ASSERT(i_link->tgt_lnode); 2038 2039 kmem_free(i_link, sizeof (i_link_t)); 2040 } 2041 2042 /*ARGSUSED*/ 2043 static uint_t 2044 i_lnode_hashfunc(void *arg, mod_hash_key_t key) 2045 { 2046 i_lnode_t *i_lnode = (i_lnode_t *)key; 2047 struct di_node *ptr; 2048 dev_t dev; 2049 2050 dev = i_lnode->devt; 2051 if (dev != DDI_DEV_T_NONE) 2052 return (i_lnode->modid + getminor(dev) + getmajor(dev)); 2053 2054 ptr = i_lnode->di_node; 2055 ASSERT(ptr->self > 0); 2056 if (ptr) { 2057 uintptr_t k = (uintptr_t)ptr; 2058 k >>= (int)highbit(sizeof (struct di_node)); 2059 return ((uint_t)k); 2060 } 2061 2062 return (i_lnode->modid); 2063 } 2064 2065 static int 2066 i_lnode_cmp(void *arg1, void *arg2) 2067 { 2068 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1; 2069 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2; 2070 2071 if (i_lnode1->modid != i_lnode2->modid) { 2072 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); 2073 } 2074 2075 if (i_lnode1->di_node != i_lnode2->di_node) 2076 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); 2077 2078 if (i_lnode1->devt != i_lnode2->devt) 2079 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); 2080 2081 return (0); 2082 } 2083 2084 /* 2085 * An lnode represents a {dip, dev_t} tuple. A link represents a 2086 * {src_lnode, tgt_lnode, spec_type} tuple. 2087 * The following callback assumes that LDI framework ref-counts the 2088 * src_dip and tgt_dip while invoking this callback. 2089 */ 2090 static int 2091 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) 2092 { 2093 struct di_state *st = (struct di_state *)arg; 2094 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode; 2095 i_link_t **i_link_next, *i_link; 2096 di_off_t soff, toff; 2097 mod_hash_val_t nodep = NULL; 2098 int res; 2099 2100 /* 2101 * if the source or target of this device usage information doesn't 2102 * correspond to a device node then we don't report it via 2103 * libdevinfo so return. 2104 */ 2105 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) 2106 return (LDI_USAGE_CONTINUE); 2107 2108 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); 2109 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); 2110 2111 /* 2112 * Skip the ldi_usage if either src or tgt dip is not in the 2113 * snapshot. This saves us from pruning bad lnodes/links later. 2114 */ 2115 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) 2116 return (LDI_USAGE_CONTINUE); 2117 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) 2118 return (LDI_USAGE_CONTINUE); 2119 2120 ASSERT(soff > 0); 2121 ASSERT(toff > 0); 2122 2123 /* 2124 * allocate an i_lnode and add it to the lnode hash 2125 * if it is not already present. For this particular 2126 * link the lnode is a source, but it may 2127 * participate as tgt or src in any number of layered 2128 * operations - so it may already be in the hash. 2129 */ 2130 i_lnode = i_lnode_alloc(ldi_usage->src_modid); 2131 i_lnode->di_node = DI_NODE(di_mem_addr(st, soff)); 2132 i_lnode->devt = ldi_usage->src_devt; 2133 2134 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2135 if (res == MH_ERR_NOTFOUND) { 2136 /* 2137 * new i_lnode 2138 * add it to the hash and increment the lnode count 2139 */ 2140 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2141 ASSERT(res == 0); 2142 st->lnode_count++; 2143 src_lnode = i_lnode; 2144 } else { 2145 /* this i_lnode already exists in the lnode_hash */ 2146 i_lnode_free(i_lnode); 2147 src_lnode = (i_lnode_t *)nodep; 2148 } 2149 2150 /* 2151 * allocate a tgt i_lnode and add it to the lnode hash 2152 */ 2153 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); 2154 i_lnode->di_node = DI_NODE(di_mem_addr(st, toff)); 2155 i_lnode->devt = ldi_usage->tgt_devt; 2156 2157 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2158 if (res == MH_ERR_NOTFOUND) { 2159 /* 2160 * new i_lnode 2161 * add it to the hash and increment the lnode count 2162 */ 2163 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2164 ASSERT(res == 0); 2165 st->lnode_count++; 2166 tgt_lnode = i_lnode; 2167 } else { 2168 /* this i_lnode already exists in the lnode_hash */ 2169 i_lnode_free(i_lnode); 2170 tgt_lnode = (i_lnode_t *)nodep; 2171 } 2172 2173 /* 2174 * allocate a i_link 2175 */ 2176 i_link = i_link_alloc(ldi_usage->tgt_spec_type); 2177 i_link->src_lnode = src_lnode; 2178 i_link->tgt_lnode = tgt_lnode; 2179 2180 /* 2181 * add this link onto the src i_lnodes outbound i_link list 2182 */ 2183 i_link_next = &(src_lnode->link_out); 2184 while (*i_link_next != NULL) { 2185 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && 2186 (i_link->spec_type == (*i_link_next)->spec_type)) { 2187 /* this link already exists */ 2188 kmem_free(i_link, sizeof (i_link_t)); 2189 return (LDI_USAGE_CONTINUE); 2190 } 2191 i_link_next = &((*i_link_next)->src_link_next); 2192 } 2193 *i_link_next = i_link; 2194 2195 /* 2196 * add this link onto the tgt i_lnodes inbound i_link list 2197 */ 2198 i_link_next = &(tgt_lnode->link_in); 2199 while (*i_link_next != NULL) { 2200 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); 2201 i_link_next = &((*i_link_next)->tgt_link_next); 2202 } 2203 *i_link_next = i_link; 2204 2205 /* 2206 * add this i_link to the link hash 2207 */ 2208 res = mod_hash_insert(st->link_hash, i_link, i_link); 2209 ASSERT(res == 0); 2210 st->link_count++; 2211 2212 return (LDI_USAGE_CONTINUE); 2213 } 2214 2215 struct i_layer_data { 2216 struct di_state *st; 2217 int lnode_count; 2218 int link_count; 2219 di_off_t lnode_off; 2220 di_off_t link_off; 2221 }; 2222 2223 /*ARGSUSED*/ 2224 static uint_t 2225 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2226 { 2227 i_link_t *i_link = (i_link_t *)key; 2228 struct i_layer_data *data = arg; 2229 struct di_link *me; 2230 struct di_lnode *melnode; 2231 struct di_node *medinode; 2232 2233 ASSERT(i_link->self == 0); 2234 2235 i_link->self = data->link_off + 2236 (data->link_count * sizeof (struct di_link)); 2237 data->link_count++; 2238 2239 ASSERT(data->link_off > 0 && data->link_count > 0); 2240 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ 2241 ASSERT(data->link_count <= data->st->link_count); 2242 2243 /* fill in fields for the di_link snapshot */ 2244 me = DI_LINK(di_mem_addr(data->st, i_link->self)); 2245 me->self = i_link->self; 2246 me->spec_type = i_link->spec_type; 2247 2248 /* 2249 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t 2250 * are created during the LDI table walk. Since we are 2251 * walking the link hash, the lnode hash has already been 2252 * walked and the lnodes have been snapshotted. Save lnode 2253 * offsets. 2254 */ 2255 me->src_lnode = i_link->src_lnode->self; 2256 me->tgt_lnode = i_link->tgt_lnode->self; 2257 2258 /* 2259 * Save this link's offset in the src_lnode snapshot's link_out 2260 * field 2261 */ 2262 melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode)); 2263 me->src_link_next = melnode->link_out; 2264 melnode->link_out = me->self; 2265 2266 /* 2267 * Put this link on the tgt_lnode's link_in field 2268 */ 2269 melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode)); 2270 me->tgt_link_next = melnode->link_in; 2271 melnode->link_in = me->self; 2272 2273 /* 2274 * An i_lnode_t is only created if the corresponding dip exists 2275 * in the snapshot. A pointer to the di_node is saved in the 2276 * i_lnode_t when it is allocated. For this link, get the di_node 2277 * for the source lnode. Then put the link on the di_node's list 2278 * of src links 2279 */ 2280 medinode = i_link->src_lnode->di_node; 2281 me->src_node_next = medinode->src_links; 2282 medinode->src_links = me->self; 2283 2284 /* 2285 * Put this link on the tgt_links list of the target 2286 * dip. 2287 */ 2288 medinode = i_link->tgt_lnode->di_node; 2289 me->tgt_node_next = medinode->tgt_links; 2290 medinode->tgt_links = me->self; 2291 2292 return (MH_WALK_CONTINUE); 2293 } 2294 2295 /*ARGSUSED*/ 2296 static uint_t 2297 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2298 { 2299 i_lnode_t *i_lnode = (i_lnode_t *)key; 2300 struct i_layer_data *data = arg; 2301 struct di_lnode *me; 2302 struct di_node *medinode; 2303 2304 ASSERT(i_lnode->self == 0); 2305 2306 i_lnode->self = data->lnode_off + 2307 (data->lnode_count * sizeof (struct di_lnode)); 2308 data->lnode_count++; 2309 2310 ASSERT(data->lnode_off > 0 && data->lnode_count > 0); 2311 ASSERT(data->link_count == 0); /* links not done yet */ 2312 ASSERT(data->lnode_count <= data->st->lnode_count); 2313 2314 /* fill in fields for the di_lnode snapshot */ 2315 me = DI_LNODE(di_mem_addr(data->st, i_lnode->self)); 2316 me->self = i_lnode->self; 2317 2318 if (i_lnode->devt == DDI_DEV_T_NONE) { 2319 me->dev_major = DDI_MAJOR_T_NONE; 2320 me->dev_minor = DDI_MAJOR_T_NONE; 2321 } else { 2322 me->dev_major = getmajor(i_lnode->devt); 2323 me->dev_minor = getminor(i_lnode->devt); 2324 } 2325 2326 /* 2327 * The dip corresponding to this lnode must exist in 2328 * the snapshot or we wouldn't have created the i_lnode_t 2329 * during LDI walk. Save the offset of the dip. 2330 */ 2331 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); 2332 me->node = i_lnode->di_node->self; 2333 2334 /* 2335 * There must be at least one link in or out of this lnode 2336 * or we wouldn't have created it. These fields will be set 2337 * during the link hash walk. 2338 */ 2339 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); 2340 2341 /* 2342 * set the offset of the devinfo node associated with this 2343 * lnode. Also update the node_next next pointer. this pointer 2344 * is set if there are multiple lnodes associated with the same 2345 * devinfo node. (could occure when multiple minor nodes 2346 * are open for one device, etc.) 2347 */ 2348 medinode = i_lnode->di_node; 2349 me->node_next = medinode->lnodes; 2350 medinode->lnodes = me->self; 2351 2352 return (MH_WALK_CONTINUE); 2353 } 2354 2355 static di_off_t 2356 di_getlink_data(di_off_t off, struct di_state *st) 2357 { 2358 struct i_layer_data data = {0}; 2359 size_t size; 2360 2361 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); 2362 2363 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, 2364 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, 2365 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); 2366 2367 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, 2368 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); 2369 2370 /* get driver layering information */ 2371 (void) ldi_usage_walker(st, di_ldi_callback); 2372 2373 /* check if there is any link data to include in the snapshot */ 2374 if (st->lnode_count == 0) { 2375 ASSERT(st->link_count == 0); 2376 goto out; 2377 } 2378 2379 ASSERT(st->link_count != 0); 2380 2381 /* get a pointer to snapshot memory for all the di_lnodes */ 2382 size = sizeof (struct di_lnode) * st->lnode_count; 2383 data.lnode_off = off = di_checkmem(st, off, size); 2384 off += size; 2385 2386 /* get a pointer to snapshot memory for all the di_links */ 2387 size = sizeof (struct di_link) * st->link_count; 2388 data.link_off = off = di_checkmem(st, off, size); 2389 off += size; 2390 2391 data.lnode_count = data.link_count = 0; 2392 data.st = st; 2393 2394 /* 2395 * We have lnodes and links that will go into the 2396 * snapshot, so let's walk the respective hashes 2397 * and snapshot them. The various linkages are 2398 * also set up during the walk. 2399 */ 2400 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); 2401 ASSERT(data.lnode_count == st->lnode_count); 2402 2403 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); 2404 ASSERT(data.link_count == st->link_count); 2405 2406 out: 2407 /* free up the i_lnodes and i_links used to create the snapshot */ 2408 mod_hash_destroy_hash(st->lnode_hash); 2409 mod_hash_destroy_hash(st->link_hash); 2410 st->lnode_count = 0; 2411 st->link_count = 0; 2412 2413 return (off); 2414 } 2415 2416 2417 /* 2418 * Copy all minor data nodes attached to a devinfo node into the snapshot. 2419 * It is called from di_copynode with active ndi_devi_enter to protect 2420 * the list of minor nodes. 2421 */ 2422 static di_off_t 2423 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, 2424 struct di_state *st) 2425 { 2426 di_off_t off; 2427 struct di_minor *me; 2428 size_t size; 2429 2430 dcmn_err2((CE_CONT, "di_getmdata:\n")); 2431 2432 /* 2433 * check memory first 2434 */ 2435 off = di_checkmem(st, *off_p, sizeof (struct di_minor)); 2436 *off_p = off; 2437 2438 do { 2439 me = DI_MINOR(di_mem_addr(st, off)); 2440 me->self = off; 2441 me->type = mnode->type; 2442 me->node = node; 2443 me->user_private_data = NULL; 2444 2445 off += sizeof (struct di_minor); 2446 2447 /* 2448 * Split dev_t to major/minor, so it works for 2449 * both ILP32 and LP64 model 2450 */ 2451 me->dev_major = getmajor(mnode->ddm_dev); 2452 me->dev_minor = getminor(mnode->ddm_dev); 2453 me->spec_type = mnode->ddm_spec_type; 2454 2455 if (mnode->ddm_name) { 2456 size = strlen(mnode->ddm_name) + 1; 2457 me->name = off = di_checkmem(st, off, size); 2458 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name); 2459 off += size; 2460 } 2461 2462 if (mnode->ddm_node_type) { 2463 size = strlen(mnode->ddm_node_type) + 1; 2464 me->node_type = off = di_checkmem(st, off, size); 2465 (void) strcpy(di_mem_addr(st, off), 2466 mnode->ddm_node_type); 2467 off += size; 2468 } 2469 2470 off = di_checkmem(st, off, sizeof (struct di_minor)); 2471 me->next = off; 2472 mnode = mnode->next; 2473 } while (mnode); 2474 2475 me->next = 0; 2476 2477 return (off); 2478 } 2479 2480 /* 2481 * di_register_dip(), di_find_dip(): The dip must be protected 2482 * from deallocation when using these routines - this can either 2483 * be a reference count, a busy hold or a per-driver lock. 2484 */ 2485 2486 static void 2487 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) 2488 { 2489 struct dev_info *node = DEVI(dip); 2490 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2491 struct di_dkey *dk; 2492 2493 ASSERT(dip); 2494 ASSERT(off > 0); 2495 2496 key->k_type = DI_DKEY; 2497 dk = &(key->k_u.dkey); 2498 2499 dk->dk_dip = dip; 2500 dk->dk_major = node->devi_major; 2501 dk->dk_inst = node->devi_instance; 2502 dk->dk_nodeid = node->devi_nodeid; 2503 2504 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, 2505 (mod_hash_val_t)(uintptr_t)off) != 0) { 2506 panic( 2507 "duplicate devinfo (%p) registered during device " 2508 "tree walk", (void *)dip); 2509 } 2510 } 2511 2512 2513 static int 2514 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) 2515 { 2516 /* 2517 * uintptr_t must be used because it matches the size of void *; 2518 * mod_hash expects clients to place results into pointer-size 2519 * containers; since di_off_t is always a 32-bit offset, alignment 2520 * would otherwise be broken on 64-bit kernels. 2521 */ 2522 uintptr_t offset; 2523 struct di_key key = {0}; 2524 struct di_dkey *dk; 2525 2526 ASSERT(st->reg_dip_hash); 2527 ASSERT(dip); 2528 ASSERT(off_p); 2529 2530 2531 key.k_type = DI_DKEY; 2532 dk = &(key.k_u.dkey); 2533 2534 dk->dk_dip = dip; 2535 dk->dk_major = DEVI(dip)->devi_major; 2536 dk->dk_inst = DEVI(dip)->devi_instance; 2537 dk->dk_nodeid = DEVI(dip)->devi_nodeid; 2538 2539 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, 2540 (mod_hash_val_t *)&offset) == 0) { 2541 *off_p = (di_off_t)offset; 2542 return (0); 2543 } else { 2544 return (-1); 2545 } 2546 } 2547 2548 /* 2549 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation 2550 * when using these routines. The caller must do this by protecting the 2551 * client(or phci)<->pip linkage while traversing the list and then holding the 2552 * pip when it is found in the list. 2553 */ 2554 2555 static void 2556 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) 2557 { 2558 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2559 char *path_addr; 2560 struct di_pkey *pk; 2561 2562 ASSERT(pip); 2563 ASSERT(off > 0); 2564 2565 key->k_type = DI_PKEY; 2566 pk = &(key->k_u.pkey); 2567 2568 pk->pk_pip = pip; 2569 path_addr = mdi_pi_get_addr(pip); 2570 if (path_addr) 2571 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); 2572 pk->pk_client = mdi_pi_get_client(pip); 2573 pk->pk_phci = mdi_pi_get_phci(pip); 2574 2575 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, 2576 (mod_hash_val_t)(uintptr_t)off) != 0) { 2577 panic( 2578 "duplicate pathinfo (%p) registered during device " 2579 "tree walk", (void *)pip); 2580 } 2581 } 2582 2583 /* 2584 * As with di_register_pip, the caller must hold or lock the pip 2585 */ 2586 static int 2587 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) 2588 { 2589 /* 2590 * uintptr_t must be used because it matches the size of void *; 2591 * mod_hash expects clients to place results into pointer-size 2592 * containers; since di_off_t is always a 32-bit offset, alignment 2593 * would otherwise be broken on 64-bit kernels. 2594 */ 2595 uintptr_t offset; 2596 struct di_key key = {0}; 2597 struct di_pkey *pk; 2598 2599 ASSERT(st->reg_pip_hash); 2600 ASSERT(off_p); 2601 2602 if (pip == NULL) { 2603 *off_p = 0; 2604 return (0); 2605 } 2606 2607 key.k_type = DI_PKEY; 2608 pk = &(key.k_u.pkey); 2609 2610 pk->pk_pip = pip; 2611 pk->pk_path_addr = mdi_pi_get_addr(pip); 2612 pk->pk_client = mdi_pi_get_client(pip); 2613 pk->pk_phci = mdi_pi_get_phci(pip); 2614 2615 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, 2616 (mod_hash_val_t *)&offset) == 0) { 2617 *off_p = (di_off_t)offset; 2618 return (0); 2619 } else { 2620 return (-1); 2621 } 2622 } 2623 2624 static di_path_state_t 2625 path_state_convert(mdi_pathinfo_state_t st) 2626 { 2627 switch (st) { 2628 case MDI_PATHINFO_STATE_ONLINE: 2629 return (DI_PATH_STATE_ONLINE); 2630 case MDI_PATHINFO_STATE_STANDBY: 2631 return (DI_PATH_STATE_STANDBY); 2632 case MDI_PATHINFO_STATE_OFFLINE: 2633 return (DI_PATH_STATE_OFFLINE); 2634 case MDI_PATHINFO_STATE_FAULT: 2635 return (DI_PATH_STATE_FAULT); 2636 default: 2637 return (DI_PATH_STATE_UNKNOWN); 2638 } 2639 } 2640 2641 2642 static di_off_t 2643 di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p, 2644 struct di_state *st) 2645 { 2646 nvpair_t *prop = NULL; 2647 struct di_path_prop *me; 2648 int off; 2649 size_t size; 2650 char *str; 2651 uchar_t *buf; 2652 uint_t nelems; 2653 2654 off = *off_p; 2655 if (mdi_pi_get_next_prop(pip, NULL) == NULL) { 2656 *off_p = 0; 2657 return (off); 2658 } 2659 2660 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2661 *off_p = off; 2662 2663 while (prop = mdi_pi_get_next_prop(pip, prop)) { 2664 me = DI_PATHPROP(di_mem_addr(st, off)); 2665 me->self = off; 2666 off += sizeof (struct di_path_prop); 2667 2668 /* 2669 * property name 2670 */ 2671 size = strlen(nvpair_name(prop)) + 1; 2672 me->prop_name = off = di_checkmem(st, off, size); 2673 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); 2674 off += size; 2675 2676 switch (nvpair_type(prop)) { 2677 case DATA_TYPE_BYTE: 2678 case DATA_TYPE_INT16: 2679 case DATA_TYPE_UINT16: 2680 case DATA_TYPE_INT32: 2681 case DATA_TYPE_UINT32: 2682 me->prop_type = DDI_PROP_TYPE_INT; 2683 size = sizeof (int32_t); 2684 off = di_checkmem(st, off, size); 2685 (void) nvpair_value_int32(prop, 2686 (int32_t *)di_mem_addr(st, off)); 2687 break; 2688 2689 case DATA_TYPE_INT64: 2690 case DATA_TYPE_UINT64: 2691 me->prop_type = DDI_PROP_TYPE_INT64; 2692 size = sizeof (int64_t); 2693 off = di_checkmem(st, off, size); 2694 (void) nvpair_value_int64(prop, 2695 (int64_t *)di_mem_addr(st, off)); 2696 break; 2697 2698 case DATA_TYPE_STRING: 2699 me->prop_type = DDI_PROP_TYPE_STRING; 2700 (void) nvpair_value_string(prop, &str); 2701 size = strlen(str) + 1; 2702 off = di_checkmem(st, off, size); 2703 (void) strcpy(di_mem_addr(st, off), str); 2704 break; 2705 2706 case DATA_TYPE_BYTE_ARRAY: 2707 case DATA_TYPE_INT16_ARRAY: 2708 case DATA_TYPE_UINT16_ARRAY: 2709 case DATA_TYPE_INT32_ARRAY: 2710 case DATA_TYPE_UINT32_ARRAY: 2711 case DATA_TYPE_INT64_ARRAY: 2712 case DATA_TYPE_UINT64_ARRAY: 2713 me->prop_type = DDI_PROP_TYPE_BYTE; 2714 (void) nvpair_value_byte_array(prop, &buf, &nelems); 2715 size = nelems; 2716 if (nelems != 0) { 2717 off = di_checkmem(st, off, size); 2718 bcopy(buf, di_mem_addr(st, off), size); 2719 } 2720 break; 2721 2722 default: /* Unknown or unhandled type; skip it */ 2723 size = 0; 2724 break; 2725 } 2726 2727 if (size > 0) { 2728 me->prop_data = off; 2729 } 2730 2731 me->prop_len = (int)size; 2732 off += size; 2733 2734 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2735 me->prop_next = off; 2736 } 2737 2738 me->prop_next = 0; 2739 return (off); 2740 } 2741 2742 2743 static void 2744 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp, 2745 int get_client) 2746 { 2747 if (get_client) { 2748 ASSERT(me->path_client == 0); 2749 me->path_client = noff; 2750 ASSERT(me->path_c_link == 0); 2751 *off_pp = &me->path_c_link; 2752 me->path_snap_state &= 2753 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK); 2754 } else { 2755 ASSERT(me->path_phci == 0); 2756 me->path_phci = noff; 2757 ASSERT(me->path_p_link == 0); 2758 *off_pp = &me->path_p_link; 2759 me->path_snap_state &= 2760 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK); 2761 } 2762 } 2763 2764 /* 2765 * off_p: pointer to the linkage field. This links pips along the client|phci 2766 * linkage list. 2767 * noff : Offset for the endpoint dip snapshot. 2768 */ 2769 static di_off_t 2770 di_getpath_data(dev_info_t *dip, di_off_t *off_p, di_off_t noff, 2771 struct di_state *st, int get_client) 2772 { 2773 di_off_t off; 2774 mdi_pathinfo_t *pip; 2775 struct di_path *me; 2776 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *); 2777 size_t size; 2778 2779 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client)); 2780 2781 /* 2782 * The naming of the following mdi_xyz() is unfortunately 2783 * non-intuitive. mdi_get_next_phci_path() follows the 2784 * client_link i.e. the list of pip's belonging to the 2785 * given client dip. 2786 */ 2787 if (get_client) 2788 next_pip = &mdi_get_next_phci_path; 2789 else 2790 next_pip = &mdi_get_next_client_path; 2791 2792 off = *off_p; 2793 2794 pip = NULL; 2795 while (pip = (*next_pip)(dip, pip)) { 2796 mdi_pathinfo_state_t state; 2797 di_off_t stored_offset; 2798 2799 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip)); 2800 2801 mdi_pi_lock(pip); 2802 2803 if (di_pip_find(st, pip, &stored_offset) != -1) { 2804 /* 2805 * We've already seen this pathinfo node so we need to 2806 * take care not to snap it again; However, one endpoint 2807 * and linkage will be set here. The other endpoint 2808 * and linkage has already been set when the pip was 2809 * first snapshotted i.e. when the other endpoint dip 2810 * was snapshotted. 2811 */ 2812 me = DI_PATH(di_mem_addr(st, stored_offset)); 2813 *off_p = stored_offset; 2814 2815 di_path_one_endpoint(me, noff, &off_p, get_client); 2816 2817 /* 2818 * The other endpoint and linkage were set when this 2819 * pip was snapshotted. So we are done with both 2820 * endpoints and linkages. 2821 */ 2822 ASSERT(!(me->path_snap_state & 2823 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI))); 2824 ASSERT(!(me->path_snap_state & 2825 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK))); 2826 2827 mdi_pi_unlock(pip); 2828 continue; 2829 } 2830 2831 /* 2832 * Now that we need to snapshot this pip, check memory 2833 */ 2834 size = sizeof (struct di_path); 2835 *off_p = off = di_checkmem(st, off, size); 2836 me = DI_PATH(di_mem_addr(st, off)); 2837 me->self = off; 2838 off += size; 2839 2840 me->path_snap_state = 2841 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK; 2842 me->path_snap_state |= 2843 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI; 2844 2845 /* 2846 * Zero out fields as di_checkmem() doesn't guarantee 2847 * zero-filled memory 2848 */ 2849 me->path_client = me->path_phci = 0; 2850 me->path_c_link = me->path_p_link = 0; 2851 2852 di_path_one_endpoint(me, noff, &off_p, get_client); 2853 2854 /* 2855 * Note the existence of this pathinfo 2856 */ 2857 di_register_pip(st, pip, me->self); 2858 2859 state = mdi_pi_get_state(pip); 2860 me->path_state = path_state_convert(state); 2861 2862 me->path_instance = mdi_pi_get_path_instance(pip); 2863 2864 /* 2865 * Get intermediate addressing info. 2866 */ 2867 size = strlen(mdi_pi_get_addr(pip)) + 1; 2868 me->path_addr = off = di_checkmem(st, off, size); 2869 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip)); 2870 off += size; 2871 2872 /* 2873 * Get path properties if props are to be included in the 2874 * snapshot 2875 */ 2876 if (DINFOPROP & st->command) { 2877 me->path_prop = off; 2878 off = di_path_getprop(pip, &me->path_prop, st); 2879 } else { 2880 me->path_prop = 0; 2881 } 2882 2883 mdi_pi_unlock(pip); 2884 } 2885 2886 *off_p = 0; 2887 return (off); 2888 } 2889 2890 /* 2891 * Return driver prop_op entry point for the specified devinfo node. 2892 * 2893 * To return a non-NULL value: 2894 * - driver must be attached and held: 2895 * If driver is not attached we ignore the driver property list. 2896 * No one should rely on such properties. 2897 * - driver "cb_prop_op != ddi_prop_op": 2898 * If "cb_prop_op == ddi_prop_op", framework does not need to call driver. 2899 * XXX or parent's bus_prop_op != ddi_bus_prop_op 2900 */ 2901 static int 2902 (*di_getprop_prop_op(struct dev_info *dip)) 2903 (dev_t, dev_info_t *, ddi_prop_op_t, int, char *, caddr_t, int *) 2904 { 2905 struct dev_ops *ops; 2906 2907 /* If driver is not attached we ignore the driver property list. */ 2908 if ((dip == NULL) || !i_ddi_devi_attached((dev_info_t *)dip)) 2909 return (NULL); 2910 2911 /* 2912 * Some nexus drivers incorrectly set cb_prop_op to nodev, nulldev, 2913 * or even NULL. 2914 */ 2915 ops = dip->devi_ops; 2916 if (ops && ops->devo_cb_ops && 2917 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) && 2918 (ops->devo_cb_ops->cb_prop_op != nodev) && 2919 (ops->devo_cb_ops->cb_prop_op != nulldev) && 2920 (ops->devo_cb_ops->cb_prop_op != NULL)) 2921 return (ops->devo_cb_ops->cb_prop_op); 2922 return (NULL); 2923 } 2924 2925 static di_off_t 2926 di_getprop_add(int list, int dyn, struct di_state *st, struct dev_info *dip, 2927 int (*prop_op)(), 2928 char *name, dev_t devt, int aflags, int alen, caddr_t aval, 2929 di_off_t off, di_off_t **off_pp) 2930 { 2931 int need_free = 0; 2932 dev_t pdevt; 2933 int pflags; 2934 int rv; 2935 caddr_t val; 2936 int len; 2937 size_t size; 2938 struct di_prop *pp; 2939 2940 /* If we have prop_op function, ask driver for latest value */ 2941 if (prop_op) { 2942 ASSERT(dip); 2943 2944 /* Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY */ 2945 pdevt = (devt == DDI_DEV_T_NONE) ? DDI_DEV_T_ANY : devt; 2946 2947 /* 2948 * We have type information in flags, but are invoking an 2949 * old non-typed prop_op(9E) interface. Since not all types are 2950 * part of DDI_PROP_TYPE_ANY (example is DDI_PROP_TYPE_INT64), 2951 * we set DDI_PROP_CONSUMER_TYPED - causing the framework to 2952 * expand type bits beyond DDI_PROP_TYPE_ANY. This allows us 2953 * to use the legacy prop_op(9E) interface to obtain updates 2954 * non-DDI_PROP_TYPE_ANY dynamic properties. 2955 */ 2956 pflags = aflags & ~DDI_PROP_TYPE_MASK; 2957 pflags |= DDI_PROP_DONTPASS | DDI_PROP_NOTPROM | 2958 DDI_PROP_CONSUMER_TYPED; 2959 2960 /* 2961 * Hold and exit across prop_op(9E) to avoid lock order 2962 * issues between 2963 * [ndi_devi_enter() ..prop_op(9E).. driver-lock] 2964 * .vs. 2965 * [..ioctl(9E).. driver-lock ..ddi_remove_minor_node(9F).. 2966 * ndi_devi_enter()] 2967 * ordering. 2968 */ 2969 ndi_hold_devi((dev_info_t *)dip); 2970 ndi_devi_exit((dev_info_t *)dip, dip->devi_circular); 2971 rv = (*prop_op)(pdevt, (dev_info_t *)dip, 2972 PROP_LEN_AND_VAL_ALLOC, pflags, name, &val, &len); 2973 ndi_devi_enter((dev_info_t *)dip, &dip->devi_circular); 2974 ndi_rele_devi((dev_info_t *)dip); 2975 2976 if (rv == DDI_PROP_SUCCESS) { 2977 need_free = 1; /* dynamic prop obtained */ 2978 } else if (dyn) { 2979 /* 2980 * A dynamic property must succeed prop_op(9E) to show 2981 * up in the snapshot - that is the only source of its 2982 * value. 2983 */ 2984 return (off); /* dynamic prop not supported */ 2985 } else { 2986 /* 2987 * In case calling the driver caused an update off 2988 * prop_op(9E) of a non-dynamic property (code leading 2989 * to ddi_prop_change), we defer picking up val and 2990 * len informatiojn until after prop_op(9E) to ensure 2991 * that we snapshot the latest value. 2992 */ 2993 val = aval; 2994 len = alen; 2995 2996 } 2997 } else { 2998 val = aval; 2999 len = alen; 3000 } 3001 3002 dcmn_err((CE_CONT, "di_getprop_add: list %d %s len %d val %p\n", 3003 list, name ? name : "NULL", len, (void *)val)); 3004 3005 size = sizeof (struct di_prop); 3006 **off_pp = off = di_checkmem(st, off, size); 3007 pp = DI_PROP(di_mem_addr(st, off)); 3008 pp->self = off; 3009 off += size; 3010 3011 pp->dev_major = getmajor(devt); 3012 pp->dev_minor = getminor(devt); 3013 pp->prop_flags = aflags; 3014 pp->prop_list = list; 3015 3016 /* property name */ 3017 if (name) { 3018 size = strlen(name) + 1; 3019 pp->prop_name = off = di_checkmem(st, off, size); 3020 (void) strcpy(di_mem_addr(st, off), name); 3021 off += size; 3022 } else { 3023 pp->prop_name = -1; 3024 } 3025 3026 pp->prop_len = len; 3027 if (val == NULL) { 3028 pp->prop_data = -1; 3029 } else if (len != 0) { 3030 size = len; 3031 pp->prop_data = off = di_checkmem(st, off, size); 3032 bcopy(val, di_mem_addr(st, off), size); 3033 off += size; 3034 } 3035 3036 pp->next = 0; /* assume tail for now */ 3037 *off_pp = &pp->next; /* return pointer to our next */ 3038 3039 if (need_free) /* free PROP_LEN_AND_VAL_ALLOC alloc */ 3040 kmem_free(val, len); 3041 return (off); 3042 } 3043 3044 3045 /* 3046 * Copy a list of properties attached to a devinfo node. Called from 3047 * di_copynode with active ndi_devi_enter. The major number is passed in case 3048 * we need to call driver's prop_op entry. The value of list indicates 3049 * which list we are copying. Possible values are: 3050 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST 3051 */ 3052 static di_off_t 3053 di_getprop(int list, struct ddi_prop **pprop, di_off_t *off_p, 3054 struct di_state *st, struct dev_info *dip) 3055 { 3056 struct ddi_prop *prop; 3057 int (*prop_op)(); 3058 int off; 3059 struct ddi_minor_data *mn; 3060 i_ddi_prop_dyn_t *dp; 3061 struct plist { 3062 struct plist *pl_next; 3063 char *pl_name; 3064 int pl_flags; 3065 dev_t pl_dev; 3066 int pl_len; 3067 caddr_t pl_val; 3068 } *pl, *pl0, **plp; 3069 3070 ASSERT(st != NULL); 3071 3072 off = *off_p; 3073 *off_p = 0; 3074 dcmn_err((CE_CONT, "di_getprop: copy property list %d at addr %p\n", 3075 list, (void *)*pprop)); 3076 3077 /* get pointer to driver's prop_op(9E) implementation if DRV_LIST */ 3078 prop_op = (list == DI_PROP_DRV_LIST) ? di_getprop_prop_op(dip) : NULL; 3079 3080 /* 3081 * Form private list of properties, holding devi_lock for properties 3082 * that hang off the dip. 3083 */ 3084 if (dip) 3085 mutex_enter(&(dip->devi_lock)); 3086 for (pl0 = NULL, plp = &pl0, prop = *pprop; 3087 prop; plp = &pl->pl_next, prop = prop->prop_next) { 3088 pl = kmem_alloc(sizeof (*pl), KM_SLEEP); 3089 *plp = pl; 3090 pl->pl_next = NULL; 3091 if (prop->prop_name) 3092 pl->pl_name = i_ddi_strdup(prop->prop_name, KM_SLEEP); 3093 else 3094 pl->pl_name = NULL; 3095 pl->pl_flags = prop->prop_flags; 3096 pl->pl_dev = prop->prop_dev; 3097 if (prop->prop_len) { 3098 pl->pl_len = prop->prop_len; 3099 pl->pl_val = kmem_alloc(pl->pl_len, KM_SLEEP); 3100 bcopy(prop->prop_val, pl->pl_val, pl->pl_len); 3101 } else { 3102 pl->pl_len = 0; 3103 pl->pl_val = NULL; 3104 } 3105 } 3106 if (dip) 3107 mutex_exit(&(dip->devi_lock)); 3108 3109 /* 3110 * Now that we have dropped devi_lock, perform a second-pass to 3111 * add properties to the snapshot. We do this as a second pass 3112 * because we may need to call prop_op(9E) and we can't hold 3113 * devi_lock across that call. 3114 */ 3115 for (pl = pl0; pl; pl = pl0) { 3116 pl0 = pl->pl_next; 3117 off = di_getprop_add(list, 0, st, dip, prop_op, pl->pl_name, 3118 pl->pl_dev, pl->pl_flags, pl->pl_len, pl->pl_val, 3119 off, &off_p); 3120 if (pl->pl_val) 3121 kmem_free(pl->pl_val, pl->pl_len); 3122 if (pl->pl_name) 3123 kmem_free(pl->pl_name, strlen(pl->pl_name) + 1); 3124 kmem_free(pl, sizeof (*pl)); 3125 } 3126 3127 /* 3128 * If there is no prop_op or dynamic property support has been 3129 * disabled, we are done. 3130 */ 3131 if ((prop_op == NULL) || (di_prop_dyn == 0)) { 3132 *off_p = 0; 3133 return (off); 3134 } 3135 3136 /* Add dynamic driver properties to snapshot */ 3137 for (dp = i_ddi_prop_dyn_driver_get((dev_info_t *)dip); 3138 dp && dp->dp_name; dp++) { 3139 if (dp->dp_spec_type) { 3140 /* if spec_type, property of matching minor */ 3141 ASSERT(DEVI_BUSY_OWNED(dip)); 3142 for (mn = dip->devi_minor; mn; mn = mn->next) { 3143 if (mn->ddm_spec_type != dp->dp_spec_type) 3144 continue; 3145 off = di_getprop_add(list, 1, st, dip, prop_op, 3146 dp->dp_name, mn->ddm_dev, dp->dp_type, 3147 0, NULL, off, &off_p); 3148 } 3149 } else { 3150 /* property of devinfo node */ 3151 off = di_getprop_add(list, 1, st, dip, prop_op, 3152 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type, 3153 0, NULL, off, &off_p); 3154 } 3155 } 3156 3157 /* Add dynamic parent properties to snapshot */ 3158 for (dp = i_ddi_prop_dyn_parent_get((dev_info_t *)dip); 3159 dp && dp->dp_name; dp++) { 3160 if (dp->dp_spec_type) { 3161 /* if spec_type, property of matching minor */ 3162 ASSERT(DEVI_BUSY_OWNED(dip)); 3163 for (mn = dip->devi_minor; mn; mn = mn->next) { 3164 if (mn->ddm_spec_type != dp->dp_spec_type) 3165 continue; 3166 off = di_getprop_add(list, 1, st, dip, prop_op, 3167 dp->dp_name, mn->ddm_dev, dp->dp_type, 3168 0, NULL, off, &off_p); 3169 } 3170 } else { 3171 /* property of devinfo node */ 3172 off = di_getprop_add(list, 1, st, dip, prop_op, 3173 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type, 3174 0, NULL, off, &off_p); 3175 } 3176 } 3177 3178 *off_p = 0; 3179 return (off); 3180 } 3181 3182 /* 3183 * find private data format attached to a dip 3184 * parent = 1 to match driver name of parent dip (for parent private data) 3185 * 0 to match driver name of current dip (for driver private data) 3186 */ 3187 #define DI_MATCH_DRIVER 0 3188 #define DI_MATCH_PARENT 1 3189 3190 struct di_priv_format * 3191 di_match_drv_name(struct dev_info *node, struct di_state *st, int match) 3192 { 3193 int i, count, len; 3194 char *drv_name; 3195 major_t major; 3196 struct di_all *all; 3197 struct di_priv_format *form; 3198 3199 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n", 3200 node->devi_node_name, match)); 3201 3202 if (match == DI_MATCH_PARENT) { 3203 node = DEVI(node->devi_parent); 3204 } 3205 3206 if (node == NULL) { 3207 return (NULL); 3208 } 3209 3210 major = node->devi_major; 3211 if (major == (major_t)(-1)) { 3212 return (NULL); 3213 } 3214 3215 /* 3216 * Match the driver name. 3217 */ 3218 drv_name = ddi_major_to_name(major); 3219 if ((drv_name == NULL) || *drv_name == '\0') { 3220 return (NULL); 3221 } 3222 3223 /* Now get the di_priv_format array */ 3224 all = DI_ALL_PTR(st); 3225 if (match == DI_MATCH_PARENT) { 3226 count = all->n_ppdata; 3227 form = DI_PRIV_FORMAT(di_mem_addr(st, all->ppdata_format)); 3228 } else { 3229 count = all->n_dpdata; 3230 form = DI_PRIV_FORMAT(di_mem_addr(st, all->dpdata_format)); 3231 } 3232 3233 len = strlen(drv_name); 3234 for (i = 0; i < count; i++) { 3235 char *tmp; 3236 3237 tmp = form[i].drv_name; 3238 while (tmp && (*tmp != '\0')) { 3239 if (strncmp(drv_name, tmp, len) == 0) { 3240 return (&form[i]); 3241 } 3242 /* 3243 * Move to next driver name, skipping a white space 3244 */ 3245 if (tmp = strchr(tmp, ' ')) { 3246 tmp++; 3247 } 3248 } 3249 } 3250 3251 return (NULL); 3252 } 3253 3254 /* 3255 * The following functions copy data as specified by the format passed in. 3256 * To prevent invalid format from panicing the system, we call on_fault(). 3257 * A return value of 0 indicates an error. Otherwise, the total offset 3258 * is returned. 3259 */ 3260 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */ 3261 3262 static di_off_t 3263 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node, 3264 void *data, di_off_t *off_p, struct di_state *st) 3265 { 3266 caddr_t pa; 3267 void *ptr; 3268 int i, size, repeat; 3269 di_off_t off, off0, *tmp; 3270 char *path; 3271 label_t ljb; 3272 3273 dcmn_err2((CE_CONT, "di_getprvdata:\n")); 3274 3275 /* 3276 * check memory availability. Private data size is 3277 * limited to DI_MAX_PRIVDATA. 3278 */ 3279 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA); 3280 *off_p = off; 3281 3282 if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) { 3283 goto failure; 3284 } 3285 3286 if (!on_fault(&ljb)) { 3287 /* copy the struct */ 3288 bcopy(data, di_mem_addr(st, off), pdp->bytes); 3289 off0 = DI_ALIGN(pdp->bytes); /* XXX remove DI_ALIGN */ 3290 3291 /* dereferencing pointers */ 3292 for (i = 0; i < MAX_PTR_IN_PRV; i++) { 3293 3294 if (pdp->ptr[i].size == 0) { 3295 goto success; /* no more ptrs */ 3296 } 3297 3298 /* 3299 * first, get the pointer content 3300 */ 3301 if ((pdp->ptr[i].offset < 0) || 3302 (pdp->ptr[i].offset > pdp->bytes - sizeof (char *))) 3303 goto failure; /* wrong offset */ 3304 3305 pa = di_mem_addr(st, off + pdp->ptr[i].offset); 3306 3307 /* save a tmp ptr to store off_t later */ 3308 tmp = (di_off_t *)(intptr_t)pa; 3309 3310 /* get pointer value, if NULL continue */ 3311 ptr = *((void **) (intptr_t)pa); 3312 if (ptr == NULL) { 3313 continue; 3314 } 3315 3316 /* 3317 * next, find the repeat count (array dimension) 3318 */ 3319 repeat = pdp->ptr[i].len_offset; 3320 3321 /* 3322 * Positive value indicates a fixed sized array. 3323 * 0 or negative value indicates variable sized array. 3324 * 3325 * For variable sized array, the variable must be 3326 * an int member of the structure, with an offset 3327 * equal to the absolution value of struct member. 3328 */ 3329 if (repeat > pdp->bytes - sizeof (int)) { 3330 goto failure; /* wrong offset */ 3331 } 3332 3333 if (repeat >= 0) { 3334 repeat = *((int *) 3335 (intptr_t)((caddr_t)data + repeat)); 3336 } else { 3337 repeat = -repeat; 3338 } 3339 3340 /* 3341 * next, get the size of the object to be copied 3342 */ 3343 size = pdp->ptr[i].size * repeat; 3344 3345 /* 3346 * Arbitrarily limit the total size of object to be 3347 * copied (1 byte to 1/4 page). 3348 */ 3349 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) { 3350 goto failure; /* wrong size or too big */ 3351 } 3352 3353 /* 3354 * Now copy the data 3355 */ 3356 *tmp = off0; 3357 bcopy(ptr, di_mem_addr(st, off + off0), size); 3358 off0 += DI_ALIGN(size); /* XXX remove DI_ALIGN */ 3359 } 3360 } else { 3361 goto failure; 3362 } 3363 3364 success: 3365 /* 3366 * success if reached here 3367 */ 3368 no_fault(); 3369 return (off + off0); 3370 /*NOTREACHED*/ 3371 3372 failure: 3373 /* 3374 * fault occurred 3375 */ 3376 no_fault(); 3377 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3378 cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p", 3379 ddi_pathname((dev_info_t *)node, path), data); 3380 kmem_free(path, MAXPATHLEN); 3381 *off_p = -1; /* set private data to indicate error */ 3382 3383 return (off); 3384 } 3385 3386 /* 3387 * get parent private data; on error, returns original offset 3388 */ 3389 static di_off_t 3390 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3391 { 3392 int off; 3393 struct di_priv_format *ppdp; 3394 3395 dcmn_err2((CE_CONT, "di_getppdata:\n")); 3396 3397 /* find the parent data format */ 3398 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) { 3399 off = *off_p; 3400 *off_p = 0; /* set parent data to none */ 3401 return (off); 3402 } 3403 3404 return (di_getprvdata(ppdp, node, 3405 ddi_get_parent_data((dev_info_t *)node), off_p, st)); 3406 } 3407 3408 /* 3409 * get parent private data; returns original offset 3410 */ 3411 static di_off_t 3412 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3413 { 3414 int off; 3415 struct di_priv_format *dpdp; 3416 3417 dcmn_err2((CE_CONT, "di_getdpdata:")); 3418 3419 /* find the parent data format */ 3420 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) { 3421 off = *off_p; 3422 *off_p = 0; /* set driver data to none */ 3423 return (off); 3424 } 3425 3426 return (di_getprvdata(dpdp, node, 3427 ddi_get_driver_private((dev_info_t *)node), off_p, st)); 3428 } 3429 3430 /* 3431 * The driver is stateful across DINFOCPYALL and DINFOUSRLD. 3432 * This function encapsulates the state machine: 3433 * 3434 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY -> 3435 * | SNAPSHOT USRLD | 3436 * -------------------------------------------------- 3437 * 3438 * Returns 0 on success and -1 on failure 3439 */ 3440 static int 3441 di_setstate(struct di_state *st, int new_state) 3442 { 3443 int ret = 0; 3444 3445 mutex_enter(&di_lock); 3446 switch (new_state) { 3447 case IOC_IDLE: 3448 case IOC_DONE: 3449 break; 3450 case IOC_SNAP: 3451 if (st->di_iocstate != IOC_IDLE) 3452 ret = -1; 3453 break; 3454 case IOC_COPY: 3455 if (st->di_iocstate != IOC_DONE) 3456 ret = -1; 3457 break; 3458 default: 3459 ret = -1; 3460 } 3461 3462 if (ret == 0) 3463 st->di_iocstate = new_state; 3464 else 3465 cmn_err(CE_NOTE, "incorrect state transition from %d to %d", 3466 st->di_iocstate, new_state); 3467 mutex_exit(&di_lock); 3468 return (ret); 3469 } 3470 3471 /* 3472 * We cannot assume the presence of the entire 3473 * snapshot in this routine. All we are guaranteed 3474 * is the di_all struct + 1 byte (for root_path) 3475 */ 3476 static int 3477 header_plus_one_ok(struct di_all *all) 3478 { 3479 /* 3480 * Refuse to read old versions 3481 */ 3482 if (all->version != DI_SNAPSHOT_VERSION) { 3483 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version)); 3484 return (0); 3485 } 3486 3487 if (all->cache_magic != DI_CACHE_MAGIC) { 3488 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic)); 3489 return (0); 3490 } 3491 3492 if (all->snapshot_time == 0) { 3493 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time)); 3494 return (0); 3495 } 3496 3497 if (all->top_devinfo == 0) { 3498 CACHE_DEBUG((DI_ERR, "NULL top devinfo")); 3499 return (0); 3500 } 3501 3502 if (all->map_size < sizeof (*all) + 1) { 3503 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size)); 3504 return (0); 3505 } 3506 3507 if (all->root_path[0] != '/' || all->root_path[1] != '\0') { 3508 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c", 3509 all->root_path[0], all->root_path[1])); 3510 return (0); 3511 } 3512 3513 /* 3514 * We can't check checksum here as we just have the header 3515 */ 3516 3517 return (1); 3518 } 3519 3520 static int 3521 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len) 3522 { 3523 rlim64_t rlimit; 3524 ssize_t resid; 3525 int error = 0; 3526 3527 3528 rlimit = RLIM64_INFINITY; 3529 3530 while (len) { 3531 resid = 0; 3532 error = vn_rdwr(UIO_WRITE, vp, buf, len, off, 3533 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 3534 3535 if (error || resid < 0) { 3536 error = error ? error : EIO; 3537 CACHE_DEBUG((DI_ERR, "write error: %d", error)); 3538 break; 3539 } 3540 3541 /* 3542 * Check if we are making progress 3543 */ 3544 if (resid >= len) { 3545 error = ENOSPC; 3546 break; 3547 } 3548 buf += len - resid; 3549 off += len - resid; 3550 len = resid; 3551 } 3552 3553 return (error); 3554 } 3555 3556 static void 3557 di_cache_write(struct di_cache *cache) 3558 { 3559 struct di_all *all; 3560 struct vnode *vp; 3561 int oflags; 3562 size_t map_size; 3563 size_t chunk; 3564 offset_t off; 3565 int error; 3566 char *buf; 3567 3568 ASSERT(DI_CACHE_LOCKED(*cache)); 3569 ASSERT(!servicing_interrupt()); 3570 3571 if (cache->cache_size == 0) { 3572 ASSERT(cache->cache_data == NULL); 3573 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write")); 3574 return; 3575 } 3576 3577 ASSERT(cache->cache_size > 0); 3578 ASSERT(cache->cache_data); 3579 3580 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) { 3581 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write")); 3582 return; 3583 } 3584 3585 all = (struct di_all *)cache->cache_data; 3586 3587 if (!header_plus_one_ok(all)) { 3588 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write")); 3589 return; 3590 } 3591 3592 ASSERT(strcmp(all->root_path, "/") == 0); 3593 3594 /* 3595 * The cache_size is the total allocated memory for the cache. 3596 * The map_size is the actual size of valid data in the cache. 3597 * map_size may be smaller than cache_size but cannot exceed 3598 * cache_size. 3599 */ 3600 if (all->map_size > cache->cache_size) { 3601 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)." 3602 " Skipping write", all->map_size, cache->cache_size)); 3603 return; 3604 } 3605 3606 /* 3607 * First unlink the temp file 3608 */ 3609 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE); 3610 if (error && error != ENOENT) { 3611 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d", 3612 DI_CACHE_TEMP, error)); 3613 } 3614 3615 if (error == EROFS) { 3616 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write")); 3617 return; 3618 } 3619 3620 vp = NULL; 3621 oflags = (FCREAT|FWRITE); 3622 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags, 3623 DI_CACHE_PERMS, &vp, CRCREAT, 0)) { 3624 CACHE_DEBUG((DI_ERR, "%s: create failed: %d", 3625 DI_CACHE_TEMP, error)); 3626 return; 3627 } 3628 3629 ASSERT(vp); 3630 3631 /* 3632 * Paranoid: Check if the file is on a read-only FS 3633 */ 3634 if (vn_is_readonly(vp)) { 3635 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS")); 3636 goto fail; 3637 } 3638 3639 /* 3640 * Note that we only write map_size bytes to disk - this saves 3641 * space as the actual cache size may be larger than size of 3642 * valid data in the cache. 3643 * Another advantage is that it makes verification of size 3644 * easier when the file is read later. 3645 */ 3646 map_size = all->map_size; 3647 off = 0; 3648 buf = cache->cache_data; 3649 3650 while (map_size) { 3651 ASSERT(map_size > 0); 3652 /* 3653 * Write in chunks so that VM system 3654 * is not overwhelmed 3655 */ 3656 if (map_size > di_chunk * PAGESIZE) 3657 chunk = di_chunk * PAGESIZE; 3658 else 3659 chunk = map_size; 3660 3661 error = chunk_write(vp, off, buf, chunk); 3662 if (error) { 3663 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d", 3664 off, error)); 3665 goto fail; 3666 } 3667 3668 off += chunk; 3669 buf += chunk; 3670 map_size -= chunk; 3671 3672 /* If low on memory, give pageout a chance to run */ 3673 if (freemem < desfree) 3674 delay(1); 3675 } 3676 3677 /* 3678 * Now sync the file and close it 3679 */ 3680 if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) { 3681 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error)); 3682 } 3683 3684 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) { 3685 CACHE_DEBUG((DI_ERR, "close() failed: %d", error)); 3686 VN_RELE(vp); 3687 return; 3688 } 3689 3690 VN_RELE(vp); 3691 3692 /* 3693 * Now do the rename 3694 */ 3695 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) { 3696 CACHE_DEBUG((DI_ERR, "rename failed: %d", error)); 3697 return; 3698 } 3699 3700 CACHE_DEBUG((DI_INFO, "Cache write successful.")); 3701 3702 return; 3703 3704 fail: 3705 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL); 3706 VN_RELE(vp); 3707 } 3708 3709 3710 /* 3711 * Since we could be called early in boot, 3712 * use kobj_read_file() 3713 */ 3714 static void 3715 di_cache_read(struct di_cache *cache) 3716 { 3717 struct _buf *file; 3718 struct di_all *all; 3719 int n; 3720 size_t map_size, sz, chunk; 3721 offset_t off; 3722 caddr_t buf; 3723 uint32_t saved_crc, crc; 3724 3725 ASSERT(modrootloaded); 3726 ASSERT(DI_CACHE_LOCKED(*cache)); 3727 ASSERT(cache->cache_data == NULL); 3728 ASSERT(cache->cache_size == 0); 3729 ASSERT(!servicing_interrupt()); 3730 3731 file = kobj_open_file(DI_CACHE_FILE); 3732 if (file == (struct _buf *)-1) { 3733 CACHE_DEBUG((DI_ERR, "%s: open failed: %d", 3734 DI_CACHE_FILE, ENOENT)); 3735 return; 3736 } 3737 3738 /* 3739 * Read in the header+root_path first. The root_path must be "/" 3740 */ 3741 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP); 3742 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0); 3743 3744 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) { 3745 kmem_free(all, sizeof (*all) + 1); 3746 kobj_close_file(file); 3747 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid")); 3748 return; 3749 } 3750 3751 map_size = all->map_size; 3752 3753 kmem_free(all, sizeof (*all) + 1); 3754 3755 ASSERT(map_size >= sizeof (*all) + 1); 3756 3757 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP); 3758 sz = map_size; 3759 off = 0; 3760 while (sz) { 3761 /* Don't overload VM with large reads */ 3762 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz; 3763 n = kobj_read_file(file, buf, chunk, off); 3764 if (n != chunk) { 3765 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld", 3766 DI_CACHE_FILE, off)); 3767 goto fail; 3768 } 3769 off += chunk; 3770 buf += chunk; 3771 sz -= chunk; 3772 } 3773 3774 ASSERT(off == map_size); 3775 3776 /* 3777 * Read past expected EOF to verify size. 3778 */ 3779 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) { 3780 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE)); 3781 goto fail; 3782 } 3783 3784 all = (struct di_all *)di_cache.cache_data; 3785 if (!header_plus_one_ok(all)) { 3786 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE)); 3787 goto fail; 3788 } 3789 3790 /* 3791 * Compute CRC with checksum field in the cache data set to 0 3792 */ 3793 saved_crc = all->cache_checksum; 3794 all->cache_checksum = 0; 3795 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table); 3796 all->cache_checksum = saved_crc; 3797 3798 if (crc != all->cache_checksum) { 3799 CACHE_DEBUG((DI_ERR, 3800 "%s: checksum error: expected=0x%x actual=0x%x", 3801 DI_CACHE_FILE, all->cache_checksum, crc)); 3802 goto fail; 3803 } 3804 3805 if (all->map_size != map_size) { 3806 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE)); 3807 goto fail; 3808 } 3809 3810 kobj_close_file(file); 3811 3812 di_cache.cache_size = map_size; 3813 3814 return; 3815 3816 fail: 3817 kmem_free(di_cache.cache_data, map_size); 3818 kobj_close_file(file); 3819 di_cache.cache_data = NULL; 3820 di_cache.cache_size = 0; 3821 } 3822 3823 3824 /* 3825 * Checks if arguments are valid for using the cache. 3826 */ 3827 static int 3828 cache_args_valid(struct di_state *st, int *error) 3829 { 3830 ASSERT(error); 3831 ASSERT(st->mem_size > 0); 3832 ASSERT(st->memlist != NULL); 3833 3834 if (!modrootloaded || !i_ddi_io_initialized()) { 3835 CACHE_DEBUG((DI_ERR, 3836 "cache lookup failure: I/O subsystem not inited")); 3837 *error = ENOTACTIVE; 3838 return (0); 3839 } 3840 3841 /* 3842 * No other flags allowed with DINFOCACHE 3843 */ 3844 if (st->command != (DINFOCACHE & DIIOC_MASK)) { 3845 CACHE_DEBUG((DI_ERR, 3846 "cache lookup failure: bad flags: 0x%x", 3847 st->command)); 3848 *error = EINVAL; 3849 return (0); 3850 } 3851 3852 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3853 CACHE_DEBUG((DI_ERR, 3854 "cache lookup failure: bad root: %s", 3855 DI_ALL_PTR(st)->root_path)); 3856 *error = EINVAL; 3857 return (0); 3858 } 3859 3860 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command)); 3861 3862 *error = 0; 3863 3864 return (1); 3865 } 3866 3867 static int 3868 snapshot_is_cacheable(struct di_state *st) 3869 { 3870 ASSERT(st->mem_size > 0); 3871 ASSERT(st->memlist != NULL); 3872 3873 if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) != 3874 (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) { 3875 CACHE_DEBUG((DI_INFO, 3876 "not cacheable: incompatible flags: 0x%x", 3877 st->command)); 3878 return (0); 3879 } 3880 3881 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3882 CACHE_DEBUG((DI_INFO, 3883 "not cacheable: incompatible root path: %s", 3884 DI_ALL_PTR(st)->root_path)); 3885 return (0); 3886 } 3887 3888 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command)); 3889 3890 return (1); 3891 } 3892 3893 static int 3894 di_cache_lookup(struct di_state *st) 3895 { 3896 size_t rval; 3897 int cache_valid; 3898 3899 ASSERT(cache_args_valid(st, &cache_valid)); 3900 ASSERT(modrootloaded); 3901 3902 DI_CACHE_LOCK(di_cache); 3903 3904 /* 3905 * The following assignment determines the validity 3906 * of the cache as far as this snapshot is concerned. 3907 */ 3908 cache_valid = di_cache.cache_valid; 3909 3910 if (cache_valid && di_cache.cache_data == NULL) { 3911 di_cache_read(&di_cache); 3912 /* check for read or file error */ 3913 if (di_cache.cache_data == NULL) 3914 cache_valid = 0; 3915 } 3916 3917 if (cache_valid) { 3918 /* 3919 * Ok, the cache was valid as of this particular 3920 * snapshot. Copy the cached snapshot. This is safe 3921 * to do as the cache cannot be freed (we hold the 3922 * cache lock). Free the memory allocated in di_state 3923 * up until this point - we will simply copy everything 3924 * in the cache. 3925 */ 3926 3927 ASSERT(di_cache.cache_data != NULL); 3928 ASSERT(di_cache.cache_size > 0); 3929 3930 di_freemem(st); 3931 3932 rval = 0; 3933 if (di_cache2mem(&di_cache, st) > 0) { 3934 /* 3935 * map_size is size of valid data in the 3936 * cached snapshot and may be less than 3937 * size of the cache. 3938 */ 3939 ASSERT(DI_ALL_PTR(st)); 3940 rval = DI_ALL_PTR(st)->map_size; 3941 3942 ASSERT(rval >= sizeof (struct di_all)); 3943 ASSERT(rval <= di_cache.cache_size); 3944 } 3945 } else { 3946 /* 3947 * The cache isn't valid, we need to take a snapshot. 3948 * Set the command flags appropriately 3949 */ 3950 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK)); 3951 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK); 3952 rval = di_cache_update(st); 3953 st->command = (DINFOCACHE & DIIOC_MASK); 3954 } 3955 3956 DI_CACHE_UNLOCK(di_cache); 3957 3958 /* 3959 * For cached snapshots, the devinfo driver always returns 3960 * a snapshot rooted at "/". 3961 */ 3962 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0); 3963 3964 return ((int)rval); 3965 } 3966 3967 /* 3968 * This is a forced update of the cache - the previous state of the cache 3969 * may be: 3970 * - unpopulated 3971 * - populated and invalid 3972 * - populated and valid 3973 */ 3974 static int 3975 di_cache_update(struct di_state *st) 3976 { 3977 int rval; 3978 uint32_t crc; 3979 struct di_all *all; 3980 3981 ASSERT(DI_CACHE_LOCKED(di_cache)); 3982 ASSERT(snapshot_is_cacheable(st)); 3983 3984 /* 3985 * Free the in-core cache and the on-disk file (if they exist) 3986 */ 3987 i_ddi_di_cache_free(&di_cache); 3988 3989 /* 3990 * Set valid flag before taking the snapshot, 3991 * so that any invalidations that arrive 3992 * during or after the snapshot are not 3993 * removed by us. 3994 */ 3995 atomic_or_32(&di_cache.cache_valid, 1); 3996 3997 rval = di_snapshot_and_clean(st); 3998 3999 if (rval == 0) { 4000 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot")); 4001 return (0); 4002 } 4003 4004 DI_ALL_PTR(st)->map_size = rval; 4005 if (di_mem2cache(st, &di_cache) == 0) { 4006 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed")); 4007 return (0); 4008 } 4009 4010 ASSERT(di_cache.cache_data); 4011 ASSERT(di_cache.cache_size > 0); 4012 4013 /* 4014 * Now that we have cached the snapshot, compute its checksum. 4015 * The checksum is only computed over the valid data in the 4016 * cache, not the entire cache. 4017 * Also, set all the fields (except checksum) before computing 4018 * checksum. 4019 */ 4020 all = (struct di_all *)di_cache.cache_data; 4021 all->cache_magic = DI_CACHE_MAGIC; 4022 all->map_size = rval; 4023 4024 ASSERT(all->cache_checksum == 0); 4025 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table); 4026 all->cache_checksum = crc; 4027 4028 di_cache_write(&di_cache); 4029 4030 return (rval); 4031 } 4032 4033 static void 4034 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...) 4035 { 4036 va_list ap; 4037 4038 if (di_cache_debug <= DI_QUIET) 4039 return; 4040 4041 if (di_cache_debug < msglevel) 4042 return; 4043 4044 switch (msglevel) { 4045 case DI_ERR: 4046 msglevel = CE_WARN; 4047 break; 4048 case DI_INFO: 4049 case DI_TRACE: 4050 default: 4051 msglevel = CE_NOTE; 4052 break; 4053 } 4054 4055 va_start(ap, fmt); 4056 vcmn_err(msglevel, fmt, ap); 4057 va_end(ap); 4058 } 4059