1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * driver for accessing kernel devinfo tree. 29 */ 30 #include <sys/types.h> 31 #include <sys/pathname.h> 32 #include <sys/debug.h> 33 #include <sys/autoconf.h> 34 #include <sys/vmsystm.h> 35 #include <sys/conf.h> 36 #include <sys/file.h> 37 #include <sys/kmem.h> 38 #include <sys/modctl.h> 39 #include <sys/stat.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/sunldi_impl.h> 43 #include <sys/sunndi.h> 44 #include <sys/esunddi.h> 45 #include <sys/sunmdi.h> 46 #include <sys/ddi_impldefs.h> 47 #include <sys/ndi_impldefs.h> 48 #include <sys/mdi_impldefs.h> 49 #include <sys/devinfo_impl.h> 50 #include <sys/thread.h> 51 #include <sys/modhash.h> 52 #include <sys/bitmap.h> 53 #include <util/qsort.h> 54 #include <sys/disp.h> 55 #include <sys/kobj.h> 56 #include <sys/crc32.h> 57 58 59 #ifdef DEBUG 60 static int di_debug; 61 #define dcmn_err(args) if (di_debug >= 1) cmn_err args 62 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args 63 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args 64 #else 65 #define dcmn_err(args) /* nothing */ 66 #define dcmn_err2(args) /* nothing */ 67 #define dcmn_err3(args) /* nothing */ 68 #endif 69 70 /* 71 * We partition the space of devinfo minor nodes equally between the full and 72 * unprivileged versions of the driver. The even-numbered minor nodes are the 73 * full version, while the odd-numbered ones are the read-only version. 74 */ 75 static int di_max_opens = 32; 76 77 static int di_prop_dyn = 1; /* enable dynamic property support */ 78 79 #define DI_FULL_PARENT 0 80 #define DI_READONLY_PARENT 1 81 #define DI_NODE_SPECIES 2 82 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) 83 84 #define IOC_IDLE 0 /* snapshot ioctl states */ 85 #define IOC_SNAP 1 /* snapshot in progress */ 86 #define IOC_DONE 2 /* snapshot done, but not copied out */ 87 #define IOC_COPY 3 /* copyout in progress */ 88 89 /* 90 * Keep max alignment so we can move snapshot to different platforms. 91 * 92 * NOTE: Most callers should rely on the di_checkmem return value 93 * being aligned, and reestablish *off_p with aligned value, instead 94 * of trying to align size of their allocations: this approach will 95 * minimize memory use. 96 */ 97 #define DI_ALIGN(addr) ((addr + 7l) & ~7l) 98 99 /* 100 * To avoid wasting memory, make a linked list of memory chunks. 101 * Size of each chunk is buf_size. 102 */ 103 struct di_mem { 104 struct di_mem *next; /* link to next chunk */ 105 char *buf; /* contiguous kernel memory */ 106 size_t buf_size; /* size of buf in bytes */ 107 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ 108 }; 109 110 /* 111 * This is a stack for walking the tree without using recursion. 112 * When the devinfo tree height is above some small size, one 113 * gets watchdog resets on sun4m. 114 */ 115 struct di_stack { 116 void *offset[MAX_TREE_DEPTH]; 117 struct dev_info *dip[MAX_TREE_DEPTH]; 118 int circ[MAX_TREE_DEPTH]; 119 int depth; /* depth of current node to be copied */ 120 }; 121 122 #define TOP_OFFSET(stack) \ 123 ((di_off_t *)(stack)->offset[(stack)->depth - 1]) 124 #define TOP_NODE(stack) \ 125 ((stack)->dip[(stack)->depth - 1]) 126 #define PARENT_OFFSET(stack) \ 127 ((di_off_t *)(stack)->offset[(stack)->depth - 2]) 128 #define EMPTY_STACK(stack) ((stack)->depth == 0) 129 #define POP_STACK(stack) { \ 130 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ 131 (stack)->circ[(stack)->depth - 1]); \ 132 ((stack)->depth--); \ 133 } 134 #define PUSH_STACK(stack, node, off_p) { \ 135 ASSERT(node != NULL); \ 136 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ 137 (stack)->dip[(stack)->depth] = (node); \ 138 (stack)->offset[(stack)->depth] = (void *)(off_p); \ 139 ((stack)->depth)++; \ 140 } 141 142 #define DI_ALL_PTR(s) DI_ALL(di_mem_addr((s), 0)) 143 144 /* 145 * With devfs, the device tree has no global locks. The device tree is 146 * dynamic and dips may come and go if they are not locked locally. Under 147 * these conditions, pointers are no longer reliable as unique IDs. 148 * Specifically, these pointers cannot be used as keys for hash tables 149 * as the same devinfo structure may be freed in one part of the tree only 150 * to be allocated as the structure for a different device in another 151 * part of the tree. This can happen if DR and the snapshot are 152 * happening concurrently. 153 * The following data structures act as keys for devinfo nodes and 154 * pathinfo nodes. 155 */ 156 157 enum di_ktype { 158 DI_DKEY = 1, 159 DI_PKEY = 2 160 }; 161 162 struct di_dkey { 163 dev_info_t *dk_dip; 164 major_t dk_major; 165 int dk_inst; 166 pnode_t dk_nodeid; 167 }; 168 169 struct di_pkey { 170 mdi_pathinfo_t *pk_pip; 171 char *pk_path_addr; 172 dev_info_t *pk_client; 173 dev_info_t *pk_phci; 174 }; 175 176 struct di_key { 177 enum di_ktype k_type; 178 union { 179 struct di_dkey dkey; 180 struct di_pkey pkey; 181 } k_u; 182 }; 183 184 185 struct i_lnode; 186 187 typedef struct i_link { 188 /* 189 * If a di_link struct representing this i_link struct makes it 190 * into the snapshot, then self will point to the offset of 191 * the di_link struct in the snapshot 192 */ 193 di_off_t self; 194 195 int spec_type; /* block or char access type */ 196 struct i_lnode *src_lnode; /* src i_lnode */ 197 struct i_lnode *tgt_lnode; /* tgt i_lnode */ 198 struct i_link *src_link_next; /* next src i_link /w same i_lnode */ 199 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ 200 } i_link_t; 201 202 typedef struct i_lnode { 203 /* 204 * If a di_lnode struct representing this i_lnode struct makes it 205 * into the snapshot, then self will point to the offset of 206 * the di_lnode struct in the snapshot 207 */ 208 di_off_t self; 209 210 /* 211 * used for hashing and comparing i_lnodes 212 */ 213 int modid; 214 215 /* 216 * public information describing a link endpoint 217 */ 218 struct di_node *di_node; /* di_node in snapshot */ 219 dev_t devt; /* devt */ 220 221 /* 222 * i_link ptr to links coming into this i_lnode node 223 * (this i_lnode is the target of these i_links) 224 */ 225 i_link_t *link_in; 226 227 /* 228 * i_link ptr to links going out of this i_lnode node 229 * (this i_lnode is the source of these i_links) 230 */ 231 i_link_t *link_out; 232 } i_lnode_t; 233 234 /* 235 * Soft state associated with each instance of driver open. 236 */ 237 static struct di_state { 238 di_off_t mem_size; /* total # bytes in memlist */ 239 struct di_mem *memlist; /* head of memlist */ 240 uint_t command; /* command from ioctl */ 241 int di_iocstate; /* snapshot ioctl state */ 242 mod_hash_t *reg_dip_hash; 243 mod_hash_t *reg_pip_hash; 244 int lnode_count; 245 int link_count; 246 247 mod_hash_t *lnode_hash; 248 mod_hash_t *link_hash; 249 } **di_states; 250 251 static kmutex_t di_lock; /* serialize instance assignment */ 252 253 typedef enum { 254 DI_QUIET = 0, /* DI_QUIET must always be 0 */ 255 DI_ERR, 256 DI_INFO, 257 DI_TRACE, 258 DI_TRACE1, 259 DI_TRACE2 260 } di_cache_debug_t; 261 262 static uint_t di_chunk = 32; /* I/O chunk size in pages */ 263 264 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) 265 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) 266 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) 267 268 /* 269 * Check that whole device tree is being configured as a pre-condition for 270 * cleaning up /etc/devices files. 271 */ 272 #define DEVICES_FILES_CLEANABLE(st) \ 273 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \ 274 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0) 275 276 #define CACHE_DEBUG(args) \ 277 { if (di_cache_debug != DI_QUIET) di_cache_print args; } 278 279 typedef struct phci_walk_arg { 280 di_off_t off; 281 struct di_state *st; 282 } phci_walk_arg_t; 283 284 static int di_open(dev_t *, int, int, cred_t *); 285 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 286 static int di_close(dev_t, int, int, cred_t *); 287 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 288 static int di_attach(dev_info_t *, ddi_attach_cmd_t); 289 static int di_detach(dev_info_t *, ddi_detach_cmd_t); 290 291 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); 292 static di_off_t di_snapshot_and_clean(struct di_state *); 293 static di_off_t di_copydevnm(di_off_t *, struct di_state *); 294 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); 295 static di_off_t di_copynode(struct dev_info *, struct di_stack *, 296 struct di_state *); 297 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, 298 struct di_state *); 299 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); 300 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); 301 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *, 302 struct di_state *, struct dev_info *); 303 static void di_allocmem(struct di_state *, size_t); 304 static void di_freemem(struct di_state *); 305 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); 306 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); 307 static void *di_mem_addr(struct di_state *, di_off_t); 308 static int di_setstate(struct di_state *, int); 309 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); 310 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); 311 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, 312 struct di_state *, int); 313 static di_off_t di_getlink_data(di_off_t, struct di_state *); 314 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); 315 316 static int cache_args_valid(struct di_state *st, int *error); 317 static int snapshot_is_cacheable(struct di_state *st); 318 static int di_cache_lookup(struct di_state *st); 319 static int di_cache_update(struct di_state *st); 320 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); 321 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg); 322 static int build_phci_list(dev_info_t *ph_devinfo, void *arg); 323 324 extern int modrootloaded; 325 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *); 326 extern void mdi_vhci_walk_phcis(dev_info_t *, 327 int (*)(dev_info_t *, void *), void *); 328 329 330 static struct cb_ops di_cb_ops = { 331 di_open, /* open */ 332 di_close, /* close */ 333 nodev, /* strategy */ 334 nodev, /* print */ 335 nodev, /* dump */ 336 nodev, /* read */ 337 nodev, /* write */ 338 di_ioctl, /* ioctl */ 339 nodev, /* devmap */ 340 nodev, /* mmap */ 341 nodev, /* segmap */ 342 nochpoll, /* poll */ 343 ddi_prop_op, /* prop_op */ 344 NULL, /* streamtab */ 345 D_NEW | D_MP /* Driver compatibility flag */ 346 }; 347 348 static struct dev_ops di_ops = { 349 DEVO_REV, /* devo_rev, */ 350 0, /* refcnt */ 351 di_info, /* info */ 352 nulldev, /* identify */ 353 nulldev, /* probe */ 354 di_attach, /* attach */ 355 di_detach, /* detach */ 356 nodev, /* reset */ 357 &di_cb_ops, /* driver operations */ 358 NULL /* bus operations */ 359 }; 360 361 /* 362 * Module linkage information for the kernel. 363 */ 364 static struct modldrv modldrv = { 365 &mod_driverops, 366 "DEVINFO Driver", 367 &di_ops 368 }; 369 370 static struct modlinkage modlinkage = { 371 MODREV_1, 372 &modldrv, 373 NULL 374 }; 375 376 int 377 _init(void) 378 { 379 int error; 380 381 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); 382 383 error = mod_install(&modlinkage); 384 if (error != 0) { 385 mutex_destroy(&di_lock); 386 return (error); 387 } 388 389 return (0); 390 } 391 392 int 393 _info(struct modinfo *modinfop) 394 { 395 return (mod_info(&modlinkage, modinfop)); 396 } 397 398 int 399 _fini(void) 400 { 401 int error; 402 403 error = mod_remove(&modlinkage); 404 if (error != 0) { 405 return (error); 406 } 407 408 mutex_destroy(&di_lock); 409 return (0); 410 } 411 412 static dev_info_t *di_dip; 413 414 /*ARGSUSED*/ 415 static int 416 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 417 { 418 int error = DDI_FAILURE; 419 420 switch (infocmd) { 421 case DDI_INFO_DEVT2DEVINFO: 422 *result = (void *)di_dip; 423 error = DDI_SUCCESS; 424 break; 425 case DDI_INFO_DEVT2INSTANCE: 426 /* 427 * All dev_t's map to the same, single instance. 428 */ 429 *result = (void *)0; 430 error = DDI_SUCCESS; 431 break; 432 default: 433 break; 434 } 435 436 return (error); 437 } 438 439 static int 440 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 441 { 442 int error = DDI_FAILURE; 443 444 switch (cmd) { 445 case DDI_ATTACH: 446 di_states = kmem_zalloc( 447 di_max_opens * sizeof (struct di_state *), KM_SLEEP); 448 449 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, 450 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || 451 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, 452 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { 453 kmem_free(di_states, 454 di_max_opens * sizeof (struct di_state *)); 455 ddi_remove_minor_node(dip, NULL); 456 error = DDI_FAILURE; 457 } else { 458 di_dip = dip; 459 ddi_report_dev(dip); 460 461 error = DDI_SUCCESS; 462 } 463 break; 464 default: 465 error = DDI_FAILURE; 466 break; 467 } 468 469 return (error); 470 } 471 472 static int 473 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 474 { 475 int error = DDI_FAILURE; 476 477 switch (cmd) { 478 case DDI_DETACH: 479 ddi_remove_minor_node(dip, NULL); 480 di_dip = NULL; 481 kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); 482 483 error = DDI_SUCCESS; 484 break; 485 default: 486 error = DDI_FAILURE; 487 break; 488 } 489 490 return (error); 491 } 492 493 /* 494 * Allow multiple opens by tweaking the dev_t such that it looks like each 495 * open is getting a different minor device. Each minor gets a separate 496 * entry in the di_states[] table. Based on the original minor number, we 497 * discriminate opens of the full and read-only nodes. If all of the instances 498 * of the selected minor node are currently open, we return EAGAIN. 499 */ 500 /*ARGSUSED*/ 501 static int 502 di_open(dev_t *devp, int flag, int otyp, cred_t *credp) 503 { 504 int m; 505 minor_t minor_parent = getminor(*devp); 506 507 if (minor_parent != DI_FULL_PARENT && 508 minor_parent != DI_READONLY_PARENT) 509 return (ENXIO); 510 511 mutex_enter(&di_lock); 512 513 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { 514 if (di_states[m] != NULL) 515 continue; 516 517 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); 518 break; /* It's ours. */ 519 } 520 521 if (m >= di_max_opens) { 522 /* 523 * maximum open instance for device reached 524 */ 525 mutex_exit(&di_lock); 526 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); 527 return (EAGAIN); 528 } 529 mutex_exit(&di_lock); 530 531 ASSERT(m < di_max_opens); 532 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); 533 534 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", 535 (void *)curthread, m + DI_NODE_SPECIES)); 536 537 return (0); 538 } 539 540 /*ARGSUSED*/ 541 static int 542 di_close(dev_t dev, int flag, int otype, cred_t *cred_p) 543 { 544 struct di_state *st; 545 int m = (int)getminor(dev) - DI_NODE_SPECIES; 546 547 if (m < 0) { 548 cmn_err(CE_WARN, "closing non-existent devinfo minor %d", 549 m + DI_NODE_SPECIES); 550 return (ENXIO); 551 } 552 553 st = di_states[m]; 554 ASSERT(m < di_max_opens && st != NULL); 555 556 di_freemem(st); 557 kmem_free(st, sizeof (struct di_state)); 558 559 /* 560 * empty slot in state table 561 */ 562 mutex_enter(&di_lock); 563 di_states[m] = NULL; 564 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", 565 (void *)curthread, m + DI_NODE_SPECIES)); 566 mutex_exit(&di_lock); 567 568 return (0); 569 } 570 571 572 /*ARGSUSED*/ 573 static int 574 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 575 { 576 int rv, error; 577 di_off_t off; 578 struct di_all *all; 579 struct di_state *st; 580 int m = (int)getminor(dev) - DI_NODE_SPECIES; 581 major_t i; 582 char *drv_name; 583 size_t map_size, size; 584 struct di_mem *dcp; 585 int ndi_flags; 586 587 if (m < 0 || m >= di_max_opens) { 588 return (ENXIO); 589 } 590 591 st = di_states[m]; 592 ASSERT(st != NULL); 593 594 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); 595 596 switch (cmd) { 597 case DINFOIDENT: 598 /* 599 * This is called from di_init to verify that the driver 600 * opened is indeed devinfo. The purpose is to guard against 601 * sending ioctl to an unknown driver in case of an 602 * unresolved major number conflict during bfu. 603 */ 604 *rvalp = DI_MAGIC; 605 return (0); 606 607 case DINFOLODRV: 608 /* 609 * Hold an installed driver and return the result 610 */ 611 if (DI_UNPRIVILEGED_NODE(m)) { 612 /* 613 * Only the fully enabled instances may issue 614 * DINFOLDDRV. 615 */ 616 return (EACCES); 617 } 618 619 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 620 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { 621 kmem_free(drv_name, MAXNAMELEN); 622 return (EFAULT); 623 } 624 625 /* 626 * Some 3rd party driver's _init() walks the device tree, 627 * so we load the driver module before configuring driver. 628 */ 629 i = ddi_name_to_major(drv_name); 630 if (ddi_hold_driver(i) == NULL) { 631 kmem_free(drv_name, MAXNAMELEN); 632 return (ENXIO); 633 } 634 635 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; 636 637 /* 638 * i_ddi_load_drvconf() below will trigger a reprobe 639 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't 640 * needed here. 641 */ 642 modunload_disable(); 643 (void) i_ddi_load_drvconf(i); 644 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); 645 kmem_free(drv_name, MAXNAMELEN); 646 ddi_rele_driver(i); 647 rv = i_ddi_devs_attached(i); 648 modunload_enable(); 649 650 i_ddi_di_cache_invalidate(); 651 652 return ((rv == DDI_SUCCESS)? 0 : ENXIO); 653 654 case DINFOUSRLD: 655 /* 656 * The case for copying snapshot to userland 657 */ 658 if (di_setstate(st, IOC_COPY) == -1) 659 return (EBUSY); 660 661 map_size = DI_ALL_PTR(st)->map_size; 662 if (map_size == 0) { 663 (void) di_setstate(st, IOC_DONE); 664 return (EFAULT); 665 } 666 667 /* 668 * copyout the snapshot 669 */ 670 map_size = (map_size + PAGEOFFSET) & PAGEMASK; 671 672 /* 673 * Return the map size, so caller may do a sanity 674 * check against the return value of snapshot ioctl() 675 */ 676 *rvalp = (int)map_size; 677 678 /* 679 * Copy one chunk at a time 680 */ 681 off = 0; 682 dcp = st->memlist; 683 while (map_size) { 684 size = dcp->buf_size; 685 if (map_size <= size) { 686 size = map_size; 687 } 688 689 if (ddi_copyout(di_mem_addr(st, off), 690 (void *)(arg + off), size, mode) != 0) { 691 (void) di_setstate(st, IOC_DONE); 692 return (EFAULT); 693 } 694 695 map_size -= size; 696 off += size; 697 dcp = dcp->next; 698 } 699 700 di_freemem(st); 701 (void) di_setstate(st, IOC_IDLE); 702 return (0); 703 704 default: 705 if ((cmd & ~DIIOC_MASK) != DIIOC) { 706 /* 707 * Invalid ioctl command 708 */ 709 return (ENOTTY); 710 } 711 /* 712 * take a snapshot 713 */ 714 st->command = cmd & DIIOC_MASK; 715 /*FALLTHROUGH*/ 716 } 717 718 /* 719 * Obtain enough memory to hold header + rootpath. We prevent kernel 720 * memory exhaustion by freeing any previously allocated snapshot and 721 * refusing the operation; otherwise we would be allowing ioctl(), 722 * ioctl(), ioctl(), ..., panic. 723 */ 724 if (di_setstate(st, IOC_SNAP) == -1) 725 return (EBUSY); 726 727 /* 728 * Initial memlist always holds di_all and the root_path - and 729 * is at least a page and size. 730 */ 731 size = sizeof (struct di_all) + 732 sizeof (((struct dinfo_io *)(NULL))->root_path); 733 if (size < PAGESIZE) 734 size = PAGESIZE; 735 off = di_checkmem(st, 0, size); 736 all = DI_ALL_PTR(st); 737 off += sizeof (struct di_all); /* real length of di_all */ 738 739 all->devcnt = devcnt; 740 all->command = st->command; 741 all->version = DI_SNAPSHOT_VERSION; 742 all->top_vhci_devinfo = 0; /* filled by build_vhci_list. */ 743 744 /* 745 * Note the endianness in case we need to transport snapshot 746 * over the network. 747 */ 748 #if defined(_LITTLE_ENDIAN) 749 all->endianness = DI_LITTLE_ENDIAN; 750 #else 751 all->endianness = DI_BIG_ENDIAN; 752 #endif 753 754 /* Copyin ioctl args, store in the snapshot. */ 755 if (copyinstr((void *)arg, all->root_path, 756 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { 757 di_freemem(st); 758 (void) di_setstate(st, IOC_IDLE); 759 return (EFAULT); 760 } 761 off += size; /* real length of root_path */ 762 763 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) { 764 di_freemem(st); 765 (void) di_setstate(st, IOC_IDLE); 766 return (EINVAL); 767 } 768 769 error = 0; 770 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { 771 di_freemem(st); 772 (void) di_setstate(st, IOC_IDLE); 773 return (error); 774 } 775 776 /* 777 * Only the fully enabled version may force load drivers or read 778 * the parent private data from a driver. 779 */ 780 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && 781 DI_UNPRIVILEGED_NODE(m)) { 782 di_freemem(st); 783 (void) di_setstate(st, IOC_IDLE); 784 return (EACCES); 785 } 786 787 /* Do we need private data? */ 788 if (st->command & DINFOPRIVDATA) { 789 arg += sizeof (((struct dinfo_io *)(NULL))->root_path); 790 791 #ifdef _MULTI_DATAMODEL 792 switch (ddi_model_convert_from(mode & FMODELS)) { 793 case DDI_MODEL_ILP32: { 794 /* 795 * Cannot copy private data from 64-bit kernel 796 * to 32-bit app 797 */ 798 di_freemem(st); 799 (void) di_setstate(st, IOC_IDLE); 800 return (EINVAL); 801 } 802 case DDI_MODEL_NONE: 803 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 804 di_freemem(st); 805 (void) di_setstate(st, IOC_IDLE); 806 return (EFAULT); 807 } 808 break; 809 } 810 #else /* !_MULTI_DATAMODEL */ 811 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 812 di_freemem(st); 813 (void) di_setstate(st, IOC_IDLE); 814 return (EFAULT); 815 } 816 #endif /* _MULTI_DATAMODEL */ 817 } 818 819 all->top_devinfo = DI_ALIGN(off); 820 821 /* 822 * For cache lookups we reallocate memory from scratch, 823 * so the value of "all" is no longer valid. 824 */ 825 all = NULL; 826 827 if (st->command & DINFOCACHE) { 828 *rvalp = di_cache_lookup(st); 829 } else if (snapshot_is_cacheable(st)) { 830 DI_CACHE_LOCK(di_cache); 831 *rvalp = di_cache_update(st); 832 DI_CACHE_UNLOCK(di_cache); 833 } else 834 *rvalp = di_snapshot_and_clean(st); 835 836 if (*rvalp) { 837 DI_ALL_PTR(st)->map_size = *rvalp; 838 (void) di_setstate(st, IOC_DONE); 839 } else { 840 di_freemem(st); 841 (void) di_setstate(st, IOC_IDLE); 842 } 843 844 return (0); 845 } 846 847 /* 848 * Get a chunk of memory >= size, for the snapshot 849 */ 850 static void 851 di_allocmem(struct di_state *st, size_t size) 852 { 853 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP); 854 855 /* 856 * Round up size to nearest power of 2. If it is less 857 * than st->mem_size, set it to st->mem_size (i.e., 858 * the mem_size is doubled every time) to reduce the 859 * number of memory allocations. 860 */ 861 size_t tmp = 1; 862 while (tmp < size) { 863 tmp <<= 1; 864 } 865 size = (tmp > st->mem_size) ? tmp : st->mem_size; 866 867 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); 868 mem->buf_size = size; 869 870 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); 871 872 if (st->mem_size == 0) { /* first chunk */ 873 st->memlist = mem; 874 } else { 875 /* 876 * locate end of linked list and add a chunk at the end 877 */ 878 struct di_mem *dcp = st->memlist; 879 while (dcp->next != NULL) { 880 dcp = dcp->next; 881 } 882 883 dcp->next = mem; 884 } 885 886 st->mem_size += size; 887 } 888 889 /* 890 * Copy upto bufsiz bytes of the memlist to buf 891 */ 892 static void 893 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) 894 { 895 struct di_mem *dcp; 896 size_t copysz; 897 898 if (st->mem_size == 0) { 899 ASSERT(st->memlist == NULL); 900 return; 901 } 902 903 copysz = 0; 904 for (dcp = st->memlist; dcp; dcp = dcp->next) { 905 906 ASSERT(bufsiz > 0); 907 908 if (bufsiz <= dcp->buf_size) 909 copysz = bufsiz; 910 else 911 copysz = dcp->buf_size; 912 913 bcopy(dcp->buf, buf, copysz); 914 915 buf += copysz; 916 bufsiz -= copysz; 917 918 if (bufsiz == 0) 919 break; 920 } 921 } 922 923 /* 924 * Free all memory for the snapshot 925 */ 926 static void 927 di_freemem(struct di_state *st) 928 { 929 struct di_mem *dcp, *tmp; 930 931 dcmn_err2((CE_CONT, "di_freemem\n")); 932 933 if (st->mem_size) { 934 dcp = st->memlist; 935 while (dcp) { /* traverse the linked list */ 936 tmp = dcp; 937 dcp = dcp->next; 938 ddi_umem_free(tmp->cook); 939 kmem_free(tmp, sizeof (struct di_mem)); 940 } 941 st->mem_size = 0; 942 st->memlist = NULL; 943 } 944 945 ASSERT(st->mem_size == 0); 946 ASSERT(st->memlist == NULL); 947 } 948 949 /* 950 * Copies cached data to the di_state structure. 951 * Returns: 952 * - size of data copied, on SUCCESS 953 * - 0 on failure 954 */ 955 static int 956 di_cache2mem(struct di_cache *cache, struct di_state *st) 957 { 958 caddr_t pa; 959 960 ASSERT(st->mem_size == 0); 961 ASSERT(st->memlist == NULL); 962 ASSERT(!servicing_interrupt()); 963 ASSERT(DI_CACHE_LOCKED(*cache)); 964 965 if (cache->cache_size == 0) { 966 ASSERT(cache->cache_data == NULL); 967 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); 968 return (0); 969 } 970 971 ASSERT(cache->cache_data); 972 973 di_allocmem(st, cache->cache_size); 974 975 pa = di_mem_addr(st, 0); 976 977 ASSERT(pa); 978 979 /* 980 * Verify that di_allocmem() allocates contiguous memory, 981 * so that it is safe to do straight bcopy() 982 */ 983 ASSERT(st->memlist != NULL); 984 ASSERT(st->memlist->next == NULL); 985 bcopy(cache->cache_data, pa, cache->cache_size); 986 987 return (cache->cache_size); 988 } 989 990 /* 991 * Copies a snapshot from di_state to the cache 992 * Returns: 993 * - 0 on failure 994 * - size of copied data on success 995 */ 996 static size_t 997 di_mem2cache(struct di_state *st, struct di_cache *cache) 998 { 999 size_t map_size; 1000 1001 ASSERT(cache->cache_size == 0); 1002 ASSERT(cache->cache_data == NULL); 1003 ASSERT(!servicing_interrupt()); 1004 ASSERT(DI_CACHE_LOCKED(*cache)); 1005 1006 if (st->mem_size == 0) { 1007 ASSERT(st->memlist == NULL); 1008 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); 1009 return (0); 1010 } 1011 1012 ASSERT(st->memlist); 1013 1014 /* 1015 * The size of the memory list may be much larger than the 1016 * size of valid data (map_size). Cache only the valid data 1017 */ 1018 map_size = DI_ALL_PTR(st)->map_size; 1019 if (map_size == 0 || map_size < sizeof (struct di_all) || 1020 map_size > st->mem_size) { 1021 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); 1022 return (0); 1023 } 1024 1025 cache->cache_data = kmem_alloc(map_size, KM_SLEEP); 1026 cache->cache_size = map_size; 1027 di_copymem(st, cache->cache_data, cache->cache_size); 1028 1029 return (map_size); 1030 } 1031 1032 /* 1033 * Make sure there is at least "size" bytes memory left before 1034 * going on. Otherwise, start on a new chunk. 1035 */ 1036 static di_off_t 1037 di_checkmem(struct di_state *st, di_off_t off, size_t size) 1038 { 1039 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", 1040 off, (int)size)); 1041 1042 /* 1043 * di_checkmem() shouldn't be called with a size of zero. 1044 * But in case it is, we want to make sure we return a valid 1045 * offset within the memlist and not an offset that points us 1046 * at the end of the memlist. 1047 */ 1048 if (size == 0) { 1049 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); 1050 size = 1; 1051 } 1052 1053 off = DI_ALIGN(off); 1054 if ((st->mem_size - off) < size) { 1055 off = st->mem_size; 1056 di_allocmem(st, size); 1057 } 1058 1059 /* verify that return value is aligned */ 1060 ASSERT(off == DI_ALIGN(off)); 1061 return (off); 1062 } 1063 1064 /* 1065 * Copy the private data format from ioctl arg. 1066 * On success, the ending offset is returned. On error 0 is returned. 1067 */ 1068 static di_off_t 1069 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) 1070 { 1071 di_off_t size; 1072 struct di_priv_data *priv; 1073 struct di_all *all = DI_ALL_PTR(st); 1074 1075 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", 1076 off, (void *)arg, mode)); 1077 1078 /* 1079 * Copyin data and check version. 1080 * We only handle private data version 0. 1081 */ 1082 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); 1083 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), 1084 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { 1085 kmem_free(priv, sizeof (struct di_priv_data)); 1086 return (0); 1087 } 1088 1089 /* 1090 * Save di_priv_data copied from userland in snapshot. 1091 */ 1092 all->pd_version = priv->version; 1093 all->n_ppdata = priv->n_parent; 1094 all->n_dpdata = priv->n_driver; 1095 1096 /* 1097 * copyin private data format, modify offset accordingly 1098 */ 1099 if (all->n_ppdata) { /* parent private data format */ 1100 /* 1101 * check memory 1102 */ 1103 size = all->n_ppdata * sizeof (struct di_priv_format); 1104 all->ppdata_format = off = di_checkmem(st, off, size); 1105 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, 1106 mode) != 0) { 1107 kmem_free(priv, sizeof (struct di_priv_data)); 1108 return (0); 1109 } 1110 1111 off += size; 1112 } 1113 1114 if (all->n_dpdata) { /* driver private data format */ 1115 /* 1116 * check memory 1117 */ 1118 size = all->n_dpdata * sizeof (struct di_priv_format); 1119 all->dpdata_format = off = di_checkmem(st, off, size); 1120 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, 1121 mode) != 0) { 1122 kmem_free(priv, sizeof (struct di_priv_data)); 1123 return (0); 1124 } 1125 1126 off += size; 1127 } 1128 1129 kmem_free(priv, sizeof (struct di_priv_data)); 1130 return (off); 1131 } 1132 1133 /* 1134 * Return the real address based on the offset (off) within snapshot 1135 */ 1136 static void * 1137 di_mem_addr(struct di_state *st, di_off_t off) 1138 { 1139 struct di_mem *dcp = st->memlist; 1140 1141 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", 1142 (void *)dcp, off)); 1143 1144 ASSERT(off < st->mem_size); 1145 1146 while (off >= dcp->buf_size) { 1147 off -= dcp->buf_size; 1148 dcp = dcp->next; 1149 } 1150 1151 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", 1152 off, (void *)(dcp->buf + off))); 1153 1154 return (dcp->buf + off); 1155 } 1156 1157 /* 1158 * Ideally we would use the whole key to derive the hash 1159 * value. However, the probability that two keys will 1160 * have the same dip (or pip) is very low, so 1161 * hashing by dip (or pip) pointer should suffice. 1162 */ 1163 static uint_t 1164 di_hash_byptr(void *arg, mod_hash_key_t key) 1165 { 1166 struct di_key *dik = key; 1167 size_t rshift; 1168 void *ptr; 1169 1170 ASSERT(arg == NULL); 1171 1172 switch (dik->k_type) { 1173 case DI_DKEY: 1174 ptr = dik->k_u.dkey.dk_dip; 1175 rshift = highbit(sizeof (struct dev_info)); 1176 break; 1177 case DI_PKEY: 1178 ptr = dik->k_u.pkey.pk_pip; 1179 rshift = highbit(sizeof (struct mdi_pathinfo)); 1180 break; 1181 default: 1182 panic("devinfo: unknown key type"); 1183 /*NOTREACHED*/ 1184 } 1185 return (mod_hash_byptr((void *)rshift, ptr)); 1186 } 1187 1188 static void 1189 di_key_dtor(mod_hash_key_t key) 1190 { 1191 char *path_addr; 1192 struct di_key *dik = key; 1193 1194 switch (dik->k_type) { 1195 case DI_DKEY: 1196 break; 1197 case DI_PKEY: 1198 path_addr = dik->k_u.pkey.pk_path_addr; 1199 if (path_addr) 1200 kmem_free(path_addr, strlen(path_addr) + 1); 1201 break; 1202 default: 1203 panic("devinfo: unknown key type"); 1204 /*NOTREACHED*/ 1205 } 1206 1207 kmem_free(dik, sizeof (struct di_key)); 1208 } 1209 1210 static int 1211 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) 1212 { 1213 if (dk1->dk_dip != dk2->dk_dip) 1214 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); 1215 1216 if (dk1->dk_major != DDI_MAJOR_T_NONE && 1217 dk2->dk_major != DDI_MAJOR_T_NONE) { 1218 if (dk1->dk_major != dk2->dk_major) 1219 return (dk1->dk_major > dk2->dk_major ? 1 : -1); 1220 1221 if (dk1->dk_inst != dk2->dk_inst) 1222 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); 1223 } 1224 1225 if (dk1->dk_nodeid != dk2->dk_nodeid) 1226 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); 1227 1228 return (0); 1229 } 1230 1231 static int 1232 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) 1233 { 1234 char *p1, *p2; 1235 int rv; 1236 1237 if (pk1->pk_pip != pk2->pk_pip) 1238 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); 1239 1240 p1 = pk1->pk_path_addr; 1241 p2 = pk2->pk_path_addr; 1242 1243 p1 = p1 ? p1 : ""; 1244 p2 = p2 ? p2 : ""; 1245 1246 rv = strcmp(p1, p2); 1247 if (rv) 1248 return (rv > 0 ? 1 : -1); 1249 1250 if (pk1->pk_client != pk2->pk_client) 1251 return (pk1->pk_client > pk2->pk_client ? 1 : -1); 1252 1253 if (pk1->pk_phci != pk2->pk_phci) 1254 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); 1255 1256 return (0); 1257 } 1258 1259 static int 1260 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 1261 { 1262 struct di_key *dik1, *dik2; 1263 1264 dik1 = key1; 1265 dik2 = key2; 1266 1267 if (dik1->k_type != dik2->k_type) { 1268 panic("devinfo: mismatched keys"); 1269 /*NOTREACHED*/ 1270 } 1271 1272 switch (dik1->k_type) { 1273 case DI_DKEY: 1274 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); 1275 case DI_PKEY: 1276 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); 1277 default: 1278 panic("devinfo: unknown key type"); 1279 /*NOTREACHED*/ 1280 } 1281 } 1282 1283 /* 1284 * This is the main function that takes a snapshot 1285 */ 1286 static di_off_t 1287 di_snapshot(struct di_state *st) 1288 { 1289 di_off_t off; 1290 struct di_all *all; 1291 dev_info_t *rootnode; 1292 char buf[80]; 1293 int plen; 1294 char *path; 1295 vnode_t *vp; 1296 1297 all = DI_ALL_PTR(st); 1298 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); 1299 1300 /* 1301 * Verify path before entrusting it to e_ddi_hold_devi_by_path because 1302 * some platforms have OBP bugs where executing the NDI_PROMNAME code 1303 * path against an invalid path results in panic. The lookupnameat 1304 * is done relative to rootdir without a leading '/' on "devices/" 1305 * to force the lookup to occur in the global zone. 1306 */ 1307 plen = strlen("devices/") + strlen(all->root_path) + 1; 1308 path = kmem_alloc(plen, KM_SLEEP); 1309 (void) snprintf(path, plen, "devices/%s", all->root_path); 1310 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) { 1311 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1312 all->root_path)); 1313 kmem_free(path, plen); 1314 return (0); 1315 } 1316 kmem_free(path, plen); 1317 VN_RELE(vp); 1318 1319 /* 1320 * Hold the devinfo node referred by the path. 1321 */ 1322 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); 1323 if (rootnode == NULL) { 1324 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1325 all->root_path)); 1326 return (0); 1327 } 1328 1329 (void) snprintf(buf, sizeof (buf), 1330 "devinfo registered dips (statep=%p)", (void *)st); 1331 1332 st->reg_dip_hash = mod_hash_create_extended(buf, 64, 1333 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1334 NULL, di_key_cmp, KM_SLEEP); 1335 1336 1337 (void) snprintf(buf, sizeof (buf), 1338 "devinfo registered pips (statep=%p)", (void *)st); 1339 1340 st->reg_pip_hash = mod_hash_create_extended(buf, 64, 1341 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1342 NULL, di_key_cmp, KM_SLEEP); 1343 1344 /* 1345 * copy the device tree 1346 */ 1347 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); 1348 1349 if (DINFOPATH & st->command) { 1350 mdi_walk_vhcis(build_vhci_list, st); 1351 } 1352 1353 ddi_release_devi(rootnode); 1354 1355 /* 1356 * copy the devnames array 1357 */ 1358 all->devnames = off; 1359 off = di_copydevnm(&all->devnames, st); 1360 1361 1362 /* initialize the hash tables */ 1363 st->lnode_count = 0; 1364 st->link_count = 0; 1365 1366 if (DINFOLYR & st->command) { 1367 off = di_getlink_data(off, st); 1368 } 1369 1370 /* 1371 * Free up hash tables 1372 */ 1373 mod_hash_destroy_hash(st->reg_dip_hash); 1374 mod_hash_destroy_hash(st->reg_pip_hash); 1375 1376 /* 1377 * Record the timestamp now that we are done with snapshot. 1378 * 1379 * We compute the checksum later and then only if we cache 1380 * the snapshot, since checksumming adds some overhead. 1381 * The checksum is checked later if we read the cache file. 1382 * from disk. 1383 * 1384 * Set checksum field to 0 as CRC is calculated with that 1385 * field set to 0. 1386 */ 1387 all->snapshot_time = ddi_get_time(); 1388 all->cache_checksum = 0; 1389 1390 ASSERT(all->snapshot_time != 0); 1391 1392 return (off); 1393 } 1394 1395 /* 1396 * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set 1397 */ 1398 static di_off_t 1399 di_snapshot_and_clean(struct di_state *st) 1400 { 1401 di_off_t off; 1402 1403 modunload_disable(); 1404 off = di_snapshot(st); 1405 if (off != 0 && (st->command & DINFOCLEANUP)) { 1406 ASSERT(DEVICES_FILES_CLEANABLE(st)); 1407 /* 1408 * Cleanup /etc/devices files: 1409 * In order to accurately account for the system configuration 1410 * in /etc/devices files, the appropriate drivers must be 1411 * fully configured before the cleanup starts. 1412 * So enable modunload only after the cleanup. 1413 */ 1414 i_ddi_clean_devices_files(); 1415 /* 1416 * Remove backing store nodes for unused devices, 1417 * which retain past permissions customizations 1418 * and may be undesired for newly configured devices. 1419 */ 1420 dev_devices_cleanup(); 1421 } 1422 modunload_enable(); 1423 1424 return (off); 1425 } 1426 1427 /* 1428 * construct vhci linkage in the snapshot. 1429 */ 1430 static int 1431 build_vhci_list(dev_info_t *vh_devinfo, void *arg) 1432 { 1433 struct di_all *all; 1434 struct di_node *me; 1435 struct di_state *st; 1436 di_off_t off; 1437 phci_walk_arg_t pwa; 1438 1439 dcmn_err3((CE_CONT, "build_vhci list\n")); 1440 1441 dcmn_err3((CE_CONT, "vhci node %s%d\n", 1442 ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo))); 1443 1444 st = (struct di_state *)arg; 1445 if (di_dip_find(st, vh_devinfo, &off) != 0) { 1446 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1447 return (DDI_WALK_TERMINATE); 1448 } 1449 1450 dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n", 1451 st->mem_size, off)); 1452 1453 all = DI_ALL_PTR(st); 1454 if (all->top_vhci_devinfo == 0) { 1455 all->top_vhci_devinfo = off; 1456 } else { 1457 me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo)); 1458 1459 while (me->next_vhci != 0) { 1460 me = DI_NODE(di_mem_addr(st, me->next_vhci)); 1461 } 1462 1463 me->next_vhci = off; 1464 } 1465 1466 pwa.off = off; 1467 pwa.st = st; 1468 mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa); 1469 1470 return (DDI_WALK_CONTINUE); 1471 } 1472 1473 /* 1474 * construct phci linkage for the given vhci in the snapshot. 1475 */ 1476 static int 1477 build_phci_list(dev_info_t *ph_devinfo, void *arg) 1478 { 1479 struct di_node *vh_di_node; 1480 struct di_node *me; 1481 phci_walk_arg_t *pwa; 1482 di_off_t off; 1483 1484 pwa = (phci_walk_arg_t *)arg; 1485 1486 dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n", 1487 pwa->off)); 1488 1489 vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off)); 1490 if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) { 1491 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1492 return (DDI_WALK_TERMINATE); 1493 } 1494 1495 dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n", 1496 ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off)); 1497 1498 if (vh_di_node->top_phci == 0) { 1499 vh_di_node->top_phci = off; 1500 return (DDI_WALK_CONTINUE); 1501 } 1502 1503 me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci)); 1504 1505 while (me->next_phci != 0) { 1506 me = DI_NODE(di_mem_addr(pwa->st, me->next_phci)); 1507 } 1508 me->next_phci = off; 1509 1510 return (DDI_WALK_CONTINUE); 1511 } 1512 1513 /* 1514 * Assumes all devinfo nodes in device tree have been snapshotted 1515 */ 1516 static void 1517 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p) 1518 { 1519 struct dev_info *node; 1520 struct di_node *me; 1521 di_off_t off; 1522 1523 ASSERT(mutex_owned(&dnp->dn_lock)); 1524 1525 node = DEVI(dnp->dn_head); 1526 for (; node; node = node->devi_next) { 1527 if (di_dip_find(st, (dev_info_t *)node, &off) != 0) 1528 continue; 1529 1530 ASSERT(off > 0); 1531 me = DI_NODE(di_mem_addr(st, off)); 1532 ASSERT(me->next == 0 || me->next == -1); 1533 /* 1534 * Only nodes which were BOUND when they were 1535 * snapshotted will be added to per-driver list. 1536 */ 1537 if (me->next != -1) 1538 continue; 1539 1540 *off_p = off; 1541 off_p = &me->next; 1542 } 1543 1544 *off_p = 0; 1545 } 1546 1547 /* 1548 * Copy the devnames array, so we have a list of drivers in the snapshot. 1549 * Also makes it possible to locate the per-driver devinfo nodes. 1550 */ 1551 static di_off_t 1552 di_copydevnm(di_off_t *off_p, struct di_state *st) 1553 { 1554 int i; 1555 di_off_t off; 1556 size_t size; 1557 struct di_devnm *dnp; 1558 1559 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); 1560 1561 /* 1562 * make sure there is some allocated memory 1563 */ 1564 size = devcnt * sizeof (struct di_devnm); 1565 *off_p = off = di_checkmem(st, *off_p, size); 1566 dnp = DI_DEVNM(di_mem_addr(st, off)); 1567 off += size; 1568 1569 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", 1570 devcnt, off)); 1571 1572 for (i = 0; i < devcnt; i++) { 1573 if (devnamesp[i].dn_name == NULL) { 1574 continue; 1575 } 1576 1577 /* 1578 * dn_name is not freed during driver unload or removal. 1579 * 1580 * There is a race condition when make_devname() changes 1581 * dn_name during our strcpy. This should be rare since 1582 * only add_drv does this. At any rate, we never had a 1583 * problem with ddi_name_to_major(), which should have 1584 * the same problem. 1585 */ 1586 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", 1587 devnamesp[i].dn_name, devnamesp[i].dn_instance, off)); 1588 1589 size = strlen(devnamesp[i].dn_name) + 1; 1590 dnp[i].name = off = di_checkmem(st, off, size); 1591 (void) strcpy((char *)di_mem_addr(st, off), 1592 devnamesp[i].dn_name); 1593 off += size; 1594 1595 mutex_enter(&devnamesp[i].dn_lock); 1596 1597 /* 1598 * Snapshot per-driver node list 1599 */ 1600 snap_driver_list(st, &devnamesp[i], &dnp[i].head); 1601 1602 /* 1603 * This is not used by libdevinfo, leave it for now 1604 */ 1605 dnp[i].flags = devnamesp[i].dn_flags; 1606 dnp[i].instance = devnamesp[i].dn_instance; 1607 1608 /* 1609 * get global properties 1610 */ 1611 if ((DINFOPROP & st->command) && 1612 devnamesp[i].dn_global_prop_ptr) { 1613 dnp[i].global_prop = off; 1614 off = di_getprop(DI_PROP_GLB_LIST, 1615 &devnamesp[i].dn_global_prop_ptr->prop_list, 1616 &dnp[i].global_prop, st, NULL); 1617 } 1618 1619 /* 1620 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str 1621 */ 1622 if (CB_DRV_INSTALLED(devopsp[i])) { 1623 if (devopsp[i]->devo_cb_ops) { 1624 dnp[i].ops |= DI_CB_OPS; 1625 if (devopsp[i]->devo_cb_ops->cb_str) 1626 dnp[i].ops |= DI_STREAM_OPS; 1627 } 1628 if (NEXUS_DRV(devopsp[i])) { 1629 dnp[i].ops |= DI_BUS_OPS; 1630 } 1631 } 1632 1633 mutex_exit(&devnamesp[i].dn_lock); 1634 } 1635 1636 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); 1637 1638 return (off); 1639 } 1640 1641 /* 1642 * Copy the kernel devinfo tree. The tree and the devnames array forms 1643 * the entire snapshot (see also di_copydevnm). 1644 */ 1645 static di_off_t 1646 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) 1647 { 1648 di_off_t off; 1649 struct dev_info *node; 1650 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); 1651 1652 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", 1653 (void *)root, *off_p)); 1654 1655 /* force attach drivers */ 1656 if (i_ddi_devi_attached((dev_info_t *)root) && 1657 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { 1658 (void) ndi_devi_config((dev_info_t *)root, 1659 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | 1660 NDI_DRV_CONF_REPROBE); 1661 } 1662 1663 /* 1664 * Push top_devinfo onto a stack 1665 * 1666 * The stack is necessary to avoid recursion, which can overrun 1667 * the kernel stack. 1668 */ 1669 PUSH_STACK(dsp, root, off_p); 1670 1671 /* 1672 * As long as there is a node on the stack, copy the node. 1673 * di_copynode() is responsible for pushing and popping 1674 * child and sibling nodes on the stack. 1675 */ 1676 while (!EMPTY_STACK(dsp)) { 1677 node = TOP_NODE(dsp); 1678 off = di_copynode(node, dsp, st); 1679 } 1680 1681 /* 1682 * Free the stack structure 1683 */ 1684 kmem_free(dsp, sizeof (struct di_stack)); 1685 1686 return (off); 1687 } 1688 1689 /* 1690 * This is the core function, which copies all data associated with a single 1691 * node into the snapshot. The amount of information is determined by the 1692 * ioctl command. 1693 */ 1694 static di_off_t 1695 di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st) 1696 { 1697 di_off_t off; 1698 struct di_node *me; 1699 size_t size; struct dev_info *n; 1700 1701 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth)); 1702 ASSERT((node != NULL) && (node == TOP_NODE(dsp))); 1703 1704 /* 1705 * check memory usage, and fix offsets accordingly. 1706 */ 1707 size = sizeof (struct di_node); 1708 *(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size); 1709 me = DI_NODE(di_mem_addr(st, off)); 1710 me->self = off; 1711 off += size; 1712 1713 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", 1714 node->devi_node_name, node->devi_instance, off)); 1715 1716 /* 1717 * Node parameters: 1718 * self -- offset of current node within snapshot 1719 * nodeid -- pointer to PROM node (tri-valued) 1720 * state -- hot plugging device state 1721 * node_state -- devinfo node state 1722 */ 1723 me->instance = node->devi_instance; 1724 me->nodeid = node->devi_nodeid; 1725 me->node_class = node->devi_node_class; 1726 me->attributes = node->devi_node_attributes; 1727 me->state = node->devi_state; 1728 me->flags = node->devi_flags; 1729 me->node_state = node->devi_node_state; 1730 me->next_vhci = 0; /* Filled up by build_vhci_list. */ 1731 me->top_phci = 0; /* Filled up by build_phci_list. */ 1732 me->next_phci = 0; /* Filled up by build_phci_list. */ 1733 me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */ 1734 me->user_private_data = NULL; 1735 1736 /* 1737 * Get parent's offset in snapshot from the stack 1738 * and store it in the current node 1739 */ 1740 if (dsp->depth > 1) { 1741 me->parent = *(PARENT_OFFSET(dsp)); 1742 } 1743 1744 /* 1745 * Save the offset of this di_node in a hash table. 1746 * This is used later to resolve references to this 1747 * dip from other parts of the tree (per-driver list, 1748 * multipathing linkages, layered usage linkages). 1749 * The key used for the hash table is derived from 1750 * information in the dip. 1751 */ 1752 di_register_dip(st, (dev_info_t *)node, me->self); 1753 1754 #ifdef DEVID_COMPATIBILITY 1755 /* check for devid as property marker */ 1756 if (node->devi_devid_str) { 1757 ddi_devid_t devid; 1758 1759 /* 1760 * The devid is now represented as a property. For 1761 * compatibility with di_devid() interface in libdevinfo we 1762 * must return it as a binary structure in the snapshot. When 1763 * (if) di_devid() is removed from libdevinfo then the code 1764 * related to DEVID_COMPATIBILITY can be removed. 1765 */ 1766 if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) == 1767 DDI_SUCCESS) { 1768 size = ddi_devid_sizeof(devid); 1769 off = di_checkmem(st, off, size); 1770 me->devid = off; 1771 bcopy(devid, di_mem_addr(st, off), size); 1772 off += size; 1773 ddi_devid_free(devid); 1774 } 1775 } 1776 #endif /* DEVID_COMPATIBILITY */ 1777 1778 if (node->devi_node_name) { 1779 size = strlen(node->devi_node_name) + 1; 1780 me->node_name = off = di_checkmem(st, off, size); 1781 (void) strcpy(di_mem_addr(st, off), node->devi_node_name); 1782 off += size; 1783 } 1784 1785 if (node->devi_compat_names && (node->devi_compat_length > 1)) { 1786 size = node->devi_compat_length; 1787 me->compat_names = off = di_checkmem(st, off, size); 1788 me->compat_length = (int)size; 1789 bcopy(node->devi_compat_names, di_mem_addr(st, off), size); 1790 off += size; 1791 } 1792 1793 if (node->devi_addr) { 1794 size = strlen(node->devi_addr) + 1; 1795 me->address = off = di_checkmem(st, off, size); 1796 (void) strcpy(di_mem_addr(st, off), node->devi_addr); 1797 off += size; 1798 } 1799 1800 if (node->devi_binding_name) { 1801 size = strlen(node->devi_binding_name) + 1; 1802 me->bind_name = off = di_checkmem(st, off, size); 1803 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name); 1804 off += size; 1805 } 1806 1807 me->drv_major = node->devi_major; 1808 1809 /* 1810 * If the dip is BOUND, set the next pointer of the 1811 * per-instance list to -1, indicating that it is yet to be resolved. 1812 * This will be resolved later in snap_driver_list(). 1813 */ 1814 if (me->drv_major != -1) { 1815 me->next = -1; 1816 } else { 1817 me->next = 0; 1818 } 1819 1820 /* 1821 * An optimization to skip mutex_enter when not needed. 1822 */ 1823 if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) { 1824 goto priv_data; 1825 } 1826 1827 /* 1828 * LOCKING: We already have an active ndi_devi_enter to gather the 1829 * minor data, and we will take devi_lock to gather properties as 1830 * needed off di_getprop. 1831 */ 1832 if (!(DINFOMINOR & st->command)) { 1833 goto path; 1834 } 1835 1836 ASSERT(DEVI_BUSY_OWNED(node)); 1837 if (node->devi_minor) { /* minor data */ 1838 me->minor_data = off; 1839 off = di_getmdata(node->devi_minor, &me->minor_data, 1840 me->self, st); 1841 } 1842 1843 path: 1844 if (!(DINFOPATH & st->command)) { 1845 goto property; 1846 } 1847 1848 if (MDI_VHCI(node)) { 1849 me->multipath_component = MULTIPATH_COMPONENT_VHCI; 1850 } 1851 1852 if (MDI_CLIENT(node)) { 1853 me->multipath_component = MULTIPATH_COMPONENT_CLIENT; 1854 me->multipath_client = off; 1855 off = di_getpath_data((dev_info_t *)node, &me->multipath_client, 1856 me->self, st, 1); 1857 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " 1858 "component type = %d. off=%d", 1859 me->multipath_client, 1860 (void *)node, node->devi_mdi_component, off)); 1861 } 1862 1863 if (MDI_PHCI(node)) { 1864 me->multipath_component = MULTIPATH_COMPONENT_PHCI; 1865 me->multipath_phci = off; 1866 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, 1867 me->self, st, 0); 1868 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " 1869 "component type = %d. off=%d", 1870 me->multipath_phci, 1871 (void *)node, node->devi_mdi_component, off)); 1872 } 1873 1874 property: 1875 if (!(DINFOPROP & st->command)) { 1876 goto priv_data; 1877 } 1878 1879 if (node->devi_drv_prop_ptr) { /* driver property list */ 1880 me->drv_prop = off; 1881 off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr, 1882 &me->drv_prop, st, node); 1883 } 1884 1885 if (node->devi_sys_prop_ptr) { /* system property list */ 1886 me->sys_prop = off; 1887 off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr, 1888 &me->sys_prop, st, node); 1889 } 1890 1891 if (node->devi_hw_prop_ptr) { /* hardware property list */ 1892 me->hw_prop = off; 1893 off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr, 1894 &me->hw_prop, st, node); 1895 } 1896 1897 if (node->devi_global_prop_list == NULL) { 1898 me->glob_prop = (di_off_t)-1; /* not global property */ 1899 } else { 1900 /* 1901 * Make copy of global property list if this devinfo refers 1902 * global properties different from what's on the devnames 1903 * array. It can happen if there has been a forced 1904 * driver.conf update. See mod_drv(1M). 1905 */ 1906 ASSERT(me->drv_major != -1); 1907 if (node->devi_global_prop_list != 1908 devnamesp[me->drv_major].dn_global_prop_ptr) { 1909 me->glob_prop = off; 1910 off = di_getprop(DI_PROP_GLB_LIST, 1911 &node->devi_global_prop_list->prop_list, 1912 &me->glob_prop, st, node); 1913 } 1914 } 1915 1916 priv_data: 1917 if (!(DINFOPRIVDATA & st->command)) { 1918 goto pm_info; 1919 } 1920 1921 if (ddi_get_parent_data((dev_info_t *)node) != NULL) { 1922 me->parent_data = off; 1923 off = di_getppdata(node, &me->parent_data, st); 1924 } 1925 1926 if (ddi_get_driver_private((dev_info_t *)node) != NULL) { 1927 me->driver_data = off; 1928 off = di_getdpdata(node, &me->driver_data, st); 1929 } 1930 1931 pm_info: /* NOT implemented */ 1932 1933 subtree: 1934 /* keep the stack aligned */ 1935 off = DI_ALIGN(off); 1936 1937 if (!(DINFOSUBTREE & st->command)) { 1938 POP_STACK(dsp); 1939 return (off); 1940 } 1941 1942 child: 1943 /* 1944 * If there is a visible child--push child onto stack. 1945 * Hold the parent (me) busy while doing so. 1946 */ 1947 if ((n = node->devi_child) != NULL) { 1948 /* skip hidden nodes */ 1949 while (n && ndi_dev_is_hidden_node((dev_info_t *)n)) 1950 n = n->devi_sibling; 1951 if (n) { 1952 me->child = off; 1953 PUSH_STACK(dsp, n, &me->child); 1954 return (me->child); 1955 } 1956 } 1957 1958 sibling: 1959 /* 1960 * Done with any child nodes, unroll the stack till a visible 1961 * sibling of a parent node is found or root node is reached. 1962 */ 1963 POP_STACK(dsp); 1964 while (!EMPTY_STACK(dsp)) { 1965 if ((n = node->devi_sibling) != NULL) { 1966 /* skip hidden nodes */ 1967 while (n && ndi_dev_is_hidden_node((dev_info_t *)n)) 1968 n = n->devi_sibling; 1969 if (n) { 1970 me->sibling = DI_ALIGN(off); 1971 PUSH_STACK(dsp, n, &me->sibling); 1972 return (me->sibling); 1973 } 1974 } 1975 node = TOP_NODE(dsp); 1976 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); 1977 POP_STACK(dsp); 1978 } 1979 1980 /* 1981 * DONE with all nodes 1982 */ 1983 return (off); 1984 } 1985 1986 static i_lnode_t * 1987 i_lnode_alloc(int modid) 1988 { 1989 i_lnode_t *i_lnode; 1990 1991 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); 1992 1993 ASSERT(modid != -1); 1994 i_lnode->modid = modid; 1995 1996 return (i_lnode); 1997 } 1998 1999 static void 2000 i_lnode_free(i_lnode_t *i_lnode) 2001 { 2002 kmem_free(i_lnode, sizeof (i_lnode_t)); 2003 } 2004 2005 static void 2006 i_lnode_check_free(i_lnode_t *i_lnode) 2007 { 2008 /* This lnode and its dip must have been snapshotted */ 2009 ASSERT(i_lnode->self > 0); 2010 ASSERT(i_lnode->di_node->self > 0); 2011 2012 /* at least 1 link (in or out) must exist for this lnode */ 2013 ASSERT(i_lnode->link_in || i_lnode->link_out); 2014 2015 i_lnode_free(i_lnode); 2016 } 2017 2018 static i_link_t * 2019 i_link_alloc(int spec_type) 2020 { 2021 i_link_t *i_link; 2022 2023 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); 2024 i_link->spec_type = spec_type; 2025 2026 return (i_link); 2027 } 2028 2029 static void 2030 i_link_check_free(i_link_t *i_link) 2031 { 2032 /* This link must have been snapshotted */ 2033 ASSERT(i_link->self > 0); 2034 2035 /* Both endpoint lnodes must exist for this link */ 2036 ASSERT(i_link->src_lnode); 2037 ASSERT(i_link->tgt_lnode); 2038 2039 kmem_free(i_link, sizeof (i_link_t)); 2040 } 2041 2042 /*ARGSUSED*/ 2043 static uint_t 2044 i_lnode_hashfunc(void *arg, mod_hash_key_t key) 2045 { 2046 i_lnode_t *i_lnode = (i_lnode_t *)key; 2047 struct di_node *ptr; 2048 dev_t dev; 2049 2050 dev = i_lnode->devt; 2051 if (dev != DDI_DEV_T_NONE) 2052 return (i_lnode->modid + getminor(dev) + getmajor(dev)); 2053 2054 ptr = i_lnode->di_node; 2055 ASSERT(ptr->self > 0); 2056 if (ptr) { 2057 uintptr_t k = (uintptr_t)ptr; 2058 k >>= (int)highbit(sizeof (struct di_node)); 2059 return ((uint_t)k); 2060 } 2061 2062 return (i_lnode->modid); 2063 } 2064 2065 static int 2066 i_lnode_cmp(void *arg1, void *arg2) 2067 { 2068 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1; 2069 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2; 2070 2071 if (i_lnode1->modid != i_lnode2->modid) { 2072 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); 2073 } 2074 2075 if (i_lnode1->di_node != i_lnode2->di_node) 2076 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); 2077 2078 if (i_lnode1->devt != i_lnode2->devt) 2079 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); 2080 2081 return (0); 2082 } 2083 2084 /* 2085 * An lnode represents a {dip, dev_t} tuple. A link represents a 2086 * {src_lnode, tgt_lnode, spec_type} tuple. 2087 * The following callback assumes that LDI framework ref-counts the 2088 * src_dip and tgt_dip while invoking this callback. 2089 */ 2090 static int 2091 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) 2092 { 2093 struct di_state *st = (struct di_state *)arg; 2094 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode; 2095 i_link_t **i_link_next, *i_link; 2096 di_off_t soff, toff; 2097 mod_hash_val_t nodep = NULL; 2098 int res; 2099 2100 /* 2101 * if the source or target of this device usage information doesn't 2102 * correspond to a device node then we don't report it via 2103 * libdevinfo so return. 2104 */ 2105 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) 2106 return (LDI_USAGE_CONTINUE); 2107 2108 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); 2109 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); 2110 2111 /* 2112 * Skip the ldi_usage if either src or tgt dip is not in the 2113 * snapshot. This saves us from pruning bad lnodes/links later. 2114 */ 2115 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) 2116 return (LDI_USAGE_CONTINUE); 2117 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) 2118 return (LDI_USAGE_CONTINUE); 2119 2120 ASSERT(soff > 0); 2121 ASSERT(toff > 0); 2122 2123 /* 2124 * allocate an i_lnode and add it to the lnode hash 2125 * if it is not already present. For this particular 2126 * link the lnode is a source, but it may 2127 * participate as tgt or src in any number of layered 2128 * operations - so it may already be in the hash. 2129 */ 2130 i_lnode = i_lnode_alloc(ldi_usage->src_modid); 2131 i_lnode->di_node = DI_NODE(di_mem_addr(st, soff)); 2132 i_lnode->devt = ldi_usage->src_devt; 2133 2134 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2135 if (res == MH_ERR_NOTFOUND) { 2136 /* 2137 * new i_lnode 2138 * add it to the hash and increment the lnode count 2139 */ 2140 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2141 ASSERT(res == 0); 2142 st->lnode_count++; 2143 src_lnode = i_lnode; 2144 } else { 2145 /* this i_lnode already exists in the lnode_hash */ 2146 i_lnode_free(i_lnode); 2147 src_lnode = (i_lnode_t *)nodep; 2148 } 2149 2150 /* 2151 * allocate a tgt i_lnode and add it to the lnode hash 2152 */ 2153 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); 2154 i_lnode->di_node = DI_NODE(di_mem_addr(st, toff)); 2155 i_lnode->devt = ldi_usage->tgt_devt; 2156 2157 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2158 if (res == MH_ERR_NOTFOUND) { 2159 /* 2160 * new i_lnode 2161 * add it to the hash and increment the lnode count 2162 */ 2163 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2164 ASSERT(res == 0); 2165 st->lnode_count++; 2166 tgt_lnode = i_lnode; 2167 } else { 2168 /* this i_lnode already exists in the lnode_hash */ 2169 i_lnode_free(i_lnode); 2170 tgt_lnode = (i_lnode_t *)nodep; 2171 } 2172 2173 /* 2174 * allocate a i_link 2175 */ 2176 i_link = i_link_alloc(ldi_usage->tgt_spec_type); 2177 i_link->src_lnode = src_lnode; 2178 i_link->tgt_lnode = tgt_lnode; 2179 2180 /* 2181 * add this link onto the src i_lnodes outbound i_link list 2182 */ 2183 i_link_next = &(src_lnode->link_out); 2184 while (*i_link_next != NULL) { 2185 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && 2186 (i_link->spec_type == (*i_link_next)->spec_type)) { 2187 /* this link already exists */ 2188 kmem_free(i_link, sizeof (i_link_t)); 2189 return (LDI_USAGE_CONTINUE); 2190 } 2191 i_link_next = &((*i_link_next)->src_link_next); 2192 } 2193 *i_link_next = i_link; 2194 2195 /* 2196 * add this link onto the tgt i_lnodes inbound i_link list 2197 */ 2198 i_link_next = &(tgt_lnode->link_in); 2199 while (*i_link_next != NULL) { 2200 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); 2201 i_link_next = &((*i_link_next)->tgt_link_next); 2202 } 2203 *i_link_next = i_link; 2204 2205 /* 2206 * add this i_link to the link hash 2207 */ 2208 res = mod_hash_insert(st->link_hash, i_link, i_link); 2209 ASSERT(res == 0); 2210 st->link_count++; 2211 2212 return (LDI_USAGE_CONTINUE); 2213 } 2214 2215 struct i_layer_data { 2216 struct di_state *st; 2217 int lnode_count; 2218 int link_count; 2219 di_off_t lnode_off; 2220 di_off_t link_off; 2221 }; 2222 2223 /*ARGSUSED*/ 2224 static uint_t 2225 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2226 { 2227 i_link_t *i_link = (i_link_t *)key; 2228 struct i_layer_data *data = arg; 2229 struct di_link *me; 2230 struct di_lnode *melnode; 2231 struct di_node *medinode; 2232 2233 ASSERT(i_link->self == 0); 2234 2235 i_link->self = data->link_off + 2236 (data->link_count * sizeof (struct di_link)); 2237 data->link_count++; 2238 2239 ASSERT(data->link_off > 0 && data->link_count > 0); 2240 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ 2241 ASSERT(data->link_count <= data->st->link_count); 2242 2243 /* fill in fields for the di_link snapshot */ 2244 me = DI_LINK(di_mem_addr(data->st, i_link->self)); 2245 me->self = i_link->self; 2246 me->spec_type = i_link->spec_type; 2247 2248 /* 2249 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t 2250 * are created during the LDI table walk. Since we are 2251 * walking the link hash, the lnode hash has already been 2252 * walked and the lnodes have been snapshotted. Save lnode 2253 * offsets. 2254 */ 2255 me->src_lnode = i_link->src_lnode->self; 2256 me->tgt_lnode = i_link->tgt_lnode->self; 2257 2258 /* 2259 * Save this link's offset in the src_lnode snapshot's link_out 2260 * field 2261 */ 2262 melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode)); 2263 me->src_link_next = melnode->link_out; 2264 melnode->link_out = me->self; 2265 2266 /* 2267 * Put this link on the tgt_lnode's link_in field 2268 */ 2269 melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode)); 2270 me->tgt_link_next = melnode->link_in; 2271 melnode->link_in = me->self; 2272 2273 /* 2274 * An i_lnode_t is only created if the corresponding dip exists 2275 * in the snapshot. A pointer to the di_node is saved in the 2276 * i_lnode_t when it is allocated. For this link, get the di_node 2277 * for the source lnode. Then put the link on the di_node's list 2278 * of src links 2279 */ 2280 medinode = i_link->src_lnode->di_node; 2281 me->src_node_next = medinode->src_links; 2282 medinode->src_links = me->self; 2283 2284 /* 2285 * Put this link on the tgt_links list of the target 2286 * dip. 2287 */ 2288 medinode = i_link->tgt_lnode->di_node; 2289 me->tgt_node_next = medinode->tgt_links; 2290 medinode->tgt_links = me->self; 2291 2292 return (MH_WALK_CONTINUE); 2293 } 2294 2295 /*ARGSUSED*/ 2296 static uint_t 2297 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2298 { 2299 i_lnode_t *i_lnode = (i_lnode_t *)key; 2300 struct i_layer_data *data = arg; 2301 struct di_lnode *me; 2302 struct di_node *medinode; 2303 2304 ASSERT(i_lnode->self == 0); 2305 2306 i_lnode->self = data->lnode_off + 2307 (data->lnode_count * sizeof (struct di_lnode)); 2308 data->lnode_count++; 2309 2310 ASSERT(data->lnode_off > 0 && data->lnode_count > 0); 2311 ASSERT(data->link_count == 0); /* links not done yet */ 2312 ASSERT(data->lnode_count <= data->st->lnode_count); 2313 2314 /* fill in fields for the di_lnode snapshot */ 2315 me = DI_LNODE(di_mem_addr(data->st, i_lnode->self)); 2316 me->self = i_lnode->self; 2317 2318 if (i_lnode->devt == DDI_DEV_T_NONE) { 2319 me->dev_major = DDI_MAJOR_T_NONE; 2320 me->dev_minor = DDI_MAJOR_T_NONE; 2321 } else { 2322 me->dev_major = getmajor(i_lnode->devt); 2323 me->dev_minor = getminor(i_lnode->devt); 2324 } 2325 2326 /* 2327 * The dip corresponding to this lnode must exist in 2328 * the snapshot or we wouldn't have created the i_lnode_t 2329 * during LDI walk. Save the offset of the dip. 2330 */ 2331 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); 2332 me->node = i_lnode->di_node->self; 2333 2334 /* 2335 * There must be at least one link in or out of this lnode 2336 * or we wouldn't have created it. These fields will be set 2337 * during the link hash walk. 2338 */ 2339 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); 2340 2341 /* 2342 * set the offset of the devinfo node associated with this 2343 * lnode. Also update the node_next next pointer. this pointer 2344 * is set if there are multiple lnodes associated with the same 2345 * devinfo node. (could occure when multiple minor nodes 2346 * are open for one device, etc.) 2347 */ 2348 medinode = i_lnode->di_node; 2349 me->node_next = medinode->lnodes; 2350 medinode->lnodes = me->self; 2351 2352 return (MH_WALK_CONTINUE); 2353 } 2354 2355 static di_off_t 2356 di_getlink_data(di_off_t off, struct di_state *st) 2357 { 2358 struct i_layer_data data = {0}; 2359 size_t size; 2360 2361 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); 2362 2363 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, 2364 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, 2365 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); 2366 2367 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, 2368 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); 2369 2370 /* get driver layering information */ 2371 (void) ldi_usage_walker(st, di_ldi_callback); 2372 2373 /* check if there is any link data to include in the snapshot */ 2374 if (st->lnode_count == 0) { 2375 ASSERT(st->link_count == 0); 2376 goto out; 2377 } 2378 2379 ASSERT(st->link_count != 0); 2380 2381 /* get a pointer to snapshot memory for all the di_lnodes */ 2382 size = sizeof (struct di_lnode) * st->lnode_count; 2383 data.lnode_off = off = di_checkmem(st, off, size); 2384 off += size; 2385 2386 /* get a pointer to snapshot memory for all the di_links */ 2387 size = sizeof (struct di_link) * st->link_count; 2388 data.link_off = off = di_checkmem(st, off, size); 2389 off += size; 2390 2391 data.lnode_count = data.link_count = 0; 2392 data.st = st; 2393 2394 /* 2395 * We have lnodes and links that will go into the 2396 * snapshot, so let's walk the respective hashes 2397 * and snapshot them. The various linkages are 2398 * also set up during the walk. 2399 */ 2400 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); 2401 ASSERT(data.lnode_count == st->lnode_count); 2402 2403 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); 2404 ASSERT(data.link_count == st->link_count); 2405 2406 out: 2407 /* free up the i_lnodes and i_links used to create the snapshot */ 2408 mod_hash_destroy_hash(st->lnode_hash); 2409 mod_hash_destroy_hash(st->link_hash); 2410 st->lnode_count = 0; 2411 st->link_count = 0; 2412 2413 return (off); 2414 } 2415 2416 2417 /* 2418 * Copy all minor data nodes attached to a devinfo node into the snapshot. 2419 * It is called from di_copynode with active ndi_devi_enter to protect 2420 * the list of minor nodes. 2421 */ 2422 static di_off_t 2423 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, 2424 struct di_state *st) 2425 { 2426 di_off_t off; 2427 struct di_minor *me; 2428 size_t size; 2429 2430 dcmn_err2((CE_CONT, "di_getmdata:\n")); 2431 2432 /* 2433 * check memory first 2434 */ 2435 off = di_checkmem(st, *off_p, sizeof (struct di_minor)); 2436 *off_p = off; 2437 2438 do { 2439 me = DI_MINOR(di_mem_addr(st, off)); 2440 me->self = off; 2441 me->type = mnode->type; 2442 me->node = node; 2443 me->user_private_data = NULL; 2444 2445 off += sizeof (struct di_minor); 2446 2447 /* 2448 * Split dev_t to major/minor, so it works for 2449 * both ILP32 and LP64 model 2450 */ 2451 me->dev_major = getmajor(mnode->ddm_dev); 2452 me->dev_minor = getminor(mnode->ddm_dev); 2453 me->spec_type = mnode->ddm_spec_type; 2454 2455 if (mnode->ddm_name) { 2456 size = strlen(mnode->ddm_name) + 1; 2457 me->name = off = di_checkmem(st, off, size); 2458 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name); 2459 off += size; 2460 } 2461 2462 if (mnode->ddm_node_type) { 2463 size = strlen(mnode->ddm_node_type) + 1; 2464 me->node_type = off = di_checkmem(st, off, size); 2465 (void) strcpy(di_mem_addr(st, off), 2466 mnode->ddm_node_type); 2467 off += size; 2468 } 2469 2470 off = di_checkmem(st, off, sizeof (struct di_minor)); 2471 me->next = off; 2472 mnode = mnode->next; 2473 } while (mnode); 2474 2475 me->next = 0; 2476 2477 return (off); 2478 } 2479 2480 /* 2481 * di_register_dip(), di_find_dip(): The dip must be protected 2482 * from deallocation when using these routines - this can either 2483 * be a reference count, a busy hold or a per-driver lock. 2484 */ 2485 2486 static void 2487 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) 2488 { 2489 struct dev_info *node = DEVI(dip); 2490 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2491 struct di_dkey *dk; 2492 2493 ASSERT(dip); 2494 ASSERT(off > 0); 2495 2496 key->k_type = DI_DKEY; 2497 dk = &(key->k_u.dkey); 2498 2499 dk->dk_dip = dip; 2500 dk->dk_major = node->devi_major; 2501 dk->dk_inst = node->devi_instance; 2502 dk->dk_nodeid = node->devi_nodeid; 2503 2504 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, 2505 (mod_hash_val_t)(uintptr_t)off) != 0) { 2506 panic( 2507 "duplicate devinfo (%p) registered during device " 2508 "tree walk", (void *)dip); 2509 } 2510 } 2511 2512 2513 static int 2514 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) 2515 { 2516 /* 2517 * uintptr_t must be used because it matches the size of void *; 2518 * mod_hash expects clients to place results into pointer-size 2519 * containers; since di_off_t is always a 32-bit offset, alignment 2520 * would otherwise be broken on 64-bit kernels. 2521 */ 2522 uintptr_t offset; 2523 struct di_key key = {0}; 2524 struct di_dkey *dk; 2525 2526 ASSERT(st->reg_dip_hash); 2527 ASSERT(dip); 2528 ASSERT(off_p); 2529 2530 2531 key.k_type = DI_DKEY; 2532 dk = &(key.k_u.dkey); 2533 2534 dk->dk_dip = dip; 2535 dk->dk_major = DEVI(dip)->devi_major; 2536 dk->dk_inst = DEVI(dip)->devi_instance; 2537 dk->dk_nodeid = DEVI(dip)->devi_nodeid; 2538 2539 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, 2540 (mod_hash_val_t *)&offset) == 0) { 2541 *off_p = (di_off_t)offset; 2542 return (0); 2543 } else { 2544 return (-1); 2545 } 2546 } 2547 2548 /* 2549 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation 2550 * when using these routines. The caller must do this by protecting the 2551 * client(or phci)<->pip linkage while traversing the list and then holding the 2552 * pip when it is found in the list. 2553 */ 2554 2555 static void 2556 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) 2557 { 2558 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2559 char *path_addr; 2560 struct di_pkey *pk; 2561 2562 ASSERT(pip); 2563 ASSERT(off > 0); 2564 2565 key->k_type = DI_PKEY; 2566 pk = &(key->k_u.pkey); 2567 2568 pk->pk_pip = pip; 2569 path_addr = mdi_pi_get_addr(pip); 2570 if (path_addr) 2571 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); 2572 pk->pk_client = mdi_pi_get_client(pip); 2573 pk->pk_phci = mdi_pi_get_phci(pip); 2574 2575 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, 2576 (mod_hash_val_t)(uintptr_t)off) != 0) { 2577 panic( 2578 "duplicate pathinfo (%p) registered during device " 2579 "tree walk", (void *)pip); 2580 } 2581 } 2582 2583 /* 2584 * As with di_register_pip, the caller must hold or lock the pip 2585 */ 2586 static int 2587 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) 2588 { 2589 /* 2590 * uintptr_t must be used because it matches the size of void *; 2591 * mod_hash expects clients to place results into pointer-size 2592 * containers; since di_off_t is always a 32-bit offset, alignment 2593 * would otherwise be broken on 64-bit kernels. 2594 */ 2595 uintptr_t offset; 2596 struct di_key key = {0}; 2597 struct di_pkey *pk; 2598 2599 ASSERT(st->reg_pip_hash); 2600 ASSERT(off_p); 2601 2602 if (pip == NULL) { 2603 *off_p = 0; 2604 return (0); 2605 } 2606 2607 key.k_type = DI_PKEY; 2608 pk = &(key.k_u.pkey); 2609 2610 pk->pk_pip = pip; 2611 pk->pk_path_addr = mdi_pi_get_addr(pip); 2612 pk->pk_client = mdi_pi_get_client(pip); 2613 pk->pk_phci = mdi_pi_get_phci(pip); 2614 2615 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, 2616 (mod_hash_val_t *)&offset) == 0) { 2617 *off_p = (di_off_t)offset; 2618 return (0); 2619 } else { 2620 return (-1); 2621 } 2622 } 2623 2624 static di_path_state_t 2625 path_state_convert(mdi_pathinfo_state_t st) 2626 { 2627 switch (st) { 2628 case MDI_PATHINFO_STATE_ONLINE: 2629 return (DI_PATH_STATE_ONLINE); 2630 case MDI_PATHINFO_STATE_STANDBY: 2631 return (DI_PATH_STATE_STANDBY); 2632 case MDI_PATHINFO_STATE_OFFLINE: 2633 return (DI_PATH_STATE_OFFLINE); 2634 case MDI_PATHINFO_STATE_FAULT: 2635 return (DI_PATH_STATE_FAULT); 2636 default: 2637 return (DI_PATH_STATE_UNKNOWN); 2638 } 2639 } 2640 2641 static uint_t 2642 path_flags_convert(uint_t pi_path_flags) 2643 { 2644 uint_t di_path_flags = 0; 2645 2646 /* MDI_PATHINFO_FLAGS_HIDDEN nodes not in snapshot */ 2647 2648 if (pi_path_flags & MDI_PATHINFO_FLAGS_DEVICE_REMOVED) 2649 di_path_flags |= DI_PATH_FLAGS_DEVICE_REMOVED; 2650 2651 return (di_path_flags); 2652 } 2653 2654 2655 static di_off_t 2656 di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p, 2657 struct di_state *st) 2658 { 2659 nvpair_t *prop = NULL; 2660 struct di_path_prop *me; 2661 int off; 2662 size_t size; 2663 char *str; 2664 uchar_t *buf; 2665 uint_t nelems; 2666 2667 off = *off_p; 2668 if (mdi_pi_get_next_prop(pip, NULL) == NULL) { 2669 *off_p = 0; 2670 return (off); 2671 } 2672 2673 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2674 *off_p = off; 2675 2676 while (prop = mdi_pi_get_next_prop(pip, prop)) { 2677 me = DI_PATHPROP(di_mem_addr(st, off)); 2678 me->self = off; 2679 off += sizeof (struct di_path_prop); 2680 2681 /* 2682 * property name 2683 */ 2684 size = strlen(nvpair_name(prop)) + 1; 2685 me->prop_name = off = di_checkmem(st, off, size); 2686 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); 2687 off += size; 2688 2689 switch (nvpair_type(prop)) { 2690 case DATA_TYPE_BYTE: 2691 case DATA_TYPE_INT16: 2692 case DATA_TYPE_UINT16: 2693 case DATA_TYPE_INT32: 2694 case DATA_TYPE_UINT32: 2695 me->prop_type = DDI_PROP_TYPE_INT; 2696 size = sizeof (int32_t); 2697 off = di_checkmem(st, off, size); 2698 (void) nvpair_value_int32(prop, 2699 (int32_t *)di_mem_addr(st, off)); 2700 break; 2701 2702 case DATA_TYPE_INT64: 2703 case DATA_TYPE_UINT64: 2704 me->prop_type = DDI_PROP_TYPE_INT64; 2705 size = sizeof (int64_t); 2706 off = di_checkmem(st, off, size); 2707 (void) nvpair_value_int64(prop, 2708 (int64_t *)di_mem_addr(st, off)); 2709 break; 2710 2711 case DATA_TYPE_STRING: 2712 me->prop_type = DDI_PROP_TYPE_STRING; 2713 (void) nvpair_value_string(prop, &str); 2714 size = strlen(str) + 1; 2715 off = di_checkmem(st, off, size); 2716 (void) strcpy(di_mem_addr(st, off), str); 2717 break; 2718 2719 case DATA_TYPE_BYTE_ARRAY: 2720 case DATA_TYPE_INT16_ARRAY: 2721 case DATA_TYPE_UINT16_ARRAY: 2722 case DATA_TYPE_INT32_ARRAY: 2723 case DATA_TYPE_UINT32_ARRAY: 2724 case DATA_TYPE_INT64_ARRAY: 2725 case DATA_TYPE_UINT64_ARRAY: 2726 me->prop_type = DDI_PROP_TYPE_BYTE; 2727 (void) nvpair_value_byte_array(prop, &buf, &nelems); 2728 size = nelems; 2729 if (nelems != 0) { 2730 off = di_checkmem(st, off, size); 2731 bcopy(buf, di_mem_addr(st, off), size); 2732 } 2733 break; 2734 2735 default: /* Unknown or unhandled type; skip it */ 2736 size = 0; 2737 break; 2738 } 2739 2740 if (size > 0) { 2741 me->prop_data = off; 2742 } 2743 2744 me->prop_len = (int)size; 2745 off += size; 2746 2747 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2748 me->prop_next = off; 2749 } 2750 2751 me->prop_next = 0; 2752 return (off); 2753 } 2754 2755 2756 static void 2757 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp, 2758 int get_client) 2759 { 2760 if (get_client) { 2761 ASSERT(me->path_client == 0); 2762 me->path_client = noff; 2763 ASSERT(me->path_c_link == 0); 2764 *off_pp = &me->path_c_link; 2765 me->path_snap_state &= 2766 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK); 2767 } else { 2768 ASSERT(me->path_phci == 0); 2769 me->path_phci = noff; 2770 ASSERT(me->path_p_link == 0); 2771 *off_pp = &me->path_p_link; 2772 me->path_snap_state &= 2773 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK); 2774 } 2775 } 2776 2777 /* 2778 * off_p: pointer to the linkage field. This links pips along the client|phci 2779 * linkage list. 2780 * noff : Offset for the endpoint dip snapshot. 2781 */ 2782 static di_off_t 2783 di_getpath_data(dev_info_t *dip, di_off_t *off_p, di_off_t noff, 2784 struct di_state *st, int get_client) 2785 { 2786 di_off_t off; 2787 mdi_pathinfo_t *pip; 2788 struct di_path *me; 2789 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *); 2790 size_t size; 2791 2792 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client)); 2793 2794 /* 2795 * The naming of the following mdi_xyz() is unfortunately 2796 * non-intuitive. mdi_get_next_phci_path() follows the 2797 * client_link i.e. the list of pip's belonging to the 2798 * given client dip. 2799 */ 2800 if (get_client) 2801 next_pip = &mdi_get_next_phci_path; 2802 else 2803 next_pip = &mdi_get_next_client_path; 2804 2805 off = *off_p; 2806 2807 pip = NULL; 2808 while (pip = (*next_pip)(dip, pip)) { 2809 di_off_t stored_offset; 2810 2811 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip)); 2812 2813 mdi_pi_lock(pip); 2814 2815 /* We don't represent hidden paths in the snapshot */ 2816 if (mdi_pi_ishidden(pip)) { 2817 dcmn_err((CE_WARN, "hidden, skip")); 2818 mdi_pi_unlock(pip); 2819 continue; 2820 } 2821 2822 if (di_pip_find(st, pip, &stored_offset) != -1) { 2823 /* 2824 * We've already seen this pathinfo node so we need to 2825 * take care not to snap it again; However, one endpoint 2826 * and linkage will be set here. The other endpoint 2827 * and linkage has already been set when the pip was 2828 * first snapshotted i.e. when the other endpoint dip 2829 * was snapshotted. 2830 */ 2831 me = DI_PATH(di_mem_addr(st, stored_offset)); 2832 *off_p = stored_offset; 2833 2834 di_path_one_endpoint(me, noff, &off_p, get_client); 2835 2836 /* 2837 * The other endpoint and linkage were set when this 2838 * pip was snapshotted. So we are done with both 2839 * endpoints and linkages. 2840 */ 2841 ASSERT(!(me->path_snap_state & 2842 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI))); 2843 ASSERT(!(me->path_snap_state & 2844 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK))); 2845 2846 mdi_pi_unlock(pip); 2847 continue; 2848 } 2849 2850 /* 2851 * Now that we need to snapshot this pip, check memory 2852 */ 2853 size = sizeof (struct di_path); 2854 *off_p = off = di_checkmem(st, off, size); 2855 me = DI_PATH(di_mem_addr(st, off)); 2856 me->self = off; 2857 off += size; 2858 2859 me->path_snap_state = 2860 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK; 2861 me->path_snap_state |= 2862 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI; 2863 2864 /* 2865 * Zero out fields as di_checkmem() doesn't guarantee 2866 * zero-filled memory 2867 */ 2868 me->path_client = me->path_phci = 0; 2869 me->path_c_link = me->path_p_link = 0; 2870 2871 di_path_one_endpoint(me, noff, &off_p, get_client); 2872 2873 /* 2874 * Note the existence of this pathinfo 2875 */ 2876 di_register_pip(st, pip, me->self); 2877 2878 me->path_state = path_state_convert(mdi_pi_get_state(pip)); 2879 me->path_flags = path_flags_convert(mdi_pi_get_flags(pip)); 2880 2881 me->path_instance = mdi_pi_get_path_instance(pip); 2882 2883 /* 2884 * Get intermediate addressing info. 2885 */ 2886 size = strlen(mdi_pi_get_addr(pip)) + 1; 2887 me->path_addr = off = di_checkmem(st, off, size); 2888 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip)); 2889 off += size; 2890 2891 /* 2892 * Get path properties if props are to be included in the 2893 * snapshot 2894 */ 2895 if (DINFOPROP & st->command) { 2896 me->path_prop = off; 2897 off = di_path_getprop(pip, &me->path_prop, st); 2898 } else { 2899 me->path_prop = 0; 2900 } 2901 2902 mdi_pi_unlock(pip); 2903 } 2904 2905 *off_p = 0; 2906 return (off); 2907 } 2908 2909 /* 2910 * Return driver prop_op entry point for the specified devinfo node. 2911 * 2912 * To return a non-NULL value: 2913 * - driver must be attached and held: 2914 * If driver is not attached we ignore the driver property list. 2915 * No one should rely on such properties. 2916 * - driver "cb_prop_op != ddi_prop_op": 2917 * If "cb_prop_op == ddi_prop_op", framework does not need to call driver. 2918 * XXX or parent's bus_prop_op != ddi_bus_prop_op 2919 */ 2920 static int 2921 (*di_getprop_prop_op(struct dev_info *dip)) 2922 (dev_t, dev_info_t *, ddi_prop_op_t, int, char *, caddr_t, int *) 2923 { 2924 struct dev_ops *ops; 2925 2926 /* If driver is not attached we ignore the driver property list. */ 2927 if ((dip == NULL) || !i_ddi_devi_attached((dev_info_t *)dip)) 2928 return (NULL); 2929 2930 /* 2931 * Some nexus drivers incorrectly set cb_prop_op to nodev, nulldev, 2932 * or even NULL. 2933 */ 2934 ops = dip->devi_ops; 2935 if (ops && ops->devo_cb_ops && 2936 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) && 2937 (ops->devo_cb_ops->cb_prop_op != nodev) && 2938 (ops->devo_cb_ops->cb_prop_op != nulldev) && 2939 (ops->devo_cb_ops->cb_prop_op != NULL)) 2940 return (ops->devo_cb_ops->cb_prop_op); 2941 return (NULL); 2942 } 2943 2944 static di_off_t 2945 di_getprop_add(int list, int dyn, struct di_state *st, struct dev_info *dip, 2946 int (*prop_op)(), 2947 char *name, dev_t devt, int aflags, int alen, caddr_t aval, 2948 di_off_t off, di_off_t **off_pp) 2949 { 2950 int need_free = 0; 2951 dev_t pdevt; 2952 int pflags; 2953 int rv; 2954 caddr_t val; 2955 int len; 2956 size_t size; 2957 struct di_prop *pp; 2958 2959 /* If we have prop_op function, ask driver for latest value */ 2960 if (prop_op) { 2961 ASSERT(dip); 2962 2963 /* Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY */ 2964 pdevt = (devt == DDI_DEV_T_NONE) ? DDI_DEV_T_ANY : devt; 2965 2966 /* 2967 * We have type information in flags, but are invoking an 2968 * old non-typed prop_op(9E) interface. Since not all types are 2969 * part of DDI_PROP_TYPE_ANY (example is DDI_PROP_TYPE_INT64), 2970 * we set DDI_PROP_CONSUMER_TYPED - causing the framework to 2971 * expand type bits beyond DDI_PROP_TYPE_ANY. This allows us 2972 * to use the legacy prop_op(9E) interface to obtain updates 2973 * non-DDI_PROP_TYPE_ANY dynamic properties. 2974 */ 2975 pflags = aflags & ~DDI_PROP_TYPE_MASK; 2976 pflags |= DDI_PROP_DONTPASS | DDI_PROP_NOTPROM | 2977 DDI_PROP_CONSUMER_TYPED; 2978 2979 /* 2980 * Hold and exit across prop_op(9E) to avoid lock order 2981 * issues between 2982 * [ndi_devi_enter() ..prop_op(9E).. driver-lock] 2983 * .vs. 2984 * [..ioctl(9E).. driver-lock ..ddi_remove_minor_node(9F).. 2985 * ndi_devi_enter()] 2986 * ordering. 2987 */ 2988 ndi_hold_devi((dev_info_t *)dip); 2989 ndi_devi_exit((dev_info_t *)dip, dip->devi_circular); 2990 rv = (*prop_op)(pdevt, (dev_info_t *)dip, 2991 PROP_LEN_AND_VAL_ALLOC, pflags, name, &val, &len); 2992 ndi_devi_enter((dev_info_t *)dip, &dip->devi_circular); 2993 ndi_rele_devi((dev_info_t *)dip); 2994 2995 if (rv == DDI_PROP_SUCCESS) { 2996 need_free = 1; /* dynamic prop obtained */ 2997 } else if (dyn) { 2998 /* 2999 * A dynamic property must succeed prop_op(9E) to show 3000 * up in the snapshot - that is the only source of its 3001 * value. 3002 */ 3003 return (off); /* dynamic prop not supported */ 3004 } else { 3005 /* 3006 * In case calling the driver caused an update off 3007 * prop_op(9E) of a non-dynamic property (code leading 3008 * to ddi_prop_change), we defer picking up val and 3009 * len informatiojn until after prop_op(9E) to ensure 3010 * that we snapshot the latest value. 3011 */ 3012 val = aval; 3013 len = alen; 3014 3015 } 3016 } else { 3017 val = aval; 3018 len = alen; 3019 } 3020 3021 dcmn_err((CE_CONT, "di_getprop_add: list %d %s len %d val %p\n", 3022 list, name ? name : "NULL", len, (void *)val)); 3023 3024 size = sizeof (struct di_prop); 3025 **off_pp = off = di_checkmem(st, off, size); 3026 pp = DI_PROP(di_mem_addr(st, off)); 3027 pp->self = off; 3028 off += size; 3029 3030 pp->dev_major = getmajor(devt); 3031 pp->dev_minor = getminor(devt); 3032 pp->prop_flags = aflags; 3033 pp->prop_list = list; 3034 3035 /* property name */ 3036 if (name) { 3037 size = strlen(name) + 1; 3038 pp->prop_name = off = di_checkmem(st, off, size); 3039 (void) strcpy(di_mem_addr(st, off), name); 3040 off += size; 3041 } else { 3042 pp->prop_name = -1; 3043 } 3044 3045 pp->prop_len = len; 3046 if (val == NULL) { 3047 pp->prop_data = -1; 3048 } else if (len != 0) { 3049 size = len; 3050 pp->prop_data = off = di_checkmem(st, off, size); 3051 bcopy(val, di_mem_addr(st, off), size); 3052 off += size; 3053 } 3054 3055 pp->next = 0; /* assume tail for now */ 3056 *off_pp = &pp->next; /* return pointer to our next */ 3057 3058 if (need_free) /* free PROP_LEN_AND_VAL_ALLOC alloc */ 3059 kmem_free(val, len); 3060 return (off); 3061 } 3062 3063 3064 /* 3065 * Copy a list of properties attached to a devinfo node. Called from 3066 * di_copynode with active ndi_devi_enter. The major number is passed in case 3067 * we need to call driver's prop_op entry. The value of list indicates 3068 * which list we are copying. Possible values are: 3069 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST 3070 */ 3071 static di_off_t 3072 di_getprop(int list, struct ddi_prop **pprop, di_off_t *off_p, 3073 struct di_state *st, struct dev_info *dip) 3074 { 3075 struct ddi_prop *prop; 3076 int (*prop_op)(); 3077 int off; 3078 struct ddi_minor_data *mn; 3079 i_ddi_prop_dyn_t *dp; 3080 struct plist { 3081 struct plist *pl_next; 3082 char *pl_name; 3083 int pl_flags; 3084 dev_t pl_dev; 3085 int pl_len; 3086 caddr_t pl_val; 3087 } *pl, *pl0, **plp; 3088 3089 ASSERT(st != NULL); 3090 3091 off = *off_p; 3092 *off_p = 0; 3093 dcmn_err((CE_CONT, "di_getprop: copy property list %d at addr %p\n", 3094 list, (void *)*pprop)); 3095 3096 /* get pointer to driver's prop_op(9E) implementation if DRV_LIST */ 3097 prop_op = (list == DI_PROP_DRV_LIST) ? di_getprop_prop_op(dip) : NULL; 3098 3099 /* 3100 * Form private list of properties, holding devi_lock for properties 3101 * that hang off the dip. 3102 */ 3103 if (dip) 3104 mutex_enter(&(dip->devi_lock)); 3105 for (pl0 = NULL, plp = &pl0, prop = *pprop; 3106 prop; plp = &pl->pl_next, prop = prop->prop_next) { 3107 pl = kmem_alloc(sizeof (*pl), KM_SLEEP); 3108 *plp = pl; 3109 pl->pl_next = NULL; 3110 if (prop->prop_name) 3111 pl->pl_name = i_ddi_strdup(prop->prop_name, KM_SLEEP); 3112 else 3113 pl->pl_name = NULL; 3114 pl->pl_flags = prop->prop_flags; 3115 pl->pl_dev = prop->prop_dev; 3116 if (prop->prop_len) { 3117 pl->pl_len = prop->prop_len; 3118 pl->pl_val = kmem_alloc(pl->pl_len, KM_SLEEP); 3119 bcopy(prop->prop_val, pl->pl_val, pl->pl_len); 3120 } else { 3121 pl->pl_len = 0; 3122 pl->pl_val = NULL; 3123 } 3124 } 3125 if (dip) 3126 mutex_exit(&(dip->devi_lock)); 3127 3128 /* 3129 * Now that we have dropped devi_lock, perform a second-pass to 3130 * add properties to the snapshot. We do this as a second pass 3131 * because we may need to call prop_op(9E) and we can't hold 3132 * devi_lock across that call. 3133 */ 3134 for (pl = pl0; pl; pl = pl0) { 3135 pl0 = pl->pl_next; 3136 off = di_getprop_add(list, 0, st, dip, prop_op, pl->pl_name, 3137 pl->pl_dev, pl->pl_flags, pl->pl_len, pl->pl_val, 3138 off, &off_p); 3139 if (pl->pl_val) 3140 kmem_free(pl->pl_val, pl->pl_len); 3141 if (pl->pl_name) 3142 kmem_free(pl->pl_name, strlen(pl->pl_name) + 1); 3143 kmem_free(pl, sizeof (*pl)); 3144 } 3145 3146 /* 3147 * If there is no prop_op or dynamic property support has been 3148 * disabled, we are done. 3149 */ 3150 if ((prop_op == NULL) || (di_prop_dyn == 0)) { 3151 *off_p = 0; 3152 return (off); 3153 } 3154 3155 /* Add dynamic driver properties to snapshot */ 3156 for (dp = i_ddi_prop_dyn_driver_get((dev_info_t *)dip); 3157 dp && dp->dp_name; dp++) { 3158 if (dp->dp_spec_type) { 3159 /* if spec_type, property of matching minor */ 3160 ASSERT(DEVI_BUSY_OWNED(dip)); 3161 for (mn = dip->devi_minor; mn; mn = mn->next) { 3162 if (mn->ddm_spec_type != dp->dp_spec_type) 3163 continue; 3164 off = di_getprop_add(list, 1, st, dip, prop_op, 3165 dp->dp_name, mn->ddm_dev, dp->dp_type, 3166 0, NULL, off, &off_p); 3167 } 3168 } else { 3169 /* property of devinfo node */ 3170 off = di_getprop_add(list, 1, st, dip, prop_op, 3171 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type, 3172 0, NULL, off, &off_p); 3173 } 3174 } 3175 3176 /* Add dynamic parent properties to snapshot */ 3177 for (dp = i_ddi_prop_dyn_parent_get((dev_info_t *)dip); 3178 dp && dp->dp_name; dp++) { 3179 if (dp->dp_spec_type) { 3180 /* if spec_type, property of matching minor */ 3181 ASSERT(DEVI_BUSY_OWNED(dip)); 3182 for (mn = dip->devi_minor; mn; mn = mn->next) { 3183 if (mn->ddm_spec_type != dp->dp_spec_type) 3184 continue; 3185 off = di_getprop_add(list, 1, st, dip, prop_op, 3186 dp->dp_name, mn->ddm_dev, dp->dp_type, 3187 0, NULL, off, &off_p); 3188 } 3189 } else { 3190 /* property of devinfo node */ 3191 off = di_getprop_add(list, 1, st, dip, prop_op, 3192 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type, 3193 0, NULL, off, &off_p); 3194 } 3195 } 3196 3197 *off_p = 0; 3198 return (off); 3199 } 3200 3201 /* 3202 * find private data format attached to a dip 3203 * parent = 1 to match driver name of parent dip (for parent private data) 3204 * 0 to match driver name of current dip (for driver private data) 3205 */ 3206 #define DI_MATCH_DRIVER 0 3207 #define DI_MATCH_PARENT 1 3208 3209 struct di_priv_format * 3210 di_match_drv_name(struct dev_info *node, struct di_state *st, int match) 3211 { 3212 int i, count, len; 3213 char *drv_name; 3214 major_t major; 3215 struct di_all *all; 3216 struct di_priv_format *form; 3217 3218 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n", 3219 node->devi_node_name, match)); 3220 3221 if (match == DI_MATCH_PARENT) { 3222 node = DEVI(node->devi_parent); 3223 } 3224 3225 if (node == NULL) { 3226 return (NULL); 3227 } 3228 3229 major = node->devi_major; 3230 if (major == (major_t)(-1)) { 3231 return (NULL); 3232 } 3233 3234 /* 3235 * Match the driver name. 3236 */ 3237 drv_name = ddi_major_to_name(major); 3238 if ((drv_name == NULL) || *drv_name == '\0') { 3239 return (NULL); 3240 } 3241 3242 /* Now get the di_priv_format array */ 3243 all = DI_ALL_PTR(st); 3244 if (match == DI_MATCH_PARENT) { 3245 count = all->n_ppdata; 3246 form = DI_PRIV_FORMAT(di_mem_addr(st, all->ppdata_format)); 3247 } else { 3248 count = all->n_dpdata; 3249 form = DI_PRIV_FORMAT(di_mem_addr(st, all->dpdata_format)); 3250 } 3251 3252 len = strlen(drv_name); 3253 for (i = 0; i < count; i++) { 3254 char *tmp; 3255 3256 tmp = form[i].drv_name; 3257 while (tmp && (*tmp != '\0')) { 3258 if (strncmp(drv_name, tmp, len) == 0) { 3259 return (&form[i]); 3260 } 3261 /* 3262 * Move to next driver name, skipping a white space 3263 */ 3264 if (tmp = strchr(tmp, ' ')) { 3265 tmp++; 3266 } 3267 } 3268 } 3269 3270 return (NULL); 3271 } 3272 3273 /* 3274 * The following functions copy data as specified by the format passed in. 3275 * To prevent invalid format from panicing the system, we call on_fault(). 3276 * A return value of 0 indicates an error. Otherwise, the total offset 3277 * is returned. 3278 */ 3279 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */ 3280 3281 static di_off_t 3282 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node, 3283 void *data, di_off_t *off_p, struct di_state *st) 3284 { 3285 caddr_t pa; 3286 void *ptr; 3287 int i, size, repeat; 3288 di_off_t off, off0, *tmp; 3289 char *path; 3290 label_t ljb; 3291 3292 dcmn_err2((CE_CONT, "di_getprvdata:\n")); 3293 3294 /* 3295 * check memory availability. Private data size is 3296 * limited to DI_MAX_PRIVDATA. 3297 */ 3298 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA); 3299 *off_p = off; 3300 3301 if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) { 3302 goto failure; 3303 } 3304 3305 if (!on_fault(&ljb)) { 3306 /* copy the struct */ 3307 bcopy(data, di_mem_addr(st, off), pdp->bytes); 3308 off0 = DI_ALIGN(pdp->bytes); /* XXX remove DI_ALIGN */ 3309 3310 /* dereferencing pointers */ 3311 for (i = 0; i < MAX_PTR_IN_PRV; i++) { 3312 3313 if (pdp->ptr[i].size == 0) { 3314 goto success; /* no more ptrs */ 3315 } 3316 3317 /* 3318 * first, get the pointer content 3319 */ 3320 if ((pdp->ptr[i].offset < 0) || 3321 (pdp->ptr[i].offset > pdp->bytes - sizeof (char *))) 3322 goto failure; /* wrong offset */ 3323 3324 pa = di_mem_addr(st, off + pdp->ptr[i].offset); 3325 3326 /* save a tmp ptr to store off_t later */ 3327 tmp = (di_off_t *)(intptr_t)pa; 3328 3329 /* get pointer value, if NULL continue */ 3330 ptr = *((void **) (intptr_t)pa); 3331 if (ptr == NULL) { 3332 continue; 3333 } 3334 3335 /* 3336 * next, find the repeat count (array dimension) 3337 */ 3338 repeat = pdp->ptr[i].len_offset; 3339 3340 /* 3341 * Positive value indicates a fixed sized array. 3342 * 0 or negative value indicates variable sized array. 3343 * 3344 * For variable sized array, the variable must be 3345 * an int member of the structure, with an offset 3346 * equal to the absolution value of struct member. 3347 */ 3348 if (repeat > pdp->bytes - sizeof (int)) { 3349 goto failure; /* wrong offset */ 3350 } 3351 3352 if (repeat >= 0) { 3353 repeat = *((int *) 3354 (intptr_t)((caddr_t)data + repeat)); 3355 } else { 3356 repeat = -repeat; 3357 } 3358 3359 /* 3360 * next, get the size of the object to be copied 3361 */ 3362 size = pdp->ptr[i].size * repeat; 3363 3364 /* 3365 * Arbitrarily limit the total size of object to be 3366 * copied (1 byte to 1/4 page). 3367 */ 3368 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) { 3369 goto failure; /* wrong size or too big */ 3370 } 3371 3372 /* 3373 * Now copy the data 3374 */ 3375 *tmp = off0; 3376 bcopy(ptr, di_mem_addr(st, off + off0), size); 3377 off0 += DI_ALIGN(size); /* XXX remove DI_ALIGN */ 3378 } 3379 } else { 3380 goto failure; 3381 } 3382 3383 success: 3384 /* 3385 * success if reached here 3386 */ 3387 no_fault(); 3388 return (off + off0); 3389 /*NOTREACHED*/ 3390 3391 failure: 3392 /* 3393 * fault occurred 3394 */ 3395 no_fault(); 3396 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3397 cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p", 3398 ddi_pathname((dev_info_t *)node, path), data); 3399 kmem_free(path, MAXPATHLEN); 3400 *off_p = -1; /* set private data to indicate error */ 3401 3402 return (off); 3403 } 3404 3405 /* 3406 * get parent private data; on error, returns original offset 3407 */ 3408 static di_off_t 3409 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3410 { 3411 int off; 3412 struct di_priv_format *ppdp; 3413 3414 dcmn_err2((CE_CONT, "di_getppdata:\n")); 3415 3416 /* find the parent data format */ 3417 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) { 3418 off = *off_p; 3419 *off_p = 0; /* set parent data to none */ 3420 return (off); 3421 } 3422 3423 return (di_getprvdata(ppdp, node, 3424 ddi_get_parent_data((dev_info_t *)node), off_p, st)); 3425 } 3426 3427 /* 3428 * get parent private data; returns original offset 3429 */ 3430 static di_off_t 3431 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3432 { 3433 int off; 3434 struct di_priv_format *dpdp; 3435 3436 dcmn_err2((CE_CONT, "di_getdpdata:")); 3437 3438 /* find the parent data format */ 3439 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) { 3440 off = *off_p; 3441 *off_p = 0; /* set driver data to none */ 3442 return (off); 3443 } 3444 3445 return (di_getprvdata(dpdp, node, 3446 ddi_get_driver_private((dev_info_t *)node), off_p, st)); 3447 } 3448 3449 /* 3450 * The driver is stateful across DINFOCPYALL and DINFOUSRLD. 3451 * This function encapsulates the state machine: 3452 * 3453 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY -> 3454 * | SNAPSHOT USRLD | 3455 * -------------------------------------------------- 3456 * 3457 * Returns 0 on success and -1 on failure 3458 */ 3459 static int 3460 di_setstate(struct di_state *st, int new_state) 3461 { 3462 int ret = 0; 3463 3464 mutex_enter(&di_lock); 3465 switch (new_state) { 3466 case IOC_IDLE: 3467 case IOC_DONE: 3468 break; 3469 case IOC_SNAP: 3470 if (st->di_iocstate != IOC_IDLE) 3471 ret = -1; 3472 break; 3473 case IOC_COPY: 3474 if (st->di_iocstate != IOC_DONE) 3475 ret = -1; 3476 break; 3477 default: 3478 ret = -1; 3479 } 3480 3481 if (ret == 0) 3482 st->di_iocstate = new_state; 3483 else 3484 cmn_err(CE_NOTE, "incorrect state transition from %d to %d", 3485 st->di_iocstate, new_state); 3486 mutex_exit(&di_lock); 3487 return (ret); 3488 } 3489 3490 /* 3491 * We cannot assume the presence of the entire 3492 * snapshot in this routine. All we are guaranteed 3493 * is the di_all struct + 1 byte (for root_path) 3494 */ 3495 static int 3496 header_plus_one_ok(struct di_all *all) 3497 { 3498 /* 3499 * Refuse to read old versions 3500 */ 3501 if (all->version != DI_SNAPSHOT_VERSION) { 3502 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version)); 3503 return (0); 3504 } 3505 3506 if (all->cache_magic != DI_CACHE_MAGIC) { 3507 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic)); 3508 return (0); 3509 } 3510 3511 if (all->snapshot_time == 0) { 3512 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time)); 3513 return (0); 3514 } 3515 3516 if (all->top_devinfo == 0) { 3517 CACHE_DEBUG((DI_ERR, "NULL top devinfo")); 3518 return (0); 3519 } 3520 3521 if (all->map_size < sizeof (*all) + 1) { 3522 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size)); 3523 return (0); 3524 } 3525 3526 if (all->root_path[0] != '/' || all->root_path[1] != '\0') { 3527 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c", 3528 all->root_path[0], all->root_path[1])); 3529 return (0); 3530 } 3531 3532 /* 3533 * We can't check checksum here as we just have the header 3534 */ 3535 3536 return (1); 3537 } 3538 3539 static int 3540 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len) 3541 { 3542 rlim64_t rlimit; 3543 ssize_t resid; 3544 int error = 0; 3545 3546 3547 rlimit = RLIM64_INFINITY; 3548 3549 while (len) { 3550 resid = 0; 3551 error = vn_rdwr(UIO_WRITE, vp, buf, len, off, 3552 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 3553 3554 if (error || resid < 0) { 3555 error = error ? error : EIO; 3556 CACHE_DEBUG((DI_ERR, "write error: %d", error)); 3557 break; 3558 } 3559 3560 /* 3561 * Check if we are making progress 3562 */ 3563 if (resid >= len) { 3564 error = ENOSPC; 3565 break; 3566 } 3567 buf += len - resid; 3568 off += len - resid; 3569 len = resid; 3570 } 3571 3572 return (error); 3573 } 3574 3575 static void 3576 di_cache_write(struct di_cache *cache) 3577 { 3578 struct di_all *all; 3579 struct vnode *vp; 3580 int oflags; 3581 size_t map_size; 3582 size_t chunk; 3583 offset_t off; 3584 int error; 3585 char *buf; 3586 3587 ASSERT(DI_CACHE_LOCKED(*cache)); 3588 ASSERT(!servicing_interrupt()); 3589 3590 if (cache->cache_size == 0) { 3591 ASSERT(cache->cache_data == NULL); 3592 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write")); 3593 return; 3594 } 3595 3596 ASSERT(cache->cache_size > 0); 3597 ASSERT(cache->cache_data); 3598 3599 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) { 3600 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write")); 3601 return; 3602 } 3603 3604 all = (struct di_all *)cache->cache_data; 3605 3606 if (!header_plus_one_ok(all)) { 3607 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write")); 3608 return; 3609 } 3610 3611 ASSERT(strcmp(all->root_path, "/") == 0); 3612 3613 /* 3614 * The cache_size is the total allocated memory for the cache. 3615 * The map_size is the actual size of valid data in the cache. 3616 * map_size may be smaller than cache_size but cannot exceed 3617 * cache_size. 3618 */ 3619 if (all->map_size > cache->cache_size) { 3620 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)." 3621 " Skipping write", all->map_size, cache->cache_size)); 3622 return; 3623 } 3624 3625 /* 3626 * First unlink the temp file 3627 */ 3628 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE); 3629 if (error && error != ENOENT) { 3630 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d", 3631 DI_CACHE_TEMP, error)); 3632 } 3633 3634 if (error == EROFS) { 3635 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write")); 3636 return; 3637 } 3638 3639 vp = NULL; 3640 oflags = (FCREAT|FWRITE); 3641 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags, 3642 DI_CACHE_PERMS, &vp, CRCREAT, 0)) { 3643 CACHE_DEBUG((DI_ERR, "%s: create failed: %d", 3644 DI_CACHE_TEMP, error)); 3645 return; 3646 } 3647 3648 ASSERT(vp); 3649 3650 /* 3651 * Paranoid: Check if the file is on a read-only FS 3652 */ 3653 if (vn_is_readonly(vp)) { 3654 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS")); 3655 goto fail; 3656 } 3657 3658 /* 3659 * Note that we only write map_size bytes to disk - this saves 3660 * space as the actual cache size may be larger than size of 3661 * valid data in the cache. 3662 * Another advantage is that it makes verification of size 3663 * easier when the file is read later. 3664 */ 3665 map_size = all->map_size; 3666 off = 0; 3667 buf = cache->cache_data; 3668 3669 while (map_size) { 3670 ASSERT(map_size > 0); 3671 /* 3672 * Write in chunks so that VM system 3673 * is not overwhelmed 3674 */ 3675 if (map_size > di_chunk * PAGESIZE) 3676 chunk = di_chunk * PAGESIZE; 3677 else 3678 chunk = map_size; 3679 3680 error = chunk_write(vp, off, buf, chunk); 3681 if (error) { 3682 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d", 3683 off, error)); 3684 goto fail; 3685 } 3686 3687 off += chunk; 3688 buf += chunk; 3689 map_size -= chunk; 3690 3691 /* If low on memory, give pageout a chance to run */ 3692 if (freemem < desfree) 3693 delay(1); 3694 } 3695 3696 /* 3697 * Now sync the file and close it 3698 */ 3699 if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) { 3700 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error)); 3701 } 3702 3703 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) { 3704 CACHE_DEBUG((DI_ERR, "close() failed: %d", error)); 3705 VN_RELE(vp); 3706 return; 3707 } 3708 3709 VN_RELE(vp); 3710 3711 /* 3712 * Now do the rename 3713 */ 3714 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) { 3715 CACHE_DEBUG((DI_ERR, "rename failed: %d", error)); 3716 return; 3717 } 3718 3719 CACHE_DEBUG((DI_INFO, "Cache write successful.")); 3720 3721 return; 3722 3723 fail: 3724 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL); 3725 VN_RELE(vp); 3726 } 3727 3728 3729 /* 3730 * Since we could be called early in boot, 3731 * use kobj_read_file() 3732 */ 3733 static void 3734 di_cache_read(struct di_cache *cache) 3735 { 3736 struct _buf *file; 3737 struct di_all *all; 3738 int n; 3739 size_t map_size, sz, chunk; 3740 offset_t off; 3741 caddr_t buf; 3742 uint32_t saved_crc, crc; 3743 3744 ASSERT(modrootloaded); 3745 ASSERT(DI_CACHE_LOCKED(*cache)); 3746 ASSERT(cache->cache_data == NULL); 3747 ASSERT(cache->cache_size == 0); 3748 ASSERT(!servicing_interrupt()); 3749 3750 file = kobj_open_file(DI_CACHE_FILE); 3751 if (file == (struct _buf *)-1) { 3752 CACHE_DEBUG((DI_ERR, "%s: open failed: %d", 3753 DI_CACHE_FILE, ENOENT)); 3754 return; 3755 } 3756 3757 /* 3758 * Read in the header+root_path first. The root_path must be "/" 3759 */ 3760 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP); 3761 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0); 3762 3763 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) { 3764 kmem_free(all, sizeof (*all) + 1); 3765 kobj_close_file(file); 3766 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid")); 3767 return; 3768 } 3769 3770 map_size = all->map_size; 3771 3772 kmem_free(all, sizeof (*all) + 1); 3773 3774 ASSERT(map_size >= sizeof (*all) + 1); 3775 3776 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP); 3777 sz = map_size; 3778 off = 0; 3779 while (sz) { 3780 /* Don't overload VM with large reads */ 3781 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz; 3782 n = kobj_read_file(file, buf, chunk, off); 3783 if (n != chunk) { 3784 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld", 3785 DI_CACHE_FILE, off)); 3786 goto fail; 3787 } 3788 off += chunk; 3789 buf += chunk; 3790 sz -= chunk; 3791 } 3792 3793 ASSERT(off == map_size); 3794 3795 /* 3796 * Read past expected EOF to verify size. 3797 */ 3798 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) { 3799 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE)); 3800 goto fail; 3801 } 3802 3803 all = (struct di_all *)di_cache.cache_data; 3804 if (!header_plus_one_ok(all)) { 3805 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE)); 3806 goto fail; 3807 } 3808 3809 /* 3810 * Compute CRC with checksum field in the cache data set to 0 3811 */ 3812 saved_crc = all->cache_checksum; 3813 all->cache_checksum = 0; 3814 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table); 3815 all->cache_checksum = saved_crc; 3816 3817 if (crc != all->cache_checksum) { 3818 CACHE_DEBUG((DI_ERR, 3819 "%s: checksum error: expected=0x%x actual=0x%x", 3820 DI_CACHE_FILE, all->cache_checksum, crc)); 3821 goto fail; 3822 } 3823 3824 if (all->map_size != map_size) { 3825 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE)); 3826 goto fail; 3827 } 3828 3829 kobj_close_file(file); 3830 3831 di_cache.cache_size = map_size; 3832 3833 return; 3834 3835 fail: 3836 kmem_free(di_cache.cache_data, map_size); 3837 kobj_close_file(file); 3838 di_cache.cache_data = NULL; 3839 di_cache.cache_size = 0; 3840 } 3841 3842 3843 /* 3844 * Checks if arguments are valid for using the cache. 3845 */ 3846 static int 3847 cache_args_valid(struct di_state *st, int *error) 3848 { 3849 ASSERT(error); 3850 ASSERT(st->mem_size > 0); 3851 ASSERT(st->memlist != NULL); 3852 3853 if (!modrootloaded || !i_ddi_io_initialized()) { 3854 CACHE_DEBUG((DI_ERR, 3855 "cache lookup failure: I/O subsystem not inited")); 3856 *error = ENOTACTIVE; 3857 return (0); 3858 } 3859 3860 /* 3861 * No other flags allowed with DINFOCACHE 3862 */ 3863 if (st->command != (DINFOCACHE & DIIOC_MASK)) { 3864 CACHE_DEBUG((DI_ERR, 3865 "cache lookup failure: bad flags: 0x%x", 3866 st->command)); 3867 *error = EINVAL; 3868 return (0); 3869 } 3870 3871 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3872 CACHE_DEBUG((DI_ERR, 3873 "cache lookup failure: bad root: %s", 3874 DI_ALL_PTR(st)->root_path)); 3875 *error = EINVAL; 3876 return (0); 3877 } 3878 3879 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command)); 3880 3881 *error = 0; 3882 3883 return (1); 3884 } 3885 3886 static int 3887 snapshot_is_cacheable(struct di_state *st) 3888 { 3889 ASSERT(st->mem_size > 0); 3890 ASSERT(st->memlist != NULL); 3891 3892 if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) != 3893 (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) { 3894 CACHE_DEBUG((DI_INFO, 3895 "not cacheable: incompatible flags: 0x%x", 3896 st->command)); 3897 return (0); 3898 } 3899 3900 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3901 CACHE_DEBUG((DI_INFO, 3902 "not cacheable: incompatible root path: %s", 3903 DI_ALL_PTR(st)->root_path)); 3904 return (0); 3905 } 3906 3907 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command)); 3908 3909 return (1); 3910 } 3911 3912 static int 3913 di_cache_lookup(struct di_state *st) 3914 { 3915 size_t rval; 3916 int cache_valid; 3917 3918 ASSERT(cache_args_valid(st, &cache_valid)); 3919 ASSERT(modrootloaded); 3920 3921 DI_CACHE_LOCK(di_cache); 3922 3923 /* 3924 * The following assignment determines the validity 3925 * of the cache as far as this snapshot is concerned. 3926 */ 3927 cache_valid = di_cache.cache_valid; 3928 3929 if (cache_valid && di_cache.cache_data == NULL) { 3930 di_cache_read(&di_cache); 3931 /* check for read or file error */ 3932 if (di_cache.cache_data == NULL) 3933 cache_valid = 0; 3934 } 3935 3936 if (cache_valid) { 3937 /* 3938 * Ok, the cache was valid as of this particular 3939 * snapshot. Copy the cached snapshot. This is safe 3940 * to do as the cache cannot be freed (we hold the 3941 * cache lock). Free the memory allocated in di_state 3942 * up until this point - we will simply copy everything 3943 * in the cache. 3944 */ 3945 3946 ASSERT(di_cache.cache_data != NULL); 3947 ASSERT(di_cache.cache_size > 0); 3948 3949 di_freemem(st); 3950 3951 rval = 0; 3952 if (di_cache2mem(&di_cache, st) > 0) { 3953 /* 3954 * map_size is size of valid data in the 3955 * cached snapshot and may be less than 3956 * size of the cache. 3957 */ 3958 ASSERT(DI_ALL_PTR(st)); 3959 rval = DI_ALL_PTR(st)->map_size; 3960 3961 ASSERT(rval >= sizeof (struct di_all)); 3962 ASSERT(rval <= di_cache.cache_size); 3963 } 3964 } else { 3965 /* 3966 * The cache isn't valid, we need to take a snapshot. 3967 * Set the command flags appropriately 3968 */ 3969 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK)); 3970 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK); 3971 rval = di_cache_update(st); 3972 st->command = (DINFOCACHE & DIIOC_MASK); 3973 } 3974 3975 DI_CACHE_UNLOCK(di_cache); 3976 3977 /* 3978 * For cached snapshots, the devinfo driver always returns 3979 * a snapshot rooted at "/". 3980 */ 3981 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0); 3982 3983 return ((int)rval); 3984 } 3985 3986 /* 3987 * This is a forced update of the cache - the previous state of the cache 3988 * may be: 3989 * - unpopulated 3990 * - populated and invalid 3991 * - populated and valid 3992 */ 3993 static int 3994 di_cache_update(struct di_state *st) 3995 { 3996 int rval; 3997 uint32_t crc; 3998 struct di_all *all; 3999 4000 ASSERT(DI_CACHE_LOCKED(di_cache)); 4001 ASSERT(snapshot_is_cacheable(st)); 4002 4003 /* 4004 * Free the in-core cache and the on-disk file (if they exist) 4005 */ 4006 i_ddi_di_cache_free(&di_cache); 4007 4008 /* 4009 * Set valid flag before taking the snapshot, 4010 * so that any invalidations that arrive 4011 * during or after the snapshot are not 4012 * removed by us. 4013 */ 4014 atomic_or_32(&di_cache.cache_valid, 1); 4015 4016 rval = di_snapshot_and_clean(st); 4017 4018 if (rval == 0) { 4019 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot")); 4020 return (0); 4021 } 4022 4023 DI_ALL_PTR(st)->map_size = rval; 4024 if (di_mem2cache(st, &di_cache) == 0) { 4025 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed")); 4026 return (0); 4027 } 4028 4029 ASSERT(di_cache.cache_data); 4030 ASSERT(di_cache.cache_size > 0); 4031 4032 /* 4033 * Now that we have cached the snapshot, compute its checksum. 4034 * The checksum is only computed over the valid data in the 4035 * cache, not the entire cache. 4036 * Also, set all the fields (except checksum) before computing 4037 * checksum. 4038 */ 4039 all = (struct di_all *)di_cache.cache_data; 4040 all->cache_magic = DI_CACHE_MAGIC; 4041 all->map_size = rval; 4042 4043 ASSERT(all->cache_checksum == 0); 4044 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table); 4045 all->cache_checksum = crc; 4046 4047 di_cache_write(&di_cache); 4048 4049 return (rval); 4050 } 4051 4052 static void 4053 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...) 4054 { 4055 va_list ap; 4056 4057 if (di_cache_debug <= DI_QUIET) 4058 return; 4059 4060 if (di_cache_debug < msglevel) 4061 return; 4062 4063 switch (msglevel) { 4064 case DI_ERR: 4065 msglevel = CE_WARN; 4066 break; 4067 case DI_INFO: 4068 case DI_TRACE: 4069 default: 4070 msglevel = CE_NOTE; 4071 break; 4072 } 4073 4074 va_start(ap, fmt); 4075 vcmn_err(msglevel, fmt, ap); 4076 va_end(ap); 4077 } 4078