1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * driver for accessing kernel devinfo tree. 29 */ 30 #include <sys/types.h> 31 #include <sys/pathname.h> 32 #include <sys/debug.h> 33 #include <sys/autoconf.h> 34 #include <sys/vmsystm.h> 35 #include <sys/conf.h> 36 #include <sys/file.h> 37 #include <sys/kmem.h> 38 #include <sys/modctl.h> 39 #include <sys/stat.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/sunldi_impl.h> 43 #include <sys/sunndi.h> 44 #include <sys/esunddi.h> 45 #include <sys/sunmdi.h> 46 #include <sys/ddi_impldefs.h> 47 #include <sys/ndi_impldefs.h> 48 #include <sys/mdi_impldefs.h> 49 #include <sys/devinfo_impl.h> 50 #include <sys/thread.h> 51 #include <sys/modhash.h> 52 #include <sys/bitmap.h> 53 #include <util/qsort.h> 54 #include <sys/disp.h> 55 #include <sys/kobj.h> 56 #include <sys/crc32.h> 57 58 59 #ifdef DEBUG 60 static int di_debug; 61 #define dcmn_err(args) if (di_debug >= 1) cmn_err args 62 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args 63 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args 64 #else 65 #define dcmn_err(args) /* nothing */ 66 #define dcmn_err2(args) /* nothing */ 67 #define dcmn_err3(args) /* nothing */ 68 #endif 69 70 /* 71 * We partition the space of devinfo minor nodes equally between the full and 72 * unprivileged versions of the driver. The even-numbered minor nodes are the 73 * full version, while the odd-numbered ones are the read-only version. 74 */ 75 static int di_max_opens = 32; 76 77 static int di_prop_dyn = 1; /* enable dynamic property support */ 78 79 #define DI_FULL_PARENT 0 80 #define DI_READONLY_PARENT 1 81 #define DI_NODE_SPECIES 2 82 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) 83 84 #define IOC_IDLE 0 /* snapshot ioctl states */ 85 #define IOC_SNAP 1 /* snapshot in progress */ 86 #define IOC_DONE 2 /* snapshot done, but not copied out */ 87 #define IOC_COPY 3 /* copyout in progress */ 88 89 /* 90 * Keep max alignment so we can move snapshot to different platforms. 91 * 92 * NOTE: Most callers should rely on the di_checkmem return value 93 * being aligned, and reestablish *off_p with aligned value, instead 94 * of trying to align size of their allocations: this approach will 95 * minimize memory use. 96 */ 97 #define DI_ALIGN(addr) ((addr + 7l) & ~7l) 98 99 /* 100 * To avoid wasting memory, make a linked list of memory chunks. 101 * Size of each chunk is buf_size. 102 */ 103 struct di_mem { 104 struct di_mem *next; /* link to next chunk */ 105 char *buf; /* contiguous kernel memory */ 106 size_t buf_size; /* size of buf in bytes */ 107 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ 108 }; 109 110 /* 111 * This is a stack for walking the tree without using recursion. 112 * When the devinfo tree height is above some small size, one 113 * gets watchdog resets on sun4m. 114 */ 115 struct di_stack { 116 void *offset[MAX_TREE_DEPTH]; 117 struct dev_info *dip[MAX_TREE_DEPTH]; 118 int circ[MAX_TREE_DEPTH]; 119 int depth; /* depth of current node to be copied */ 120 }; 121 122 #define TOP_OFFSET(stack) \ 123 ((di_off_t *)(stack)->offset[(stack)->depth - 1]) 124 #define TOP_NODE(stack) \ 125 ((stack)->dip[(stack)->depth - 1]) 126 #define PARENT_OFFSET(stack) \ 127 ((di_off_t *)(stack)->offset[(stack)->depth - 2]) 128 #define EMPTY_STACK(stack) ((stack)->depth == 0) 129 #define POP_STACK(stack) { \ 130 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ 131 (stack)->circ[(stack)->depth - 1]); \ 132 ((stack)->depth--); \ 133 } 134 #define PUSH_STACK(stack, node, off_p) { \ 135 ASSERT(node != NULL); \ 136 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ 137 (stack)->dip[(stack)->depth] = (node); \ 138 (stack)->offset[(stack)->depth] = (void *)(off_p); \ 139 ((stack)->depth)++; \ 140 } 141 142 #define DI_ALL_PTR(s) DI_ALL(di_mem_addr((s), 0)) 143 144 /* 145 * With devfs, the device tree has no global locks. The device tree is 146 * dynamic and dips may come and go if they are not locked locally. Under 147 * these conditions, pointers are no longer reliable as unique IDs. 148 * Specifically, these pointers cannot be used as keys for hash tables 149 * as the same devinfo structure may be freed in one part of the tree only 150 * to be allocated as the structure for a different device in another 151 * part of the tree. This can happen if DR and the snapshot are 152 * happening concurrently. 153 * The following data structures act as keys for devinfo nodes and 154 * pathinfo nodes. 155 */ 156 157 enum di_ktype { 158 DI_DKEY = 1, 159 DI_PKEY = 2 160 }; 161 162 struct di_dkey { 163 dev_info_t *dk_dip; 164 major_t dk_major; 165 int dk_inst; 166 pnode_t dk_nodeid; 167 }; 168 169 struct di_pkey { 170 mdi_pathinfo_t *pk_pip; 171 char *pk_path_addr; 172 dev_info_t *pk_client; 173 dev_info_t *pk_phci; 174 }; 175 176 struct di_key { 177 enum di_ktype k_type; 178 union { 179 struct di_dkey dkey; 180 struct di_pkey pkey; 181 } k_u; 182 }; 183 184 185 struct i_lnode; 186 187 typedef struct i_link { 188 /* 189 * If a di_link struct representing this i_link struct makes it 190 * into the snapshot, then self will point to the offset of 191 * the di_link struct in the snapshot 192 */ 193 di_off_t self; 194 195 int spec_type; /* block or char access type */ 196 struct i_lnode *src_lnode; /* src i_lnode */ 197 struct i_lnode *tgt_lnode; /* tgt i_lnode */ 198 struct i_link *src_link_next; /* next src i_link /w same i_lnode */ 199 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ 200 } i_link_t; 201 202 typedef struct i_lnode { 203 /* 204 * If a di_lnode struct representing this i_lnode struct makes it 205 * into the snapshot, then self will point to the offset of 206 * the di_lnode struct in the snapshot 207 */ 208 di_off_t self; 209 210 /* 211 * used for hashing and comparing i_lnodes 212 */ 213 int modid; 214 215 /* 216 * public information describing a link endpoint 217 */ 218 struct di_node *di_node; /* di_node in snapshot */ 219 dev_t devt; /* devt */ 220 221 /* 222 * i_link ptr to links coming into this i_lnode node 223 * (this i_lnode is the target of these i_links) 224 */ 225 i_link_t *link_in; 226 227 /* 228 * i_link ptr to links going out of this i_lnode node 229 * (this i_lnode is the source of these i_links) 230 */ 231 i_link_t *link_out; 232 } i_lnode_t; 233 234 /* 235 * Soft state associated with each instance of driver open. 236 */ 237 static struct di_state { 238 di_off_t mem_size; /* total # bytes in memlist */ 239 struct di_mem *memlist; /* head of memlist */ 240 uint_t command; /* command from ioctl */ 241 int di_iocstate; /* snapshot ioctl state */ 242 mod_hash_t *reg_dip_hash; 243 mod_hash_t *reg_pip_hash; 244 int lnode_count; 245 int link_count; 246 247 mod_hash_t *lnode_hash; 248 mod_hash_t *link_hash; 249 } **di_states; 250 251 static kmutex_t di_lock; /* serialize instance assignment */ 252 253 typedef enum { 254 DI_QUIET = 0, /* DI_QUIET must always be 0 */ 255 DI_ERR, 256 DI_INFO, 257 DI_TRACE, 258 DI_TRACE1, 259 DI_TRACE2 260 } di_cache_debug_t; 261 262 static uint_t di_chunk = 32; /* I/O chunk size in pages */ 263 264 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) 265 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) 266 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) 267 268 /* 269 * Check that whole device tree is being configured as a pre-condition for 270 * cleaning up /etc/devices files. 271 */ 272 #define DEVICES_FILES_CLEANABLE(st) \ 273 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \ 274 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0) 275 276 #define CACHE_DEBUG(args) \ 277 { if (di_cache_debug != DI_QUIET) di_cache_print args; } 278 279 typedef struct phci_walk_arg { 280 di_off_t off; 281 struct di_state *st; 282 } phci_walk_arg_t; 283 284 static int di_open(dev_t *, int, int, cred_t *); 285 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 286 static int di_close(dev_t, int, int, cred_t *); 287 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 288 static int di_attach(dev_info_t *, ddi_attach_cmd_t); 289 static int di_detach(dev_info_t *, ddi_detach_cmd_t); 290 291 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); 292 static di_off_t di_snapshot_and_clean(struct di_state *); 293 static di_off_t di_copydevnm(di_off_t *, struct di_state *); 294 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); 295 static di_off_t di_copynode(struct dev_info *, struct di_stack *, 296 struct di_state *); 297 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, 298 struct di_state *); 299 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); 300 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); 301 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *, 302 struct di_state *, struct dev_info *); 303 static void di_allocmem(struct di_state *, size_t); 304 static void di_freemem(struct di_state *); 305 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); 306 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); 307 static void *di_mem_addr(struct di_state *, di_off_t); 308 static int di_setstate(struct di_state *, int); 309 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); 310 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); 311 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, 312 struct di_state *, int); 313 static di_off_t di_getlink_data(di_off_t, struct di_state *); 314 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); 315 316 static int cache_args_valid(struct di_state *st, int *error); 317 static int snapshot_is_cacheable(struct di_state *st); 318 static int di_cache_lookup(struct di_state *st); 319 static int di_cache_update(struct di_state *st); 320 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); 321 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg); 322 static int build_phci_list(dev_info_t *ph_devinfo, void *arg); 323 324 extern int modrootloaded; 325 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *); 326 extern void mdi_vhci_walk_phcis(dev_info_t *, 327 int (*)(dev_info_t *, void *), void *); 328 329 330 static struct cb_ops di_cb_ops = { 331 di_open, /* open */ 332 di_close, /* close */ 333 nodev, /* strategy */ 334 nodev, /* print */ 335 nodev, /* dump */ 336 nodev, /* read */ 337 nodev, /* write */ 338 di_ioctl, /* ioctl */ 339 nodev, /* devmap */ 340 nodev, /* mmap */ 341 nodev, /* segmap */ 342 nochpoll, /* poll */ 343 ddi_prop_op, /* prop_op */ 344 NULL, /* streamtab */ 345 D_NEW | D_MP /* Driver compatibility flag */ 346 }; 347 348 static struct dev_ops di_ops = { 349 DEVO_REV, /* devo_rev, */ 350 0, /* refcnt */ 351 di_info, /* info */ 352 nulldev, /* identify */ 353 nulldev, /* probe */ 354 di_attach, /* attach */ 355 di_detach, /* detach */ 356 nodev, /* reset */ 357 &di_cb_ops, /* driver operations */ 358 NULL /* bus operations */ 359 }; 360 361 /* 362 * Module linkage information for the kernel. 363 */ 364 static struct modldrv modldrv = { 365 &mod_driverops, 366 "DEVINFO Driver", 367 &di_ops 368 }; 369 370 static struct modlinkage modlinkage = { 371 MODREV_1, 372 &modldrv, 373 NULL 374 }; 375 376 int 377 _init(void) 378 { 379 int error; 380 381 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); 382 383 error = mod_install(&modlinkage); 384 if (error != 0) { 385 mutex_destroy(&di_lock); 386 return (error); 387 } 388 389 return (0); 390 } 391 392 int 393 _info(struct modinfo *modinfop) 394 { 395 return (mod_info(&modlinkage, modinfop)); 396 } 397 398 int 399 _fini(void) 400 { 401 int error; 402 403 error = mod_remove(&modlinkage); 404 if (error != 0) { 405 return (error); 406 } 407 408 mutex_destroy(&di_lock); 409 return (0); 410 } 411 412 static dev_info_t *di_dip; 413 414 /*ARGSUSED*/ 415 static int 416 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 417 { 418 int error = DDI_FAILURE; 419 420 switch (infocmd) { 421 case DDI_INFO_DEVT2DEVINFO: 422 *result = (void *)di_dip; 423 error = DDI_SUCCESS; 424 break; 425 case DDI_INFO_DEVT2INSTANCE: 426 /* 427 * All dev_t's map to the same, single instance. 428 */ 429 *result = (void *)0; 430 error = DDI_SUCCESS; 431 break; 432 default: 433 break; 434 } 435 436 return (error); 437 } 438 439 static int 440 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 441 { 442 int error = DDI_FAILURE; 443 444 switch (cmd) { 445 case DDI_ATTACH: 446 di_states = kmem_zalloc( 447 di_max_opens * sizeof (struct di_state *), KM_SLEEP); 448 449 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, 450 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || 451 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, 452 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { 453 kmem_free(di_states, 454 di_max_opens * sizeof (struct di_state *)); 455 ddi_remove_minor_node(dip, NULL); 456 error = DDI_FAILURE; 457 } else { 458 di_dip = dip; 459 ddi_report_dev(dip); 460 461 error = DDI_SUCCESS; 462 } 463 break; 464 default: 465 error = DDI_FAILURE; 466 break; 467 } 468 469 return (error); 470 } 471 472 static int 473 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 474 { 475 int error = DDI_FAILURE; 476 477 switch (cmd) { 478 case DDI_DETACH: 479 ddi_remove_minor_node(dip, NULL); 480 di_dip = NULL; 481 kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); 482 483 error = DDI_SUCCESS; 484 break; 485 default: 486 error = DDI_FAILURE; 487 break; 488 } 489 490 return (error); 491 } 492 493 /* 494 * Allow multiple opens by tweaking the dev_t such that it looks like each 495 * open is getting a different minor device. Each minor gets a separate 496 * entry in the di_states[] table. Based on the original minor number, we 497 * discriminate opens of the full and read-only nodes. If all of the instances 498 * of the selected minor node are currently open, we return EAGAIN. 499 */ 500 /*ARGSUSED*/ 501 static int 502 di_open(dev_t *devp, int flag, int otyp, cred_t *credp) 503 { 504 int m; 505 minor_t minor_parent = getminor(*devp); 506 507 if (minor_parent != DI_FULL_PARENT && 508 minor_parent != DI_READONLY_PARENT) 509 return (ENXIO); 510 511 mutex_enter(&di_lock); 512 513 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { 514 if (di_states[m] != NULL) 515 continue; 516 517 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); 518 break; /* It's ours. */ 519 } 520 521 if (m >= di_max_opens) { 522 /* 523 * maximum open instance for device reached 524 */ 525 mutex_exit(&di_lock); 526 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); 527 return (EAGAIN); 528 } 529 mutex_exit(&di_lock); 530 531 ASSERT(m < di_max_opens); 532 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); 533 534 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", 535 (void *)curthread, m + DI_NODE_SPECIES)); 536 537 return (0); 538 } 539 540 /*ARGSUSED*/ 541 static int 542 di_close(dev_t dev, int flag, int otype, cred_t *cred_p) 543 { 544 struct di_state *st; 545 int m = (int)getminor(dev) - DI_NODE_SPECIES; 546 547 if (m < 0) { 548 cmn_err(CE_WARN, "closing non-existent devinfo minor %d", 549 m + DI_NODE_SPECIES); 550 return (ENXIO); 551 } 552 553 st = di_states[m]; 554 ASSERT(m < di_max_opens && st != NULL); 555 556 di_freemem(st); 557 kmem_free(st, sizeof (struct di_state)); 558 559 /* 560 * empty slot in state table 561 */ 562 mutex_enter(&di_lock); 563 di_states[m] = NULL; 564 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", 565 (void *)curthread, m + DI_NODE_SPECIES)); 566 mutex_exit(&di_lock); 567 568 return (0); 569 } 570 571 572 /*ARGSUSED*/ 573 static int 574 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 575 { 576 int rv, error; 577 di_off_t off; 578 struct di_all *all; 579 struct di_state *st; 580 int m = (int)getminor(dev) - DI_NODE_SPECIES; 581 major_t i; 582 char *drv_name; 583 size_t map_size, size; 584 struct di_mem *dcp; 585 int ndi_flags; 586 587 if (m < 0 || m >= di_max_opens) { 588 return (ENXIO); 589 } 590 591 st = di_states[m]; 592 ASSERT(st != NULL); 593 594 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); 595 596 switch (cmd) { 597 case DINFOIDENT: 598 /* 599 * This is called from di_init to verify that the driver 600 * opened is indeed devinfo. The purpose is to guard against 601 * sending ioctl to an unknown driver in case of an 602 * unresolved major number conflict during bfu. 603 */ 604 *rvalp = DI_MAGIC; 605 return (0); 606 607 case DINFOLODRV: 608 /* 609 * Hold an installed driver and return the result 610 */ 611 if (DI_UNPRIVILEGED_NODE(m)) { 612 /* 613 * Only the fully enabled instances may issue 614 * DINFOLDDRV. 615 */ 616 return (EACCES); 617 } 618 619 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 620 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { 621 kmem_free(drv_name, MAXNAMELEN); 622 return (EFAULT); 623 } 624 625 /* 626 * Some 3rd party driver's _init() walks the device tree, 627 * so we load the driver module before configuring driver. 628 */ 629 i = ddi_name_to_major(drv_name); 630 if (ddi_hold_driver(i) == NULL) { 631 kmem_free(drv_name, MAXNAMELEN); 632 return (ENXIO); 633 } 634 635 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; 636 637 /* 638 * i_ddi_load_drvconf() below will trigger a reprobe 639 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't 640 * needed here. 641 */ 642 modunload_disable(); 643 (void) i_ddi_load_drvconf(i); 644 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); 645 kmem_free(drv_name, MAXNAMELEN); 646 ddi_rele_driver(i); 647 rv = i_ddi_devs_attached(i); 648 modunload_enable(); 649 650 i_ddi_di_cache_invalidate(KM_SLEEP); 651 652 return ((rv == DDI_SUCCESS)? 0 : ENXIO); 653 654 case DINFOUSRLD: 655 /* 656 * The case for copying snapshot to userland 657 */ 658 if (di_setstate(st, IOC_COPY) == -1) 659 return (EBUSY); 660 661 map_size = DI_ALL_PTR(st)->map_size; 662 if (map_size == 0) { 663 (void) di_setstate(st, IOC_DONE); 664 return (EFAULT); 665 } 666 667 /* 668 * copyout the snapshot 669 */ 670 map_size = (map_size + PAGEOFFSET) & PAGEMASK; 671 672 /* 673 * Return the map size, so caller may do a sanity 674 * check against the return value of snapshot ioctl() 675 */ 676 *rvalp = (int)map_size; 677 678 /* 679 * Copy one chunk at a time 680 */ 681 off = 0; 682 dcp = st->memlist; 683 while (map_size) { 684 size = dcp->buf_size; 685 if (map_size <= size) { 686 size = map_size; 687 } 688 689 if (ddi_copyout(di_mem_addr(st, off), 690 (void *)(arg + off), size, mode) != 0) { 691 (void) di_setstate(st, IOC_DONE); 692 return (EFAULT); 693 } 694 695 map_size -= size; 696 off += size; 697 dcp = dcp->next; 698 } 699 700 di_freemem(st); 701 (void) di_setstate(st, IOC_IDLE); 702 return (0); 703 704 default: 705 if ((cmd & ~DIIOC_MASK) != DIIOC) { 706 /* 707 * Invalid ioctl command 708 */ 709 return (ENOTTY); 710 } 711 /* 712 * take a snapshot 713 */ 714 st->command = cmd & DIIOC_MASK; 715 /*FALLTHROUGH*/ 716 } 717 718 /* 719 * Obtain enough memory to hold header + rootpath. We prevent kernel 720 * memory exhaustion by freeing any previously allocated snapshot and 721 * refusing the operation; otherwise we would be allowing ioctl(), 722 * ioctl(), ioctl(), ..., panic. 723 */ 724 if (di_setstate(st, IOC_SNAP) == -1) 725 return (EBUSY); 726 727 /* 728 * Initial memlist always holds di_all and the root_path - and 729 * is at least a page and size. 730 */ 731 size = sizeof (struct di_all) + 732 sizeof (((struct dinfo_io *)(NULL))->root_path); 733 if (size < PAGESIZE) 734 size = PAGESIZE; 735 off = di_checkmem(st, 0, size); 736 all = DI_ALL_PTR(st); 737 off += sizeof (struct di_all); /* real length of di_all */ 738 739 all->devcnt = devcnt; 740 all->command = st->command; 741 all->version = DI_SNAPSHOT_VERSION; 742 all->top_vhci_devinfo = 0; /* filled by build_vhci_list. */ 743 744 /* 745 * Note the endianness in case we need to transport snapshot 746 * over the network. 747 */ 748 #if defined(_LITTLE_ENDIAN) 749 all->endianness = DI_LITTLE_ENDIAN; 750 #else 751 all->endianness = DI_BIG_ENDIAN; 752 #endif 753 754 /* Copyin ioctl args, store in the snapshot. */ 755 if (copyinstr((void *)arg, all->root_path, 756 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { 757 di_freemem(st); 758 (void) di_setstate(st, IOC_IDLE); 759 return (EFAULT); 760 } 761 off += size; /* real length of root_path */ 762 763 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) { 764 di_freemem(st); 765 (void) di_setstate(st, IOC_IDLE); 766 return (EINVAL); 767 } 768 769 error = 0; 770 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { 771 di_freemem(st); 772 (void) di_setstate(st, IOC_IDLE); 773 return (error); 774 } 775 776 /* 777 * Only the fully enabled version may force load drivers or read 778 * the parent private data from a driver. 779 */ 780 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && 781 DI_UNPRIVILEGED_NODE(m)) { 782 di_freemem(st); 783 (void) di_setstate(st, IOC_IDLE); 784 return (EACCES); 785 } 786 787 /* Do we need private data? */ 788 if (st->command & DINFOPRIVDATA) { 789 arg += sizeof (((struct dinfo_io *)(NULL))->root_path); 790 791 #ifdef _MULTI_DATAMODEL 792 switch (ddi_model_convert_from(mode & FMODELS)) { 793 case DDI_MODEL_ILP32: { 794 /* 795 * Cannot copy private data from 64-bit kernel 796 * to 32-bit app 797 */ 798 di_freemem(st); 799 (void) di_setstate(st, IOC_IDLE); 800 return (EINVAL); 801 } 802 case DDI_MODEL_NONE: 803 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 804 di_freemem(st); 805 (void) di_setstate(st, IOC_IDLE); 806 return (EFAULT); 807 } 808 break; 809 } 810 #else /* !_MULTI_DATAMODEL */ 811 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 812 di_freemem(st); 813 (void) di_setstate(st, IOC_IDLE); 814 return (EFAULT); 815 } 816 #endif /* _MULTI_DATAMODEL */ 817 } 818 819 all->top_devinfo = DI_ALIGN(off); 820 821 /* 822 * For cache lookups we reallocate memory from scratch, 823 * so the value of "all" is no longer valid. 824 */ 825 all = NULL; 826 827 if (st->command & DINFOCACHE) { 828 *rvalp = di_cache_lookup(st); 829 } else if (snapshot_is_cacheable(st)) { 830 DI_CACHE_LOCK(di_cache); 831 *rvalp = di_cache_update(st); 832 DI_CACHE_UNLOCK(di_cache); 833 } else 834 *rvalp = di_snapshot_and_clean(st); 835 836 if (*rvalp) { 837 DI_ALL_PTR(st)->map_size = *rvalp; 838 (void) di_setstate(st, IOC_DONE); 839 } else { 840 di_freemem(st); 841 (void) di_setstate(st, IOC_IDLE); 842 } 843 844 return (0); 845 } 846 847 /* 848 * Get a chunk of memory >= size, for the snapshot 849 */ 850 static void 851 di_allocmem(struct di_state *st, size_t size) 852 { 853 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP); 854 855 /* 856 * Round up size to nearest power of 2. If it is less 857 * than st->mem_size, set it to st->mem_size (i.e., 858 * the mem_size is doubled every time) to reduce the 859 * number of memory allocations. 860 */ 861 size_t tmp = 1; 862 while (tmp < size) { 863 tmp <<= 1; 864 } 865 size = (tmp > st->mem_size) ? tmp : st->mem_size; 866 867 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); 868 mem->buf_size = size; 869 870 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); 871 872 if (st->mem_size == 0) { /* first chunk */ 873 st->memlist = mem; 874 } else { 875 /* 876 * locate end of linked list and add a chunk at the end 877 */ 878 struct di_mem *dcp = st->memlist; 879 while (dcp->next != NULL) { 880 dcp = dcp->next; 881 } 882 883 dcp->next = mem; 884 } 885 886 st->mem_size += size; 887 } 888 889 /* 890 * Copy upto bufsiz bytes of the memlist to buf 891 */ 892 static void 893 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) 894 { 895 struct di_mem *dcp; 896 size_t copysz; 897 898 if (st->mem_size == 0) { 899 ASSERT(st->memlist == NULL); 900 return; 901 } 902 903 copysz = 0; 904 for (dcp = st->memlist; dcp; dcp = dcp->next) { 905 906 ASSERT(bufsiz > 0); 907 908 if (bufsiz <= dcp->buf_size) 909 copysz = bufsiz; 910 else 911 copysz = dcp->buf_size; 912 913 bcopy(dcp->buf, buf, copysz); 914 915 buf += copysz; 916 bufsiz -= copysz; 917 918 if (bufsiz == 0) 919 break; 920 } 921 } 922 923 /* 924 * Free all memory for the snapshot 925 */ 926 static void 927 di_freemem(struct di_state *st) 928 { 929 struct di_mem *dcp, *tmp; 930 931 dcmn_err2((CE_CONT, "di_freemem\n")); 932 933 if (st->mem_size) { 934 dcp = st->memlist; 935 while (dcp) { /* traverse the linked list */ 936 tmp = dcp; 937 dcp = dcp->next; 938 ddi_umem_free(tmp->cook); 939 kmem_free(tmp, sizeof (struct di_mem)); 940 } 941 st->mem_size = 0; 942 st->memlist = NULL; 943 } 944 945 ASSERT(st->mem_size == 0); 946 ASSERT(st->memlist == NULL); 947 } 948 949 /* 950 * Copies cached data to the di_state structure. 951 * Returns: 952 * - size of data copied, on SUCCESS 953 * - 0 on failure 954 */ 955 static int 956 di_cache2mem(struct di_cache *cache, struct di_state *st) 957 { 958 caddr_t pa; 959 960 ASSERT(st->mem_size == 0); 961 ASSERT(st->memlist == NULL); 962 ASSERT(!servicing_interrupt()); 963 ASSERT(DI_CACHE_LOCKED(*cache)); 964 965 if (cache->cache_size == 0) { 966 ASSERT(cache->cache_data == NULL); 967 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); 968 return (0); 969 } 970 971 ASSERT(cache->cache_data); 972 973 di_allocmem(st, cache->cache_size); 974 975 pa = di_mem_addr(st, 0); 976 977 ASSERT(pa); 978 979 /* 980 * Verify that di_allocmem() allocates contiguous memory, 981 * so that it is safe to do straight bcopy() 982 */ 983 ASSERT(st->memlist != NULL); 984 ASSERT(st->memlist->next == NULL); 985 bcopy(cache->cache_data, pa, cache->cache_size); 986 987 return (cache->cache_size); 988 } 989 990 /* 991 * Copies a snapshot from di_state to the cache 992 * Returns: 993 * - 0 on failure 994 * - size of copied data on success 995 */ 996 static size_t 997 di_mem2cache(struct di_state *st, struct di_cache *cache) 998 { 999 size_t map_size; 1000 1001 ASSERT(cache->cache_size == 0); 1002 ASSERT(cache->cache_data == NULL); 1003 ASSERT(!servicing_interrupt()); 1004 ASSERT(DI_CACHE_LOCKED(*cache)); 1005 1006 if (st->mem_size == 0) { 1007 ASSERT(st->memlist == NULL); 1008 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); 1009 return (0); 1010 } 1011 1012 ASSERT(st->memlist); 1013 1014 /* 1015 * The size of the memory list may be much larger than the 1016 * size of valid data (map_size). Cache only the valid data 1017 */ 1018 map_size = DI_ALL_PTR(st)->map_size; 1019 if (map_size == 0 || map_size < sizeof (struct di_all) || 1020 map_size > st->mem_size) { 1021 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); 1022 return (0); 1023 } 1024 1025 cache->cache_data = kmem_alloc(map_size, KM_SLEEP); 1026 cache->cache_size = map_size; 1027 di_copymem(st, cache->cache_data, cache->cache_size); 1028 1029 return (map_size); 1030 } 1031 1032 /* 1033 * Make sure there is at least "size" bytes memory left before 1034 * going on. Otherwise, start on a new chunk. 1035 */ 1036 static di_off_t 1037 di_checkmem(struct di_state *st, di_off_t off, size_t size) 1038 { 1039 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", 1040 off, (int)size)); 1041 1042 /* 1043 * di_checkmem() shouldn't be called with a size of zero. 1044 * But in case it is, we want to make sure we return a valid 1045 * offset within the memlist and not an offset that points us 1046 * at the end of the memlist. 1047 */ 1048 if (size == 0) { 1049 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); 1050 size = 1; 1051 } 1052 1053 off = DI_ALIGN(off); 1054 if ((st->mem_size - off) < size) { 1055 off = st->mem_size; 1056 di_allocmem(st, size); 1057 } 1058 1059 /* verify that return value is aligned */ 1060 ASSERT(off == DI_ALIGN(off)); 1061 return (off); 1062 } 1063 1064 /* 1065 * Copy the private data format from ioctl arg. 1066 * On success, the ending offset is returned. On error 0 is returned. 1067 */ 1068 static di_off_t 1069 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) 1070 { 1071 di_off_t size; 1072 struct di_priv_data *priv; 1073 struct di_all *all = DI_ALL_PTR(st); 1074 1075 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", 1076 off, (void *)arg, mode)); 1077 1078 /* 1079 * Copyin data and check version. 1080 * We only handle private data version 0. 1081 */ 1082 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); 1083 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), 1084 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { 1085 kmem_free(priv, sizeof (struct di_priv_data)); 1086 return (0); 1087 } 1088 1089 /* 1090 * Save di_priv_data copied from userland in snapshot. 1091 */ 1092 all->pd_version = priv->version; 1093 all->n_ppdata = priv->n_parent; 1094 all->n_dpdata = priv->n_driver; 1095 1096 /* 1097 * copyin private data format, modify offset accordingly 1098 */ 1099 if (all->n_ppdata) { /* parent private data format */ 1100 /* 1101 * check memory 1102 */ 1103 size = all->n_ppdata * sizeof (struct di_priv_format); 1104 all->ppdata_format = off = di_checkmem(st, off, size); 1105 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, 1106 mode) != 0) { 1107 kmem_free(priv, sizeof (struct di_priv_data)); 1108 return (0); 1109 } 1110 1111 off += size; 1112 } 1113 1114 if (all->n_dpdata) { /* driver private data format */ 1115 /* 1116 * check memory 1117 */ 1118 size = all->n_dpdata * sizeof (struct di_priv_format); 1119 all->dpdata_format = off = di_checkmem(st, off, size); 1120 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, 1121 mode) != 0) { 1122 kmem_free(priv, sizeof (struct di_priv_data)); 1123 return (0); 1124 } 1125 1126 off += size; 1127 } 1128 1129 kmem_free(priv, sizeof (struct di_priv_data)); 1130 return (off); 1131 } 1132 1133 /* 1134 * Return the real address based on the offset (off) within snapshot 1135 */ 1136 static void * 1137 di_mem_addr(struct di_state *st, di_off_t off) 1138 { 1139 struct di_mem *dcp = st->memlist; 1140 1141 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", 1142 (void *)dcp, off)); 1143 1144 ASSERT(off < st->mem_size); 1145 1146 while (off >= dcp->buf_size) { 1147 off -= dcp->buf_size; 1148 dcp = dcp->next; 1149 } 1150 1151 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", 1152 off, (void *)(dcp->buf + off))); 1153 1154 return (dcp->buf + off); 1155 } 1156 1157 /* 1158 * Ideally we would use the whole key to derive the hash 1159 * value. However, the probability that two keys will 1160 * have the same dip (or pip) is very low, so 1161 * hashing by dip (or pip) pointer should suffice. 1162 */ 1163 static uint_t 1164 di_hash_byptr(void *arg, mod_hash_key_t key) 1165 { 1166 struct di_key *dik = key; 1167 size_t rshift; 1168 void *ptr; 1169 1170 ASSERT(arg == NULL); 1171 1172 switch (dik->k_type) { 1173 case DI_DKEY: 1174 ptr = dik->k_u.dkey.dk_dip; 1175 rshift = highbit(sizeof (struct dev_info)); 1176 break; 1177 case DI_PKEY: 1178 ptr = dik->k_u.pkey.pk_pip; 1179 rshift = highbit(sizeof (struct mdi_pathinfo)); 1180 break; 1181 default: 1182 panic("devinfo: unknown key type"); 1183 /*NOTREACHED*/ 1184 } 1185 return (mod_hash_byptr((void *)rshift, ptr)); 1186 } 1187 1188 static void 1189 di_key_dtor(mod_hash_key_t key) 1190 { 1191 char *path_addr; 1192 struct di_key *dik = key; 1193 1194 switch (dik->k_type) { 1195 case DI_DKEY: 1196 break; 1197 case DI_PKEY: 1198 path_addr = dik->k_u.pkey.pk_path_addr; 1199 if (path_addr) 1200 kmem_free(path_addr, strlen(path_addr) + 1); 1201 break; 1202 default: 1203 panic("devinfo: unknown key type"); 1204 /*NOTREACHED*/ 1205 } 1206 1207 kmem_free(dik, sizeof (struct di_key)); 1208 } 1209 1210 static int 1211 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) 1212 { 1213 if (dk1->dk_dip != dk2->dk_dip) 1214 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); 1215 1216 if (dk1->dk_major != DDI_MAJOR_T_NONE && 1217 dk2->dk_major != DDI_MAJOR_T_NONE) { 1218 if (dk1->dk_major != dk2->dk_major) 1219 return (dk1->dk_major > dk2->dk_major ? 1 : -1); 1220 1221 if (dk1->dk_inst != dk2->dk_inst) 1222 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); 1223 } 1224 1225 if (dk1->dk_nodeid != dk2->dk_nodeid) 1226 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); 1227 1228 return (0); 1229 } 1230 1231 static int 1232 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) 1233 { 1234 char *p1, *p2; 1235 int rv; 1236 1237 if (pk1->pk_pip != pk2->pk_pip) 1238 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); 1239 1240 p1 = pk1->pk_path_addr; 1241 p2 = pk2->pk_path_addr; 1242 1243 p1 = p1 ? p1 : ""; 1244 p2 = p2 ? p2 : ""; 1245 1246 rv = strcmp(p1, p2); 1247 if (rv) 1248 return (rv > 0 ? 1 : -1); 1249 1250 if (pk1->pk_client != pk2->pk_client) 1251 return (pk1->pk_client > pk2->pk_client ? 1 : -1); 1252 1253 if (pk1->pk_phci != pk2->pk_phci) 1254 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); 1255 1256 return (0); 1257 } 1258 1259 static int 1260 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 1261 { 1262 struct di_key *dik1, *dik2; 1263 1264 dik1 = key1; 1265 dik2 = key2; 1266 1267 if (dik1->k_type != dik2->k_type) { 1268 panic("devinfo: mismatched keys"); 1269 /*NOTREACHED*/ 1270 } 1271 1272 switch (dik1->k_type) { 1273 case DI_DKEY: 1274 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); 1275 case DI_PKEY: 1276 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); 1277 default: 1278 panic("devinfo: unknown key type"); 1279 /*NOTREACHED*/ 1280 } 1281 } 1282 1283 /* 1284 * This is the main function that takes a snapshot 1285 */ 1286 static di_off_t 1287 di_snapshot(struct di_state *st) 1288 { 1289 di_off_t off; 1290 struct di_all *all; 1291 dev_info_t *rootnode; 1292 char buf[80]; 1293 int plen; 1294 char *path; 1295 vnode_t *vp; 1296 1297 all = DI_ALL_PTR(st); 1298 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); 1299 1300 /* 1301 * Verify path before entrusting it to e_ddi_hold_devi_by_path because 1302 * some platforms have OBP bugs where executing the NDI_PROMNAME code 1303 * path against an invalid path results in panic. The lookupnameat 1304 * is done relative to rootdir without a leading '/' on "devices/" 1305 * to force the lookup to occur in the global zone. 1306 */ 1307 plen = strlen("devices/") + strlen(all->root_path) + 1; 1308 path = kmem_alloc(plen, KM_SLEEP); 1309 (void) snprintf(path, plen, "devices/%s", all->root_path); 1310 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) { 1311 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1312 all->root_path)); 1313 kmem_free(path, plen); 1314 return (0); 1315 } 1316 kmem_free(path, plen); 1317 VN_RELE(vp); 1318 1319 /* 1320 * Hold the devinfo node referred by the path. 1321 */ 1322 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); 1323 if (rootnode == NULL) { 1324 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1325 all->root_path)); 1326 return (0); 1327 } 1328 1329 (void) snprintf(buf, sizeof (buf), 1330 "devinfo registered dips (statep=%p)", (void *)st); 1331 1332 st->reg_dip_hash = mod_hash_create_extended(buf, 64, 1333 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1334 NULL, di_key_cmp, KM_SLEEP); 1335 1336 1337 (void) snprintf(buf, sizeof (buf), 1338 "devinfo registered pips (statep=%p)", (void *)st); 1339 1340 st->reg_pip_hash = mod_hash_create_extended(buf, 64, 1341 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1342 NULL, di_key_cmp, KM_SLEEP); 1343 1344 /* 1345 * copy the device tree 1346 */ 1347 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); 1348 1349 if (DINFOPATH & st->command) { 1350 mdi_walk_vhcis(build_vhci_list, st); 1351 } 1352 1353 ddi_release_devi(rootnode); 1354 1355 /* 1356 * copy the devnames array 1357 */ 1358 all->devnames = off; 1359 off = di_copydevnm(&all->devnames, st); 1360 1361 1362 /* initialize the hash tables */ 1363 st->lnode_count = 0; 1364 st->link_count = 0; 1365 1366 if (DINFOLYR & st->command) { 1367 off = di_getlink_data(off, st); 1368 } 1369 1370 /* 1371 * Free up hash tables 1372 */ 1373 mod_hash_destroy_hash(st->reg_dip_hash); 1374 mod_hash_destroy_hash(st->reg_pip_hash); 1375 1376 /* 1377 * Record the timestamp now that we are done with snapshot. 1378 * 1379 * We compute the checksum later and then only if we cache 1380 * the snapshot, since checksumming adds some overhead. 1381 * The checksum is checked later if we read the cache file. 1382 * from disk. 1383 * 1384 * Set checksum field to 0 as CRC is calculated with that 1385 * field set to 0. 1386 */ 1387 all->snapshot_time = ddi_get_time(); 1388 all->cache_checksum = 0; 1389 1390 ASSERT(all->snapshot_time != 0); 1391 1392 return (off); 1393 } 1394 1395 /* 1396 * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set 1397 */ 1398 static di_off_t 1399 di_snapshot_and_clean(struct di_state *st) 1400 { 1401 di_off_t off; 1402 1403 modunload_disable(); 1404 off = di_snapshot(st); 1405 if (off != 0 && (st->command & DINFOCLEANUP)) { 1406 ASSERT(DEVICES_FILES_CLEANABLE(st)); 1407 /* 1408 * Cleanup /etc/devices files: 1409 * In order to accurately account for the system configuration 1410 * in /etc/devices files, the appropriate drivers must be 1411 * fully configured before the cleanup starts. 1412 * So enable modunload only after the cleanup. 1413 */ 1414 i_ddi_clean_devices_files(); 1415 /* 1416 * Remove backing store nodes for unused devices, 1417 * which retain past permissions customizations 1418 * and may be undesired for newly configured devices. 1419 */ 1420 dev_devices_cleanup(); 1421 } 1422 modunload_enable(); 1423 1424 return (off); 1425 } 1426 1427 /* 1428 * construct vhci linkage in the snapshot. 1429 */ 1430 static int 1431 build_vhci_list(dev_info_t *vh_devinfo, void *arg) 1432 { 1433 struct di_all *all; 1434 struct di_node *me; 1435 struct di_state *st; 1436 di_off_t off; 1437 phci_walk_arg_t pwa; 1438 1439 dcmn_err3((CE_CONT, "build_vhci list\n")); 1440 1441 dcmn_err3((CE_CONT, "vhci node %s%d\n", 1442 ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo))); 1443 1444 st = (struct di_state *)arg; 1445 if (di_dip_find(st, vh_devinfo, &off) != 0) { 1446 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1447 return (DDI_WALK_TERMINATE); 1448 } 1449 1450 dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n", 1451 st->mem_size, off)); 1452 1453 all = DI_ALL_PTR(st); 1454 if (all->top_vhci_devinfo == 0) { 1455 all->top_vhci_devinfo = off; 1456 } else { 1457 me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo)); 1458 1459 while (me->next_vhci != 0) { 1460 me = DI_NODE(di_mem_addr(st, me->next_vhci)); 1461 } 1462 1463 me->next_vhci = off; 1464 } 1465 1466 pwa.off = off; 1467 pwa.st = st; 1468 mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa); 1469 1470 return (DDI_WALK_CONTINUE); 1471 } 1472 1473 /* 1474 * construct phci linkage for the given vhci in the snapshot. 1475 */ 1476 static int 1477 build_phci_list(dev_info_t *ph_devinfo, void *arg) 1478 { 1479 struct di_node *vh_di_node; 1480 struct di_node *me; 1481 phci_walk_arg_t *pwa; 1482 di_off_t off; 1483 1484 pwa = (phci_walk_arg_t *)arg; 1485 1486 dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n", 1487 pwa->off)); 1488 1489 vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off)); 1490 if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) { 1491 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1492 return (DDI_WALK_TERMINATE); 1493 } 1494 1495 dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n", 1496 ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off)); 1497 1498 if (vh_di_node->top_phci == 0) { 1499 vh_di_node->top_phci = off; 1500 return (DDI_WALK_CONTINUE); 1501 } 1502 1503 me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci)); 1504 1505 while (me->next_phci != 0) { 1506 me = DI_NODE(di_mem_addr(pwa->st, me->next_phci)); 1507 } 1508 me->next_phci = off; 1509 1510 return (DDI_WALK_CONTINUE); 1511 } 1512 1513 /* 1514 * Assumes all devinfo nodes in device tree have been snapshotted 1515 */ 1516 static void 1517 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p) 1518 { 1519 struct dev_info *node; 1520 struct di_node *me; 1521 di_off_t off; 1522 1523 ASSERT(mutex_owned(&dnp->dn_lock)); 1524 1525 node = DEVI(dnp->dn_head); 1526 for (; node; node = node->devi_next) { 1527 if (di_dip_find(st, (dev_info_t *)node, &off) != 0) 1528 continue; 1529 1530 ASSERT(off > 0); 1531 me = DI_NODE(di_mem_addr(st, off)); 1532 ASSERT(me->next == 0 || me->next == -1); 1533 /* 1534 * Only nodes which were BOUND when they were 1535 * snapshotted will be added to per-driver list. 1536 */ 1537 if (me->next != -1) 1538 continue; 1539 1540 *off_p = off; 1541 off_p = &me->next; 1542 } 1543 1544 *off_p = 0; 1545 } 1546 1547 /* 1548 * Copy the devnames array, so we have a list of drivers in the snapshot. 1549 * Also makes it possible to locate the per-driver devinfo nodes. 1550 */ 1551 static di_off_t 1552 di_copydevnm(di_off_t *off_p, struct di_state *st) 1553 { 1554 int i; 1555 di_off_t off; 1556 size_t size; 1557 struct di_devnm *dnp; 1558 1559 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); 1560 1561 /* 1562 * make sure there is some allocated memory 1563 */ 1564 size = devcnt * sizeof (struct di_devnm); 1565 *off_p = off = di_checkmem(st, *off_p, size); 1566 dnp = DI_DEVNM(di_mem_addr(st, off)); 1567 off += size; 1568 1569 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", 1570 devcnt, off)); 1571 1572 for (i = 0; i < devcnt; i++) { 1573 if (devnamesp[i].dn_name == NULL) { 1574 continue; 1575 } 1576 1577 /* 1578 * dn_name is not freed during driver unload or removal. 1579 * 1580 * There is a race condition when make_devname() changes 1581 * dn_name during our strcpy. This should be rare since 1582 * only add_drv does this. At any rate, we never had a 1583 * problem with ddi_name_to_major(), which should have 1584 * the same problem. 1585 */ 1586 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", 1587 devnamesp[i].dn_name, devnamesp[i].dn_instance, off)); 1588 1589 size = strlen(devnamesp[i].dn_name) + 1; 1590 dnp[i].name = off = di_checkmem(st, off, size); 1591 (void) strcpy((char *)di_mem_addr(st, off), 1592 devnamesp[i].dn_name); 1593 off += size; 1594 1595 mutex_enter(&devnamesp[i].dn_lock); 1596 1597 /* 1598 * Snapshot per-driver node list 1599 */ 1600 snap_driver_list(st, &devnamesp[i], &dnp[i].head); 1601 1602 /* 1603 * This is not used by libdevinfo, leave it for now 1604 */ 1605 dnp[i].flags = devnamesp[i].dn_flags; 1606 dnp[i].instance = devnamesp[i].dn_instance; 1607 1608 /* 1609 * get global properties 1610 */ 1611 if ((DINFOPROP & st->command) && 1612 devnamesp[i].dn_global_prop_ptr) { 1613 dnp[i].global_prop = off; 1614 off = di_getprop(DI_PROP_GLB_LIST, 1615 &devnamesp[i].dn_global_prop_ptr->prop_list, 1616 &dnp[i].global_prop, st, NULL); 1617 } 1618 1619 /* 1620 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str 1621 */ 1622 if (CB_DRV_INSTALLED(devopsp[i])) { 1623 if (devopsp[i]->devo_cb_ops) { 1624 dnp[i].ops |= DI_CB_OPS; 1625 if (devopsp[i]->devo_cb_ops->cb_str) 1626 dnp[i].ops |= DI_STREAM_OPS; 1627 } 1628 if (NEXUS_DRV(devopsp[i])) { 1629 dnp[i].ops |= DI_BUS_OPS; 1630 } 1631 } 1632 1633 mutex_exit(&devnamesp[i].dn_lock); 1634 } 1635 1636 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); 1637 1638 return (off); 1639 } 1640 1641 /* 1642 * Copy the kernel devinfo tree. The tree and the devnames array forms 1643 * the entire snapshot (see also di_copydevnm). 1644 */ 1645 static di_off_t 1646 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) 1647 { 1648 di_off_t off; 1649 struct dev_info *node; 1650 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); 1651 1652 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", 1653 (void *)root, *off_p)); 1654 1655 /* force attach drivers */ 1656 if (i_ddi_devi_attached((dev_info_t *)root) && 1657 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { 1658 (void) ndi_devi_config((dev_info_t *)root, 1659 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | 1660 NDI_DRV_CONF_REPROBE); 1661 } 1662 1663 /* 1664 * Push top_devinfo onto a stack 1665 * 1666 * The stack is necessary to avoid recursion, which can overrun 1667 * the kernel stack. 1668 */ 1669 PUSH_STACK(dsp, root, off_p); 1670 1671 /* 1672 * As long as there is a node on the stack, copy the node. 1673 * di_copynode() is responsible for pushing and popping 1674 * child and sibling nodes on the stack. 1675 */ 1676 while (!EMPTY_STACK(dsp)) { 1677 node = TOP_NODE(dsp); 1678 off = di_copynode(node, dsp, st); 1679 } 1680 1681 /* 1682 * Free the stack structure 1683 */ 1684 kmem_free(dsp, sizeof (struct di_stack)); 1685 1686 return (off); 1687 } 1688 1689 /* 1690 * This is the core function, which copies all data associated with a single 1691 * node into the snapshot. The amount of information is determined by the 1692 * ioctl command. 1693 */ 1694 static di_off_t 1695 di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st) 1696 { 1697 di_off_t off; 1698 struct di_node *me; 1699 size_t size; 1700 1701 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth)); 1702 ASSERT((node != NULL) && (node == TOP_NODE(dsp))); 1703 1704 /* 1705 * check memory usage, and fix offsets accordingly. 1706 */ 1707 size = sizeof (struct di_node); 1708 *(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size); 1709 me = DI_NODE(di_mem_addr(st, off)); 1710 me->self = off; 1711 off += size; 1712 1713 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", 1714 node->devi_node_name, node->devi_instance, off)); 1715 1716 /* 1717 * Node parameters: 1718 * self -- offset of current node within snapshot 1719 * nodeid -- pointer to PROM node (tri-valued) 1720 * state -- hot plugging device state 1721 * node_state -- devinfo node state 1722 */ 1723 me->instance = node->devi_instance; 1724 me->nodeid = node->devi_nodeid; 1725 me->node_class = node->devi_node_class; 1726 me->attributes = node->devi_node_attributes; 1727 me->state = node->devi_state; 1728 me->flags = node->devi_flags; 1729 me->node_state = node->devi_node_state; 1730 me->next_vhci = 0; /* Filled up by build_vhci_list. */ 1731 me->top_phci = 0; /* Filled up by build_phci_list. */ 1732 me->next_phci = 0; /* Filled up by build_phci_list. */ 1733 me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */ 1734 me->user_private_data = NULL; 1735 1736 /* 1737 * Get parent's offset in snapshot from the stack 1738 * and store it in the current node 1739 */ 1740 if (dsp->depth > 1) { 1741 me->parent = *(PARENT_OFFSET(dsp)); 1742 } 1743 1744 /* 1745 * Save the offset of this di_node in a hash table. 1746 * This is used later to resolve references to this 1747 * dip from other parts of the tree (per-driver list, 1748 * multipathing linkages, layered usage linkages). 1749 * The key used for the hash table is derived from 1750 * information in the dip. 1751 */ 1752 di_register_dip(st, (dev_info_t *)node, me->self); 1753 1754 #ifdef DEVID_COMPATIBILITY 1755 /* check for devid as property marker */ 1756 if (node->devi_devid_str) { 1757 ddi_devid_t devid; 1758 1759 /* 1760 * The devid is now represented as a property. For 1761 * compatibility with di_devid() interface in libdevinfo we 1762 * must return it as a binary structure in the snapshot. When 1763 * (if) di_devid() is removed from libdevinfo then the code 1764 * related to DEVID_COMPATIBILITY can be removed. 1765 */ 1766 if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) == 1767 DDI_SUCCESS) { 1768 size = ddi_devid_sizeof(devid); 1769 off = di_checkmem(st, off, size); 1770 me->devid = off; 1771 bcopy(devid, di_mem_addr(st, off), size); 1772 off += size; 1773 ddi_devid_free(devid); 1774 } 1775 } 1776 #endif /* DEVID_COMPATIBILITY */ 1777 1778 if (node->devi_node_name) { 1779 size = strlen(node->devi_node_name) + 1; 1780 me->node_name = off = di_checkmem(st, off, size); 1781 (void) strcpy(di_mem_addr(st, off), node->devi_node_name); 1782 off += size; 1783 } 1784 1785 if (node->devi_compat_names && (node->devi_compat_length > 1)) { 1786 size = node->devi_compat_length; 1787 me->compat_names = off = di_checkmem(st, off, size); 1788 me->compat_length = (int)size; 1789 bcopy(node->devi_compat_names, di_mem_addr(st, off), size); 1790 off += size; 1791 } 1792 1793 if (node->devi_addr) { 1794 size = strlen(node->devi_addr) + 1; 1795 me->address = off = di_checkmem(st, off, size); 1796 (void) strcpy(di_mem_addr(st, off), node->devi_addr); 1797 off += size; 1798 } 1799 1800 if (node->devi_binding_name) { 1801 size = strlen(node->devi_binding_name) + 1; 1802 me->bind_name = off = di_checkmem(st, off, size); 1803 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name); 1804 off += size; 1805 } 1806 1807 me->drv_major = node->devi_major; 1808 1809 /* 1810 * If the dip is BOUND, set the next pointer of the 1811 * per-instance list to -1, indicating that it is yet to be resolved. 1812 * This will be resolved later in snap_driver_list(). 1813 */ 1814 if (me->drv_major != -1) { 1815 me->next = -1; 1816 } else { 1817 me->next = 0; 1818 } 1819 1820 /* 1821 * An optimization to skip mutex_enter when not needed. 1822 */ 1823 if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) { 1824 goto priv_data; 1825 } 1826 1827 /* 1828 * LOCKING: We already have an active ndi_devi_enter to gather the 1829 * minor data, and we will take devi_lock to gather properties as 1830 * needed off di_getprop. 1831 */ 1832 if (!(DINFOMINOR & st->command)) { 1833 goto path; 1834 } 1835 1836 ASSERT(DEVI_BUSY_OWNED(node)); 1837 if (node->devi_minor) { /* minor data */ 1838 me->minor_data = off; 1839 off = di_getmdata(node->devi_minor, &me->minor_data, 1840 me->self, st); 1841 } 1842 1843 path: 1844 if (!(DINFOPATH & st->command)) { 1845 goto property; 1846 } 1847 1848 if (MDI_VHCI(node)) { 1849 me->multipath_component = MULTIPATH_COMPONENT_VHCI; 1850 } 1851 1852 if (MDI_CLIENT(node)) { 1853 me->multipath_component = MULTIPATH_COMPONENT_CLIENT; 1854 me->multipath_client = off; 1855 off = di_getpath_data((dev_info_t *)node, &me->multipath_client, 1856 me->self, st, 1); 1857 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " 1858 "component type = %d. off=%d", 1859 me->multipath_client, 1860 (void *)node, node->devi_mdi_component, off)); 1861 } 1862 1863 if (MDI_PHCI(node)) { 1864 me->multipath_component = MULTIPATH_COMPONENT_PHCI; 1865 me->multipath_phci = off; 1866 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, 1867 me->self, st, 0); 1868 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " 1869 "component type = %d. off=%d", 1870 me->multipath_phci, 1871 (void *)node, node->devi_mdi_component, off)); 1872 } 1873 1874 property: 1875 if (!(DINFOPROP & st->command)) { 1876 goto priv_data; 1877 } 1878 1879 if (node->devi_drv_prop_ptr) { /* driver property list */ 1880 me->drv_prop = off; 1881 off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr, 1882 &me->drv_prop, st, node); 1883 } 1884 1885 if (node->devi_sys_prop_ptr) { /* system property list */ 1886 me->sys_prop = off; 1887 off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr, 1888 &me->sys_prop, st, node); 1889 } 1890 1891 if (node->devi_hw_prop_ptr) { /* hardware property list */ 1892 me->hw_prop = off; 1893 off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr, 1894 &me->hw_prop, st, node); 1895 } 1896 1897 if (node->devi_global_prop_list == NULL) { 1898 me->glob_prop = (di_off_t)-1; /* not global property */ 1899 } else { 1900 /* 1901 * Make copy of global property list if this devinfo refers 1902 * global properties different from what's on the devnames 1903 * array. It can happen if there has been a forced 1904 * driver.conf update. See mod_drv(1M). 1905 */ 1906 ASSERT(me->drv_major != -1); 1907 if (node->devi_global_prop_list != 1908 devnamesp[me->drv_major].dn_global_prop_ptr) { 1909 me->glob_prop = off; 1910 off = di_getprop(DI_PROP_GLB_LIST, 1911 &node->devi_global_prop_list->prop_list, 1912 &me->glob_prop, st, node); 1913 } 1914 } 1915 1916 priv_data: 1917 if (!(DINFOPRIVDATA & st->command)) { 1918 goto pm_info; 1919 } 1920 1921 if (ddi_get_parent_data((dev_info_t *)node) != NULL) { 1922 me->parent_data = off; 1923 off = di_getppdata(node, &me->parent_data, st); 1924 } 1925 1926 if (ddi_get_driver_private((dev_info_t *)node) != NULL) { 1927 me->driver_data = off; 1928 off = di_getdpdata(node, &me->driver_data, st); 1929 } 1930 1931 pm_info: /* NOT implemented */ 1932 1933 subtree: 1934 /* keep the stack aligned */ 1935 off = DI_ALIGN(off); 1936 1937 if (!(DINFOSUBTREE & st->command)) { 1938 POP_STACK(dsp); 1939 return (off); 1940 } 1941 1942 child: 1943 /* 1944 * If there is a child--push child onto stack. 1945 * Hold the parent busy while doing so. 1946 */ 1947 if (node->devi_child) { 1948 me->child = off; 1949 PUSH_STACK(dsp, node->devi_child, &me->child); 1950 return (me->child); 1951 } 1952 1953 sibling: 1954 /* 1955 * no child node, unroll the stack till a sibling of 1956 * a parent node is found or root node is reached 1957 */ 1958 POP_STACK(dsp); 1959 while (!EMPTY_STACK(dsp) && (node->devi_sibling == NULL)) { 1960 node = TOP_NODE(dsp); 1961 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); 1962 POP_STACK(dsp); 1963 } 1964 1965 if (!EMPTY_STACK(dsp)) { 1966 /* 1967 * a sibling is found, replace top of stack by its sibling 1968 */ 1969 me->sibling = off; 1970 PUSH_STACK(dsp, node->devi_sibling, &me->sibling); 1971 return (me->sibling); 1972 } 1973 1974 /* 1975 * DONE with all nodes 1976 */ 1977 return (off); 1978 } 1979 1980 static i_lnode_t * 1981 i_lnode_alloc(int modid) 1982 { 1983 i_lnode_t *i_lnode; 1984 1985 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); 1986 1987 ASSERT(modid != -1); 1988 i_lnode->modid = modid; 1989 1990 return (i_lnode); 1991 } 1992 1993 static void 1994 i_lnode_free(i_lnode_t *i_lnode) 1995 { 1996 kmem_free(i_lnode, sizeof (i_lnode_t)); 1997 } 1998 1999 static void 2000 i_lnode_check_free(i_lnode_t *i_lnode) 2001 { 2002 /* This lnode and its dip must have been snapshotted */ 2003 ASSERT(i_lnode->self > 0); 2004 ASSERT(i_lnode->di_node->self > 0); 2005 2006 /* at least 1 link (in or out) must exist for this lnode */ 2007 ASSERT(i_lnode->link_in || i_lnode->link_out); 2008 2009 i_lnode_free(i_lnode); 2010 } 2011 2012 static i_link_t * 2013 i_link_alloc(int spec_type) 2014 { 2015 i_link_t *i_link; 2016 2017 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); 2018 i_link->spec_type = spec_type; 2019 2020 return (i_link); 2021 } 2022 2023 static void 2024 i_link_check_free(i_link_t *i_link) 2025 { 2026 /* This link must have been snapshotted */ 2027 ASSERT(i_link->self > 0); 2028 2029 /* Both endpoint lnodes must exist for this link */ 2030 ASSERT(i_link->src_lnode); 2031 ASSERT(i_link->tgt_lnode); 2032 2033 kmem_free(i_link, sizeof (i_link_t)); 2034 } 2035 2036 /*ARGSUSED*/ 2037 static uint_t 2038 i_lnode_hashfunc(void *arg, mod_hash_key_t key) 2039 { 2040 i_lnode_t *i_lnode = (i_lnode_t *)key; 2041 struct di_node *ptr; 2042 dev_t dev; 2043 2044 dev = i_lnode->devt; 2045 if (dev != DDI_DEV_T_NONE) 2046 return (i_lnode->modid + getminor(dev) + getmajor(dev)); 2047 2048 ptr = i_lnode->di_node; 2049 ASSERT(ptr->self > 0); 2050 if (ptr) { 2051 uintptr_t k = (uintptr_t)ptr; 2052 k >>= (int)highbit(sizeof (struct di_node)); 2053 return ((uint_t)k); 2054 } 2055 2056 return (i_lnode->modid); 2057 } 2058 2059 static int 2060 i_lnode_cmp(void *arg1, void *arg2) 2061 { 2062 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1; 2063 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2; 2064 2065 if (i_lnode1->modid != i_lnode2->modid) { 2066 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); 2067 } 2068 2069 if (i_lnode1->di_node != i_lnode2->di_node) 2070 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); 2071 2072 if (i_lnode1->devt != i_lnode2->devt) 2073 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); 2074 2075 return (0); 2076 } 2077 2078 /* 2079 * An lnode represents a {dip, dev_t} tuple. A link represents a 2080 * {src_lnode, tgt_lnode, spec_type} tuple. 2081 * The following callback assumes that LDI framework ref-counts the 2082 * src_dip and tgt_dip while invoking this callback. 2083 */ 2084 static int 2085 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) 2086 { 2087 struct di_state *st = (struct di_state *)arg; 2088 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode; 2089 i_link_t **i_link_next, *i_link; 2090 di_off_t soff, toff; 2091 mod_hash_val_t nodep = NULL; 2092 int res; 2093 2094 /* 2095 * if the source or target of this device usage information doesn't 2096 * correspond to a device node then we don't report it via 2097 * libdevinfo so return. 2098 */ 2099 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) 2100 return (LDI_USAGE_CONTINUE); 2101 2102 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); 2103 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); 2104 2105 /* 2106 * Skip the ldi_usage if either src or tgt dip is not in the 2107 * snapshot. This saves us from pruning bad lnodes/links later. 2108 */ 2109 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) 2110 return (LDI_USAGE_CONTINUE); 2111 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) 2112 return (LDI_USAGE_CONTINUE); 2113 2114 ASSERT(soff > 0); 2115 ASSERT(toff > 0); 2116 2117 /* 2118 * allocate an i_lnode and add it to the lnode hash 2119 * if it is not already present. For this particular 2120 * link the lnode is a source, but it may 2121 * participate as tgt or src in any number of layered 2122 * operations - so it may already be in the hash. 2123 */ 2124 i_lnode = i_lnode_alloc(ldi_usage->src_modid); 2125 i_lnode->di_node = DI_NODE(di_mem_addr(st, soff)); 2126 i_lnode->devt = ldi_usage->src_devt; 2127 2128 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2129 if (res == MH_ERR_NOTFOUND) { 2130 /* 2131 * new i_lnode 2132 * add it to the hash and increment the lnode count 2133 */ 2134 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2135 ASSERT(res == 0); 2136 st->lnode_count++; 2137 src_lnode = i_lnode; 2138 } else { 2139 /* this i_lnode already exists in the lnode_hash */ 2140 i_lnode_free(i_lnode); 2141 src_lnode = (i_lnode_t *)nodep; 2142 } 2143 2144 /* 2145 * allocate a tgt i_lnode and add it to the lnode hash 2146 */ 2147 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); 2148 i_lnode->di_node = DI_NODE(di_mem_addr(st, toff)); 2149 i_lnode->devt = ldi_usage->tgt_devt; 2150 2151 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2152 if (res == MH_ERR_NOTFOUND) { 2153 /* 2154 * new i_lnode 2155 * add it to the hash and increment the lnode count 2156 */ 2157 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2158 ASSERT(res == 0); 2159 st->lnode_count++; 2160 tgt_lnode = i_lnode; 2161 } else { 2162 /* this i_lnode already exists in the lnode_hash */ 2163 i_lnode_free(i_lnode); 2164 tgt_lnode = (i_lnode_t *)nodep; 2165 } 2166 2167 /* 2168 * allocate a i_link 2169 */ 2170 i_link = i_link_alloc(ldi_usage->tgt_spec_type); 2171 i_link->src_lnode = src_lnode; 2172 i_link->tgt_lnode = tgt_lnode; 2173 2174 /* 2175 * add this link onto the src i_lnodes outbound i_link list 2176 */ 2177 i_link_next = &(src_lnode->link_out); 2178 while (*i_link_next != NULL) { 2179 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && 2180 (i_link->spec_type == (*i_link_next)->spec_type)) { 2181 /* this link already exists */ 2182 kmem_free(i_link, sizeof (i_link_t)); 2183 return (LDI_USAGE_CONTINUE); 2184 } 2185 i_link_next = &((*i_link_next)->src_link_next); 2186 } 2187 *i_link_next = i_link; 2188 2189 /* 2190 * add this link onto the tgt i_lnodes inbound i_link list 2191 */ 2192 i_link_next = &(tgt_lnode->link_in); 2193 while (*i_link_next != NULL) { 2194 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); 2195 i_link_next = &((*i_link_next)->tgt_link_next); 2196 } 2197 *i_link_next = i_link; 2198 2199 /* 2200 * add this i_link to the link hash 2201 */ 2202 res = mod_hash_insert(st->link_hash, i_link, i_link); 2203 ASSERT(res == 0); 2204 st->link_count++; 2205 2206 return (LDI_USAGE_CONTINUE); 2207 } 2208 2209 struct i_layer_data { 2210 struct di_state *st; 2211 int lnode_count; 2212 int link_count; 2213 di_off_t lnode_off; 2214 di_off_t link_off; 2215 }; 2216 2217 /*ARGSUSED*/ 2218 static uint_t 2219 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2220 { 2221 i_link_t *i_link = (i_link_t *)key; 2222 struct i_layer_data *data = arg; 2223 struct di_link *me; 2224 struct di_lnode *melnode; 2225 struct di_node *medinode; 2226 2227 ASSERT(i_link->self == 0); 2228 2229 i_link->self = data->link_off + 2230 (data->link_count * sizeof (struct di_link)); 2231 data->link_count++; 2232 2233 ASSERT(data->link_off > 0 && data->link_count > 0); 2234 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ 2235 ASSERT(data->link_count <= data->st->link_count); 2236 2237 /* fill in fields for the di_link snapshot */ 2238 me = DI_LINK(di_mem_addr(data->st, i_link->self)); 2239 me->self = i_link->self; 2240 me->spec_type = i_link->spec_type; 2241 2242 /* 2243 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t 2244 * are created during the LDI table walk. Since we are 2245 * walking the link hash, the lnode hash has already been 2246 * walked and the lnodes have been snapshotted. Save lnode 2247 * offsets. 2248 */ 2249 me->src_lnode = i_link->src_lnode->self; 2250 me->tgt_lnode = i_link->tgt_lnode->self; 2251 2252 /* 2253 * Save this link's offset in the src_lnode snapshot's link_out 2254 * field 2255 */ 2256 melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode)); 2257 me->src_link_next = melnode->link_out; 2258 melnode->link_out = me->self; 2259 2260 /* 2261 * Put this link on the tgt_lnode's link_in field 2262 */ 2263 melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode)); 2264 me->tgt_link_next = melnode->link_in; 2265 melnode->link_in = me->self; 2266 2267 /* 2268 * An i_lnode_t is only created if the corresponding dip exists 2269 * in the snapshot. A pointer to the di_node is saved in the 2270 * i_lnode_t when it is allocated. For this link, get the di_node 2271 * for the source lnode. Then put the link on the di_node's list 2272 * of src links 2273 */ 2274 medinode = i_link->src_lnode->di_node; 2275 me->src_node_next = medinode->src_links; 2276 medinode->src_links = me->self; 2277 2278 /* 2279 * Put this link on the tgt_links list of the target 2280 * dip. 2281 */ 2282 medinode = i_link->tgt_lnode->di_node; 2283 me->tgt_node_next = medinode->tgt_links; 2284 medinode->tgt_links = me->self; 2285 2286 return (MH_WALK_CONTINUE); 2287 } 2288 2289 /*ARGSUSED*/ 2290 static uint_t 2291 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2292 { 2293 i_lnode_t *i_lnode = (i_lnode_t *)key; 2294 struct i_layer_data *data = arg; 2295 struct di_lnode *me; 2296 struct di_node *medinode; 2297 2298 ASSERT(i_lnode->self == 0); 2299 2300 i_lnode->self = data->lnode_off + 2301 (data->lnode_count * sizeof (struct di_lnode)); 2302 data->lnode_count++; 2303 2304 ASSERT(data->lnode_off > 0 && data->lnode_count > 0); 2305 ASSERT(data->link_count == 0); /* links not done yet */ 2306 ASSERT(data->lnode_count <= data->st->lnode_count); 2307 2308 /* fill in fields for the di_lnode snapshot */ 2309 me = DI_LNODE(di_mem_addr(data->st, i_lnode->self)); 2310 me->self = i_lnode->self; 2311 2312 if (i_lnode->devt == DDI_DEV_T_NONE) { 2313 me->dev_major = DDI_MAJOR_T_NONE; 2314 me->dev_minor = DDI_MAJOR_T_NONE; 2315 } else { 2316 me->dev_major = getmajor(i_lnode->devt); 2317 me->dev_minor = getminor(i_lnode->devt); 2318 } 2319 2320 /* 2321 * The dip corresponding to this lnode must exist in 2322 * the snapshot or we wouldn't have created the i_lnode_t 2323 * during LDI walk. Save the offset of the dip. 2324 */ 2325 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); 2326 me->node = i_lnode->di_node->self; 2327 2328 /* 2329 * There must be at least one link in or out of this lnode 2330 * or we wouldn't have created it. These fields will be set 2331 * during the link hash walk. 2332 */ 2333 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); 2334 2335 /* 2336 * set the offset of the devinfo node associated with this 2337 * lnode. Also update the node_next next pointer. this pointer 2338 * is set if there are multiple lnodes associated with the same 2339 * devinfo node. (could occure when multiple minor nodes 2340 * are open for one device, etc.) 2341 */ 2342 medinode = i_lnode->di_node; 2343 me->node_next = medinode->lnodes; 2344 medinode->lnodes = me->self; 2345 2346 return (MH_WALK_CONTINUE); 2347 } 2348 2349 static di_off_t 2350 di_getlink_data(di_off_t off, struct di_state *st) 2351 { 2352 struct i_layer_data data = {0}; 2353 size_t size; 2354 2355 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); 2356 2357 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, 2358 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, 2359 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); 2360 2361 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, 2362 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); 2363 2364 /* get driver layering information */ 2365 (void) ldi_usage_walker(st, di_ldi_callback); 2366 2367 /* check if there is any link data to include in the snapshot */ 2368 if (st->lnode_count == 0) { 2369 ASSERT(st->link_count == 0); 2370 goto out; 2371 } 2372 2373 ASSERT(st->link_count != 0); 2374 2375 /* get a pointer to snapshot memory for all the di_lnodes */ 2376 size = sizeof (struct di_lnode) * st->lnode_count; 2377 data.lnode_off = off = di_checkmem(st, off, size); 2378 off += size; 2379 2380 /* get a pointer to snapshot memory for all the di_links */ 2381 size = sizeof (struct di_link) * st->link_count; 2382 data.link_off = off = di_checkmem(st, off, size); 2383 off += size; 2384 2385 data.lnode_count = data.link_count = 0; 2386 data.st = st; 2387 2388 /* 2389 * We have lnodes and links that will go into the 2390 * snapshot, so let's walk the respective hashes 2391 * and snapshot them. The various linkages are 2392 * also set up during the walk. 2393 */ 2394 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); 2395 ASSERT(data.lnode_count == st->lnode_count); 2396 2397 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); 2398 ASSERT(data.link_count == st->link_count); 2399 2400 out: 2401 /* free up the i_lnodes and i_links used to create the snapshot */ 2402 mod_hash_destroy_hash(st->lnode_hash); 2403 mod_hash_destroy_hash(st->link_hash); 2404 st->lnode_count = 0; 2405 st->link_count = 0; 2406 2407 return (off); 2408 } 2409 2410 2411 /* 2412 * Copy all minor data nodes attached to a devinfo node into the snapshot. 2413 * It is called from di_copynode with active ndi_devi_enter to protect 2414 * the list of minor nodes. 2415 */ 2416 static di_off_t 2417 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, 2418 struct di_state *st) 2419 { 2420 di_off_t off; 2421 struct di_minor *me; 2422 size_t size; 2423 2424 dcmn_err2((CE_CONT, "di_getmdata:\n")); 2425 2426 /* 2427 * check memory first 2428 */ 2429 off = di_checkmem(st, *off_p, sizeof (struct di_minor)); 2430 *off_p = off; 2431 2432 do { 2433 me = DI_MINOR(di_mem_addr(st, off)); 2434 me->self = off; 2435 me->type = mnode->type; 2436 me->node = node; 2437 me->user_private_data = NULL; 2438 2439 off += sizeof (struct di_minor); 2440 2441 /* 2442 * Split dev_t to major/minor, so it works for 2443 * both ILP32 and LP64 model 2444 */ 2445 me->dev_major = getmajor(mnode->ddm_dev); 2446 me->dev_minor = getminor(mnode->ddm_dev); 2447 me->spec_type = mnode->ddm_spec_type; 2448 2449 if (mnode->ddm_name) { 2450 size = strlen(mnode->ddm_name) + 1; 2451 me->name = off = di_checkmem(st, off, size); 2452 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name); 2453 off += size; 2454 } 2455 2456 if (mnode->ddm_node_type) { 2457 size = strlen(mnode->ddm_node_type) + 1; 2458 me->node_type = off = di_checkmem(st, off, size); 2459 (void) strcpy(di_mem_addr(st, off), 2460 mnode->ddm_node_type); 2461 off += size; 2462 } 2463 2464 off = di_checkmem(st, off, sizeof (struct di_minor)); 2465 me->next = off; 2466 mnode = mnode->next; 2467 } while (mnode); 2468 2469 me->next = 0; 2470 2471 return (off); 2472 } 2473 2474 /* 2475 * di_register_dip(), di_find_dip(): The dip must be protected 2476 * from deallocation when using these routines - this can either 2477 * be a reference count, a busy hold or a per-driver lock. 2478 */ 2479 2480 static void 2481 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) 2482 { 2483 struct dev_info *node = DEVI(dip); 2484 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2485 struct di_dkey *dk; 2486 2487 ASSERT(dip); 2488 ASSERT(off > 0); 2489 2490 key->k_type = DI_DKEY; 2491 dk = &(key->k_u.dkey); 2492 2493 dk->dk_dip = dip; 2494 dk->dk_major = node->devi_major; 2495 dk->dk_inst = node->devi_instance; 2496 dk->dk_nodeid = node->devi_nodeid; 2497 2498 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, 2499 (mod_hash_val_t)(uintptr_t)off) != 0) { 2500 panic( 2501 "duplicate devinfo (%p) registered during device " 2502 "tree walk", (void *)dip); 2503 } 2504 } 2505 2506 2507 static int 2508 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) 2509 { 2510 /* 2511 * uintptr_t must be used because it matches the size of void *; 2512 * mod_hash expects clients to place results into pointer-size 2513 * containers; since di_off_t is always a 32-bit offset, alignment 2514 * would otherwise be broken on 64-bit kernels. 2515 */ 2516 uintptr_t offset; 2517 struct di_key key = {0}; 2518 struct di_dkey *dk; 2519 2520 ASSERT(st->reg_dip_hash); 2521 ASSERT(dip); 2522 ASSERT(off_p); 2523 2524 2525 key.k_type = DI_DKEY; 2526 dk = &(key.k_u.dkey); 2527 2528 dk->dk_dip = dip; 2529 dk->dk_major = DEVI(dip)->devi_major; 2530 dk->dk_inst = DEVI(dip)->devi_instance; 2531 dk->dk_nodeid = DEVI(dip)->devi_nodeid; 2532 2533 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, 2534 (mod_hash_val_t *)&offset) == 0) { 2535 *off_p = (di_off_t)offset; 2536 return (0); 2537 } else { 2538 return (-1); 2539 } 2540 } 2541 2542 /* 2543 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation 2544 * when using these routines. The caller must do this by protecting the 2545 * client(or phci)<->pip linkage while traversing the list and then holding the 2546 * pip when it is found in the list. 2547 */ 2548 2549 static void 2550 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) 2551 { 2552 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2553 char *path_addr; 2554 struct di_pkey *pk; 2555 2556 ASSERT(pip); 2557 ASSERT(off > 0); 2558 2559 key->k_type = DI_PKEY; 2560 pk = &(key->k_u.pkey); 2561 2562 pk->pk_pip = pip; 2563 path_addr = mdi_pi_get_addr(pip); 2564 if (path_addr) 2565 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); 2566 pk->pk_client = mdi_pi_get_client(pip); 2567 pk->pk_phci = mdi_pi_get_phci(pip); 2568 2569 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, 2570 (mod_hash_val_t)(uintptr_t)off) != 0) { 2571 panic( 2572 "duplicate pathinfo (%p) registered during device " 2573 "tree walk", (void *)pip); 2574 } 2575 } 2576 2577 /* 2578 * As with di_register_pip, the caller must hold or lock the pip 2579 */ 2580 static int 2581 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) 2582 { 2583 /* 2584 * uintptr_t must be used because it matches the size of void *; 2585 * mod_hash expects clients to place results into pointer-size 2586 * containers; since di_off_t is always a 32-bit offset, alignment 2587 * would otherwise be broken on 64-bit kernels. 2588 */ 2589 uintptr_t offset; 2590 struct di_key key = {0}; 2591 struct di_pkey *pk; 2592 2593 ASSERT(st->reg_pip_hash); 2594 ASSERT(off_p); 2595 2596 if (pip == NULL) { 2597 *off_p = 0; 2598 return (0); 2599 } 2600 2601 key.k_type = DI_PKEY; 2602 pk = &(key.k_u.pkey); 2603 2604 pk->pk_pip = pip; 2605 pk->pk_path_addr = mdi_pi_get_addr(pip); 2606 pk->pk_client = mdi_pi_get_client(pip); 2607 pk->pk_phci = mdi_pi_get_phci(pip); 2608 2609 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, 2610 (mod_hash_val_t *)&offset) == 0) { 2611 *off_p = (di_off_t)offset; 2612 return (0); 2613 } else { 2614 return (-1); 2615 } 2616 } 2617 2618 static di_path_state_t 2619 path_state_convert(mdi_pathinfo_state_t st) 2620 { 2621 switch (st) { 2622 case MDI_PATHINFO_STATE_ONLINE: 2623 return (DI_PATH_STATE_ONLINE); 2624 case MDI_PATHINFO_STATE_STANDBY: 2625 return (DI_PATH_STATE_STANDBY); 2626 case MDI_PATHINFO_STATE_OFFLINE: 2627 return (DI_PATH_STATE_OFFLINE); 2628 case MDI_PATHINFO_STATE_FAULT: 2629 return (DI_PATH_STATE_FAULT); 2630 default: 2631 return (DI_PATH_STATE_UNKNOWN); 2632 } 2633 } 2634 2635 2636 static di_off_t 2637 di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p, 2638 struct di_state *st) 2639 { 2640 nvpair_t *prop = NULL; 2641 struct di_path_prop *me; 2642 int off; 2643 size_t size; 2644 char *str; 2645 uchar_t *buf; 2646 uint_t nelems; 2647 2648 off = *off_p; 2649 if (mdi_pi_get_next_prop(pip, NULL) == NULL) { 2650 *off_p = 0; 2651 return (off); 2652 } 2653 2654 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2655 *off_p = off; 2656 2657 while (prop = mdi_pi_get_next_prop(pip, prop)) { 2658 me = DI_PATHPROP(di_mem_addr(st, off)); 2659 me->self = off; 2660 off += sizeof (struct di_path_prop); 2661 2662 /* 2663 * property name 2664 */ 2665 size = strlen(nvpair_name(prop)) + 1; 2666 me->prop_name = off = di_checkmem(st, off, size); 2667 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); 2668 off += size; 2669 2670 switch (nvpair_type(prop)) { 2671 case DATA_TYPE_BYTE: 2672 case DATA_TYPE_INT16: 2673 case DATA_TYPE_UINT16: 2674 case DATA_TYPE_INT32: 2675 case DATA_TYPE_UINT32: 2676 me->prop_type = DDI_PROP_TYPE_INT; 2677 size = sizeof (int32_t); 2678 off = di_checkmem(st, off, size); 2679 (void) nvpair_value_int32(prop, 2680 (int32_t *)di_mem_addr(st, off)); 2681 break; 2682 2683 case DATA_TYPE_INT64: 2684 case DATA_TYPE_UINT64: 2685 me->prop_type = DDI_PROP_TYPE_INT64; 2686 size = sizeof (int64_t); 2687 off = di_checkmem(st, off, size); 2688 (void) nvpair_value_int64(prop, 2689 (int64_t *)di_mem_addr(st, off)); 2690 break; 2691 2692 case DATA_TYPE_STRING: 2693 me->prop_type = DDI_PROP_TYPE_STRING; 2694 (void) nvpair_value_string(prop, &str); 2695 size = strlen(str) + 1; 2696 off = di_checkmem(st, off, size); 2697 (void) strcpy(di_mem_addr(st, off), str); 2698 break; 2699 2700 case DATA_TYPE_BYTE_ARRAY: 2701 case DATA_TYPE_INT16_ARRAY: 2702 case DATA_TYPE_UINT16_ARRAY: 2703 case DATA_TYPE_INT32_ARRAY: 2704 case DATA_TYPE_UINT32_ARRAY: 2705 case DATA_TYPE_INT64_ARRAY: 2706 case DATA_TYPE_UINT64_ARRAY: 2707 me->prop_type = DDI_PROP_TYPE_BYTE; 2708 (void) nvpair_value_byte_array(prop, &buf, &nelems); 2709 size = nelems; 2710 if (nelems != 0) { 2711 off = di_checkmem(st, off, size); 2712 bcopy(buf, di_mem_addr(st, off), size); 2713 } 2714 break; 2715 2716 default: /* Unknown or unhandled type; skip it */ 2717 size = 0; 2718 break; 2719 } 2720 2721 if (size > 0) { 2722 me->prop_data = off; 2723 } 2724 2725 me->prop_len = (int)size; 2726 off += size; 2727 2728 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2729 me->prop_next = off; 2730 } 2731 2732 me->prop_next = 0; 2733 return (off); 2734 } 2735 2736 2737 static void 2738 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp, 2739 int get_client) 2740 { 2741 if (get_client) { 2742 ASSERT(me->path_client == 0); 2743 me->path_client = noff; 2744 ASSERT(me->path_c_link == 0); 2745 *off_pp = &me->path_c_link; 2746 me->path_snap_state &= 2747 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK); 2748 } else { 2749 ASSERT(me->path_phci == 0); 2750 me->path_phci = noff; 2751 ASSERT(me->path_p_link == 0); 2752 *off_pp = &me->path_p_link; 2753 me->path_snap_state &= 2754 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK); 2755 } 2756 } 2757 2758 /* 2759 * off_p: pointer to the linkage field. This links pips along the client|phci 2760 * linkage list. 2761 * noff : Offset for the endpoint dip snapshot. 2762 */ 2763 static di_off_t 2764 di_getpath_data(dev_info_t *dip, di_off_t *off_p, di_off_t noff, 2765 struct di_state *st, int get_client) 2766 { 2767 di_off_t off; 2768 mdi_pathinfo_t *pip; 2769 struct di_path *me; 2770 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *); 2771 size_t size; 2772 2773 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client)); 2774 2775 /* 2776 * The naming of the following mdi_xyz() is unfortunately 2777 * non-intuitive. mdi_get_next_phci_path() follows the 2778 * client_link i.e. the list of pip's belonging to the 2779 * given client dip. 2780 */ 2781 if (get_client) 2782 next_pip = &mdi_get_next_phci_path; 2783 else 2784 next_pip = &mdi_get_next_client_path; 2785 2786 off = *off_p; 2787 2788 pip = NULL; 2789 while (pip = (*next_pip)(dip, pip)) { 2790 mdi_pathinfo_state_t state; 2791 di_off_t stored_offset; 2792 2793 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip)); 2794 2795 mdi_pi_lock(pip); 2796 2797 if (di_pip_find(st, pip, &stored_offset) != -1) { 2798 /* 2799 * We've already seen this pathinfo node so we need to 2800 * take care not to snap it again; However, one endpoint 2801 * and linkage will be set here. The other endpoint 2802 * and linkage has already been set when the pip was 2803 * first snapshotted i.e. when the other endpoint dip 2804 * was snapshotted. 2805 */ 2806 me = DI_PATH(di_mem_addr(st, stored_offset)); 2807 *off_p = stored_offset; 2808 2809 di_path_one_endpoint(me, noff, &off_p, get_client); 2810 2811 /* 2812 * The other endpoint and linkage were set when this 2813 * pip was snapshotted. So we are done with both 2814 * endpoints and linkages. 2815 */ 2816 ASSERT(!(me->path_snap_state & 2817 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI))); 2818 ASSERT(!(me->path_snap_state & 2819 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK))); 2820 2821 mdi_pi_unlock(pip); 2822 continue; 2823 } 2824 2825 /* 2826 * Now that we need to snapshot this pip, check memory 2827 */ 2828 size = sizeof (struct di_path); 2829 *off_p = off = di_checkmem(st, off, size); 2830 me = DI_PATH(di_mem_addr(st, off)); 2831 me->self = off; 2832 off += size; 2833 2834 me->path_snap_state = 2835 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK; 2836 me->path_snap_state |= 2837 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI; 2838 2839 /* 2840 * Zero out fields as di_checkmem() doesn't guarantee 2841 * zero-filled memory 2842 */ 2843 me->path_client = me->path_phci = 0; 2844 me->path_c_link = me->path_p_link = 0; 2845 2846 di_path_one_endpoint(me, noff, &off_p, get_client); 2847 2848 /* 2849 * Note the existence of this pathinfo 2850 */ 2851 di_register_pip(st, pip, me->self); 2852 2853 state = mdi_pi_get_state(pip); 2854 me->path_state = path_state_convert(state); 2855 2856 me->path_instance = mdi_pi_get_path_instance(pip); 2857 2858 /* 2859 * Get intermediate addressing info. 2860 */ 2861 size = strlen(mdi_pi_get_addr(pip)) + 1; 2862 me->path_addr = off = di_checkmem(st, off, size); 2863 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip)); 2864 off += size; 2865 2866 /* 2867 * Get path properties if props are to be included in the 2868 * snapshot 2869 */ 2870 if (DINFOPROP & st->command) { 2871 me->path_prop = off; 2872 off = di_path_getprop(pip, &me->path_prop, st); 2873 } else { 2874 me->path_prop = 0; 2875 } 2876 2877 mdi_pi_unlock(pip); 2878 } 2879 2880 *off_p = 0; 2881 return (off); 2882 } 2883 2884 /* 2885 * Return driver prop_op entry point for the specified devinfo node. 2886 * 2887 * To return a non-NULL value: 2888 * - driver must be attached and held: 2889 * If driver is not attached we ignore the driver property list. 2890 * No one should rely on such properties. 2891 * - driver "cb_prop_op != ddi_prop_op": 2892 * If "cb_prop_op == ddi_prop_op", framework does not need to call driver. 2893 * XXX or parent's bus_prop_op != ddi_bus_prop_op 2894 */ 2895 static int 2896 (*di_getprop_prop_op(struct dev_info *dip)) 2897 (dev_t, dev_info_t *, ddi_prop_op_t, int, char *, caddr_t, int *) 2898 { 2899 struct dev_ops *ops; 2900 2901 /* If driver is not attached we ignore the driver property list. */ 2902 if ((dip == NULL) || !i_ddi_devi_attached((dev_info_t *)dip)) 2903 return (NULL); 2904 2905 /* 2906 * Some nexus drivers incorrectly set cb_prop_op to nodev, nulldev, 2907 * or even NULL. 2908 */ 2909 ops = dip->devi_ops; 2910 if (ops && ops->devo_cb_ops && 2911 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) && 2912 (ops->devo_cb_ops->cb_prop_op != nodev) && 2913 (ops->devo_cb_ops->cb_prop_op != nulldev) && 2914 (ops->devo_cb_ops->cb_prop_op != NULL)) 2915 return (ops->devo_cb_ops->cb_prop_op); 2916 return (NULL); 2917 } 2918 2919 static di_off_t 2920 di_getprop_add(int list, int dyn, struct di_state *st, struct dev_info *dip, 2921 int (*prop_op)(), 2922 char *name, dev_t devt, int aflags, int alen, caddr_t aval, 2923 di_off_t off, di_off_t **off_pp) 2924 { 2925 int need_free = 0; 2926 dev_t pdevt; 2927 int pflags; 2928 int rv; 2929 caddr_t val; 2930 int len; 2931 size_t size; 2932 struct di_prop *pp; 2933 2934 /* If we have prop_op function, ask driver for latest value */ 2935 if (prop_op) { 2936 ASSERT(dip); 2937 2938 /* Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY */ 2939 pdevt = (devt == DDI_DEV_T_NONE) ? DDI_DEV_T_ANY : devt; 2940 2941 /* 2942 * We have type information in flags, but are invoking an 2943 * old non-typed prop_op(9E) interface. Since not all types are 2944 * part of DDI_PROP_TYPE_ANY (example is DDI_PROP_TYPE_INT64), 2945 * we set DDI_PROP_CONSUMER_TYPED - causing the framework to 2946 * expand type bits beyond DDI_PROP_TYPE_ANY. This allows us 2947 * to use the legacy prop_op(9E) interface to obtain updates 2948 * non-DDI_PROP_TYPE_ANY dynamic properties. 2949 */ 2950 pflags = aflags & ~DDI_PROP_TYPE_MASK; 2951 pflags |= DDI_PROP_DONTPASS | DDI_PROP_NOTPROM | 2952 DDI_PROP_CONSUMER_TYPED; 2953 2954 /* 2955 * Hold and exit across prop_op(9E) to avoid lock order 2956 * issues between 2957 * [ndi_devi_enter() ..prop_op(9E).. driver-lock] 2958 * .vs. 2959 * [..ioctl(9E).. driver-lock ..ddi_remove_minor_node(9F).. 2960 * ndi_devi_enter()] 2961 * ordering. 2962 */ 2963 ndi_hold_devi((dev_info_t *)dip); 2964 ndi_devi_exit((dev_info_t *)dip, dip->devi_circular); 2965 rv = (*prop_op)(pdevt, (dev_info_t *)dip, 2966 PROP_LEN_AND_VAL_ALLOC, pflags, name, &val, &len); 2967 ndi_devi_enter((dev_info_t *)dip, &dip->devi_circular); 2968 ndi_rele_devi((dev_info_t *)dip); 2969 2970 if (rv == DDI_PROP_SUCCESS) { 2971 need_free = 1; /* dynamic prop obtained */ 2972 } else if (dyn) { 2973 /* 2974 * A dynamic property must succeed prop_op(9E) to show 2975 * up in the snapshot - that is the only source of its 2976 * value. 2977 */ 2978 return (off); /* dynamic prop not supported */ 2979 } else { 2980 /* 2981 * In case calling the driver caused an update off 2982 * prop_op(9E) of a non-dynamic property (code leading 2983 * to ddi_prop_change), we defer picking up val and 2984 * len informatiojn until after prop_op(9E) to ensure 2985 * that we snapshot the latest value. 2986 */ 2987 val = aval; 2988 len = alen; 2989 2990 } 2991 } else { 2992 val = aval; 2993 len = alen; 2994 } 2995 2996 dcmn_err((CE_CONT, "di_getprop_add: list %d %s len %d val %p\n", 2997 list, name ? name : "NULL", len, (void *)val)); 2998 2999 size = sizeof (struct di_prop); 3000 **off_pp = off = di_checkmem(st, off, size); 3001 pp = DI_PROP(di_mem_addr(st, off)); 3002 pp->self = off; 3003 off += size; 3004 3005 pp->dev_major = getmajor(devt); 3006 pp->dev_minor = getminor(devt); 3007 pp->prop_flags = aflags; 3008 pp->prop_list = list; 3009 3010 /* property name */ 3011 if (name) { 3012 size = strlen(name) + 1; 3013 pp->prop_name = off = di_checkmem(st, off, size); 3014 (void) strcpy(di_mem_addr(st, off), name); 3015 off += size; 3016 } else { 3017 pp->prop_name = -1; 3018 } 3019 3020 pp->prop_len = len; 3021 if (val == NULL) { 3022 pp->prop_data = -1; 3023 } else if (len != 0) { 3024 size = len; 3025 pp->prop_data = off = di_checkmem(st, off, size); 3026 bcopy(val, di_mem_addr(st, off), size); 3027 off += size; 3028 } 3029 3030 pp->next = 0; /* assume tail for now */ 3031 *off_pp = &pp->next; /* return pointer to our next */ 3032 3033 if (need_free) /* free PROP_LEN_AND_VAL_ALLOC alloc */ 3034 kmem_free(val, len); 3035 return (off); 3036 } 3037 3038 3039 /* 3040 * Copy a list of properties attached to a devinfo node. Called from 3041 * di_copynode with active ndi_devi_enter. The major number is passed in case 3042 * we need to call driver's prop_op entry. The value of list indicates 3043 * which list we are copying. Possible values are: 3044 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST 3045 */ 3046 static di_off_t 3047 di_getprop(int list, struct ddi_prop **pprop, di_off_t *off_p, 3048 struct di_state *st, struct dev_info *dip) 3049 { 3050 struct ddi_prop *prop; 3051 int (*prop_op)(); 3052 int off; 3053 struct ddi_minor_data *mn; 3054 i_ddi_prop_dyn_t *dp; 3055 struct plist { 3056 struct plist *pl_next; 3057 char *pl_name; 3058 int pl_flags; 3059 dev_t pl_dev; 3060 int pl_len; 3061 caddr_t pl_val; 3062 } *pl, *pl0, **plp; 3063 3064 ASSERT(st != NULL); 3065 3066 off = *off_p; 3067 *off_p = 0; 3068 dcmn_err((CE_CONT, "di_getprop: copy property list %d at addr %p\n", 3069 list, (void *)*pprop)); 3070 3071 /* get pointer to driver's prop_op(9E) implementation if DRV_LIST */ 3072 prop_op = (list == DI_PROP_DRV_LIST) ? di_getprop_prop_op(dip) : NULL; 3073 3074 /* 3075 * Form private list of properties, holding devi_lock for properties 3076 * that hang off the dip. 3077 */ 3078 if (dip) 3079 mutex_enter(&(dip->devi_lock)); 3080 for (pl0 = NULL, plp = &pl0, prop = *pprop; 3081 prop; plp = &pl->pl_next, prop = prop->prop_next) { 3082 pl = kmem_alloc(sizeof (*pl), KM_SLEEP); 3083 *plp = pl; 3084 pl->pl_next = NULL; 3085 if (prop->prop_name) 3086 pl->pl_name = i_ddi_strdup(prop->prop_name, KM_SLEEP); 3087 else 3088 pl->pl_name = NULL; 3089 pl->pl_flags = prop->prop_flags; 3090 pl->pl_dev = prop->prop_dev; 3091 if (prop->prop_len) { 3092 pl->pl_len = prop->prop_len; 3093 pl->pl_val = kmem_alloc(pl->pl_len, KM_SLEEP); 3094 bcopy(prop->prop_val, pl->pl_val, pl->pl_len); 3095 } else { 3096 pl->pl_len = 0; 3097 pl->pl_val = NULL; 3098 } 3099 } 3100 if (dip) 3101 mutex_exit(&(dip->devi_lock)); 3102 3103 /* 3104 * Now that we have dropped devi_lock, perform a second-pass to 3105 * add properties to the snapshot. We do this as a second pass 3106 * because we may need to call prop_op(9E) and we can't hold 3107 * devi_lock across that call. 3108 */ 3109 for (pl = pl0; pl; pl = pl0) { 3110 pl0 = pl->pl_next; 3111 off = di_getprop_add(list, 0, st, dip, prop_op, pl->pl_name, 3112 pl->pl_dev, pl->pl_flags, pl->pl_len, pl->pl_val, 3113 off, &off_p); 3114 if (pl->pl_val) 3115 kmem_free(pl->pl_val, pl->pl_len); 3116 if (pl->pl_name) 3117 kmem_free(pl->pl_name, strlen(pl->pl_name) + 1); 3118 kmem_free(pl, sizeof (*pl)); 3119 } 3120 3121 /* 3122 * If there is no prop_op or dynamic property support has been 3123 * disabled, we are done. 3124 */ 3125 if ((prop_op == NULL) || (di_prop_dyn == 0)) { 3126 *off_p = 0; 3127 return (off); 3128 } 3129 3130 /* Add dynamic driver properties to snapshot */ 3131 for (dp = i_ddi_prop_dyn_driver_get((dev_info_t *)dip); 3132 dp && dp->dp_name; dp++) { 3133 if (dp->dp_spec_type) { 3134 /* if spec_type, property of matching minor */ 3135 ASSERT(DEVI_BUSY_OWNED(dip)); 3136 for (mn = dip->devi_minor; mn; mn = mn->next) { 3137 if (mn->ddm_spec_type != dp->dp_spec_type) 3138 continue; 3139 off = di_getprop_add(list, 1, st, dip, prop_op, 3140 dp->dp_name, mn->ddm_dev, dp->dp_type, 3141 0, NULL, off, &off_p); 3142 } 3143 } else { 3144 /* property of devinfo node */ 3145 off = di_getprop_add(list, 1, st, dip, prop_op, 3146 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type, 3147 0, NULL, off, &off_p); 3148 } 3149 } 3150 3151 /* Add dynamic parent properties to snapshot */ 3152 for (dp = i_ddi_prop_dyn_parent_get((dev_info_t *)dip); 3153 dp && dp->dp_name; dp++) { 3154 if (dp->dp_spec_type) { 3155 /* if spec_type, property of matching minor */ 3156 ASSERT(DEVI_BUSY_OWNED(dip)); 3157 for (mn = dip->devi_minor; mn; mn = mn->next) { 3158 if (mn->ddm_spec_type != dp->dp_spec_type) 3159 continue; 3160 off = di_getprop_add(list, 1, st, dip, prop_op, 3161 dp->dp_name, mn->ddm_dev, dp->dp_type, 3162 0, NULL, off, &off_p); 3163 } 3164 } else { 3165 /* property of devinfo node */ 3166 off = di_getprop_add(list, 1, st, dip, prop_op, 3167 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type, 3168 0, NULL, off, &off_p); 3169 } 3170 } 3171 3172 *off_p = 0; 3173 return (off); 3174 } 3175 3176 /* 3177 * find private data format attached to a dip 3178 * parent = 1 to match driver name of parent dip (for parent private data) 3179 * 0 to match driver name of current dip (for driver private data) 3180 */ 3181 #define DI_MATCH_DRIVER 0 3182 #define DI_MATCH_PARENT 1 3183 3184 struct di_priv_format * 3185 di_match_drv_name(struct dev_info *node, struct di_state *st, int match) 3186 { 3187 int i, count, len; 3188 char *drv_name; 3189 major_t major; 3190 struct di_all *all; 3191 struct di_priv_format *form; 3192 3193 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n", 3194 node->devi_node_name, match)); 3195 3196 if (match == DI_MATCH_PARENT) { 3197 node = DEVI(node->devi_parent); 3198 } 3199 3200 if (node == NULL) { 3201 return (NULL); 3202 } 3203 3204 major = node->devi_major; 3205 if (major == (major_t)(-1)) { 3206 return (NULL); 3207 } 3208 3209 /* 3210 * Match the driver name. 3211 */ 3212 drv_name = ddi_major_to_name(major); 3213 if ((drv_name == NULL) || *drv_name == '\0') { 3214 return (NULL); 3215 } 3216 3217 /* Now get the di_priv_format array */ 3218 all = DI_ALL_PTR(st); 3219 if (match == DI_MATCH_PARENT) { 3220 count = all->n_ppdata; 3221 form = DI_PRIV_FORMAT(di_mem_addr(st, all->ppdata_format)); 3222 } else { 3223 count = all->n_dpdata; 3224 form = DI_PRIV_FORMAT(di_mem_addr(st, all->dpdata_format)); 3225 } 3226 3227 len = strlen(drv_name); 3228 for (i = 0; i < count; i++) { 3229 char *tmp; 3230 3231 tmp = form[i].drv_name; 3232 while (tmp && (*tmp != '\0')) { 3233 if (strncmp(drv_name, tmp, len) == 0) { 3234 return (&form[i]); 3235 } 3236 /* 3237 * Move to next driver name, skipping a white space 3238 */ 3239 if (tmp = strchr(tmp, ' ')) { 3240 tmp++; 3241 } 3242 } 3243 } 3244 3245 return (NULL); 3246 } 3247 3248 /* 3249 * The following functions copy data as specified by the format passed in. 3250 * To prevent invalid format from panicing the system, we call on_fault(). 3251 * A return value of 0 indicates an error. Otherwise, the total offset 3252 * is returned. 3253 */ 3254 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */ 3255 3256 static di_off_t 3257 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node, 3258 void *data, di_off_t *off_p, struct di_state *st) 3259 { 3260 caddr_t pa; 3261 void *ptr; 3262 int i, size, repeat; 3263 di_off_t off, off0, *tmp; 3264 char *path; 3265 label_t ljb; 3266 3267 dcmn_err2((CE_CONT, "di_getprvdata:\n")); 3268 3269 /* 3270 * check memory availability. Private data size is 3271 * limited to DI_MAX_PRIVDATA. 3272 */ 3273 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA); 3274 *off_p = off; 3275 3276 if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) { 3277 goto failure; 3278 } 3279 3280 if (!on_fault(&ljb)) { 3281 /* copy the struct */ 3282 bcopy(data, di_mem_addr(st, off), pdp->bytes); 3283 off0 = DI_ALIGN(pdp->bytes); /* XXX remove DI_ALIGN */ 3284 3285 /* dereferencing pointers */ 3286 for (i = 0; i < MAX_PTR_IN_PRV; i++) { 3287 3288 if (pdp->ptr[i].size == 0) { 3289 goto success; /* no more ptrs */ 3290 } 3291 3292 /* 3293 * first, get the pointer content 3294 */ 3295 if ((pdp->ptr[i].offset < 0) || 3296 (pdp->ptr[i].offset > pdp->bytes - sizeof (char *))) 3297 goto failure; /* wrong offset */ 3298 3299 pa = di_mem_addr(st, off + pdp->ptr[i].offset); 3300 3301 /* save a tmp ptr to store off_t later */ 3302 tmp = (di_off_t *)(intptr_t)pa; 3303 3304 /* get pointer value, if NULL continue */ 3305 ptr = *((void **) (intptr_t)pa); 3306 if (ptr == NULL) { 3307 continue; 3308 } 3309 3310 /* 3311 * next, find the repeat count (array dimension) 3312 */ 3313 repeat = pdp->ptr[i].len_offset; 3314 3315 /* 3316 * Positive value indicates a fixed sized array. 3317 * 0 or negative value indicates variable sized array. 3318 * 3319 * For variable sized array, the variable must be 3320 * an int member of the structure, with an offset 3321 * equal to the absolution value of struct member. 3322 */ 3323 if (repeat > pdp->bytes - sizeof (int)) { 3324 goto failure; /* wrong offset */ 3325 } 3326 3327 if (repeat >= 0) { 3328 repeat = *((int *) 3329 (intptr_t)((caddr_t)data + repeat)); 3330 } else { 3331 repeat = -repeat; 3332 } 3333 3334 /* 3335 * next, get the size of the object to be copied 3336 */ 3337 size = pdp->ptr[i].size * repeat; 3338 3339 /* 3340 * Arbitrarily limit the total size of object to be 3341 * copied (1 byte to 1/4 page). 3342 */ 3343 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) { 3344 goto failure; /* wrong size or too big */ 3345 } 3346 3347 /* 3348 * Now copy the data 3349 */ 3350 *tmp = off0; 3351 bcopy(ptr, di_mem_addr(st, off + off0), size); 3352 off0 += DI_ALIGN(size); /* XXX remove DI_ALIGN */ 3353 } 3354 } else { 3355 goto failure; 3356 } 3357 3358 success: 3359 /* 3360 * success if reached here 3361 */ 3362 no_fault(); 3363 return (off + off0); 3364 /*NOTREACHED*/ 3365 3366 failure: 3367 /* 3368 * fault occurred 3369 */ 3370 no_fault(); 3371 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3372 cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p", 3373 ddi_pathname((dev_info_t *)node, path), data); 3374 kmem_free(path, MAXPATHLEN); 3375 *off_p = -1; /* set private data to indicate error */ 3376 3377 return (off); 3378 } 3379 3380 /* 3381 * get parent private data; on error, returns original offset 3382 */ 3383 static di_off_t 3384 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3385 { 3386 int off; 3387 struct di_priv_format *ppdp; 3388 3389 dcmn_err2((CE_CONT, "di_getppdata:\n")); 3390 3391 /* find the parent data format */ 3392 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) { 3393 off = *off_p; 3394 *off_p = 0; /* set parent data to none */ 3395 return (off); 3396 } 3397 3398 return (di_getprvdata(ppdp, node, 3399 ddi_get_parent_data((dev_info_t *)node), off_p, st)); 3400 } 3401 3402 /* 3403 * get parent private data; returns original offset 3404 */ 3405 static di_off_t 3406 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3407 { 3408 int off; 3409 struct di_priv_format *dpdp; 3410 3411 dcmn_err2((CE_CONT, "di_getdpdata:")); 3412 3413 /* find the parent data format */ 3414 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) { 3415 off = *off_p; 3416 *off_p = 0; /* set driver data to none */ 3417 return (off); 3418 } 3419 3420 return (di_getprvdata(dpdp, node, 3421 ddi_get_driver_private((dev_info_t *)node), off_p, st)); 3422 } 3423 3424 /* 3425 * The driver is stateful across DINFOCPYALL and DINFOUSRLD. 3426 * This function encapsulates the state machine: 3427 * 3428 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY -> 3429 * | SNAPSHOT USRLD | 3430 * -------------------------------------------------- 3431 * 3432 * Returns 0 on success and -1 on failure 3433 */ 3434 static int 3435 di_setstate(struct di_state *st, int new_state) 3436 { 3437 int ret = 0; 3438 3439 mutex_enter(&di_lock); 3440 switch (new_state) { 3441 case IOC_IDLE: 3442 case IOC_DONE: 3443 break; 3444 case IOC_SNAP: 3445 if (st->di_iocstate != IOC_IDLE) 3446 ret = -1; 3447 break; 3448 case IOC_COPY: 3449 if (st->di_iocstate != IOC_DONE) 3450 ret = -1; 3451 break; 3452 default: 3453 ret = -1; 3454 } 3455 3456 if (ret == 0) 3457 st->di_iocstate = new_state; 3458 else 3459 cmn_err(CE_NOTE, "incorrect state transition from %d to %d", 3460 st->di_iocstate, new_state); 3461 mutex_exit(&di_lock); 3462 return (ret); 3463 } 3464 3465 /* 3466 * We cannot assume the presence of the entire 3467 * snapshot in this routine. All we are guaranteed 3468 * is the di_all struct + 1 byte (for root_path) 3469 */ 3470 static int 3471 header_plus_one_ok(struct di_all *all) 3472 { 3473 /* 3474 * Refuse to read old versions 3475 */ 3476 if (all->version != DI_SNAPSHOT_VERSION) { 3477 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version)); 3478 return (0); 3479 } 3480 3481 if (all->cache_magic != DI_CACHE_MAGIC) { 3482 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic)); 3483 return (0); 3484 } 3485 3486 if (all->snapshot_time == 0) { 3487 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time)); 3488 return (0); 3489 } 3490 3491 if (all->top_devinfo == 0) { 3492 CACHE_DEBUG((DI_ERR, "NULL top devinfo")); 3493 return (0); 3494 } 3495 3496 if (all->map_size < sizeof (*all) + 1) { 3497 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size)); 3498 return (0); 3499 } 3500 3501 if (all->root_path[0] != '/' || all->root_path[1] != '\0') { 3502 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c", 3503 all->root_path[0], all->root_path[1])); 3504 return (0); 3505 } 3506 3507 /* 3508 * We can't check checksum here as we just have the header 3509 */ 3510 3511 return (1); 3512 } 3513 3514 static int 3515 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len) 3516 { 3517 rlim64_t rlimit; 3518 ssize_t resid; 3519 int error = 0; 3520 3521 3522 rlimit = RLIM64_INFINITY; 3523 3524 while (len) { 3525 resid = 0; 3526 error = vn_rdwr(UIO_WRITE, vp, buf, len, off, 3527 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 3528 3529 if (error || resid < 0) { 3530 error = error ? error : EIO; 3531 CACHE_DEBUG((DI_ERR, "write error: %d", error)); 3532 break; 3533 } 3534 3535 /* 3536 * Check if we are making progress 3537 */ 3538 if (resid >= len) { 3539 error = ENOSPC; 3540 break; 3541 } 3542 buf += len - resid; 3543 off += len - resid; 3544 len = resid; 3545 } 3546 3547 return (error); 3548 } 3549 3550 static void 3551 di_cache_write(struct di_cache *cache) 3552 { 3553 struct di_all *all; 3554 struct vnode *vp; 3555 int oflags; 3556 size_t map_size; 3557 size_t chunk; 3558 offset_t off; 3559 int error; 3560 char *buf; 3561 3562 ASSERT(DI_CACHE_LOCKED(*cache)); 3563 ASSERT(!servicing_interrupt()); 3564 3565 if (cache->cache_size == 0) { 3566 ASSERT(cache->cache_data == NULL); 3567 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write")); 3568 return; 3569 } 3570 3571 ASSERT(cache->cache_size > 0); 3572 ASSERT(cache->cache_data); 3573 3574 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) { 3575 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write")); 3576 return; 3577 } 3578 3579 all = (struct di_all *)cache->cache_data; 3580 3581 if (!header_plus_one_ok(all)) { 3582 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write")); 3583 return; 3584 } 3585 3586 ASSERT(strcmp(all->root_path, "/") == 0); 3587 3588 /* 3589 * The cache_size is the total allocated memory for the cache. 3590 * The map_size is the actual size of valid data in the cache. 3591 * map_size may be smaller than cache_size but cannot exceed 3592 * cache_size. 3593 */ 3594 if (all->map_size > cache->cache_size) { 3595 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)." 3596 " Skipping write", all->map_size, cache->cache_size)); 3597 return; 3598 } 3599 3600 /* 3601 * First unlink the temp file 3602 */ 3603 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE); 3604 if (error && error != ENOENT) { 3605 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d", 3606 DI_CACHE_TEMP, error)); 3607 } 3608 3609 if (error == EROFS) { 3610 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write")); 3611 return; 3612 } 3613 3614 vp = NULL; 3615 oflags = (FCREAT|FWRITE); 3616 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags, 3617 DI_CACHE_PERMS, &vp, CRCREAT, 0)) { 3618 CACHE_DEBUG((DI_ERR, "%s: create failed: %d", 3619 DI_CACHE_TEMP, error)); 3620 return; 3621 } 3622 3623 ASSERT(vp); 3624 3625 /* 3626 * Paranoid: Check if the file is on a read-only FS 3627 */ 3628 if (vn_is_readonly(vp)) { 3629 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS")); 3630 goto fail; 3631 } 3632 3633 /* 3634 * Note that we only write map_size bytes to disk - this saves 3635 * space as the actual cache size may be larger than size of 3636 * valid data in the cache. 3637 * Another advantage is that it makes verification of size 3638 * easier when the file is read later. 3639 */ 3640 map_size = all->map_size; 3641 off = 0; 3642 buf = cache->cache_data; 3643 3644 while (map_size) { 3645 ASSERT(map_size > 0); 3646 /* 3647 * Write in chunks so that VM system 3648 * is not overwhelmed 3649 */ 3650 if (map_size > di_chunk * PAGESIZE) 3651 chunk = di_chunk * PAGESIZE; 3652 else 3653 chunk = map_size; 3654 3655 error = chunk_write(vp, off, buf, chunk); 3656 if (error) { 3657 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d", 3658 off, error)); 3659 goto fail; 3660 } 3661 3662 off += chunk; 3663 buf += chunk; 3664 map_size -= chunk; 3665 3666 /* If low on memory, give pageout a chance to run */ 3667 if (freemem < desfree) 3668 delay(1); 3669 } 3670 3671 /* 3672 * Now sync the file and close it 3673 */ 3674 if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) { 3675 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error)); 3676 } 3677 3678 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) { 3679 CACHE_DEBUG((DI_ERR, "close() failed: %d", error)); 3680 VN_RELE(vp); 3681 return; 3682 } 3683 3684 VN_RELE(vp); 3685 3686 /* 3687 * Now do the rename 3688 */ 3689 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) { 3690 CACHE_DEBUG((DI_ERR, "rename failed: %d", error)); 3691 return; 3692 } 3693 3694 CACHE_DEBUG((DI_INFO, "Cache write successful.")); 3695 3696 return; 3697 3698 fail: 3699 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL); 3700 VN_RELE(vp); 3701 } 3702 3703 3704 /* 3705 * Since we could be called early in boot, 3706 * use kobj_read_file() 3707 */ 3708 static void 3709 di_cache_read(struct di_cache *cache) 3710 { 3711 struct _buf *file; 3712 struct di_all *all; 3713 int n; 3714 size_t map_size, sz, chunk; 3715 offset_t off; 3716 caddr_t buf; 3717 uint32_t saved_crc, crc; 3718 3719 ASSERT(modrootloaded); 3720 ASSERT(DI_CACHE_LOCKED(*cache)); 3721 ASSERT(cache->cache_data == NULL); 3722 ASSERT(cache->cache_size == 0); 3723 ASSERT(!servicing_interrupt()); 3724 3725 file = kobj_open_file(DI_CACHE_FILE); 3726 if (file == (struct _buf *)-1) { 3727 CACHE_DEBUG((DI_ERR, "%s: open failed: %d", 3728 DI_CACHE_FILE, ENOENT)); 3729 return; 3730 } 3731 3732 /* 3733 * Read in the header+root_path first. The root_path must be "/" 3734 */ 3735 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP); 3736 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0); 3737 3738 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) { 3739 kmem_free(all, sizeof (*all) + 1); 3740 kobj_close_file(file); 3741 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid")); 3742 return; 3743 } 3744 3745 map_size = all->map_size; 3746 3747 kmem_free(all, sizeof (*all) + 1); 3748 3749 ASSERT(map_size >= sizeof (*all) + 1); 3750 3751 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP); 3752 sz = map_size; 3753 off = 0; 3754 while (sz) { 3755 /* Don't overload VM with large reads */ 3756 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz; 3757 n = kobj_read_file(file, buf, chunk, off); 3758 if (n != chunk) { 3759 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld", 3760 DI_CACHE_FILE, off)); 3761 goto fail; 3762 } 3763 off += chunk; 3764 buf += chunk; 3765 sz -= chunk; 3766 } 3767 3768 ASSERT(off == map_size); 3769 3770 /* 3771 * Read past expected EOF to verify size. 3772 */ 3773 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) { 3774 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE)); 3775 goto fail; 3776 } 3777 3778 all = (struct di_all *)di_cache.cache_data; 3779 if (!header_plus_one_ok(all)) { 3780 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE)); 3781 goto fail; 3782 } 3783 3784 /* 3785 * Compute CRC with checksum field in the cache data set to 0 3786 */ 3787 saved_crc = all->cache_checksum; 3788 all->cache_checksum = 0; 3789 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table); 3790 all->cache_checksum = saved_crc; 3791 3792 if (crc != all->cache_checksum) { 3793 CACHE_DEBUG((DI_ERR, 3794 "%s: checksum error: expected=0x%x actual=0x%x", 3795 DI_CACHE_FILE, all->cache_checksum, crc)); 3796 goto fail; 3797 } 3798 3799 if (all->map_size != map_size) { 3800 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE)); 3801 goto fail; 3802 } 3803 3804 kobj_close_file(file); 3805 3806 di_cache.cache_size = map_size; 3807 3808 return; 3809 3810 fail: 3811 kmem_free(di_cache.cache_data, map_size); 3812 kobj_close_file(file); 3813 di_cache.cache_data = NULL; 3814 di_cache.cache_size = 0; 3815 } 3816 3817 3818 /* 3819 * Checks if arguments are valid for using the cache. 3820 */ 3821 static int 3822 cache_args_valid(struct di_state *st, int *error) 3823 { 3824 ASSERT(error); 3825 ASSERT(st->mem_size > 0); 3826 ASSERT(st->memlist != NULL); 3827 3828 if (!modrootloaded || !i_ddi_io_initialized()) { 3829 CACHE_DEBUG((DI_ERR, 3830 "cache lookup failure: I/O subsystem not inited")); 3831 *error = ENOTACTIVE; 3832 return (0); 3833 } 3834 3835 /* 3836 * No other flags allowed with DINFOCACHE 3837 */ 3838 if (st->command != (DINFOCACHE & DIIOC_MASK)) { 3839 CACHE_DEBUG((DI_ERR, 3840 "cache lookup failure: bad flags: 0x%x", 3841 st->command)); 3842 *error = EINVAL; 3843 return (0); 3844 } 3845 3846 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3847 CACHE_DEBUG((DI_ERR, 3848 "cache lookup failure: bad root: %s", 3849 DI_ALL_PTR(st)->root_path)); 3850 *error = EINVAL; 3851 return (0); 3852 } 3853 3854 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command)); 3855 3856 *error = 0; 3857 3858 return (1); 3859 } 3860 3861 static int 3862 snapshot_is_cacheable(struct di_state *st) 3863 { 3864 ASSERT(st->mem_size > 0); 3865 ASSERT(st->memlist != NULL); 3866 3867 if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) != 3868 (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) { 3869 CACHE_DEBUG((DI_INFO, 3870 "not cacheable: incompatible flags: 0x%x", 3871 st->command)); 3872 return (0); 3873 } 3874 3875 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3876 CACHE_DEBUG((DI_INFO, 3877 "not cacheable: incompatible root path: %s", 3878 DI_ALL_PTR(st)->root_path)); 3879 return (0); 3880 } 3881 3882 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command)); 3883 3884 return (1); 3885 } 3886 3887 static int 3888 di_cache_lookup(struct di_state *st) 3889 { 3890 size_t rval; 3891 int cache_valid; 3892 3893 ASSERT(cache_args_valid(st, &cache_valid)); 3894 ASSERT(modrootloaded); 3895 3896 DI_CACHE_LOCK(di_cache); 3897 3898 /* 3899 * The following assignment determines the validity 3900 * of the cache as far as this snapshot is concerned. 3901 */ 3902 cache_valid = di_cache.cache_valid; 3903 3904 if (cache_valid && di_cache.cache_data == NULL) { 3905 di_cache_read(&di_cache); 3906 /* check for read or file error */ 3907 if (di_cache.cache_data == NULL) 3908 cache_valid = 0; 3909 } 3910 3911 if (cache_valid) { 3912 /* 3913 * Ok, the cache was valid as of this particular 3914 * snapshot. Copy the cached snapshot. This is safe 3915 * to do as the cache cannot be freed (we hold the 3916 * cache lock). Free the memory allocated in di_state 3917 * up until this point - we will simply copy everything 3918 * in the cache. 3919 */ 3920 3921 ASSERT(di_cache.cache_data != NULL); 3922 ASSERT(di_cache.cache_size > 0); 3923 3924 di_freemem(st); 3925 3926 rval = 0; 3927 if (di_cache2mem(&di_cache, st) > 0) { 3928 /* 3929 * map_size is size of valid data in the 3930 * cached snapshot and may be less than 3931 * size of the cache. 3932 */ 3933 ASSERT(DI_ALL_PTR(st)); 3934 rval = DI_ALL_PTR(st)->map_size; 3935 3936 ASSERT(rval >= sizeof (struct di_all)); 3937 ASSERT(rval <= di_cache.cache_size); 3938 } 3939 } else { 3940 /* 3941 * The cache isn't valid, we need to take a snapshot. 3942 * Set the command flags appropriately 3943 */ 3944 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK)); 3945 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK); 3946 rval = di_cache_update(st); 3947 st->command = (DINFOCACHE & DIIOC_MASK); 3948 } 3949 3950 DI_CACHE_UNLOCK(di_cache); 3951 3952 /* 3953 * For cached snapshots, the devinfo driver always returns 3954 * a snapshot rooted at "/". 3955 */ 3956 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0); 3957 3958 return ((int)rval); 3959 } 3960 3961 /* 3962 * This is a forced update of the cache - the previous state of the cache 3963 * may be: 3964 * - unpopulated 3965 * - populated and invalid 3966 * - populated and valid 3967 */ 3968 static int 3969 di_cache_update(struct di_state *st) 3970 { 3971 int rval; 3972 uint32_t crc; 3973 struct di_all *all; 3974 3975 ASSERT(DI_CACHE_LOCKED(di_cache)); 3976 ASSERT(snapshot_is_cacheable(st)); 3977 3978 /* 3979 * Free the in-core cache and the on-disk file (if they exist) 3980 */ 3981 i_ddi_di_cache_free(&di_cache); 3982 3983 /* 3984 * Set valid flag before taking the snapshot, 3985 * so that any invalidations that arrive 3986 * during or after the snapshot are not 3987 * removed by us. 3988 */ 3989 atomic_or_32(&di_cache.cache_valid, 1); 3990 3991 rval = di_snapshot_and_clean(st); 3992 3993 if (rval == 0) { 3994 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot")); 3995 return (0); 3996 } 3997 3998 DI_ALL_PTR(st)->map_size = rval; 3999 if (di_mem2cache(st, &di_cache) == 0) { 4000 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed")); 4001 return (0); 4002 } 4003 4004 ASSERT(di_cache.cache_data); 4005 ASSERT(di_cache.cache_size > 0); 4006 4007 /* 4008 * Now that we have cached the snapshot, compute its checksum. 4009 * The checksum is only computed over the valid data in the 4010 * cache, not the entire cache. 4011 * Also, set all the fields (except checksum) before computing 4012 * checksum. 4013 */ 4014 all = (struct di_all *)di_cache.cache_data; 4015 all->cache_magic = DI_CACHE_MAGIC; 4016 all->map_size = rval; 4017 4018 ASSERT(all->cache_checksum == 0); 4019 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table); 4020 all->cache_checksum = crc; 4021 4022 di_cache_write(&di_cache); 4023 4024 return (rval); 4025 } 4026 4027 static void 4028 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...) 4029 { 4030 va_list ap; 4031 4032 if (di_cache_debug <= DI_QUIET) 4033 return; 4034 4035 if (di_cache_debug < msglevel) 4036 return; 4037 4038 switch (msglevel) { 4039 case DI_ERR: 4040 msglevel = CE_WARN; 4041 break; 4042 case DI_INFO: 4043 case DI_TRACE: 4044 default: 4045 msglevel = CE_NOTE; 4046 break; 4047 } 4048 4049 va_start(ap, fmt); 4050 vcmn_err(msglevel, fmt, ap); 4051 va_end(ap); 4052 } 4053