1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * driver for accessing kernel devinfo tree. 30 */ 31 #include <sys/types.h> 32 #include <sys/pathname.h> 33 #include <sys/debug.h> 34 #include <sys/autoconf.h> 35 #include <sys/vmsystm.h> 36 #include <sys/conf.h> 37 #include <sys/file.h> 38 #include <sys/kmem.h> 39 #include <sys/modctl.h> 40 #include <sys/stat.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/sunldi_impl.h> 44 #include <sys/sunndi.h> 45 #include <sys/esunddi.h> 46 #include <sys/sunmdi.h> 47 #include <sys/ddi_impldefs.h> 48 #include <sys/ndi_impldefs.h> 49 #include <sys/mdi_impldefs.h> 50 #include <sys/devinfo_impl.h> 51 #include <sys/thread.h> 52 #include <sys/modhash.h> 53 #include <sys/bitmap.h> 54 #include <util/qsort.h> 55 #include <sys/disp.h> 56 #include <sys/kobj.h> 57 #include <sys/crc32.h> 58 59 60 #ifdef DEBUG 61 static int di_debug; 62 #define dcmn_err(args) if (di_debug >= 1) cmn_err args 63 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args 64 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args 65 #else 66 #define dcmn_err(args) /* nothing */ 67 #define dcmn_err2(args) /* nothing */ 68 #define dcmn_err3(args) /* nothing */ 69 #endif 70 71 /* 72 * We partition the space of devinfo minor nodes equally between the full and 73 * unprivileged versions of the driver. The even-numbered minor nodes are the 74 * full version, while the odd-numbered ones are the read-only version. 75 */ 76 static int di_max_opens = 32; 77 78 static int di_prop_dyn = 1; /* enable dynamic property support */ 79 80 #define DI_FULL_PARENT 0 81 #define DI_READONLY_PARENT 1 82 #define DI_NODE_SPECIES 2 83 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) 84 85 #define IOC_IDLE 0 /* snapshot ioctl states */ 86 #define IOC_SNAP 1 /* snapshot in progress */ 87 #define IOC_DONE 2 /* snapshot done, but not copied out */ 88 #define IOC_COPY 3 /* copyout in progress */ 89 90 /* 91 * Keep max alignment so we can move snapshot to different platforms. 92 * 93 * NOTE: Most callers should rely on the di_checkmem return value 94 * being aligned, and reestablish *off_p with aligned value, instead 95 * of trying to align size of their allocations: this approach will 96 * minimize memory use. 97 */ 98 #define DI_ALIGN(addr) ((addr + 7l) & ~7l) 99 100 /* 101 * To avoid wasting memory, make a linked list of memory chunks. 102 * Size of each chunk is buf_size. 103 */ 104 struct di_mem { 105 struct di_mem *next; /* link to next chunk */ 106 char *buf; /* contiguous kernel memory */ 107 size_t buf_size; /* size of buf in bytes */ 108 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ 109 }; 110 111 /* 112 * This is a stack for walking the tree without using recursion. 113 * When the devinfo tree height is above some small size, one 114 * gets watchdog resets on sun4m. 115 */ 116 struct di_stack { 117 void *offset[MAX_TREE_DEPTH]; 118 struct dev_info *dip[MAX_TREE_DEPTH]; 119 int circ[MAX_TREE_DEPTH]; 120 int depth; /* depth of current node to be copied */ 121 }; 122 123 #define TOP_OFFSET(stack) \ 124 ((di_off_t *)(stack)->offset[(stack)->depth - 1]) 125 #define TOP_NODE(stack) \ 126 ((stack)->dip[(stack)->depth - 1]) 127 #define PARENT_OFFSET(stack) \ 128 ((di_off_t *)(stack)->offset[(stack)->depth - 2]) 129 #define EMPTY_STACK(stack) ((stack)->depth == 0) 130 #define POP_STACK(stack) { \ 131 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ 132 (stack)->circ[(stack)->depth - 1]); \ 133 ((stack)->depth--); \ 134 } 135 #define PUSH_STACK(stack, node, off_p) { \ 136 ASSERT(node != NULL); \ 137 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ 138 (stack)->dip[(stack)->depth] = (node); \ 139 (stack)->offset[(stack)->depth] = (void *)(off_p); \ 140 ((stack)->depth)++; \ 141 } 142 143 #define DI_ALL_PTR(s) DI_ALL(di_mem_addr((s), 0)) 144 145 /* 146 * With devfs, the device tree has no global locks. The device tree is 147 * dynamic and dips may come and go if they are not locked locally. Under 148 * these conditions, pointers are no longer reliable as unique IDs. 149 * Specifically, these pointers cannot be used as keys for hash tables 150 * as the same devinfo structure may be freed in one part of the tree only 151 * to be allocated as the structure for a different device in another 152 * part of the tree. This can happen if DR and the snapshot are 153 * happening concurrently. 154 * The following data structures act as keys for devinfo nodes and 155 * pathinfo nodes. 156 */ 157 158 enum di_ktype { 159 DI_DKEY = 1, 160 DI_PKEY = 2 161 }; 162 163 struct di_dkey { 164 dev_info_t *dk_dip; 165 major_t dk_major; 166 int dk_inst; 167 pnode_t dk_nodeid; 168 }; 169 170 struct di_pkey { 171 mdi_pathinfo_t *pk_pip; 172 char *pk_path_addr; 173 dev_info_t *pk_client; 174 dev_info_t *pk_phci; 175 }; 176 177 struct di_key { 178 enum di_ktype k_type; 179 union { 180 struct di_dkey dkey; 181 struct di_pkey pkey; 182 } k_u; 183 }; 184 185 186 struct i_lnode; 187 188 typedef struct i_link { 189 /* 190 * If a di_link struct representing this i_link struct makes it 191 * into the snapshot, then self will point to the offset of 192 * the di_link struct in the snapshot 193 */ 194 di_off_t self; 195 196 int spec_type; /* block or char access type */ 197 struct i_lnode *src_lnode; /* src i_lnode */ 198 struct i_lnode *tgt_lnode; /* tgt i_lnode */ 199 struct i_link *src_link_next; /* next src i_link /w same i_lnode */ 200 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ 201 } i_link_t; 202 203 typedef struct i_lnode { 204 /* 205 * If a di_lnode struct representing this i_lnode struct makes it 206 * into the snapshot, then self will point to the offset of 207 * the di_lnode struct in the snapshot 208 */ 209 di_off_t self; 210 211 /* 212 * used for hashing and comparing i_lnodes 213 */ 214 int modid; 215 216 /* 217 * public information describing a link endpoint 218 */ 219 struct di_node *di_node; /* di_node in snapshot */ 220 dev_t devt; /* devt */ 221 222 /* 223 * i_link ptr to links coming into this i_lnode node 224 * (this i_lnode is the target of these i_links) 225 */ 226 i_link_t *link_in; 227 228 /* 229 * i_link ptr to links going out of this i_lnode node 230 * (this i_lnode is the source of these i_links) 231 */ 232 i_link_t *link_out; 233 } i_lnode_t; 234 235 /* 236 * Soft state associated with each instance of driver open. 237 */ 238 static struct di_state { 239 di_off_t mem_size; /* total # bytes in memlist */ 240 struct di_mem *memlist; /* head of memlist */ 241 uint_t command; /* command from ioctl */ 242 int di_iocstate; /* snapshot ioctl state */ 243 mod_hash_t *reg_dip_hash; 244 mod_hash_t *reg_pip_hash; 245 int lnode_count; 246 int link_count; 247 248 mod_hash_t *lnode_hash; 249 mod_hash_t *link_hash; 250 } **di_states; 251 252 static kmutex_t di_lock; /* serialize instance assignment */ 253 254 typedef enum { 255 DI_QUIET = 0, /* DI_QUIET must always be 0 */ 256 DI_ERR, 257 DI_INFO, 258 DI_TRACE, 259 DI_TRACE1, 260 DI_TRACE2 261 } di_cache_debug_t; 262 263 static uint_t di_chunk = 32; /* I/O chunk size in pages */ 264 265 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) 266 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) 267 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) 268 269 /* 270 * Check that whole device tree is being configured as a pre-condition for 271 * cleaning up /etc/devices files. 272 */ 273 #define DEVICES_FILES_CLEANABLE(st) \ 274 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \ 275 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0) 276 277 #define CACHE_DEBUG(args) \ 278 { if (di_cache_debug != DI_QUIET) di_cache_print args; } 279 280 typedef struct phci_walk_arg { 281 di_off_t off; 282 struct di_state *st; 283 } phci_walk_arg_t; 284 285 static int di_open(dev_t *, int, int, cred_t *); 286 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 287 static int di_close(dev_t, int, int, cred_t *); 288 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 289 static int di_attach(dev_info_t *, ddi_attach_cmd_t); 290 static int di_detach(dev_info_t *, ddi_detach_cmd_t); 291 292 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); 293 static di_off_t di_snapshot_and_clean(struct di_state *); 294 static di_off_t di_copydevnm(di_off_t *, struct di_state *); 295 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); 296 static di_off_t di_copynode(struct dev_info *, struct di_stack *, 297 struct di_state *); 298 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, 299 struct di_state *); 300 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); 301 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); 302 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *, 303 struct di_state *, struct dev_info *); 304 static void di_allocmem(struct di_state *, size_t); 305 static void di_freemem(struct di_state *); 306 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); 307 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); 308 static void *di_mem_addr(struct di_state *, di_off_t); 309 static int di_setstate(struct di_state *, int); 310 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); 311 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); 312 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, 313 struct di_state *, int); 314 static di_off_t di_getlink_data(di_off_t, struct di_state *); 315 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); 316 317 static int cache_args_valid(struct di_state *st, int *error); 318 static int snapshot_is_cacheable(struct di_state *st); 319 static int di_cache_lookup(struct di_state *st); 320 static int di_cache_update(struct di_state *st); 321 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); 322 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg); 323 static int build_phci_list(dev_info_t *ph_devinfo, void *arg); 324 325 extern int modrootloaded; 326 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *); 327 extern void mdi_vhci_walk_phcis(dev_info_t *, 328 int (*)(dev_info_t *, void *), void *); 329 330 331 static struct cb_ops di_cb_ops = { 332 di_open, /* open */ 333 di_close, /* close */ 334 nodev, /* strategy */ 335 nodev, /* print */ 336 nodev, /* dump */ 337 nodev, /* read */ 338 nodev, /* write */ 339 di_ioctl, /* ioctl */ 340 nodev, /* devmap */ 341 nodev, /* mmap */ 342 nodev, /* segmap */ 343 nochpoll, /* poll */ 344 ddi_prop_op, /* prop_op */ 345 NULL, /* streamtab */ 346 D_NEW | D_MP /* Driver compatibility flag */ 347 }; 348 349 static struct dev_ops di_ops = { 350 DEVO_REV, /* devo_rev, */ 351 0, /* refcnt */ 352 di_info, /* info */ 353 nulldev, /* identify */ 354 nulldev, /* probe */ 355 di_attach, /* attach */ 356 di_detach, /* detach */ 357 nodev, /* reset */ 358 &di_cb_ops, /* driver operations */ 359 NULL /* bus operations */ 360 }; 361 362 /* 363 * Module linkage information for the kernel. 364 */ 365 static struct modldrv modldrv = { 366 &mod_driverops, 367 "DEVINFO Driver %I%", 368 &di_ops 369 }; 370 371 static struct modlinkage modlinkage = { 372 MODREV_1, 373 &modldrv, 374 NULL 375 }; 376 377 int 378 _init(void) 379 { 380 int error; 381 382 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); 383 384 error = mod_install(&modlinkage); 385 if (error != 0) { 386 mutex_destroy(&di_lock); 387 return (error); 388 } 389 390 return (0); 391 } 392 393 int 394 _info(struct modinfo *modinfop) 395 { 396 return (mod_info(&modlinkage, modinfop)); 397 } 398 399 int 400 _fini(void) 401 { 402 int error; 403 404 error = mod_remove(&modlinkage); 405 if (error != 0) { 406 return (error); 407 } 408 409 mutex_destroy(&di_lock); 410 return (0); 411 } 412 413 static dev_info_t *di_dip; 414 415 /*ARGSUSED*/ 416 static int 417 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 418 { 419 int error = DDI_FAILURE; 420 421 switch (infocmd) { 422 case DDI_INFO_DEVT2DEVINFO: 423 *result = (void *)di_dip; 424 error = DDI_SUCCESS; 425 break; 426 case DDI_INFO_DEVT2INSTANCE: 427 /* 428 * All dev_t's map to the same, single instance. 429 */ 430 *result = (void *)0; 431 error = DDI_SUCCESS; 432 break; 433 default: 434 break; 435 } 436 437 return (error); 438 } 439 440 static int 441 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 442 { 443 int error = DDI_FAILURE; 444 445 switch (cmd) { 446 case DDI_ATTACH: 447 di_states = kmem_zalloc( 448 di_max_opens * sizeof (struct di_state *), KM_SLEEP); 449 450 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, 451 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || 452 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, 453 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { 454 kmem_free(di_states, 455 di_max_opens * sizeof (struct di_state *)); 456 ddi_remove_minor_node(dip, NULL); 457 error = DDI_FAILURE; 458 } else { 459 di_dip = dip; 460 ddi_report_dev(dip); 461 462 error = DDI_SUCCESS; 463 } 464 break; 465 default: 466 error = DDI_FAILURE; 467 break; 468 } 469 470 return (error); 471 } 472 473 static int 474 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 475 { 476 int error = DDI_FAILURE; 477 478 switch (cmd) { 479 case DDI_DETACH: 480 ddi_remove_minor_node(dip, NULL); 481 di_dip = NULL; 482 kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); 483 484 error = DDI_SUCCESS; 485 break; 486 default: 487 error = DDI_FAILURE; 488 break; 489 } 490 491 return (error); 492 } 493 494 /* 495 * Allow multiple opens by tweaking the dev_t such that it looks like each 496 * open is getting a different minor device. Each minor gets a separate 497 * entry in the di_states[] table. Based on the original minor number, we 498 * discriminate opens of the full and read-only nodes. If all of the instances 499 * of the selected minor node are currently open, we return EAGAIN. 500 */ 501 /*ARGSUSED*/ 502 static int 503 di_open(dev_t *devp, int flag, int otyp, cred_t *credp) 504 { 505 int m; 506 minor_t minor_parent = getminor(*devp); 507 508 if (minor_parent != DI_FULL_PARENT && 509 minor_parent != DI_READONLY_PARENT) 510 return (ENXIO); 511 512 mutex_enter(&di_lock); 513 514 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { 515 if (di_states[m] != NULL) 516 continue; 517 518 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); 519 break; /* It's ours. */ 520 } 521 522 if (m >= di_max_opens) { 523 /* 524 * maximum open instance for device reached 525 */ 526 mutex_exit(&di_lock); 527 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); 528 return (EAGAIN); 529 } 530 mutex_exit(&di_lock); 531 532 ASSERT(m < di_max_opens); 533 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); 534 535 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", 536 (void *)curthread, m + DI_NODE_SPECIES)); 537 538 return (0); 539 } 540 541 /*ARGSUSED*/ 542 static int 543 di_close(dev_t dev, int flag, int otype, cred_t *cred_p) 544 { 545 struct di_state *st; 546 int m = (int)getminor(dev) - DI_NODE_SPECIES; 547 548 if (m < 0) { 549 cmn_err(CE_WARN, "closing non-existent devinfo minor %d", 550 m + DI_NODE_SPECIES); 551 return (ENXIO); 552 } 553 554 st = di_states[m]; 555 ASSERT(m < di_max_opens && st != NULL); 556 557 di_freemem(st); 558 kmem_free(st, sizeof (struct di_state)); 559 560 /* 561 * empty slot in state table 562 */ 563 mutex_enter(&di_lock); 564 di_states[m] = NULL; 565 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", 566 (void *)curthread, m + DI_NODE_SPECIES)); 567 mutex_exit(&di_lock); 568 569 return (0); 570 } 571 572 573 /*ARGSUSED*/ 574 static int 575 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 576 { 577 int rv, error; 578 di_off_t off; 579 struct di_all *all; 580 struct di_state *st; 581 int m = (int)getminor(dev) - DI_NODE_SPECIES; 582 major_t i; 583 char *drv_name; 584 size_t map_size, size; 585 struct di_mem *dcp; 586 int ndi_flags; 587 588 if (m < 0 || m >= di_max_opens) { 589 return (ENXIO); 590 } 591 592 st = di_states[m]; 593 ASSERT(st != NULL); 594 595 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); 596 597 switch (cmd) { 598 case DINFOIDENT: 599 /* 600 * This is called from di_init to verify that the driver 601 * opened is indeed devinfo. The purpose is to guard against 602 * sending ioctl to an unknown driver in case of an 603 * unresolved major number conflict during bfu. 604 */ 605 *rvalp = DI_MAGIC; 606 return (0); 607 608 case DINFOLODRV: 609 /* 610 * Hold an installed driver and return the result 611 */ 612 if (DI_UNPRIVILEGED_NODE(m)) { 613 /* 614 * Only the fully enabled instances may issue 615 * DINFOLDDRV. 616 */ 617 return (EACCES); 618 } 619 620 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 621 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { 622 kmem_free(drv_name, MAXNAMELEN); 623 return (EFAULT); 624 } 625 626 /* 627 * Some 3rd party driver's _init() walks the device tree, 628 * so we load the driver module before configuring driver. 629 */ 630 i = ddi_name_to_major(drv_name); 631 if (ddi_hold_driver(i) == NULL) { 632 kmem_free(drv_name, MAXNAMELEN); 633 return (ENXIO); 634 } 635 636 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; 637 638 /* 639 * i_ddi_load_drvconf() below will trigger a reprobe 640 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't 641 * needed here. 642 */ 643 modunload_disable(); 644 (void) i_ddi_load_drvconf(i); 645 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); 646 kmem_free(drv_name, MAXNAMELEN); 647 ddi_rele_driver(i); 648 rv = i_ddi_devs_attached(i); 649 modunload_enable(); 650 651 i_ddi_di_cache_invalidate(KM_SLEEP); 652 653 return ((rv == DDI_SUCCESS)? 0 : ENXIO); 654 655 case DINFOUSRLD: 656 /* 657 * The case for copying snapshot to userland 658 */ 659 if (di_setstate(st, IOC_COPY) == -1) 660 return (EBUSY); 661 662 map_size = DI_ALL_PTR(st)->map_size; 663 if (map_size == 0) { 664 (void) di_setstate(st, IOC_DONE); 665 return (EFAULT); 666 } 667 668 /* 669 * copyout the snapshot 670 */ 671 map_size = (map_size + PAGEOFFSET) & PAGEMASK; 672 673 /* 674 * Return the map size, so caller may do a sanity 675 * check against the return value of snapshot ioctl() 676 */ 677 *rvalp = (int)map_size; 678 679 /* 680 * Copy one chunk at a time 681 */ 682 off = 0; 683 dcp = st->memlist; 684 while (map_size) { 685 size = dcp->buf_size; 686 if (map_size <= size) { 687 size = map_size; 688 } 689 690 if (ddi_copyout(di_mem_addr(st, off), 691 (void *)(arg + off), size, mode) != 0) { 692 (void) di_setstate(st, IOC_DONE); 693 return (EFAULT); 694 } 695 696 map_size -= size; 697 off += size; 698 dcp = dcp->next; 699 } 700 701 di_freemem(st); 702 (void) di_setstate(st, IOC_IDLE); 703 return (0); 704 705 default: 706 if ((cmd & ~DIIOC_MASK) != DIIOC) { 707 /* 708 * Invalid ioctl command 709 */ 710 return (ENOTTY); 711 } 712 /* 713 * take a snapshot 714 */ 715 st->command = cmd & DIIOC_MASK; 716 /*FALLTHROUGH*/ 717 } 718 719 /* 720 * Obtain enough memory to hold header + rootpath. We prevent kernel 721 * memory exhaustion by freeing any previously allocated snapshot and 722 * refusing the operation; otherwise we would be allowing ioctl(), 723 * ioctl(), ioctl(), ..., panic. 724 */ 725 if (di_setstate(st, IOC_SNAP) == -1) 726 return (EBUSY); 727 728 /* 729 * Initial memlist always holds di_all and the root_path - and 730 * is at least a page and size. 731 */ 732 size = sizeof (struct di_all) + 733 sizeof (((struct dinfo_io *)(NULL))->root_path); 734 if (size < PAGESIZE) 735 size = PAGESIZE; 736 off = di_checkmem(st, 0, size); 737 all = DI_ALL_PTR(st); 738 off += sizeof (struct di_all); /* real length of di_all */ 739 740 all->devcnt = devcnt; 741 all->command = st->command; 742 all->version = DI_SNAPSHOT_VERSION; 743 all->top_vhci_devinfo = 0; /* filled by build_vhci_list. */ 744 745 /* 746 * Note the endianness in case we need to transport snapshot 747 * over the network. 748 */ 749 #if defined(_LITTLE_ENDIAN) 750 all->endianness = DI_LITTLE_ENDIAN; 751 #else 752 all->endianness = DI_BIG_ENDIAN; 753 #endif 754 755 /* Copyin ioctl args, store in the snapshot. */ 756 if (copyinstr((void *)arg, all->root_path, 757 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { 758 di_freemem(st); 759 (void) di_setstate(st, IOC_IDLE); 760 return (EFAULT); 761 } 762 off += size; /* real length of root_path */ 763 764 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) { 765 di_freemem(st); 766 (void) di_setstate(st, IOC_IDLE); 767 return (EINVAL); 768 } 769 770 error = 0; 771 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { 772 di_freemem(st); 773 (void) di_setstate(st, IOC_IDLE); 774 return (error); 775 } 776 777 /* 778 * Only the fully enabled version may force load drivers or read 779 * the parent private data from a driver. 780 */ 781 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && 782 DI_UNPRIVILEGED_NODE(m)) { 783 di_freemem(st); 784 (void) di_setstate(st, IOC_IDLE); 785 return (EACCES); 786 } 787 788 /* Do we need private data? */ 789 if (st->command & DINFOPRIVDATA) { 790 arg += sizeof (((struct dinfo_io *)(NULL))->root_path); 791 792 #ifdef _MULTI_DATAMODEL 793 switch (ddi_model_convert_from(mode & FMODELS)) { 794 case DDI_MODEL_ILP32: { 795 /* 796 * Cannot copy private data from 64-bit kernel 797 * to 32-bit app 798 */ 799 di_freemem(st); 800 (void) di_setstate(st, IOC_IDLE); 801 return (EINVAL); 802 } 803 case DDI_MODEL_NONE: 804 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 805 di_freemem(st); 806 (void) di_setstate(st, IOC_IDLE); 807 return (EFAULT); 808 } 809 break; 810 } 811 #else /* !_MULTI_DATAMODEL */ 812 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 813 di_freemem(st); 814 (void) di_setstate(st, IOC_IDLE); 815 return (EFAULT); 816 } 817 #endif /* _MULTI_DATAMODEL */ 818 } 819 820 all->top_devinfo = DI_ALIGN(off); 821 822 /* 823 * For cache lookups we reallocate memory from scratch, 824 * so the value of "all" is no longer valid. 825 */ 826 all = NULL; 827 828 if (st->command & DINFOCACHE) { 829 *rvalp = di_cache_lookup(st); 830 } else if (snapshot_is_cacheable(st)) { 831 DI_CACHE_LOCK(di_cache); 832 *rvalp = di_cache_update(st); 833 DI_CACHE_UNLOCK(di_cache); 834 } else 835 *rvalp = di_snapshot_and_clean(st); 836 837 if (*rvalp) { 838 DI_ALL_PTR(st)->map_size = *rvalp; 839 (void) di_setstate(st, IOC_DONE); 840 } else { 841 di_freemem(st); 842 (void) di_setstate(st, IOC_IDLE); 843 } 844 845 return (0); 846 } 847 848 /* 849 * Get a chunk of memory >= size, for the snapshot 850 */ 851 static void 852 di_allocmem(struct di_state *st, size_t size) 853 { 854 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP); 855 856 /* 857 * Round up size to nearest power of 2. If it is less 858 * than st->mem_size, set it to st->mem_size (i.e., 859 * the mem_size is doubled every time) to reduce the 860 * number of memory allocations. 861 */ 862 size_t tmp = 1; 863 while (tmp < size) { 864 tmp <<= 1; 865 } 866 size = (tmp > st->mem_size) ? tmp : st->mem_size; 867 868 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); 869 mem->buf_size = size; 870 871 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); 872 873 if (st->mem_size == 0) { /* first chunk */ 874 st->memlist = mem; 875 } else { 876 /* 877 * locate end of linked list and add a chunk at the end 878 */ 879 struct di_mem *dcp = st->memlist; 880 while (dcp->next != NULL) { 881 dcp = dcp->next; 882 } 883 884 dcp->next = mem; 885 } 886 887 st->mem_size += size; 888 } 889 890 /* 891 * Copy upto bufsiz bytes of the memlist to buf 892 */ 893 static void 894 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) 895 { 896 struct di_mem *dcp; 897 size_t copysz; 898 899 if (st->mem_size == 0) { 900 ASSERT(st->memlist == NULL); 901 return; 902 } 903 904 copysz = 0; 905 for (dcp = st->memlist; dcp; dcp = dcp->next) { 906 907 ASSERT(bufsiz > 0); 908 909 if (bufsiz <= dcp->buf_size) 910 copysz = bufsiz; 911 else 912 copysz = dcp->buf_size; 913 914 bcopy(dcp->buf, buf, copysz); 915 916 buf += copysz; 917 bufsiz -= copysz; 918 919 if (bufsiz == 0) 920 break; 921 } 922 } 923 924 /* 925 * Free all memory for the snapshot 926 */ 927 static void 928 di_freemem(struct di_state *st) 929 { 930 struct di_mem *dcp, *tmp; 931 932 dcmn_err2((CE_CONT, "di_freemem\n")); 933 934 if (st->mem_size) { 935 dcp = st->memlist; 936 while (dcp) { /* traverse the linked list */ 937 tmp = dcp; 938 dcp = dcp->next; 939 ddi_umem_free(tmp->cook); 940 kmem_free(tmp, sizeof (struct di_mem)); 941 } 942 st->mem_size = 0; 943 st->memlist = NULL; 944 } 945 946 ASSERT(st->mem_size == 0); 947 ASSERT(st->memlist == NULL); 948 } 949 950 /* 951 * Copies cached data to the di_state structure. 952 * Returns: 953 * - size of data copied, on SUCCESS 954 * - 0 on failure 955 */ 956 static int 957 di_cache2mem(struct di_cache *cache, struct di_state *st) 958 { 959 caddr_t pa; 960 961 ASSERT(st->mem_size == 0); 962 ASSERT(st->memlist == NULL); 963 ASSERT(!servicing_interrupt()); 964 ASSERT(DI_CACHE_LOCKED(*cache)); 965 966 if (cache->cache_size == 0) { 967 ASSERT(cache->cache_data == NULL); 968 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); 969 return (0); 970 } 971 972 ASSERT(cache->cache_data); 973 974 di_allocmem(st, cache->cache_size); 975 976 pa = di_mem_addr(st, 0); 977 978 ASSERT(pa); 979 980 /* 981 * Verify that di_allocmem() allocates contiguous memory, 982 * so that it is safe to do straight bcopy() 983 */ 984 ASSERT(st->memlist != NULL); 985 ASSERT(st->memlist->next == NULL); 986 bcopy(cache->cache_data, pa, cache->cache_size); 987 988 return (cache->cache_size); 989 } 990 991 /* 992 * Copies a snapshot from di_state to the cache 993 * Returns: 994 * - 0 on failure 995 * - size of copied data on success 996 */ 997 static size_t 998 di_mem2cache(struct di_state *st, struct di_cache *cache) 999 { 1000 size_t map_size; 1001 1002 ASSERT(cache->cache_size == 0); 1003 ASSERT(cache->cache_data == NULL); 1004 ASSERT(!servicing_interrupt()); 1005 ASSERT(DI_CACHE_LOCKED(*cache)); 1006 1007 if (st->mem_size == 0) { 1008 ASSERT(st->memlist == NULL); 1009 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); 1010 return (0); 1011 } 1012 1013 ASSERT(st->memlist); 1014 1015 /* 1016 * The size of the memory list may be much larger than the 1017 * size of valid data (map_size). Cache only the valid data 1018 */ 1019 map_size = DI_ALL_PTR(st)->map_size; 1020 if (map_size == 0 || map_size < sizeof (struct di_all) || 1021 map_size > st->mem_size) { 1022 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); 1023 return (0); 1024 } 1025 1026 cache->cache_data = kmem_alloc(map_size, KM_SLEEP); 1027 cache->cache_size = map_size; 1028 di_copymem(st, cache->cache_data, cache->cache_size); 1029 1030 return (map_size); 1031 } 1032 1033 /* 1034 * Make sure there is at least "size" bytes memory left before 1035 * going on. Otherwise, start on a new chunk. 1036 */ 1037 static di_off_t 1038 di_checkmem(struct di_state *st, di_off_t off, size_t size) 1039 { 1040 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", 1041 off, (int)size)); 1042 1043 /* 1044 * di_checkmem() shouldn't be called with a size of zero. 1045 * But in case it is, we want to make sure we return a valid 1046 * offset within the memlist and not an offset that points us 1047 * at the end of the memlist. 1048 */ 1049 if (size == 0) { 1050 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); 1051 size = 1; 1052 } 1053 1054 off = DI_ALIGN(off); 1055 if ((st->mem_size - off) < size) { 1056 off = st->mem_size; 1057 di_allocmem(st, size); 1058 } 1059 1060 /* verify that return value is aligned */ 1061 ASSERT(off == DI_ALIGN(off)); 1062 return (off); 1063 } 1064 1065 /* 1066 * Copy the private data format from ioctl arg. 1067 * On success, the ending offset is returned. On error 0 is returned. 1068 */ 1069 static di_off_t 1070 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) 1071 { 1072 di_off_t size; 1073 struct di_priv_data *priv; 1074 struct di_all *all = DI_ALL_PTR(st); 1075 1076 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", 1077 off, (void *)arg, mode)); 1078 1079 /* 1080 * Copyin data and check version. 1081 * We only handle private data version 0. 1082 */ 1083 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); 1084 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), 1085 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { 1086 kmem_free(priv, sizeof (struct di_priv_data)); 1087 return (0); 1088 } 1089 1090 /* 1091 * Save di_priv_data copied from userland in snapshot. 1092 */ 1093 all->pd_version = priv->version; 1094 all->n_ppdata = priv->n_parent; 1095 all->n_dpdata = priv->n_driver; 1096 1097 /* 1098 * copyin private data format, modify offset accordingly 1099 */ 1100 if (all->n_ppdata) { /* parent private data format */ 1101 /* 1102 * check memory 1103 */ 1104 size = all->n_ppdata * sizeof (struct di_priv_format); 1105 all->ppdata_format = off = di_checkmem(st, off, size); 1106 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, 1107 mode) != 0) { 1108 kmem_free(priv, sizeof (struct di_priv_data)); 1109 return (0); 1110 } 1111 1112 off += size; 1113 } 1114 1115 if (all->n_dpdata) { /* driver private data format */ 1116 /* 1117 * check memory 1118 */ 1119 size = all->n_dpdata * sizeof (struct di_priv_format); 1120 all->dpdata_format = off = di_checkmem(st, off, size); 1121 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, 1122 mode) != 0) { 1123 kmem_free(priv, sizeof (struct di_priv_data)); 1124 return (0); 1125 } 1126 1127 off += size; 1128 } 1129 1130 kmem_free(priv, sizeof (struct di_priv_data)); 1131 return (off); 1132 } 1133 1134 /* 1135 * Return the real address based on the offset (off) within snapshot 1136 */ 1137 static void * 1138 di_mem_addr(struct di_state *st, di_off_t off) 1139 { 1140 struct di_mem *dcp = st->memlist; 1141 1142 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", 1143 (void *)dcp, off)); 1144 1145 ASSERT(off < st->mem_size); 1146 1147 while (off >= dcp->buf_size) { 1148 off -= dcp->buf_size; 1149 dcp = dcp->next; 1150 } 1151 1152 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", 1153 off, (void *)(dcp->buf + off))); 1154 1155 return (dcp->buf + off); 1156 } 1157 1158 /* 1159 * Ideally we would use the whole key to derive the hash 1160 * value. However, the probability that two keys will 1161 * have the same dip (or pip) is very low, so 1162 * hashing by dip (or pip) pointer should suffice. 1163 */ 1164 static uint_t 1165 di_hash_byptr(void *arg, mod_hash_key_t key) 1166 { 1167 struct di_key *dik = key; 1168 size_t rshift; 1169 void *ptr; 1170 1171 ASSERT(arg == NULL); 1172 1173 switch (dik->k_type) { 1174 case DI_DKEY: 1175 ptr = dik->k_u.dkey.dk_dip; 1176 rshift = highbit(sizeof (struct dev_info)); 1177 break; 1178 case DI_PKEY: 1179 ptr = dik->k_u.pkey.pk_pip; 1180 rshift = highbit(sizeof (struct mdi_pathinfo)); 1181 break; 1182 default: 1183 panic("devinfo: unknown key type"); 1184 /*NOTREACHED*/ 1185 } 1186 return (mod_hash_byptr((void *)rshift, ptr)); 1187 } 1188 1189 static void 1190 di_key_dtor(mod_hash_key_t key) 1191 { 1192 char *path_addr; 1193 struct di_key *dik = key; 1194 1195 switch (dik->k_type) { 1196 case DI_DKEY: 1197 break; 1198 case DI_PKEY: 1199 path_addr = dik->k_u.pkey.pk_path_addr; 1200 if (path_addr) 1201 kmem_free(path_addr, strlen(path_addr) + 1); 1202 break; 1203 default: 1204 panic("devinfo: unknown key type"); 1205 /*NOTREACHED*/ 1206 } 1207 1208 kmem_free(dik, sizeof (struct di_key)); 1209 } 1210 1211 static int 1212 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) 1213 { 1214 if (dk1->dk_dip != dk2->dk_dip) 1215 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); 1216 1217 if (dk1->dk_major != DDI_MAJOR_T_NONE && 1218 dk2->dk_major != DDI_MAJOR_T_NONE) { 1219 if (dk1->dk_major != dk2->dk_major) 1220 return (dk1->dk_major > dk2->dk_major ? 1 : -1); 1221 1222 if (dk1->dk_inst != dk2->dk_inst) 1223 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); 1224 } 1225 1226 if (dk1->dk_nodeid != dk2->dk_nodeid) 1227 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); 1228 1229 return (0); 1230 } 1231 1232 static int 1233 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) 1234 { 1235 char *p1, *p2; 1236 int rv; 1237 1238 if (pk1->pk_pip != pk2->pk_pip) 1239 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); 1240 1241 p1 = pk1->pk_path_addr; 1242 p2 = pk2->pk_path_addr; 1243 1244 p1 = p1 ? p1 : ""; 1245 p2 = p2 ? p2 : ""; 1246 1247 rv = strcmp(p1, p2); 1248 if (rv) 1249 return (rv > 0 ? 1 : -1); 1250 1251 if (pk1->pk_client != pk2->pk_client) 1252 return (pk1->pk_client > pk2->pk_client ? 1 : -1); 1253 1254 if (pk1->pk_phci != pk2->pk_phci) 1255 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); 1256 1257 return (0); 1258 } 1259 1260 static int 1261 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 1262 { 1263 struct di_key *dik1, *dik2; 1264 1265 dik1 = key1; 1266 dik2 = key2; 1267 1268 if (dik1->k_type != dik2->k_type) { 1269 panic("devinfo: mismatched keys"); 1270 /*NOTREACHED*/ 1271 } 1272 1273 switch (dik1->k_type) { 1274 case DI_DKEY: 1275 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); 1276 case DI_PKEY: 1277 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); 1278 default: 1279 panic("devinfo: unknown key type"); 1280 /*NOTREACHED*/ 1281 } 1282 } 1283 1284 /* 1285 * This is the main function that takes a snapshot 1286 */ 1287 static di_off_t 1288 di_snapshot(struct di_state *st) 1289 { 1290 di_off_t off; 1291 struct di_all *all; 1292 dev_info_t *rootnode; 1293 char buf[80]; 1294 int plen; 1295 char *path; 1296 vnode_t *vp; 1297 1298 all = DI_ALL_PTR(st); 1299 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); 1300 1301 /* 1302 * Verify path before entrusting it to e_ddi_hold_devi_by_path because 1303 * some platforms have OBP bugs where executing the NDI_PROMNAME code 1304 * path against an invalid path results in panic. The lookupnameat 1305 * is done relative to rootdir without a leading '/' on "devices/" 1306 * to force the lookup to occur in the global zone. 1307 */ 1308 plen = strlen("devices/") + strlen(all->root_path) + 1; 1309 path = kmem_alloc(plen, KM_SLEEP); 1310 (void) snprintf(path, plen, "devices/%s", all->root_path); 1311 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) { 1312 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1313 all->root_path)); 1314 kmem_free(path, plen); 1315 return (0); 1316 } 1317 kmem_free(path, plen); 1318 VN_RELE(vp); 1319 1320 /* 1321 * Hold the devinfo node referred by the path. 1322 */ 1323 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); 1324 if (rootnode == NULL) { 1325 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1326 all->root_path)); 1327 return (0); 1328 } 1329 1330 (void) snprintf(buf, sizeof (buf), 1331 "devinfo registered dips (statep=%p)", (void *)st); 1332 1333 st->reg_dip_hash = mod_hash_create_extended(buf, 64, 1334 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1335 NULL, di_key_cmp, KM_SLEEP); 1336 1337 1338 (void) snprintf(buf, sizeof (buf), 1339 "devinfo registered pips (statep=%p)", (void *)st); 1340 1341 st->reg_pip_hash = mod_hash_create_extended(buf, 64, 1342 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1343 NULL, di_key_cmp, KM_SLEEP); 1344 1345 /* 1346 * copy the device tree 1347 */ 1348 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); 1349 1350 if (DINFOPATH & st->command) { 1351 mdi_walk_vhcis(build_vhci_list, st); 1352 } 1353 1354 ddi_release_devi(rootnode); 1355 1356 /* 1357 * copy the devnames array 1358 */ 1359 all->devnames = off; 1360 off = di_copydevnm(&all->devnames, st); 1361 1362 1363 /* initialize the hash tables */ 1364 st->lnode_count = 0; 1365 st->link_count = 0; 1366 1367 if (DINFOLYR & st->command) { 1368 off = di_getlink_data(off, st); 1369 } 1370 1371 /* 1372 * Free up hash tables 1373 */ 1374 mod_hash_destroy_hash(st->reg_dip_hash); 1375 mod_hash_destroy_hash(st->reg_pip_hash); 1376 1377 /* 1378 * Record the timestamp now that we are done with snapshot. 1379 * 1380 * We compute the checksum later and then only if we cache 1381 * the snapshot, since checksumming adds some overhead. 1382 * The checksum is checked later if we read the cache file. 1383 * from disk. 1384 * 1385 * Set checksum field to 0 as CRC is calculated with that 1386 * field set to 0. 1387 */ 1388 all->snapshot_time = ddi_get_time(); 1389 all->cache_checksum = 0; 1390 1391 ASSERT(all->snapshot_time != 0); 1392 1393 return (off); 1394 } 1395 1396 /* 1397 * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set 1398 */ 1399 static di_off_t 1400 di_snapshot_and_clean(struct di_state *st) 1401 { 1402 di_off_t off; 1403 1404 modunload_disable(); 1405 off = di_snapshot(st); 1406 if (off != 0 && (st->command & DINFOCLEANUP)) { 1407 ASSERT(DEVICES_FILES_CLEANABLE(st)); 1408 /* 1409 * Cleanup /etc/devices files: 1410 * In order to accurately account for the system configuration 1411 * in /etc/devices files, the appropriate drivers must be 1412 * fully configured before the cleanup starts. 1413 * So enable modunload only after the cleanup. 1414 */ 1415 i_ddi_clean_devices_files(); 1416 /* 1417 * Remove backing store nodes for unused devices, 1418 * which retain past permissions customizations 1419 * and may be undesired for newly configured devices. 1420 */ 1421 dev_devices_cleanup(); 1422 } 1423 modunload_enable(); 1424 1425 return (off); 1426 } 1427 1428 /* 1429 * construct vhci linkage in the snapshot. 1430 */ 1431 static int 1432 build_vhci_list(dev_info_t *vh_devinfo, void *arg) 1433 { 1434 struct di_all *all; 1435 struct di_node *me; 1436 struct di_state *st; 1437 di_off_t off; 1438 phci_walk_arg_t pwa; 1439 1440 dcmn_err3((CE_CONT, "build_vhci list\n")); 1441 1442 dcmn_err3((CE_CONT, "vhci node %s%d\n", 1443 ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo))); 1444 1445 st = (struct di_state *)arg; 1446 if (di_dip_find(st, vh_devinfo, &off) != 0) { 1447 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1448 return (DDI_WALK_TERMINATE); 1449 } 1450 1451 dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n", 1452 st->mem_size, off)); 1453 1454 all = DI_ALL_PTR(st); 1455 if (all->top_vhci_devinfo == 0) { 1456 all->top_vhci_devinfo = off; 1457 } else { 1458 me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo)); 1459 1460 while (me->next_vhci != 0) { 1461 me = DI_NODE(di_mem_addr(st, me->next_vhci)); 1462 } 1463 1464 me->next_vhci = off; 1465 } 1466 1467 pwa.off = off; 1468 pwa.st = st; 1469 mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa); 1470 1471 return (DDI_WALK_CONTINUE); 1472 } 1473 1474 /* 1475 * construct phci linkage for the given vhci in the snapshot. 1476 */ 1477 static int 1478 build_phci_list(dev_info_t *ph_devinfo, void *arg) 1479 { 1480 struct di_node *vh_di_node; 1481 struct di_node *me; 1482 phci_walk_arg_t *pwa; 1483 di_off_t off; 1484 1485 pwa = (phci_walk_arg_t *)arg; 1486 1487 dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n", 1488 pwa->off)); 1489 1490 vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off)); 1491 if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) { 1492 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1493 return (DDI_WALK_TERMINATE); 1494 } 1495 1496 dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n", 1497 ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off)); 1498 1499 if (vh_di_node->top_phci == 0) { 1500 vh_di_node->top_phci = off; 1501 return (DDI_WALK_CONTINUE); 1502 } 1503 1504 me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci)); 1505 1506 while (me->next_phci != 0) { 1507 me = DI_NODE(di_mem_addr(pwa->st, me->next_phci)); 1508 } 1509 me->next_phci = off; 1510 1511 return (DDI_WALK_CONTINUE); 1512 } 1513 1514 /* 1515 * Assumes all devinfo nodes in device tree have been snapshotted 1516 */ 1517 static void 1518 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p) 1519 { 1520 struct dev_info *node; 1521 struct di_node *me; 1522 di_off_t off; 1523 1524 ASSERT(mutex_owned(&dnp->dn_lock)); 1525 1526 node = DEVI(dnp->dn_head); 1527 for (; node; node = node->devi_next) { 1528 if (di_dip_find(st, (dev_info_t *)node, &off) != 0) 1529 continue; 1530 1531 ASSERT(off > 0); 1532 me = DI_NODE(di_mem_addr(st, off)); 1533 ASSERT(me->next == 0 || me->next == -1); 1534 /* 1535 * Only nodes which were BOUND when they were 1536 * snapshotted will be added to per-driver list. 1537 */ 1538 if (me->next != -1) 1539 continue; 1540 1541 *off_p = off; 1542 off_p = &me->next; 1543 } 1544 1545 *off_p = 0; 1546 } 1547 1548 /* 1549 * Copy the devnames array, so we have a list of drivers in the snapshot. 1550 * Also makes it possible to locate the per-driver devinfo nodes. 1551 */ 1552 static di_off_t 1553 di_copydevnm(di_off_t *off_p, struct di_state *st) 1554 { 1555 int i; 1556 di_off_t off; 1557 size_t size; 1558 struct di_devnm *dnp; 1559 1560 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); 1561 1562 /* 1563 * make sure there is some allocated memory 1564 */ 1565 size = devcnt * sizeof (struct di_devnm); 1566 *off_p = off = di_checkmem(st, *off_p, size); 1567 dnp = DI_DEVNM(di_mem_addr(st, off)); 1568 off += size; 1569 1570 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", 1571 devcnt, off)); 1572 1573 for (i = 0; i < devcnt; i++) { 1574 if (devnamesp[i].dn_name == NULL) { 1575 continue; 1576 } 1577 1578 /* 1579 * dn_name is not freed during driver unload or removal. 1580 * 1581 * There is a race condition when make_devname() changes 1582 * dn_name during our strcpy. This should be rare since 1583 * only add_drv does this. At any rate, we never had a 1584 * problem with ddi_name_to_major(), which should have 1585 * the same problem. 1586 */ 1587 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", 1588 devnamesp[i].dn_name, devnamesp[i].dn_instance, off)); 1589 1590 size = strlen(devnamesp[i].dn_name) + 1; 1591 dnp[i].name = off = di_checkmem(st, off, size); 1592 (void) strcpy((char *)di_mem_addr(st, off), 1593 devnamesp[i].dn_name); 1594 off += size; 1595 1596 mutex_enter(&devnamesp[i].dn_lock); 1597 1598 /* 1599 * Snapshot per-driver node list 1600 */ 1601 snap_driver_list(st, &devnamesp[i], &dnp[i].head); 1602 1603 /* 1604 * This is not used by libdevinfo, leave it for now 1605 */ 1606 dnp[i].flags = devnamesp[i].dn_flags; 1607 dnp[i].instance = devnamesp[i].dn_instance; 1608 1609 /* 1610 * get global properties 1611 */ 1612 if ((DINFOPROP & st->command) && 1613 devnamesp[i].dn_global_prop_ptr) { 1614 dnp[i].global_prop = off; 1615 off = di_getprop(DI_PROP_GLB_LIST, 1616 &devnamesp[i].dn_global_prop_ptr->prop_list, 1617 &dnp[i].global_prop, st, NULL); 1618 } 1619 1620 /* 1621 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str 1622 */ 1623 if (CB_DRV_INSTALLED(devopsp[i])) { 1624 if (devopsp[i]->devo_cb_ops) { 1625 dnp[i].ops |= DI_CB_OPS; 1626 if (devopsp[i]->devo_cb_ops->cb_str) 1627 dnp[i].ops |= DI_STREAM_OPS; 1628 } 1629 if (NEXUS_DRV(devopsp[i])) { 1630 dnp[i].ops |= DI_BUS_OPS; 1631 } 1632 } 1633 1634 mutex_exit(&devnamesp[i].dn_lock); 1635 } 1636 1637 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); 1638 1639 return (off); 1640 } 1641 1642 /* 1643 * Copy the kernel devinfo tree. The tree and the devnames array forms 1644 * the entire snapshot (see also di_copydevnm). 1645 */ 1646 static di_off_t 1647 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) 1648 { 1649 di_off_t off; 1650 struct dev_info *node; 1651 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); 1652 1653 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", 1654 (void *)root, *off_p)); 1655 1656 /* force attach drivers */ 1657 if (i_ddi_devi_attached((dev_info_t *)root) && 1658 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { 1659 (void) ndi_devi_config((dev_info_t *)root, 1660 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | 1661 NDI_DRV_CONF_REPROBE); 1662 } 1663 1664 /* 1665 * Push top_devinfo onto a stack 1666 * 1667 * The stack is necessary to avoid recursion, which can overrun 1668 * the kernel stack. 1669 */ 1670 PUSH_STACK(dsp, root, off_p); 1671 1672 /* 1673 * As long as there is a node on the stack, copy the node. 1674 * di_copynode() is responsible for pushing and popping 1675 * child and sibling nodes on the stack. 1676 */ 1677 while (!EMPTY_STACK(dsp)) { 1678 node = TOP_NODE(dsp); 1679 off = di_copynode(node, dsp, st); 1680 } 1681 1682 /* 1683 * Free the stack structure 1684 */ 1685 kmem_free(dsp, sizeof (struct di_stack)); 1686 1687 return (off); 1688 } 1689 1690 /* 1691 * This is the core function, which copies all data associated with a single 1692 * node into the snapshot. The amount of information is determined by the 1693 * ioctl command. 1694 */ 1695 static di_off_t 1696 di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st) 1697 { 1698 di_off_t off; 1699 struct di_node *me; 1700 size_t size; 1701 1702 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth)); 1703 ASSERT((node != NULL) && (node == TOP_NODE(dsp))); 1704 1705 /* 1706 * check memory usage, and fix offsets accordingly. 1707 */ 1708 size = sizeof (struct di_node); 1709 *(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size); 1710 me = DI_NODE(di_mem_addr(st, off)); 1711 me->self = off; 1712 off += size; 1713 1714 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", 1715 node->devi_node_name, node->devi_instance, off)); 1716 1717 /* 1718 * Node parameters: 1719 * self -- offset of current node within snapshot 1720 * nodeid -- pointer to PROM node (tri-valued) 1721 * state -- hot plugging device state 1722 * node_state -- devinfo node state 1723 */ 1724 me->instance = node->devi_instance; 1725 me->nodeid = node->devi_nodeid; 1726 me->node_class = node->devi_node_class; 1727 me->attributes = node->devi_node_attributes; 1728 me->state = node->devi_state; 1729 me->flags = node->devi_flags; 1730 me->node_state = node->devi_node_state; 1731 me->next_vhci = 0; /* Filled up by build_vhci_list. */ 1732 me->top_phci = 0; /* Filled up by build_phci_list. */ 1733 me->next_phci = 0; /* Filled up by build_phci_list. */ 1734 me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */ 1735 me->user_private_data = NULL; 1736 1737 /* 1738 * Get parent's offset in snapshot from the stack 1739 * and store it in the current node 1740 */ 1741 if (dsp->depth > 1) { 1742 me->parent = *(PARENT_OFFSET(dsp)); 1743 } 1744 1745 /* 1746 * Save the offset of this di_node in a hash table. 1747 * This is used later to resolve references to this 1748 * dip from other parts of the tree (per-driver list, 1749 * multipathing linkages, layered usage linkages). 1750 * The key used for the hash table is derived from 1751 * information in the dip. 1752 */ 1753 di_register_dip(st, (dev_info_t *)node, me->self); 1754 1755 #ifdef DEVID_COMPATIBILITY 1756 /* check for devid as property marker */ 1757 if (node->devi_devid_str) { 1758 ddi_devid_t devid; 1759 1760 /* 1761 * The devid is now represented as a property. For 1762 * compatibility with di_devid() interface in libdevinfo we 1763 * must return it as a binary structure in the snapshot. When 1764 * (if) di_devid() is removed from libdevinfo then the code 1765 * related to DEVID_COMPATIBILITY can be removed. 1766 */ 1767 if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) == 1768 DDI_SUCCESS) { 1769 size = ddi_devid_sizeof(devid); 1770 off = di_checkmem(st, off, size); 1771 me->devid = off; 1772 bcopy(devid, di_mem_addr(st, off), size); 1773 off += size; 1774 ddi_devid_free(devid); 1775 } 1776 } 1777 #endif /* DEVID_COMPATIBILITY */ 1778 1779 if (node->devi_node_name) { 1780 size = strlen(node->devi_node_name) + 1; 1781 me->node_name = off = di_checkmem(st, off, size); 1782 (void) strcpy(di_mem_addr(st, off), node->devi_node_name); 1783 off += size; 1784 } 1785 1786 if (node->devi_compat_names && (node->devi_compat_length > 1)) { 1787 size = node->devi_compat_length; 1788 me->compat_names = off = di_checkmem(st, off, size); 1789 me->compat_length = (int)size; 1790 bcopy(node->devi_compat_names, di_mem_addr(st, off), size); 1791 off += size; 1792 } 1793 1794 if (node->devi_addr) { 1795 size = strlen(node->devi_addr) + 1; 1796 me->address = off = di_checkmem(st, off, size); 1797 (void) strcpy(di_mem_addr(st, off), node->devi_addr); 1798 off += size; 1799 } 1800 1801 if (node->devi_binding_name) { 1802 size = strlen(node->devi_binding_name) + 1; 1803 me->bind_name = off = di_checkmem(st, off, size); 1804 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name); 1805 off += size; 1806 } 1807 1808 me->drv_major = node->devi_major; 1809 1810 /* 1811 * If the dip is BOUND, set the next pointer of the 1812 * per-instance list to -1, indicating that it is yet to be resolved. 1813 * This will be resolved later in snap_driver_list(). 1814 */ 1815 if (me->drv_major != -1) { 1816 me->next = -1; 1817 } else { 1818 me->next = 0; 1819 } 1820 1821 /* 1822 * An optimization to skip mutex_enter when not needed. 1823 */ 1824 if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) { 1825 goto priv_data; 1826 } 1827 1828 /* 1829 * LOCKING: We already have an active ndi_devi_enter to gather the 1830 * minor data, and we will take devi_lock to gather properties as 1831 * needed off di_getprop. 1832 */ 1833 if (!(DINFOMINOR & st->command)) { 1834 goto path; 1835 } 1836 1837 ASSERT(DEVI_BUSY_OWNED(node)); 1838 if (node->devi_minor) { /* minor data */ 1839 me->minor_data = off; 1840 off = di_getmdata(node->devi_minor, &me->minor_data, 1841 me->self, st); 1842 } 1843 1844 path: 1845 if (!(DINFOPATH & st->command)) { 1846 goto property; 1847 } 1848 1849 if (MDI_VHCI(node)) { 1850 me->multipath_component = MULTIPATH_COMPONENT_VHCI; 1851 } 1852 1853 if (MDI_CLIENT(node)) { 1854 me->multipath_component = MULTIPATH_COMPONENT_CLIENT; 1855 me->multipath_client = off; 1856 off = di_getpath_data((dev_info_t *)node, &me->multipath_client, 1857 me->self, st, 1); 1858 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " 1859 "component type = %d. off=%d", 1860 me->multipath_client, 1861 (void *)node, node->devi_mdi_component, off)); 1862 } 1863 1864 if (MDI_PHCI(node)) { 1865 me->multipath_component = MULTIPATH_COMPONENT_PHCI; 1866 me->multipath_phci = off; 1867 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, 1868 me->self, st, 0); 1869 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " 1870 "component type = %d. off=%d", 1871 me->multipath_phci, 1872 (void *)node, node->devi_mdi_component, off)); 1873 } 1874 1875 property: 1876 if (!(DINFOPROP & st->command)) { 1877 goto priv_data; 1878 } 1879 1880 if (node->devi_drv_prop_ptr) { /* driver property list */ 1881 me->drv_prop = off; 1882 off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr, 1883 &me->drv_prop, st, node); 1884 } 1885 1886 if (node->devi_sys_prop_ptr) { /* system property list */ 1887 me->sys_prop = off; 1888 off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr, 1889 &me->sys_prop, st, node); 1890 } 1891 1892 if (node->devi_hw_prop_ptr) { /* hardware property list */ 1893 me->hw_prop = off; 1894 off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr, 1895 &me->hw_prop, st, node); 1896 } 1897 1898 if (node->devi_global_prop_list == NULL) { 1899 me->glob_prop = (di_off_t)-1; /* not global property */ 1900 } else { 1901 /* 1902 * Make copy of global property list if this devinfo refers 1903 * global properties different from what's on the devnames 1904 * array. It can happen if there has been a forced 1905 * driver.conf update. See mod_drv(1M). 1906 */ 1907 ASSERT(me->drv_major != -1); 1908 if (node->devi_global_prop_list != 1909 devnamesp[me->drv_major].dn_global_prop_ptr) { 1910 me->glob_prop = off; 1911 off = di_getprop(DI_PROP_GLB_LIST, 1912 &node->devi_global_prop_list->prop_list, 1913 &me->glob_prop, st, node); 1914 } 1915 } 1916 1917 priv_data: 1918 if (!(DINFOPRIVDATA & st->command)) { 1919 goto pm_info; 1920 } 1921 1922 if (ddi_get_parent_data((dev_info_t *)node) != NULL) { 1923 me->parent_data = off; 1924 off = di_getppdata(node, &me->parent_data, st); 1925 } 1926 1927 if (ddi_get_driver_private((dev_info_t *)node) != NULL) { 1928 me->driver_data = off; 1929 off = di_getdpdata(node, &me->driver_data, st); 1930 } 1931 1932 pm_info: /* NOT implemented */ 1933 1934 subtree: 1935 /* keep the stack aligned */ 1936 off = DI_ALIGN(off); 1937 1938 if (!(DINFOSUBTREE & st->command)) { 1939 POP_STACK(dsp); 1940 return (off); 1941 } 1942 1943 child: 1944 /* 1945 * If there is a child--push child onto stack. 1946 * Hold the parent busy while doing so. 1947 */ 1948 if (node->devi_child) { 1949 me->child = off; 1950 PUSH_STACK(dsp, node->devi_child, &me->child); 1951 return (me->child); 1952 } 1953 1954 sibling: 1955 /* 1956 * no child node, unroll the stack till a sibling of 1957 * a parent node is found or root node is reached 1958 */ 1959 POP_STACK(dsp); 1960 while (!EMPTY_STACK(dsp) && (node->devi_sibling == NULL)) { 1961 node = TOP_NODE(dsp); 1962 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); 1963 POP_STACK(dsp); 1964 } 1965 1966 if (!EMPTY_STACK(dsp)) { 1967 /* 1968 * a sibling is found, replace top of stack by its sibling 1969 */ 1970 me->sibling = off; 1971 PUSH_STACK(dsp, node->devi_sibling, &me->sibling); 1972 return (me->sibling); 1973 } 1974 1975 /* 1976 * DONE with all nodes 1977 */ 1978 return (off); 1979 } 1980 1981 static i_lnode_t * 1982 i_lnode_alloc(int modid) 1983 { 1984 i_lnode_t *i_lnode; 1985 1986 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); 1987 1988 ASSERT(modid != -1); 1989 i_lnode->modid = modid; 1990 1991 return (i_lnode); 1992 } 1993 1994 static void 1995 i_lnode_free(i_lnode_t *i_lnode) 1996 { 1997 kmem_free(i_lnode, sizeof (i_lnode_t)); 1998 } 1999 2000 static void 2001 i_lnode_check_free(i_lnode_t *i_lnode) 2002 { 2003 /* This lnode and its dip must have been snapshotted */ 2004 ASSERT(i_lnode->self > 0); 2005 ASSERT(i_lnode->di_node->self > 0); 2006 2007 /* at least 1 link (in or out) must exist for this lnode */ 2008 ASSERT(i_lnode->link_in || i_lnode->link_out); 2009 2010 i_lnode_free(i_lnode); 2011 } 2012 2013 static i_link_t * 2014 i_link_alloc(int spec_type) 2015 { 2016 i_link_t *i_link; 2017 2018 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); 2019 i_link->spec_type = spec_type; 2020 2021 return (i_link); 2022 } 2023 2024 static void 2025 i_link_check_free(i_link_t *i_link) 2026 { 2027 /* This link must have been snapshotted */ 2028 ASSERT(i_link->self > 0); 2029 2030 /* Both endpoint lnodes must exist for this link */ 2031 ASSERT(i_link->src_lnode); 2032 ASSERT(i_link->tgt_lnode); 2033 2034 kmem_free(i_link, sizeof (i_link_t)); 2035 } 2036 2037 /*ARGSUSED*/ 2038 static uint_t 2039 i_lnode_hashfunc(void *arg, mod_hash_key_t key) 2040 { 2041 i_lnode_t *i_lnode = (i_lnode_t *)key; 2042 struct di_node *ptr; 2043 dev_t dev; 2044 2045 dev = i_lnode->devt; 2046 if (dev != DDI_DEV_T_NONE) 2047 return (i_lnode->modid + getminor(dev) + getmajor(dev)); 2048 2049 ptr = i_lnode->di_node; 2050 ASSERT(ptr->self > 0); 2051 if (ptr) { 2052 uintptr_t k = (uintptr_t)ptr; 2053 k >>= (int)highbit(sizeof (struct di_node)); 2054 return ((uint_t)k); 2055 } 2056 2057 return (i_lnode->modid); 2058 } 2059 2060 static int 2061 i_lnode_cmp(void *arg1, void *arg2) 2062 { 2063 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1; 2064 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2; 2065 2066 if (i_lnode1->modid != i_lnode2->modid) { 2067 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); 2068 } 2069 2070 if (i_lnode1->di_node != i_lnode2->di_node) 2071 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); 2072 2073 if (i_lnode1->devt != i_lnode2->devt) 2074 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); 2075 2076 return (0); 2077 } 2078 2079 /* 2080 * An lnode represents a {dip, dev_t} tuple. A link represents a 2081 * {src_lnode, tgt_lnode, spec_type} tuple. 2082 * The following callback assumes that LDI framework ref-counts the 2083 * src_dip and tgt_dip while invoking this callback. 2084 */ 2085 static int 2086 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) 2087 { 2088 struct di_state *st = (struct di_state *)arg; 2089 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode; 2090 i_link_t **i_link_next, *i_link; 2091 di_off_t soff, toff; 2092 mod_hash_val_t nodep = NULL; 2093 int res; 2094 2095 /* 2096 * if the source or target of this device usage information doesn't 2097 * correspond to a device node then we don't report it via 2098 * libdevinfo so return. 2099 */ 2100 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) 2101 return (LDI_USAGE_CONTINUE); 2102 2103 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); 2104 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); 2105 2106 /* 2107 * Skip the ldi_usage if either src or tgt dip is not in the 2108 * snapshot. This saves us from pruning bad lnodes/links later. 2109 */ 2110 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) 2111 return (LDI_USAGE_CONTINUE); 2112 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) 2113 return (LDI_USAGE_CONTINUE); 2114 2115 ASSERT(soff > 0); 2116 ASSERT(toff > 0); 2117 2118 /* 2119 * allocate an i_lnode and add it to the lnode hash 2120 * if it is not already present. For this particular 2121 * link the lnode is a source, but it may 2122 * participate as tgt or src in any number of layered 2123 * operations - so it may already be in the hash. 2124 */ 2125 i_lnode = i_lnode_alloc(ldi_usage->src_modid); 2126 i_lnode->di_node = DI_NODE(di_mem_addr(st, soff)); 2127 i_lnode->devt = ldi_usage->src_devt; 2128 2129 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2130 if (res == MH_ERR_NOTFOUND) { 2131 /* 2132 * new i_lnode 2133 * add it to the hash and increment the lnode count 2134 */ 2135 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2136 ASSERT(res == 0); 2137 st->lnode_count++; 2138 src_lnode = i_lnode; 2139 } else { 2140 /* this i_lnode already exists in the lnode_hash */ 2141 i_lnode_free(i_lnode); 2142 src_lnode = (i_lnode_t *)nodep; 2143 } 2144 2145 /* 2146 * allocate a tgt i_lnode and add it to the lnode hash 2147 */ 2148 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); 2149 i_lnode->di_node = DI_NODE(di_mem_addr(st, toff)); 2150 i_lnode->devt = ldi_usage->tgt_devt; 2151 2152 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2153 if (res == MH_ERR_NOTFOUND) { 2154 /* 2155 * new i_lnode 2156 * add it to the hash and increment the lnode count 2157 */ 2158 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2159 ASSERT(res == 0); 2160 st->lnode_count++; 2161 tgt_lnode = i_lnode; 2162 } else { 2163 /* this i_lnode already exists in the lnode_hash */ 2164 i_lnode_free(i_lnode); 2165 tgt_lnode = (i_lnode_t *)nodep; 2166 } 2167 2168 /* 2169 * allocate a i_link 2170 */ 2171 i_link = i_link_alloc(ldi_usage->tgt_spec_type); 2172 i_link->src_lnode = src_lnode; 2173 i_link->tgt_lnode = tgt_lnode; 2174 2175 /* 2176 * add this link onto the src i_lnodes outbound i_link list 2177 */ 2178 i_link_next = &(src_lnode->link_out); 2179 while (*i_link_next != NULL) { 2180 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && 2181 (i_link->spec_type == (*i_link_next)->spec_type)) { 2182 /* this link already exists */ 2183 kmem_free(i_link, sizeof (i_link_t)); 2184 return (LDI_USAGE_CONTINUE); 2185 } 2186 i_link_next = &((*i_link_next)->src_link_next); 2187 } 2188 *i_link_next = i_link; 2189 2190 /* 2191 * add this link onto the tgt i_lnodes inbound i_link list 2192 */ 2193 i_link_next = &(tgt_lnode->link_in); 2194 while (*i_link_next != NULL) { 2195 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); 2196 i_link_next = &((*i_link_next)->tgt_link_next); 2197 } 2198 *i_link_next = i_link; 2199 2200 /* 2201 * add this i_link to the link hash 2202 */ 2203 res = mod_hash_insert(st->link_hash, i_link, i_link); 2204 ASSERT(res == 0); 2205 st->link_count++; 2206 2207 return (LDI_USAGE_CONTINUE); 2208 } 2209 2210 struct i_layer_data { 2211 struct di_state *st; 2212 int lnode_count; 2213 int link_count; 2214 di_off_t lnode_off; 2215 di_off_t link_off; 2216 }; 2217 2218 /*ARGSUSED*/ 2219 static uint_t 2220 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2221 { 2222 i_link_t *i_link = (i_link_t *)key; 2223 struct i_layer_data *data = arg; 2224 struct di_link *me; 2225 struct di_lnode *melnode; 2226 struct di_node *medinode; 2227 2228 ASSERT(i_link->self == 0); 2229 2230 i_link->self = data->link_off + 2231 (data->link_count * sizeof (struct di_link)); 2232 data->link_count++; 2233 2234 ASSERT(data->link_off > 0 && data->link_count > 0); 2235 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ 2236 ASSERT(data->link_count <= data->st->link_count); 2237 2238 /* fill in fields for the di_link snapshot */ 2239 me = DI_LINK(di_mem_addr(data->st, i_link->self)); 2240 me->self = i_link->self; 2241 me->spec_type = i_link->spec_type; 2242 2243 /* 2244 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t 2245 * are created during the LDI table walk. Since we are 2246 * walking the link hash, the lnode hash has already been 2247 * walked and the lnodes have been snapshotted. Save lnode 2248 * offsets. 2249 */ 2250 me->src_lnode = i_link->src_lnode->self; 2251 me->tgt_lnode = i_link->tgt_lnode->self; 2252 2253 /* 2254 * Save this link's offset in the src_lnode snapshot's link_out 2255 * field 2256 */ 2257 melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode)); 2258 me->src_link_next = melnode->link_out; 2259 melnode->link_out = me->self; 2260 2261 /* 2262 * Put this link on the tgt_lnode's link_in field 2263 */ 2264 melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode)); 2265 me->tgt_link_next = melnode->link_in; 2266 melnode->link_in = me->self; 2267 2268 /* 2269 * An i_lnode_t is only created if the corresponding dip exists 2270 * in the snapshot. A pointer to the di_node is saved in the 2271 * i_lnode_t when it is allocated. For this link, get the di_node 2272 * for the source lnode. Then put the link on the di_node's list 2273 * of src links 2274 */ 2275 medinode = i_link->src_lnode->di_node; 2276 me->src_node_next = medinode->src_links; 2277 medinode->src_links = me->self; 2278 2279 /* 2280 * Put this link on the tgt_links list of the target 2281 * dip. 2282 */ 2283 medinode = i_link->tgt_lnode->di_node; 2284 me->tgt_node_next = medinode->tgt_links; 2285 medinode->tgt_links = me->self; 2286 2287 return (MH_WALK_CONTINUE); 2288 } 2289 2290 /*ARGSUSED*/ 2291 static uint_t 2292 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2293 { 2294 i_lnode_t *i_lnode = (i_lnode_t *)key; 2295 struct i_layer_data *data = arg; 2296 struct di_lnode *me; 2297 struct di_node *medinode; 2298 2299 ASSERT(i_lnode->self == 0); 2300 2301 i_lnode->self = data->lnode_off + 2302 (data->lnode_count * sizeof (struct di_lnode)); 2303 data->lnode_count++; 2304 2305 ASSERT(data->lnode_off > 0 && data->lnode_count > 0); 2306 ASSERT(data->link_count == 0); /* links not done yet */ 2307 ASSERT(data->lnode_count <= data->st->lnode_count); 2308 2309 /* fill in fields for the di_lnode snapshot */ 2310 me = DI_LNODE(di_mem_addr(data->st, i_lnode->self)); 2311 me->self = i_lnode->self; 2312 2313 if (i_lnode->devt == DDI_DEV_T_NONE) { 2314 me->dev_major = DDI_MAJOR_T_NONE; 2315 me->dev_minor = DDI_MAJOR_T_NONE; 2316 } else { 2317 me->dev_major = getmajor(i_lnode->devt); 2318 me->dev_minor = getminor(i_lnode->devt); 2319 } 2320 2321 /* 2322 * The dip corresponding to this lnode must exist in 2323 * the snapshot or we wouldn't have created the i_lnode_t 2324 * during LDI walk. Save the offset of the dip. 2325 */ 2326 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); 2327 me->node = i_lnode->di_node->self; 2328 2329 /* 2330 * There must be at least one link in or out of this lnode 2331 * or we wouldn't have created it. These fields will be set 2332 * during the link hash walk. 2333 */ 2334 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); 2335 2336 /* 2337 * set the offset of the devinfo node associated with this 2338 * lnode. Also update the node_next next pointer. this pointer 2339 * is set if there are multiple lnodes associated with the same 2340 * devinfo node. (could occure when multiple minor nodes 2341 * are open for one device, etc.) 2342 */ 2343 medinode = i_lnode->di_node; 2344 me->node_next = medinode->lnodes; 2345 medinode->lnodes = me->self; 2346 2347 return (MH_WALK_CONTINUE); 2348 } 2349 2350 static di_off_t 2351 di_getlink_data(di_off_t off, struct di_state *st) 2352 { 2353 struct i_layer_data data = {0}; 2354 size_t size; 2355 2356 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); 2357 2358 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, 2359 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, 2360 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); 2361 2362 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, 2363 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); 2364 2365 /* get driver layering information */ 2366 (void) ldi_usage_walker(st, di_ldi_callback); 2367 2368 /* check if there is any link data to include in the snapshot */ 2369 if (st->lnode_count == 0) { 2370 ASSERT(st->link_count == 0); 2371 goto out; 2372 } 2373 2374 ASSERT(st->link_count != 0); 2375 2376 /* get a pointer to snapshot memory for all the di_lnodes */ 2377 size = sizeof (struct di_lnode) * st->lnode_count; 2378 data.lnode_off = off = di_checkmem(st, off, size); 2379 off += size; 2380 2381 /* get a pointer to snapshot memory for all the di_links */ 2382 size = sizeof (struct di_link) * st->link_count; 2383 data.link_off = off = di_checkmem(st, off, size); 2384 off += size; 2385 2386 data.lnode_count = data.link_count = 0; 2387 data.st = st; 2388 2389 /* 2390 * We have lnodes and links that will go into the 2391 * snapshot, so let's walk the respective hashes 2392 * and snapshot them. The various linkages are 2393 * also set up during the walk. 2394 */ 2395 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); 2396 ASSERT(data.lnode_count == st->lnode_count); 2397 2398 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); 2399 ASSERT(data.link_count == st->link_count); 2400 2401 out: 2402 /* free up the i_lnodes and i_links used to create the snapshot */ 2403 mod_hash_destroy_hash(st->lnode_hash); 2404 mod_hash_destroy_hash(st->link_hash); 2405 st->lnode_count = 0; 2406 st->link_count = 0; 2407 2408 return (off); 2409 } 2410 2411 2412 /* 2413 * Copy all minor data nodes attached to a devinfo node into the snapshot. 2414 * It is called from di_copynode with active ndi_devi_enter to protect 2415 * the list of minor nodes. 2416 */ 2417 static di_off_t 2418 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, 2419 struct di_state *st) 2420 { 2421 di_off_t off; 2422 struct di_minor *me; 2423 size_t size; 2424 2425 dcmn_err2((CE_CONT, "di_getmdata:\n")); 2426 2427 /* 2428 * check memory first 2429 */ 2430 off = di_checkmem(st, *off_p, sizeof (struct di_minor)); 2431 *off_p = off; 2432 2433 do { 2434 me = DI_MINOR(di_mem_addr(st, off)); 2435 me->self = off; 2436 me->type = mnode->type; 2437 me->node = node; 2438 me->user_private_data = NULL; 2439 2440 off += sizeof (struct di_minor); 2441 2442 /* 2443 * Split dev_t to major/minor, so it works for 2444 * both ILP32 and LP64 model 2445 */ 2446 me->dev_major = getmajor(mnode->ddm_dev); 2447 me->dev_minor = getminor(mnode->ddm_dev); 2448 me->spec_type = mnode->ddm_spec_type; 2449 2450 if (mnode->ddm_name) { 2451 size = strlen(mnode->ddm_name) + 1; 2452 me->name = off = di_checkmem(st, off, size); 2453 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name); 2454 off += size; 2455 } 2456 2457 if (mnode->ddm_node_type) { 2458 size = strlen(mnode->ddm_node_type) + 1; 2459 me->node_type = off = di_checkmem(st, off, size); 2460 (void) strcpy(di_mem_addr(st, off), 2461 mnode->ddm_node_type); 2462 off += size; 2463 } 2464 2465 off = di_checkmem(st, off, sizeof (struct di_minor)); 2466 me->next = off; 2467 mnode = mnode->next; 2468 } while (mnode); 2469 2470 me->next = 0; 2471 2472 return (off); 2473 } 2474 2475 /* 2476 * di_register_dip(), di_find_dip(): The dip must be protected 2477 * from deallocation when using these routines - this can either 2478 * be a reference count, a busy hold or a per-driver lock. 2479 */ 2480 2481 static void 2482 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) 2483 { 2484 struct dev_info *node = DEVI(dip); 2485 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2486 struct di_dkey *dk; 2487 2488 ASSERT(dip); 2489 ASSERT(off > 0); 2490 2491 key->k_type = DI_DKEY; 2492 dk = &(key->k_u.dkey); 2493 2494 dk->dk_dip = dip; 2495 dk->dk_major = node->devi_major; 2496 dk->dk_inst = node->devi_instance; 2497 dk->dk_nodeid = node->devi_nodeid; 2498 2499 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, 2500 (mod_hash_val_t)(uintptr_t)off) != 0) { 2501 panic( 2502 "duplicate devinfo (%p) registered during device " 2503 "tree walk", (void *)dip); 2504 } 2505 } 2506 2507 2508 static int 2509 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) 2510 { 2511 /* 2512 * uintptr_t must be used because it matches the size of void *; 2513 * mod_hash expects clients to place results into pointer-size 2514 * containers; since di_off_t is always a 32-bit offset, alignment 2515 * would otherwise be broken on 64-bit kernels. 2516 */ 2517 uintptr_t offset; 2518 struct di_key key = {0}; 2519 struct di_dkey *dk; 2520 2521 ASSERT(st->reg_dip_hash); 2522 ASSERT(dip); 2523 ASSERT(off_p); 2524 2525 2526 key.k_type = DI_DKEY; 2527 dk = &(key.k_u.dkey); 2528 2529 dk->dk_dip = dip; 2530 dk->dk_major = DEVI(dip)->devi_major; 2531 dk->dk_inst = DEVI(dip)->devi_instance; 2532 dk->dk_nodeid = DEVI(dip)->devi_nodeid; 2533 2534 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, 2535 (mod_hash_val_t *)&offset) == 0) { 2536 *off_p = (di_off_t)offset; 2537 return (0); 2538 } else { 2539 return (-1); 2540 } 2541 } 2542 2543 /* 2544 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation 2545 * when using these routines. The caller must do this by protecting the 2546 * client(or phci)<->pip linkage while traversing the list and then holding the 2547 * pip when it is found in the list. 2548 */ 2549 2550 static void 2551 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) 2552 { 2553 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2554 char *path_addr; 2555 struct di_pkey *pk; 2556 2557 ASSERT(pip); 2558 ASSERT(off > 0); 2559 2560 key->k_type = DI_PKEY; 2561 pk = &(key->k_u.pkey); 2562 2563 pk->pk_pip = pip; 2564 path_addr = mdi_pi_get_addr(pip); 2565 if (path_addr) 2566 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); 2567 pk->pk_client = mdi_pi_get_client(pip); 2568 pk->pk_phci = mdi_pi_get_phci(pip); 2569 2570 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, 2571 (mod_hash_val_t)(uintptr_t)off) != 0) { 2572 panic( 2573 "duplicate pathinfo (%p) registered during device " 2574 "tree walk", (void *)pip); 2575 } 2576 } 2577 2578 /* 2579 * As with di_register_pip, the caller must hold or lock the pip 2580 */ 2581 static int 2582 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) 2583 { 2584 /* 2585 * uintptr_t must be used because it matches the size of void *; 2586 * mod_hash expects clients to place results into pointer-size 2587 * containers; since di_off_t is always a 32-bit offset, alignment 2588 * would otherwise be broken on 64-bit kernels. 2589 */ 2590 uintptr_t offset; 2591 struct di_key key = {0}; 2592 struct di_pkey *pk; 2593 2594 ASSERT(st->reg_pip_hash); 2595 ASSERT(off_p); 2596 2597 if (pip == NULL) { 2598 *off_p = 0; 2599 return (0); 2600 } 2601 2602 key.k_type = DI_PKEY; 2603 pk = &(key.k_u.pkey); 2604 2605 pk->pk_pip = pip; 2606 pk->pk_path_addr = mdi_pi_get_addr(pip); 2607 pk->pk_client = mdi_pi_get_client(pip); 2608 pk->pk_phci = mdi_pi_get_phci(pip); 2609 2610 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, 2611 (mod_hash_val_t *)&offset) == 0) { 2612 *off_p = (di_off_t)offset; 2613 return (0); 2614 } else { 2615 return (-1); 2616 } 2617 } 2618 2619 static di_path_state_t 2620 path_state_convert(mdi_pathinfo_state_t st) 2621 { 2622 switch (st) { 2623 case MDI_PATHINFO_STATE_ONLINE: 2624 return (DI_PATH_STATE_ONLINE); 2625 case MDI_PATHINFO_STATE_STANDBY: 2626 return (DI_PATH_STATE_STANDBY); 2627 case MDI_PATHINFO_STATE_OFFLINE: 2628 return (DI_PATH_STATE_OFFLINE); 2629 case MDI_PATHINFO_STATE_FAULT: 2630 return (DI_PATH_STATE_FAULT); 2631 default: 2632 return (DI_PATH_STATE_UNKNOWN); 2633 } 2634 } 2635 2636 2637 static di_off_t 2638 di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p, 2639 struct di_state *st) 2640 { 2641 nvpair_t *prop = NULL; 2642 struct di_path_prop *me; 2643 int off; 2644 size_t size; 2645 char *str; 2646 uchar_t *buf; 2647 uint_t nelems; 2648 2649 off = *off_p; 2650 if (mdi_pi_get_next_prop(pip, NULL) == NULL) { 2651 *off_p = 0; 2652 return (off); 2653 } 2654 2655 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2656 *off_p = off; 2657 2658 while (prop = mdi_pi_get_next_prop(pip, prop)) { 2659 me = DI_PATHPROP(di_mem_addr(st, off)); 2660 me->self = off; 2661 off += sizeof (struct di_path_prop); 2662 2663 /* 2664 * property name 2665 */ 2666 size = strlen(nvpair_name(prop)) + 1; 2667 me->prop_name = off = di_checkmem(st, off, size); 2668 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); 2669 off += size; 2670 2671 switch (nvpair_type(prop)) { 2672 case DATA_TYPE_BYTE: 2673 case DATA_TYPE_INT16: 2674 case DATA_TYPE_UINT16: 2675 case DATA_TYPE_INT32: 2676 case DATA_TYPE_UINT32: 2677 me->prop_type = DDI_PROP_TYPE_INT; 2678 size = sizeof (int32_t); 2679 off = di_checkmem(st, off, size); 2680 (void) nvpair_value_int32(prop, 2681 (int32_t *)di_mem_addr(st, off)); 2682 break; 2683 2684 case DATA_TYPE_INT64: 2685 case DATA_TYPE_UINT64: 2686 me->prop_type = DDI_PROP_TYPE_INT64; 2687 size = sizeof (int64_t); 2688 off = di_checkmem(st, off, size); 2689 (void) nvpair_value_int64(prop, 2690 (int64_t *)di_mem_addr(st, off)); 2691 break; 2692 2693 case DATA_TYPE_STRING: 2694 me->prop_type = DDI_PROP_TYPE_STRING; 2695 (void) nvpair_value_string(prop, &str); 2696 size = strlen(str) + 1; 2697 off = di_checkmem(st, off, size); 2698 (void) strcpy(di_mem_addr(st, off), str); 2699 break; 2700 2701 case DATA_TYPE_BYTE_ARRAY: 2702 case DATA_TYPE_INT16_ARRAY: 2703 case DATA_TYPE_UINT16_ARRAY: 2704 case DATA_TYPE_INT32_ARRAY: 2705 case DATA_TYPE_UINT32_ARRAY: 2706 case DATA_TYPE_INT64_ARRAY: 2707 case DATA_TYPE_UINT64_ARRAY: 2708 me->prop_type = DDI_PROP_TYPE_BYTE; 2709 (void) nvpair_value_byte_array(prop, &buf, &nelems); 2710 size = nelems; 2711 if (nelems != 0) { 2712 off = di_checkmem(st, off, size); 2713 bcopy(buf, di_mem_addr(st, off), size); 2714 } 2715 break; 2716 2717 default: /* Unknown or unhandled type; skip it */ 2718 size = 0; 2719 break; 2720 } 2721 2722 if (size > 0) { 2723 me->prop_data = off; 2724 } 2725 2726 me->prop_len = (int)size; 2727 off += size; 2728 2729 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2730 me->prop_next = off; 2731 } 2732 2733 me->prop_next = 0; 2734 return (off); 2735 } 2736 2737 2738 static void 2739 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp, 2740 int get_client) 2741 { 2742 if (get_client) { 2743 ASSERT(me->path_client == 0); 2744 me->path_client = noff; 2745 ASSERT(me->path_c_link == 0); 2746 *off_pp = &me->path_c_link; 2747 me->path_snap_state &= 2748 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK); 2749 } else { 2750 ASSERT(me->path_phci == 0); 2751 me->path_phci = noff; 2752 ASSERT(me->path_p_link == 0); 2753 *off_pp = &me->path_p_link; 2754 me->path_snap_state &= 2755 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK); 2756 } 2757 } 2758 2759 /* 2760 * off_p: pointer to the linkage field. This links pips along the client|phci 2761 * linkage list. 2762 * noff : Offset for the endpoint dip snapshot. 2763 */ 2764 static di_off_t 2765 di_getpath_data(dev_info_t *dip, di_off_t *off_p, di_off_t noff, 2766 struct di_state *st, int get_client) 2767 { 2768 di_off_t off; 2769 mdi_pathinfo_t *pip; 2770 struct di_path *me; 2771 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *); 2772 size_t size; 2773 2774 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client)); 2775 2776 /* 2777 * The naming of the following mdi_xyz() is unfortunately 2778 * non-intuitive. mdi_get_next_phci_path() follows the 2779 * client_link i.e. the list of pip's belonging to the 2780 * given client dip. 2781 */ 2782 if (get_client) 2783 next_pip = &mdi_get_next_phci_path; 2784 else 2785 next_pip = &mdi_get_next_client_path; 2786 2787 off = *off_p; 2788 2789 pip = NULL; 2790 while (pip = (*next_pip)(dip, pip)) { 2791 mdi_pathinfo_state_t state; 2792 di_off_t stored_offset; 2793 2794 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip)); 2795 2796 mdi_pi_lock(pip); 2797 2798 if (di_pip_find(st, pip, &stored_offset) != -1) { 2799 /* 2800 * We've already seen this pathinfo node so we need to 2801 * take care not to snap it again; However, one endpoint 2802 * and linkage will be set here. The other endpoint 2803 * and linkage has already been set when the pip was 2804 * first snapshotted i.e. when the other endpoint dip 2805 * was snapshotted. 2806 */ 2807 me = DI_PATH(di_mem_addr(st, stored_offset)); 2808 *off_p = stored_offset; 2809 2810 di_path_one_endpoint(me, noff, &off_p, get_client); 2811 2812 /* 2813 * The other endpoint and linkage were set when this 2814 * pip was snapshotted. So we are done with both 2815 * endpoints and linkages. 2816 */ 2817 ASSERT(!(me->path_snap_state & 2818 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI))); 2819 ASSERT(!(me->path_snap_state & 2820 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK))); 2821 2822 mdi_pi_unlock(pip); 2823 continue; 2824 } 2825 2826 /* 2827 * Now that we need to snapshot this pip, check memory 2828 */ 2829 size = sizeof (struct di_path); 2830 *off_p = off = di_checkmem(st, off, size); 2831 me = DI_PATH(di_mem_addr(st, off)); 2832 me->self = off; 2833 off += size; 2834 2835 me->path_snap_state = 2836 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK; 2837 me->path_snap_state |= 2838 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI; 2839 2840 /* 2841 * Zero out fields as di_checkmem() doesn't guarantee 2842 * zero-filled memory 2843 */ 2844 me->path_client = me->path_phci = 0; 2845 me->path_c_link = me->path_p_link = 0; 2846 2847 di_path_one_endpoint(me, noff, &off_p, get_client); 2848 2849 /* 2850 * Note the existence of this pathinfo 2851 */ 2852 di_register_pip(st, pip, me->self); 2853 2854 state = mdi_pi_get_state(pip); 2855 me->path_state = path_state_convert(state); 2856 2857 me->path_instance = mdi_pi_get_path_instance(pip); 2858 2859 /* 2860 * Get intermediate addressing info. 2861 */ 2862 size = strlen(mdi_pi_get_addr(pip)) + 1; 2863 me->path_addr = off = di_checkmem(st, off, size); 2864 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip)); 2865 off += size; 2866 2867 /* 2868 * Get path properties if props are to be included in the 2869 * snapshot 2870 */ 2871 if (DINFOPROP & st->command) { 2872 me->path_prop = off; 2873 off = di_path_getprop(pip, &me->path_prop, st); 2874 } else { 2875 me->path_prop = 0; 2876 } 2877 2878 mdi_pi_unlock(pip); 2879 } 2880 2881 *off_p = 0; 2882 return (off); 2883 } 2884 2885 /* 2886 * Return driver prop_op entry point for the specified devinfo node. 2887 * 2888 * To return a non-NULL value: 2889 * - driver must be attached and held: 2890 * If driver is not attached we ignore the driver property list. 2891 * No one should rely on such properties. 2892 * - driver "cb_prop_op != ddi_prop_op": 2893 * If "cb_prop_op == ddi_prop_op", framework does not need to call driver. 2894 * XXX or parent's bus_prop_op != ddi_bus_prop_op 2895 */ 2896 static int 2897 (*di_getprop_prop_op(struct dev_info *dip)) 2898 (dev_t, dev_info_t *, ddi_prop_op_t, int, char *, caddr_t, int *) 2899 { 2900 struct dev_ops *ops; 2901 2902 /* If driver is not attached we ignore the driver property list. */ 2903 if ((dip == NULL) || !i_ddi_devi_attached((dev_info_t *)dip)) 2904 return (NULL); 2905 2906 /* 2907 * Some nexus drivers incorrectly set cb_prop_op to nodev, nulldev, 2908 * or even NULL. 2909 */ 2910 ops = dip->devi_ops; 2911 if (ops && ops->devo_cb_ops && 2912 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) && 2913 (ops->devo_cb_ops->cb_prop_op != nodev) && 2914 (ops->devo_cb_ops->cb_prop_op != nulldev) && 2915 (ops->devo_cb_ops->cb_prop_op != NULL)) 2916 return (ops->devo_cb_ops->cb_prop_op); 2917 return (NULL); 2918 } 2919 2920 static di_off_t 2921 di_getprop_add(int list, int dyn, struct di_state *st, struct dev_info *dip, 2922 int (*prop_op)(), 2923 char *name, dev_t devt, int aflags, int alen, caddr_t aval, 2924 di_off_t off, di_off_t **off_pp) 2925 { 2926 int need_free = 0; 2927 dev_t pdevt; 2928 int pflags; 2929 int rv; 2930 caddr_t val; 2931 int len; 2932 size_t size; 2933 struct di_prop *pp; 2934 2935 /* If we have prop_op function, ask driver for latest value */ 2936 if (prop_op) { 2937 ASSERT(dip); 2938 2939 /* Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY */ 2940 pdevt = (devt == DDI_DEV_T_NONE) ? DDI_DEV_T_ANY : devt; 2941 2942 /* 2943 * We have type information in flags, but are invoking an 2944 * old non-typed prop_op(9E) interface. Since not all types are 2945 * part of DDI_PROP_TYPE_ANY (example is DDI_PROP_TYPE_INT64), 2946 * we set DDI_PROP_CONSUMER_TYPED - causing the framework to 2947 * expand type bits beyond DDI_PROP_TYPE_ANY. This allows us 2948 * to use the legacy prop_op(9E) interface to obtain updates 2949 * non-DDI_PROP_TYPE_ANY dynamic properties. 2950 */ 2951 pflags = aflags & ~DDI_PROP_TYPE_MASK; 2952 pflags |= DDI_PROP_DONTPASS | DDI_PROP_NOTPROM | 2953 DDI_PROP_CONSUMER_TYPED; 2954 rv = (*prop_op)(pdevt, (dev_info_t)dip, PROP_LEN_AND_VAL_ALLOC, 2955 pflags, name, &val, &len); 2956 2957 if (rv == DDI_PROP_SUCCESS) { 2958 need_free = 1; /* dynamic prop obtained */ 2959 } else if (dyn) { 2960 /* 2961 * A dynamic property must succeed prop_op(9E) to show 2962 * up in the snapshot - that is the only source of its 2963 * value. 2964 */ 2965 return (off); /* dynamic prop not supported */ 2966 } else { 2967 /* 2968 * In case calling the driver caused an update off 2969 * prop_op(9E) of a non-dynamic property (code leading 2970 * to ddi_prop_change), we defer picking up val and 2971 * len informatiojn until after prop_op(9E) to ensure 2972 * that we snapshot the latest value. 2973 */ 2974 val = aval; 2975 len = alen; 2976 2977 } 2978 } else { 2979 val = aval; 2980 len = alen; 2981 } 2982 2983 dcmn_err((CE_CONT, "di_getprop_add: list %d %s len %d val %p\n", 2984 list, name ? name : "NULL", len, (void *)val)); 2985 2986 size = sizeof (struct di_prop); 2987 **off_pp = off = di_checkmem(st, off, size); 2988 pp = DI_PROP(di_mem_addr(st, off)); 2989 pp->self = off; 2990 off += size; 2991 2992 pp->dev_major = getmajor(devt); 2993 pp->dev_minor = getminor(devt); 2994 pp->prop_flags = aflags; 2995 pp->prop_list = list; 2996 2997 /* property name */ 2998 if (name) { 2999 size = strlen(name) + 1; 3000 pp->prop_name = off = di_checkmem(st, off, size); 3001 (void) strcpy(di_mem_addr(st, off), name); 3002 off += size; 3003 } else { 3004 pp->prop_name = -1; 3005 } 3006 3007 pp->prop_len = len; 3008 if (val == NULL) { 3009 pp->prop_data = -1; 3010 } else if (len != 0) { 3011 size = len; 3012 pp->prop_data = off = di_checkmem(st, off, size); 3013 bcopy(val, di_mem_addr(st, off), size); 3014 off += size; 3015 } 3016 3017 pp->next = 0; /* assume tail for now */ 3018 *off_pp = &pp->next; /* return pointer to our next */ 3019 3020 if (need_free) /* free PROP_LEN_AND_VAL_ALLOC alloc */ 3021 kmem_free(val, len); 3022 return (off); 3023 } 3024 3025 3026 /* 3027 * Copy a list of properties attached to a devinfo node. Called from 3028 * di_copynode with active ndi_devi_enter. The major number is passed in case 3029 * we need to call driver's prop_op entry. The value of list indicates 3030 * which list we are copying. Possible values are: 3031 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST 3032 */ 3033 static di_off_t 3034 di_getprop(int list, struct ddi_prop **pprop, di_off_t *off_p, 3035 struct di_state *st, struct dev_info *dip) 3036 { 3037 struct ddi_prop *prop; 3038 int (*prop_op)(); 3039 int off; 3040 struct ddi_minor_data *mn; 3041 i_ddi_prop_dyn_t *dp; 3042 struct plist { 3043 struct plist *pl_next; 3044 char *pl_name; 3045 int pl_flags; 3046 dev_t pl_dev; 3047 int pl_len; 3048 caddr_t pl_val; 3049 } *pl, *pl0, **plp; 3050 3051 ASSERT(st != NULL); 3052 3053 off = *off_p; 3054 *off_p = 0; 3055 dcmn_err((CE_CONT, "di_getprop: copy property list %d at addr %p\n", 3056 list, (void *)*pprop)); 3057 3058 /* get pointer to driver's prop_op(9E) implementation if DRV_LIST */ 3059 prop_op = (list == DI_PROP_DRV_LIST) ? di_getprop_prop_op(dip) : NULL; 3060 3061 /* 3062 * Form private list of properties, holding devi_lock for properties 3063 * than hang off the dip. 3064 */ 3065 if (dip) 3066 mutex_enter(&(dip->devi_lock)); 3067 for (plp = &pl0, prop = *pprop; 3068 prop; plp = &pl->pl_next, prop = prop->prop_next) { 3069 pl = kmem_alloc(sizeof (*pl), KM_SLEEP); 3070 *plp = pl; 3071 pl->pl_next = NULL; 3072 if (prop->prop_name) 3073 pl->pl_name = i_ddi_strdup(prop->prop_name, KM_SLEEP); 3074 else 3075 pl->pl_name = NULL; 3076 pl->pl_flags = prop->prop_flags; 3077 pl->pl_dev = prop->prop_dev; 3078 if (prop->prop_len) { 3079 pl->pl_len = prop->prop_len; 3080 pl->pl_val = kmem_alloc(pl->pl_len, KM_SLEEP); 3081 bcopy(prop->prop_val, pl->pl_val, pl->pl_len); 3082 } else { 3083 pl->pl_len = 0; 3084 pl->pl_val = NULL; 3085 } 3086 } 3087 if (dip) 3088 mutex_exit(&(dip->devi_lock)); 3089 3090 /* 3091 * Now that we have dropped devi_lock, perform a second-pass to 3092 * add properties to the snapshot. We do this as a second pass 3093 * because we may need to call prop_op(9E) and we can't hold 3094 * devi_lock across that call. 3095 */ 3096 for (pl = pl0; pl; pl = pl0) { 3097 pl0 = pl->pl_next; 3098 off = di_getprop_add(list, 0, st, dip, prop_op, pl->pl_name, 3099 pl->pl_dev, pl->pl_flags, pl->pl_len, pl->pl_val, 3100 off, &off_p); 3101 if (pl->pl_val) 3102 kmem_free(pl->pl_val, pl->pl_len); 3103 if (pl->pl_name) 3104 kmem_free(pl->pl_name, strlen(pl->pl_name) + 1); 3105 kmem_free(pl, sizeof (*pl)); 3106 } 3107 3108 /* 3109 * If there is no prop_op or dynamic property support has been 3110 * disabled, we are done. 3111 */ 3112 if ((prop_op == NULL) || (di_prop_dyn == 0)) { 3113 *off_p = 0; 3114 return (off); 3115 } 3116 3117 /* Add dynamic driver properties to snapshot */ 3118 for (dp = i_ddi_prop_dyn_driver_get((dev_info_t *)dip); 3119 dp && dp->dp_name; dp++) { 3120 if (dp->dp_spec_type) { 3121 /* if spec_type, property of matching minor */ 3122 ASSERT(DEVI_BUSY_OWNED(dip)); 3123 for (mn = dip->devi_minor; mn; mn = mn->next) { 3124 if (mn->ddm_spec_type != dp->dp_spec_type) 3125 continue; 3126 off = di_getprop_add(list, 1, st, dip, prop_op, 3127 dp->dp_name, mn->ddm_dev, dp->dp_type, 3128 0, NULL, off, &off_p); 3129 } 3130 } else { 3131 /* property of devinfo node */ 3132 off = di_getprop_add(list, 1, st, dip, prop_op, 3133 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type, 3134 0, NULL, off, &off_p); 3135 } 3136 } 3137 3138 /* Add dynamic parent properties to snapshot */ 3139 for (dp = i_ddi_prop_dyn_parent_get((dev_info_t *)dip); 3140 dp && dp->dp_name; dp++) { 3141 if (dp->dp_spec_type) { 3142 /* if spec_type, property of matching minor */ 3143 ASSERT(DEVI_BUSY_OWNED(dip)); 3144 for (mn = dip->devi_minor; mn; mn = mn->next) { 3145 if (mn->ddm_spec_type != dp->dp_spec_type) 3146 continue; 3147 off = di_getprop_add(list, 1, st, dip, prop_op, 3148 dp->dp_name, mn->ddm_dev, dp->dp_type, 3149 0, NULL, off, &off_p); 3150 } 3151 } else { 3152 /* property of devinfo node */ 3153 off = di_getprop_add(list, 1, st, dip, prop_op, 3154 dp->dp_name, DDI_DEV_T_NONE, dp->dp_type, 3155 0, NULL, off, &off_p); 3156 } 3157 } 3158 3159 *off_p = 0; 3160 return (off); 3161 } 3162 3163 /* 3164 * find private data format attached to a dip 3165 * parent = 1 to match driver name of parent dip (for parent private data) 3166 * 0 to match driver name of current dip (for driver private data) 3167 */ 3168 #define DI_MATCH_DRIVER 0 3169 #define DI_MATCH_PARENT 1 3170 3171 struct di_priv_format * 3172 di_match_drv_name(struct dev_info *node, struct di_state *st, int match) 3173 { 3174 int i, count, len; 3175 char *drv_name; 3176 major_t major; 3177 struct di_all *all; 3178 struct di_priv_format *form; 3179 3180 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n", 3181 node->devi_node_name, match)); 3182 3183 if (match == DI_MATCH_PARENT) { 3184 node = DEVI(node->devi_parent); 3185 } 3186 3187 if (node == NULL) { 3188 return (NULL); 3189 } 3190 3191 major = ddi_name_to_major(node->devi_binding_name); 3192 if (major == (major_t)(-1)) { 3193 return (NULL); 3194 } 3195 3196 /* 3197 * Match the driver name. 3198 */ 3199 drv_name = ddi_major_to_name(major); 3200 if ((drv_name == NULL) || *drv_name == '\0') { 3201 return (NULL); 3202 } 3203 3204 /* Now get the di_priv_format array */ 3205 all = DI_ALL_PTR(st); 3206 if (match == DI_MATCH_PARENT) { 3207 count = all->n_ppdata; 3208 form = DI_PRIV_FORMAT(di_mem_addr(st, all->ppdata_format)); 3209 } else { 3210 count = all->n_dpdata; 3211 form = DI_PRIV_FORMAT(di_mem_addr(st, all->dpdata_format)); 3212 } 3213 3214 len = strlen(drv_name); 3215 for (i = 0; i < count; i++) { 3216 char *tmp; 3217 3218 tmp = form[i].drv_name; 3219 while (tmp && (*tmp != '\0')) { 3220 if (strncmp(drv_name, tmp, len) == 0) { 3221 return (&form[i]); 3222 } 3223 /* 3224 * Move to next driver name, skipping a white space 3225 */ 3226 if (tmp = strchr(tmp, ' ')) { 3227 tmp++; 3228 } 3229 } 3230 } 3231 3232 return (NULL); 3233 } 3234 3235 /* 3236 * The following functions copy data as specified by the format passed in. 3237 * To prevent invalid format from panicing the system, we call on_fault(). 3238 * A return value of 0 indicates an error. Otherwise, the total offset 3239 * is returned. 3240 */ 3241 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */ 3242 3243 static di_off_t 3244 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node, 3245 void *data, di_off_t *off_p, struct di_state *st) 3246 { 3247 caddr_t pa; 3248 void *ptr; 3249 int i, size, repeat; 3250 di_off_t off, off0, *tmp; 3251 char *path; 3252 label_t ljb; 3253 3254 dcmn_err2((CE_CONT, "di_getprvdata:\n")); 3255 3256 /* 3257 * check memory availability. Private data size is 3258 * limited to DI_MAX_PRIVDATA. 3259 */ 3260 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA); 3261 *off_p = off; 3262 3263 if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) { 3264 goto failure; 3265 } 3266 3267 if (!on_fault(&ljb)) { 3268 /* copy the struct */ 3269 bcopy(data, di_mem_addr(st, off), pdp->bytes); 3270 off0 = DI_ALIGN(pdp->bytes); /* XXX remove DI_ALIGN */ 3271 3272 /* dereferencing pointers */ 3273 for (i = 0; i < MAX_PTR_IN_PRV; i++) { 3274 3275 if (pdp->ptr[i].size == 0) { 3276 goto success; /* no more ptrs */ 3277 } 3278 3279 /* 3280 * first, get the pointer content 3281 */ 3282 if ((pdp->ptr[i].offset < 0) || 3283 (pdp->ptr[i].offset > pdp->bytes - sizeof (char *))) 3284 goto failure; /* wrong offset */ 3285 3286 pa = di_mem_addr(st, off + pdp->ptr[i].offset); 3287 3288 /* save a tmp ptr to store off_t later */ 3289 tmp = (di_off_t *)(intptr_t)pa; 3290 3291 /* get pointer value, if NULL continue */ 3292 ptr = *((void **) (intptr_t)pa); 3293 if (ptr == NULL) { 3294 continue; 3295 } 3296 3297 /* 3298 * next, find the repeat count (array dimension) 3299 */ 3300 repeat = pdp->ptr[i].len_offset; 3301 3302 /* 3303 * Positive value indicates a fixed sized array. 3304 * 0 or negative value indicates variable sized array. 3305 * 3306 * For variable sized array, the variable must be 3307 * an int member of the structure, with an offset 3308 * equal to the absolution value of struct member. 3309 */ 3310 if (repeat > pdp->bytes - sizeof (int)) { 3311 goto failure; /* wrong offset */ 3312 } 3313 3314 if (repeat >= 0) { 3315 repeat = *((int *) 3316 (intptr_t)((caddr_t)data + repeat)); 3317 } else { 3318 repeat = -repeat; 3319 } 3320 3321 /* 3322 * next, get the size of the object to be copied 3323 */ 3324 size = pdp->ptr[i].size * repeat; 3325 3326 /* 3327 * Arbitrarily limit the total size of object to be 3328 * copied (1 byte to 1/4 page). 3329 */ 3330 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) { 3331 goto failure; /* wrong size or too big */ 3332 } 3333 3334 /* 3335 * Now copy the data 3336 */ 3337 *tmp = off0; 3338 bcopy(ptr, di_mem_addr(st, off + off0), size); 3339 off0 += DI_ALIGN(size); /* XXX remove DI_ALIGN */ 3340 } 3341 } else { 3342 goto failure; 3343 } 3344 3345 success: 3346 /* 3347 * success if reached here 3348 */ 3349 no_fault(); 3350 return (off + off0); 3351 /*NOTREACHED*/ 3352 3353 failure: 3354 /* 3355 * fault occurred 3356 */ 3357 no_fault(); 3358 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3359 cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p", 3360 ddi_pathname((dev_info_t *)node, path), data); 3361 kmem_free(path, MAXPATHLEN); 3362 *off_p = -1; /* set private data to indicate error */ 3363 3364 return (off); 3365 } 3366 3367 /* 3368 * get parent private data; on error, returns original offset 3369 */ 3370 static di_off_t 3371 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3372 { 3373 int off; 3374 struct di_priv_format *ppdp; 3375 3376 dcmn_err2((CE_CONT, "di_getppdata:\n")); 3377 3378 /* find the parent data format */ 3379 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) { 3380 off = *off_p; 3381 *off_p = 0; /* set parent data to none */ 3382 return (off); 3383 } 3384 3385 return (di_getprvdata(ppdp, node, 3386 ddi_get_parent_data((dev_info_t *)node), off_p, st)); 3387 } 3388 3389 /* 3390 * get parent private data; returns original offset 3391 */ 3392 static di_off_t 3393 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3394 { 3395 int off; 3396 struct di_priv_format *dpdp; 3397 3398 dcmn_err2((CE_CONT, "di_getdpdata:")); 3399 3400 /* find the parent data format */ 3401 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) { 3402 off = *off_p; 3403 *off_p = 0; /* set driver data to none */ 3404 return (off); 3405 } 3406 3407 return (di_getprvdata(dpdp, node, 3408 ddi_get_driver_private((dev_info_t *)node), off_p, st)); 3409 } 3410 3411 /* 3412 * The driver is stateful across DINFOCPYALL and DINFOUSRLD. 3413 * This function encapsulates the state machine: 3414 * 3415 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY -> 3416 * | SNAPSHOT USRLD | 3417 * -------------------------------------------------- 3418 * 3419 * Returns 0 on success and -1 on failure 3420 */ 3421 static int 3422 di_setstate(struct di_state *st, int new_state) 3423 { 3424 int ret = 0; 3425 3426 mutex_enter(&di_lock); 3427 switch (new_state) { 3428 case IOC_IDLE: 3429 case IOC_DONE: 3430 break; 3431 case IOC_SNAP: 3432 if (st->di_iocstate != IOC_IDLE) 3433 ret = -1; 3434 break; 3435 case IOC_COPY: 3436 if (st->di_iocstate != IOC_DONE) 3437 ret = -1; 3438 break; 3439 default: 3440 ret = -1; 3441 } 3442 3443 if (ret == 0) 3444 st->di_iocstate = new_state; 3445 else 3446 cmn_err(CE_NOTE, "incorrect state transition from %d to %d", 3447 st->di_iocstate, new_state); 3448 mutex_exit(&di_lock); 3449 return (ret); 3450 } 3451 3452 /* 3453 * We cannot assume the presence of the entire 3454 * snapshot in this routine. All we are guaranteed 3455 * is the di_all struct + 1 byte (for root_path) 3456 */ 3457 static int 3458 header_plus_one_ok(struct di_all *all) 3459 { 3460 /* 3461 * Refuse to read old versions 3462 */ 3463 if (all->version != DI_SNAPSHOT_VERSION) { 3464 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version)); 3465 return (0); 3466 } 3467 3468 if (all->cache_magic != DI_CACHE_MAGIC) { 3469 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic)); 3470 return (0); 3471 } 3472 3473 if (all->snapshot_time == 0) { 3474 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time)); 3475 return (0); 3476 } 3477 3478 if (all->top_devinfo == 0) { 3479 CACHE_DEBUG((DI_ERR, "NULL top devinfo")); 3480 return (0); 3481 } 3482 3483 if (all->map_size < sizeof (*all) + 1) { 3484 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size)); 3485 return (0); 3486 } 3487 3488 if (all->root_path[0] != '/' || all->root_path[1] != '\0') { 3489 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c", 3490 all->root_path[0], all->root_path[1])); 3491 return (0); 3492 } 3493 3494 /* 3495 * We can't check checksum here as we just have the header 3496 */ 3497 3498 return (1); 3499 } 3500 3501 static int 3502 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len) 3503 { 3504 rlim64_t rlimit; 3505 ssize_t resid; 3506 int error = 0; 3507 3508 3509 rlimit = RLIM64_INFINITY; 3510 3511 while (len) { 3512 resid = 0; 3513 error = vn_rdwr(UIO_WRITE, vp, buf, len, off, 3514 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 3515 3516 if (error || resid < 0) { 3517 error = error ? error : EIO; 3518 CACHE_DEBUG((DI_ERR, "write error: %d", error)); 3519 break; 3520 } 3521 3522 /* 3523 * Check if we are making progress 3524 */ 3525 if (resid >= len) { 3526 error = ENOSPC; 3527 break; 3528 } 3529 buf += len - resid; 3530 off += len - resid; 3531 len = resid; 3532 } 3533 3534 return (error); 3535 } 3536 3537 static void 3538 di_cache_write(struct di_cache *cache) 3539 { 3540 struct di_all *all; 3541 struct vnode *vp; 3542 int oflags; 3543 size_t map_size; 3544 size_t chunk; 3545 offset_t off; 3546 int error; 3547 char *buf; 3548 3549 ASSERT(DI_CACHE_LOCKED(*cache)); 3550 ASSERT(!servicing_interrupt()); 3551 3552 if (cache->cache_size == 0) { 3553 ASSERT(cache->cache_data == NULL); 3554 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write")); 3555 return; 3556 } 3557 3558 ASSERT(cache->cache_size > 0); 3559 ASSERT(cache->cache_data); 3560 3561 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) { 3562 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write")); 3563 return; 3564 } 3565 3566 all = (struct di_all *)cache->cache_data; 3567 3568 if (!header_plus_one_ok(all)) { 3569 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write")); 3570 return; 3571 } 3572 3573 ASSERT(strcmp(all->root_path, "/") == 0); 3574 3575 /* 3576 * The cache_size is the total allocated memory for the cache. 3577 * The map_size is the actual size of valid data in the cache. 3578 * map_size may be smaller than cache_size but cannot exceed 3579 * cache_size. 3580 */ 3581 if (all->map_size > cache->cache_size) { 3582 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)." 3583 " Skipping write", all->map_size, cache->cache_size)); 3584 return; 3585 } 3586 3587 /* 3588 * First unlink the temp file 3589 */ 3590 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE); 3591 if (error && error != ENOENT) { 3592 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d", 3593 DI_CACHE_TEMP, error)); 3594 } 3595 3596 if (error == EROFS) { 3597 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write")); 3598 return; 3599 } 3600 3601 vp = NULL; 3602 oflags = (FCREAT|FWRITE); 3603 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags, 3604 DI_CACHE_PERMS, &vp, CRCREAT, 0)) { 3605 CACHE_DEBUG((DI_ERR, "%s: create failed: %d", 3606 DI_CACHE_TEMP, error)); 3607 return; 3608 } 3609 3610 ASSERT(vp); 3611 3612 /* 3613 * Paranoid: Check if the file is on a read-only FS 3614 */ 3615 if (vn_is_readonly(vp)) { 3616 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS")); 3617 goto fail; 3618 } 3619 3620 /* 3621 * Note that we only write map_size bytes to disk - this saves 3622 * space as the actual cache size may be larger than size of 3623 * valid data in the cache. 3624 * Another advantage is that it makes verification of size 3625 * easier when the file is read later. 3626 */ 3627 map_size = all->map_size; 3628 off = 0; 3629 buf = cache->cache_data; 3630 3631 while (map_size) { 3632 ASSERT(map_size > 0); 3633 /* 3634 * Write in chunks so that VM system 3635 * is not overwhelmed 3636 */ 3637 if (map_size > di_chunk * PAGESIZE) 3638 chunk = di_chunk * PAGESIZE; 3639 else 3640 chunk = map_size; 3641 3642 error = chunk_write(vp, off, buf, chunk); 3643 if (error) { 3644 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d", 3645 off, error)); 3646 goto fail; 3647 } 3648 3649 off += chunk; 3650 buf += chunk; 3651 map_size -= chunk; 3652 3653 /* If low on memory, give pageout a chance to run */ 3654 if (freemem < desfree) 3655 delay(1); 3656 } 3657 3658 /* 3659 * Now sync the file and close it 3660 */ 3661 if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) { 3662 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error)); 3663 } 3664 3665 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) { 3666 CACHE_DEBUG((DI_ERR, "close() failed: %d", error)); 3667 VN_RELE(vp); 3668 return; 3669 } 3670 3671 VN_RELE(vp); 3672 3673 /* 3674 * Now do the rename 3675 */ 3676 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) { 3677 CACHE_DEBUG((DI_ERR, "rename failed: %d", error)); 3678 return; 3679 } 3680 3681 CACHE_DEBUG((DI_INFO, "Cache write successful.")); 3682 3683 return; 3684 3685 fail: 3686 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL); 3687 VN_RELE(vp); 3688 } 3689 3690 3691 /* 3692 * Since we could be called early in boot, 3693 * use kobj_read_file() 3694 */ 3695 static void 3696 di_cache_read(struct di_cache *cache) 3697 { 3698 struct _buf *file; 3699 struct di_all *all; 3700 int n; 3701 size_t map_size, sz, chunk; 3702 offset_t off; 3703 caddr_t buf; 3704 uint32_t saved_crc, crc; 3705 3706 ASSERT(modrootloaded); 3707 ASSERT(DI_CACHE_LOCKED(*cache)); 3708 ASSERT(cache->cache_data == NULL); 3709 ASSERT(cache->cache_size == 0); 3710 ASSERT(!servicing_interrupt()); 3711 3712 file = kobj_open_file(DI_CACHE_FILE); 3713 if (file == (struct _buf *)-1) { 3714 CACHE_DEBUG((DI_ERR, "%s: open failed: %d", 3715 DI_CACHE_FILE, ENOENT)); 3716 return; 3717 } 3718 3719 /* 3720 * Read in the header+root_path first. The root_path must be "/" 3721 */ 3722 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP); 3723 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0); 3724 3725 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) { 3726 kmem_free(all, sizeof (*all) + 1); 3727 kobj_close_file(file); 3728 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid")); 3729 return; 3730 } 3731 3732 map_size = all->map_size; 3733 3734 kmem_free(all, sizeof (*all) + 1); 3735 3736 ASSERT(map_size >= sizeof (*all) + 1); 3737 3738 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP); 3739 sz = map_size; 3740 off = 0; 3741 while (sz) { 3742 /* Don't overload VM with large reads */ 3743 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz; 3744 n = kobj_read_file(file, buf, chunk, off); 3745 if (n != chunk) { 3746 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld", 3747 DI_CACHE_FILE, off)); 3748 goto fail; 3749 } 3750 off += chunk; 3751 buf += chunk; 3752 sz -= chunk; 3753 } 3754 3755 ASSERT(off == map_size); 3756 3757 /* 3758 * Read past expected EOF to verify size. 3759 */ 3760 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) { 3761 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE)); 3762 goto fail; 3763 } 3764 3765 all = (struct di_all *)di_cache.cache_data; 3766 if (!header_plus_one_ok(all)) { 3767 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE)); 3768 goto fail; 3769 } 3770 3771 /* 3772 * Compute CRC with checksum field in the cache data set to 0 3773 */ 3774 saved_crc = all->cache_checksum; 3775 all->cache_checksum = 0; 3776 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table); 3777 all->cache_checksum = saved_crc; 3778 3779 if (crc != all->cache_checksum) { 3780 CACHE_DEBUG((DI_ERR, 3781 "%s: checksum error: expected=0x%x actual=0x%x", 3782 DI_CACHE_FILE, all->cache_checksum, crc)); 3783 goto fail; 3784 } 3785 3786 if (all->map_size != map_size) { 3787 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE)); 3788 goto fail; 3789 } 3790 3791 kobj_close_file(file); 3792 3793 di_cache.cache_size = map_size; 3794 3795 return; 3796 3797 fail: 3798 kmem_free(di_cache.cache_data, map_size); 3799 kobj_close_file(file); 3800 di_cache.cache_data = NULL; 3801 di_cache.cache_size = 0; 3802 } 3803 3804 3805 /* 3806 * Checks if arguments are valid for using the cache. 3807 */ 3808 static int 3809 cache_args_valid(struct di_state *st, int *error) 3810 { 3811 ASSERT(error); 3812 ASSERT(st->mem_size > 0); 3813 ASSERT(st->memlist != NULL); 3814 3815 if (!modrootloaded || !i_ddi_io_initialized()) { 3816 CACHE_DEBUG((DI_ERR, 3817 "cache lookup failure: I/O subsystem not inited")); 3818 *error = ENOTACTIVE; 3819 return (0); 3820 } 3821 3822 /* 3823 * No other flags allowed with DINFOCACHE 3824 */ 3825 if (st->command != (DINFOCACHE & DIIOC_MASK)) { 3826 CACHE_DEBUG((DI_ERR, 3827 "cache lookup failure: bad flags: 0x%x", 3828 st->command)); 3829 *error = EINVAL; 3830 return (0); 3831 } 3832 3833 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3834 CACHE_DEBUG((DI_ERR, 3835 "cache lookup failure: bad root: %s", 3836 DI_ALL_PTR(st)->root_path)); 3837 *error = EINVAL; 3838 return (0); 3839 } 3840 3841 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command)); 3842 3843 *error = 0; 3844 3845 return (1); 3846 } 3847 3848 static int 3849 snapshot_is_cacheable(struct di_state *st) 3850 { 3851 ASSERT(st->mem_size > 0); 3852 ASSERT(st->memlist != NULL); 3853 3854 if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) != 3855 (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) { 3856 CACHE_DEBUG((DI_INFO, 3857 "not cacheable: incompatible flags: 0x%x", 3858 st->command)); 3859 return (0); 3860 } 3861 3862 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3863 CACHE_DEBUG((DI_INFO, 3864 "not cacheable: incompatible root path: %s", 3865 DI_ALL_PTR(st)->root_path)); 3866 return (0); 3867 } 3868 3869 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command)); 3870 3871 return (1); 3872 } 3873 3874 static int 3875 di_cache_lookup(struct di_state *st) 3876 { 3877 size_t rval; 3878 int cache_valid; 3879 3880 ASSERT(cache_args_valid(st, &cache_valid)); 3881 ASSERT(modrootloaded); 3882 3883 DI_CACHE_LOCK(di_cache); 3884 3885 /* 3886 * The following assignment determines the validity 3887 * of the cache as far as this snapshot is concerned. 3888 */ 3889 cache_valid = di_cache.cache_valid; 3890 3891 if (cache_valid && di_cache.cache_data == NULL) { 3892 di_cache_read(&di_cache); 3893 /* check for read or file error */ 3894 if (di_cache.cache_data == NULL) 3895 cache_valid = 0; 3896 } 3897 3898 if (cache_valid) { 3899 /* 3900 * Ok, the cache was valid as of this particular 3901 * snapshot. Copy the cached snapshot. This is safe 3902 * to do as the cache cannot be freed (we hold the 3903 * cache lock). Free the memory allocated in di_state 3904 * up until this point - we will simply copy everything 3905 * in the cache. 3906 */ 3907 3908 ASSERT(di_cache.cache_data != NULL); 3909 ASSERT(di_cache.cache_size > 0); 3910 3911 di_freemem(st); 3912 3913 rval = 0; 3914 if (di_cache2mem(&di_cache, st) > 0) { 3915 /* 3916 * map_size is size of valid data in the 3917 * cached snapshot and may be less than 3918 * size of the cache. 3919 */ 3920 ASSERT(DI_ALL_PTR(st)); 3921 rval = DI_ALL_PTR(st)->map_size; 3922 3923 ASSERT(rval >= sizeof (struct di_all)); 3924 ASSERT(rval <= di_cache.cache_size); 3925 } 3926 } else { 3927 /* 3928 * The cache isn't valid, we need to take a snapshot. 3929 * Set the command flags appropriately 3930 */ 3931 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK)); 3932 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK); 3933 rval = di_cache_update(st); 3934 st->command = (DINFOCACHE & DIIOC_MASK); 3935 } 3936 3937 DI_CACHE_UNLOCK(di_cache); 3938 3939 /* 3940 * For cached snapshots, the devinfo driver always returns 3941 * a snapshot rooted at "/". 3942 */ 3943 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0); 3944 3945 return ((int)rval); 3946 } 3947 3948 /* 3949 * This is a forced update of the cache - the previous state of the cache 3950 * may be: 3951 * - unpopulated 3952 * - populated and invalid 3953 * - populated and valid 3954 */ 3955 static int 3956 di_cache_update(struct di_state *st) 3957 { 3958 int rval; 3959 uint32_t crc; 3960 struct di_all *all; 3961 3962 ASSERT(DI_CACHE_LOCKED(di_cache)); 3963 ASSERT(snapshot_is_cacheable(st)); 3964 3965 /* 3966 * Free the in-core cache and the on-disk file (if they exist) 3967 */ 3968 i_ddi_di_cache_free(&di_cache); 3969 3970 /* 3971 * Set valid flag before taking the snapshot, 3972 * so that any invalidations that arrive 3973 * during or after the snapshot are not 3974 * removed by us. 3975 */ 3976 atomic_or_32(&di_cache.cache_valid, 1); 3977 3978 rval = di_snapshot_and_clean(st); 3979 3980 if (rval == 0) { 3981 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot")); 3982 return (0); 3983 } 3984 3985 DI_ALL_PTR(st)->map_size = rval; 3986 if (di_mem2cache(st, &di_cache) == 0) { 3987 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed")); 3988 return (0); 3989 } 3990 3991 ASSERT(di_cache.cache_data); 3992 ASSERT(di_cache.cache_size > 0); 3993 3994 /* 3995 * Now that we have cached the snapshot, compute its checksum. 3996 * The checksum is only computed over the valid data in the 3997 * cache, not the entire cache. 3998 * Also, set all the fields (except checksum) before computing 3999 * checksum. 4000 */ 4001 all = (struct di_all *)di_cache.cache_data; 4002 all->cache_magic = DI_CACHE_MAGIC; 4003 all->map_size = rval; 4004 4005 ASSERT(all->cache_checksum == 0); 4006 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table); 4007 all->cache_checksum = crc; 4008 4009 di_cache_write(&di_cache); 4010 4011 return (rval); 4012 } 4013 4014 static void 4015 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...) 4016 { 4017 va_list ap; 4018 4019 if (di_cache_debug <= DI_QUIET) 4020 return; 4021 4022 if (di_cache_debug < msglevel) 4023 return; 4024 4025 switch (msglevel) { 4026 case DI_ERR: 4027 msglevel = CE_WARN; 4028 break; 4029 case DI_INFO: 4030 case DI_TRACE: 4031 default: 4032 msglevel = CE_NOTE; 4033 break; 4034 } 4035 4036 va_start(ap, fmt); 4037 vcmn_err(msglevel, fmt, ap); 4038 va_end(ap); 4039 } 4040