1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * driver for accessing kernel devinfo tree. 30 */ 31 #include <sys/types.h> 32 #include <sys/pathname.h> 33 #include <sys/debug.h> 34 #include <sys/autoconf.h> 35 #include <sys/conf.h> 36 #include <sys/file.h> 37 #include <sys/kmem.h> 38 #include <sys/modctl.h> 39 #include <sys/stat.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/sunldi_impl.h> 43 #include <sys/sunndi.h> 44 #include <sys/esunddi.h> 45 #include <sys/sunmdi.h> 46 #include <sys/ddi_impldefs.h> 47 #include <sys/ndi_impldefs.h> 48 #include <sys/mdi_impldefs.h> 49 #include <sys/devinfo_impl.h> 50 #include <sys/thread.h> 51 #include <sys/modhash.h> 52 #include <sys/bitmap.h> 53 #include <util/qsort.h> 54 #include <sys/disp.h> 55 #include <sys/kobj.h> 56 #include <sys/crc32.h> 57 58 59 #ifdef DEBUG 60 static int di_debug; 61 #define dcmn_err(args) if (di_debug >= 1) cmn_err args 62 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args 63 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args 64 #else 65 #define dcmn_err(args) /* nothing */ 66 #define dcmn_err2(args) /* nothing */ 67 #define dcmn_err3(args) /* nothing */ 68 #endif 69 70 /* 71 * We partition the space of devinfo minor nodes equally between the full and 72 * unprivileged versions of the driver. The even-numbered minor nodes are the 73 * full version, while the odd-numbered ones are the read-only version. 74 */ 75 static int di_max_opens = 32; 76 77 #define DI_FULL_PARENT 0 78 #define DI_READONLY_PARENT 1 79 #define DI_NODE_SPECIES 2 80 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) 81 82 #define IOC_IDLE 0 /* snapshot ioctl states */ 83 #define IOC_SNAP 1 /* snapshot in progress */ 84 #define IOC_DONE 2 /* snapshot done, but not copied out */ 85 #define IOC_COPY 3 /* copyout in progress */ 86 87 /* 88 * Keep max alignment so we can move snapshot to different platforms 89 */ 90 #define DI_ALIGN(addr) ((addr + 7l) & ~7l) 91 92 /* 93 * To avoid wasting memory, make a linked list of memory chunks. 94 * Size of each chunk is buf_size. 95 */ 96 struct di_mem { 97 struct di_mem *next; /* link to next chunk */ 98 char *buf; /* contiguous kernel memory */ 99 size_t buf_size; /* size of buf in bytes */ 100 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ 101 }; 102 103 /* 104 * This is a stack for walking the tree without using recursion. 105 * When the devinfo tree height is above some small size, one 106 * gets watchdog resets on sun4m. 107 */ 108 struct di_stack { 109 void *offset[MAX_TREE_DEPTH]; 110 struct dev_info *dip[MAX_TREE_DEPTH]; 111 int circ[MAX_TREE_DEPTH]; 112 int depth; /* depth of current node to be copied */ 113 }; 114 115 #define TOP_OFFSET(stack) \ 116 ((di_off_t *)(stack)->offset[(stack)->depth - 1]) 117 #define TOP_NODE(stack) \ 118 ((stack)->dip[(stack)->depth - 1]) 119 #define PARENT_OFFSET(stack) \ 120 ((di_off_t *)(stack)->offset[(stack)->depth - 2]) 121 #define EMPTY_STACK(stack) ((stack)->depth == 0) 122 #define POP_STACK(stack) { \ 123 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ 124 (stack)->circ[(stack)->depth - 1]); \ 125 ((stack)->depth--); \ 126 } 127 #define PUSH_STACK(stack, node, offp) { \ 128 ASSERT(node != NULL); \ 129 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ 130 (stack)->dip[(stack)->depth] = (node); \ 131 (stack)->offset[(stack)->depth] = (void *)(offp); \ 132 ((stack)->depth)++; \ 133 } 134 135 #define DI_ALL_PTR(s) ((struct di_all *)di_mem_addr((s), 0)) 136 137 /* 138 * With devfs, the device tree has no global locks. The device tree is 139 * dynamic and dips may come and go if they are not locked locally. Under 140 * these conditions, pointers are no longer reliable as unique IDs. 141 * Specifically, these pointers cannot be used as keys for hash tables 142 * as the same devinfo structure may be freed in one part of the tree only 143 * to be allocated as the structure for a different device in another 144 * part of the tree. This can happen if DR and the snapshot are 145 * happening concurrently. 146 * The following data structures act as keys for devinfo nodes and 147 * pathinfo nodes. 148 */ 149 150 enum di_ktype { 151 DI_DKEY = 1, 152 DI_PKEY = 2 153 }; 154 155 struct di_dkey { 156 dev_info_t *dk_dip; 157 major_t dk_major; 158 int dk_inst; 159 pnode_t dk_nodeid; 160 }; 161 162 struct di_pkey { 163 mdi_pathinfo_t *pk_pip; 164 char *pk_path_addr; 165 dev_info_t *pk_client; 166 dev_info_t *pk_phci; 167 }; 168 169 struct di_key { 170 enum di_ktype k_type; 171 union { 172 struct di_dkey dkey; 173 struct di_pkey pkey; 174 } k_u; 175 }; 176 177 178 struct i_lnode; 179 180 typedef struct i_link { 181 /* 182 * If a di_link struct representing this i_link struct makes it 183 * into the snapshot, then self will point to the offset of 184 * the di_link struct in the snapshot 185 */ 186 di_off_t self; 187 188 int spec_type; /* block or char access type */ 189 struct i_lnode *src_lnode; /* src i_lnode */ 190 struct i_lnode *tgt_lnode; /* tgt i_lnode */ 191 struct i_link *src_link_next; /* next src i_link /w same i_lnode */ 192 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ 193 } i_link_t; 194 195 typedef struct i_lnode { 196 /* 197 * If a di_lnode struct representing this i_lnode struct makes it 198 * into the snapshot, then self will point to the offset of 199 * the di_lnode struct in the snapshot 200 */ 201 di_off_t self; 202 203 /* 204 * used for hashing and comparing i_lnodes 205 */ 206 int modid; 207 208 /* 209 * public information describing a link endpoint 210 */ 211 struct di_node *di_node; /* di_node in snapshot */ 212 dev_t devt; /* devt */ 213 214 /* 215 * i_link ptr to links coming into this i_lnode node 216 * (this i_lnode is the target of these i_links) 217 */ 218 i_link_t *link_in; 219 220 /* 221 * i_link ptr to links going out of this i_lnode node 222 * (this i_lnode is the source of these i_links) 223 */ 224 i_link_t *link_out; 225 } i_lnode_t; 226 227 /* 228 * Soft state associated with each instance of driver open. 229 */ 230 static struct di_state { 231 di_off_t mem_size; /* total # bytes in memlist */ 232 struct di_mem *memlist; /* head of memlist */ 233 uint_t command; /* command from ioctl */ 234 int di_iocstate; /* snapshot ioctl state */ 235 mod_hash_t *reg_dip_hash; 236 mod_hash_t *reg_pip_hash; 237 int lnode_count; 238 int link_count; 239 240 mod_hash_t *lnode_hash; 241 mod_hash_t *link_hash; 242 } **di_states; 243 244 static kmutex_t di_lock; /* serialize instance assignment */ 245 246 typedef enum { 247 DI_QUIET = 0, /* DI_QUIET must always be 0 */ 248 DI_ERR, 249 DI_INFO, 250 DI_TRACE, 251 DI_TRACE1, 252 DI_TRACE2 253 } di_cache_debug_t; 254 255 static uint_t di_chunk = 32; /* I/O chunk size in pages */ 256 257 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) 258 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) 259 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) 260 261 /* 262 * Check that whole device tree is being configured as a pre-condition for 263 * cleaning up /etc/devices files. 264 */ 265 #define DEVICES_FILES_CLEANABLE(st) \ 266 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \ 267 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0) 268 269 #define CACHE_DEBUG(args) \ 270 { if (di_cache_debug != DI_QUIET) di_cache_print args; } 271 272 static struct phci_walk_arg { 273 di_off_t off; 274 struct di_state *st; 275 } phci_walk_arg_t; 276 277 static int di_open(dev_t *, int, int, cred_t *); 278 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 279 static int di_close(dev_t, int, int, cred_t *); 280 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 281 static int di_attach(dev_info_t *, ddi_attach_cmd_t); 282 static int di_detach(dev_info_t *, ddi_detach_cmd_t); 283 284 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); 285 static di_off_t di_snapshot_and_clean(struct di_state *); 286 static di_off_t di_copydevnm(di_off_t *, struct di_state *); 287 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); 288 static di_off_t di_copynode(struct di_stack *, struct di_state *); 289 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, 290 struct di_state *); 291 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); 292 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); 293 static di_off_t di_getprop(struct ddi_prop *, di_off_t *, 294 struct di_state *, struct dev_info *, int); 295 static void di_allocmem(struct di_state *, size_t); 296 static void di_freemem(struct di_state *); 297 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); 298 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); 299 static caddr_t di_mem_addr(struct di_state *, di_off_t); 300 static int di_setstate(struct di_state *, int); 301 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); 302 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); 303 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, 304 struct di_state *, int); 305 static di_off_t di_getlink_data(di_off_t, struct di_state *); 306 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); 307 308 static int cache_args_valid(struct di_state *st, int *error); 309 static int snapshot_is_cacheable(struct di_state *st); 310 static int di_cache_lookup(struct di_state *st); 311 static int di_cache_update(struct di_state *st); 312 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); 313 int build_vhci_list(dev_info_t *vh_devinfo, void *arg); 314 int build_phci_list(dev_info_t *ph_devinfo, void *arg); 315 316 static struct cb_ops di_cb_ops = { 317 di_open, /* open */ 318 di_close, /* close */ 319 nodev, /* strategy */ 320 nodev, /* print */ 321 nodev, /* dump */ 322 nodev, /* read */ 323 nodev, /* write */ 324 di_ioctl, /* ioctl */ 325 nodev, /* devmap */ 326 nodev, /* mmap */ 327 nodev, /* segmap */ 328 nochpoll, /* poll */ 329 ddi_prop_op, /* prop_op */ 330 NULL, /* streamtab */ 331 D_NEW | D_MP /* Driver compatibility flag */ 332 }; 333 334 static struct dev_ops di_ops = { 335 DEVO_REV, /* devo_rev, */ 336 0, /* refcnt */ 337 di_info, /* info */ 338 nulldev, /* identify */ 339 nulldev, /* probe */ 340 di_attach, /* attach */ 341 di_detach, /* detach */ 342 nodev, /* reset */ 343 &di_cb_ops, /* driver operations */ 344 NULL /* bus operations */ 345 }; 346 347 /* 348 * Module linkage information for the kernel. 349 */ 350 static struct modldrv modldrv = { 351 &mod_driverops, 352 "DEVINFO Driver %I%", 353 &di_ops 354 }; 355 356 static struct modlinkage modlinkage = { 357 MODREV_1, 358 &modldrv, 359 NULL 360 }; 361 362 int 363 _init(void) 364 { 365 int error; 366 367 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); 368 369 error = mod_install(&modlinkage); 370 if (error != 0) { 371 mutex_destroy(&di_lock); 372 return (error); 373 } 374 375 return (0); 376 } 377 378 int 379 _info(struct modinfo *modinfop) 380 { 381 return (mod_info(&modlinkage, modinfop)); 382 } 383 384 int 385 _fini(void) 386 { 387 int error; 388 389 error = mod_remove(&modlinkage); 390 if (error != 0) { 391 return (error); 392 } 393 394 mutex_destroy(&di_lock); 395 return (0); 396 } 397 398 static dev_info_t *di_dip; 399 400 /*ARGSUSED*/ 401 static int 402 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 403 { 404 int error = DDI_FAILURE; 405 406 switch (infocmd) { 407 case DDI_INFO_DEVT2DEVINFO: 408 *result = (void *)di_dip; 409 error = DDI_SUCCESS; 410 break; 411 case DDI_INFO_DEVT2INSTANCE: 412 /* 413 * All dev_t's map to the same, single instance. 414 */ 415 *result = (void *)0; 416 error = DDI_SUCCESS; 417 break; 418 default: 419 break; 420 } 421 422 return (error); 423 } 424 425 static int 426 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 427 { 428 int error = DDI_FAILURE; 429 430 switch (cmd) { 431 case DDI_ATTACH: 432 di_states = kmem_zalloc( 433 di_max_opens * sizeof (struct di_state *), KM_SLEEP); 434 435 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, 436 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || 437 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, 438 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { 439 kmem_free(di_states, 440 di_max_opens * sizeof (struct di_state *)); 441 ddi_remove_minor_node(dip, NULL); 442 error = DDI_FAILURE; 443 } else { 444 di_dip = dip; 445 ddi_report_dev(dip); 446 447 error = DDI_SUCCESS; 448 } 449 break; 450 default: 451 error = DDI_FAILURE; 452 break; 453 } 454 455 return (error); 456 } 457 458 static int 459 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 460 { 461 int error = DDI_FAILURE; 462 463 switch (cmd) { 464 case DDI_DETACH: 465 ddi_remove_minor_node(dip, NULL); 466 di_dip = NULL; 467 kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); 468 469 error = DDI_SUCCESS; 470 break; 471 default: 472 error = DDI_FAILURE; 473 break; 474 } 475 476 return (error); 477 } 478 479 /* 480 * Allow multiple opens by tweaking the dev_t such that it looks like each 481 * open is getting a different minor device. Each minor gets a separate 482 * entry in the di_states[] table. Based on the original minor number, we 483 * discriminate opens of the full and read-only nodes. If all of the instances 484 * of the selected minor node are currently open, we return EAGAIN. 485 */ 486 /*ARGSUSED*/ 487 static int 488 di_open(dev_t *devp, int flag, int otyp, cred_t *credp) 489 { 490 int m; 491 minor_t minor_parent = getminor(*devp); 492 493 if (minor_parent != DI_FULL_PARENT && 494 minor_parent != DI_READONLY_PARENT) 495 return (ENXIO); 496 497 mutex_enter(&di_lock); 498 499 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { 500 if (di_states[m] != NULL) 501 continue; 502 503 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); 504 break; /* It's ours. */ 505 } 506 507 if (m >= di_max_opens) { 508 /* 509 * maximum open instance for device reached 510 */ 511 mutex_exit(&di_lock); 512 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); 513 return (EAGAIN); 514 } 515 mutex_exit(&di_lock); 516 517 ASSERT(m < di_max_opens); 518 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); 519 520 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", 521 (void *)curthread, m + DI_NODE_SPECIES)); 522 523 return (0); 524 } 525 526 /*ARGSUSED*/ 527 static int 528 di_close(dev_t dev, int flag, int otype, cred_t *cred_p) 529 { 530 struct di_state *st; 531 int m = (int)getminor(dev) - DI_NODE_SPECIES; 532 533 if (m < 0) { 534 cmn_err(CE_WARN, "closing non-existent devinfo minor %d", 535 m + DI_NODE_SPECIES); 536 return (ENXIO); 537 } 538 539 st = di_states[m]; 540 ASSERT(m < di_max_opens && st != NULL); 541 542 di_freemem(st); 543 kmem_free(st, sizeof (struct di_state)); 544 545 /* 546 * empty slot in state table 547 */ 548 mutex_enter(&di_lock); 549 di_states[m] = NULL; 550 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", 551 (void *)curthread, m + DI_NODE_SPECIES)); 552 mutex_exit(&di_lock); 553 554 return (0); 555 } 556 557 558 /*ARGSUSED*/ 559 static int 560 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 561 { 562 int rv, error; 563 di_off_t off; 564 struct di_all *all; 565 struct di_state *st; 566 int m = (int)getminor(dev) - DI_NODE_SPECIES; 567 568 major_t i; 569 char *drv_name; 570 size_t map_size, size; 571 struct di_mem *dcp; 572 int ndi_flags; 573 574 if (m < 0 || m >= di_max_opens) { 575 return (ENXIO); 576 } 577 578 st = di_states[m]; 579 ASSERT(st != NULL); 580 581 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); 582 583 switch (cmd) { 584 case DINFOIDENT: 585 /* 586 * This is called from di_init to verify that the driver 587 * opened is indeed devinfo. The purpose is to guard against 588 * sending ioctl to an unknown driver in case of an 589 * unresolved major number conflict during bfu. 590 */ 591 *rvalp = DI_MAGIC; 592 return (0); 593 594 case DINFOLODRV: 595 /* 596 * Hold an installed driver and return the result 597 */ 598 if (DI_UNPRIVILEGED_NODE(m)) { 599 /* 600 * Only the fully enabled instances may issue 601 * DINFOLDDRV. 602 */ 603 return (EACCES); 604 } 605 606 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 607 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { 608 kmem_free(drv_name, MAXNAMELEN); 609 return (EFAULT); 610 } 611 612 /* 613 * Some 3rd party driver's _init() walks the device tree, 614 * so we load the driver module before configuring driver. 615 */ 616 i = ddi_name_to_major(drv_name); 617 if (ddi_hold_driver(i) == NULL) { 618 kmem_free(drv_name, MAXNAMELEN); 619 return (ENXIO); 620 } 621 622 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; 623 624 /* 625 * i_ddi_load_drvconf() below will trigger a reprobe 626 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't 627 * needed here. 628 */ 629 modunload_disable(); 630 (void) i_ddi_load_drvconf(i); 631 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); 632 kmem_free(drv_name, MAXNAMELEN); 633 ddi_rele_driver(i); 634 rv = i_ddi_devs_attached(i); 635 modunload_enable(); 636 637 i_ddi_di_cache_invalidate(KM_SLEEP); 638 639 return ((rv == DDI_SUCCESS)? 0 : ENXIO); 640 641 case DINFOUSRLD: 642 /* 643 * The case for copying snapshot to userland 644 */ 645 if (di_setstate(st, IOC_COPY) == -1) 646 return (EBUSY); 647 648 map_size = ((struct di_all *)di_mem_addr(st, 0))->map_size; 649 if (map_size == 0) { 650 (void) di_setstate(st, IOC_DONE); 651 return (EFAULT); 652 } 653 654 /* 655 * copyout the snapshot 656 */ 657 map_size = (map_size + PAGEOFFSET) & PAGEMASK; 658 659 /* 660 * Return the map size, so caller may do a sanity 661 * check against the return value of snapshot ioctl() 662 */ 663 *rvalp = (int)map_size; 664 665 /* 666 * Copy one chunk at a time 667 */ 668 off = 0; 669 dcp = st->memlist; 670 while (map_size) { 671 size = dcp->buf_size; 672 if (map_size <= size) { 673 size = map_size; 674 } 675 676 if (ddi_copyout(di_mem_addr(st, off), 677 (void *)(arg + off), size, mode) != 0) { 678 (void) di_setstate(st, IOC_DONE); 679 return (EFAULT); 680 } 681 682 map_size -= size; 683 off += size; 684 dcp = dcp->next; 685 } 686 687 di_freemem(st); 688 (void) di_setstate(st, IOC_IDLE); 689 return (0); 690 691 default: 692 if ((cmd & ~DIIOC_MASK) != DIIOC) { 693 /* 694 * Invalid ioctl command 695 */ 696 return (ENOTTY); 697 } 698 /* 699 * take a snapshot 700 */ 701 st->command = cmd & DIIOC_MASK; 702 /*FALLTHROUGH*/ 703 } 704 705 /* 706 * Obtain enough memory to hold header + rootpath. We prevent kernel 707 * memory exhaustion by freeing any previously allocated snapshot and 708 * refusing the operation; otherwise we would be allowing ioctl(), 709 * ioctl(), ioctl(), ..., panic. 710 */ 711 if (di_setstate(st, IOC_SNAP) == -1) 712 return (EBUSY); 713 714 size = sizeof (struct di_all) + 715 sizeof (((struct dinfo_io *)(NULL))->root_path); 716 if (size < PAGESIZE) 717 size = PAGESIZE; 718 di_allocmem(st, size); 719 720 all = (struct di_all *)di_mem_addr(st, 0); 721 all->devcnt = devcnt; 722 all->command = st->command; 723 all->version = DI_SNAPSHOT_VERSION; 724 all->top_vhci_devinfo = 0; /* filled up by build_vhci_list. */ 725 726 /* 727 * Note the endianness in case we need to transport snapshot 728 * over the network. 729 */ 730 #if defined(_LITTLE_ENDIAN) 731 all->endianness = DI_LITTLE_ENDIAN; 732 #else 733 all->endianness = DI_BIG_ENDIAN; 734 #endif 735 736 /* Copyin ioctl args, store in the snapshot. */ 737 if (copyinstr((void *)arg, all->root_path, 738 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { 739 di_freemem(st); 740 (void) di_setstate(st, IOC_IDLE); 741 return (EFAULT); 742 } 743 744 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) { 745 di_freemem(st); 746 (void) di_setstate(st, IOC_IDLE); 747 return (EINVAL); 748 } 749 750 error = 0; 751 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { 752 di_freemem(st); 753 (void) di_setstate(st, IOC_IDLE); 754 return (error); 755 } 756 757 off = DI_ALIGN(sizeof (struct di_all) + size); 758 759 /* 760 * Only the fully enabled version may force load drivers or read 761 * the parent private data from a driver. 762 */ 763 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && 764 DI_UNPRIVILEGED_NODE(m)) { 765 di_freemem(st); 766 (void) di_setstate(st, IOC_IDLE); 767 return (EACCES); 768 } 769 770 /* Do we need private data? */ 771 if (st->command & DINFOPRIVDATA) { 772 arg += sizeof (((struct dinfo_io *)(NULL))->root_path); 773 774 #ifdef _MULTI_DATAMODEL 775 switch (ddi_model_convert_from(mode & FMODELS)) { 776 case DDI_MODEL_ILP32: { 777 /* 778 * Cannot copy private data from 64-bit kernel 779 * to 32-bit app 780 */ 781 di_freemem(st); 782 (void) di_setstate(st, IOC_IDLE); 783 return (EINVAL); 784 } 785 case DDI_MODEL_NONE: 786 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 787 di_freemem(st); 788 (void) di_setstate(st, IOC_IDLE); 789 return (EFAULT); 790 } 791 break; 792 } 793 #else /* !_MULTI_DATAMODEL */ 794 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 795 di_freemem(st); 796 (void) di_setstate(st, IOC_IDLE); 797 return (EFAULT); 798 } 799 #endif /* _MULTI_DATAMODEL */ 800 } 801 802 all->top_devinfo = DI_ALIGN(off); 803 804 /* 805 * For cache lookups we reallocate memory from scratch, 806 * so the value of "all" is no longer valid. 807 */ 808 all = NULL; 809 810 if (st->command & DINFOCACHE) { 811 *rvalp = di_cache_lookup(st); 812 } else if (snapshot_is_cacheable(st)) { 813 DI_CACHE_LOCK(di_cache); 814 *rvalp = di_cache_update(st); 815 DI_CACHE_UNLOCK(di_cache); 816 } else 817 *rvalp = di_snapshot_and_clean(st); 818 819 if (*rvalp) { 820 DI_ALL_PTR(st)->map_size = *rvalp; 821 (void) di_setstate(st, IOC_DONE); 822 } else { 823 di_freemem(st); 824 (void) di_setstate(st, IOC_IDLE); 825 } 826 827 return (0); 828 } 829 830 /* 831 * Get a chunk of memory >= size, for the snapshot 832 */ 833 static void 834 di_allocmem(struct di_state *st, size_t size) 835 { 836 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), 837 KM_SLEEP); 838 /* 839 * Round up size to nearest power of 2. If it is less 840 * than st->mem_size, set it to st->mem_size (i.e., 841 * the mem_size is doubled every time) to reduce the 842 * number of memory allocations. 843 */ 844 size_t tmp = 1; 845 while (tmp < size) { 846 tmp <<= 1; 847 } 848 size = (tmp > st->mem_size) ? tmp : st->mem_size; 849 850 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); 851 mem->buf_size = size; 852 853 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); 854 855 if (st->mem_size == 0) { /* first chunk */ 856 st->memlist = mem; 857 } else { 858 /* 859 * locate end of linked list and add a chunk at the end 860 */ 861 struct di_mem *dcp = st->memlist; 862 while (dcp->next != NULL) { 863 dcp = dcp->next; 864 } 865 866 dcp->next = mem; 867 } 868 869 st->mem_size += size; 870 } 871 872 /* 873 * Copy upto bufsiz bytes of the memlist to buf 874 */ 875 static void 876 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) 877 { 878 struct di_mem *dcp; 879 size_t copysz; 880 881 if (st->mem_size == 0) { 882 ASSERT(st->memlist == NULL); 883 return; 884 } 885 886 copysz = 0; 887 for (dcp = st->memlist; dcp; dcp = dcp->next) { 888 889 ASSERT(bufsiz > 0); 890 891 if (bufsiz <= dcp->buf_size) 892 copysz = bufsiz; 893 else 894 copysz = dcp->buf_size; 895 896 bcopy(dcp->buf, buf, copysz); 897 898 buf += copysz; 899 bufsiz -= copysz; 900 901 if (bufsiz == 0) 902 break; 903 } 904 } 905 906 /* 907 * Free all memory for the snapshot 908 */ 909 static void 910 di_freemem(struct di_state *st) 911 { 912 struct di_mem *dcp, *tmp; 913 914 dcmn_err2((CE_CONT, "di_freemem\n")); 915 916 if (st->mem_size) { 917 dcp = st->memlist; 918 while (dcp) { /* traverse the linked list */ 919 tmp = dcp; 920 dcp = dcp->next; 921 ddi_umem_free(tmp->cook); 922 kmem_free(tmp, sizeof (struct di_mem)); 923 } 924 st->mem_size = 0; 925 st->memlist = NULL; 926 } 927 928 ASSERT(st->mem_size == 0); 929 ASSERT(st->memlist == NULL); 930 } 931 932 /* 933 * Copies cached data to the di_state structure. 934 * Returns: 935 * - size of data copied, on SUCCESS 936 * - 0 on failure 937 */ 938 static int 939 di_cache2mem(struct di_cache *cache, struct di_state *st) 940 { 941 caddr_t pa; 942 943 ASSERT(st->mem_size == 0); 944 ASSERT(st->memlist == NULL); 945 ASSERT(!servicing_interrupt()); 946 ASSERT(DI_CACHE_LOCKED(*cache)); 947 948 if (cache->cache_size == 0) { 949 ASSERT(cache->cache_data == NULL); 950 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); 951 return (0); 952 } 953 954 ASSERT(cache->cache_data); 955 956 di_allocmem(st, cache->cache_size); 957 958 pa = di_mem_addr(st, 0); 959 960 ASSERT(pa); 961 962 /* 963 * Verify that di_allocmem() allocates contiguous memory, 964 * so that it is safe to do straight bcopy() 965 */ 966 ASSERT(st->memlist != NULL); 967 ASSERT(st->memlist->next == NULL); 968 bcopy(cache->cache_data, pa, cache->cache_size); 969 970 return (cache->cache_size); 971 } 972 973 /* 974 * Copies a snapshot from di_state to the cache 975 * Returns: 976 * - 0 on failure 977 * - size of copied data on success 978 */ 979 static int 980 di_mem2cache(struct di_state *st, struct di_cache *cache) 981 { 982 size_t map_size; 983 984 ASSERT(cache->cache_size == 0); 985 ASSERT(cache->cache_data == NULL); 986 ASSERT(!servicing_interrupt()); 987 ASSERT(DI_CACHE_LOCKED(*cache)); 988 989 if (st->mem_size == 0) { 990 ASSERT(st->memlist == NULL); 991 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); 992 return (0); 993 } 994 995 ASSERT(st->memlist); 996 997 /* 998 * The size of the memory list may be much larger than the 999 * size of valid data (map_size). Cache only the valid data 1000 */ 1001 map_size = DI_ALL_PTR(st)->map_size; 1002 if (map_size == 0 || map_size < sizeof (struct di_all) || 1003 map_size > st->mem_size) { 1004 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); 1005 return (0); 1006 } 1007 1008 cache->cache_data = kmem_alloc(map_size, KM_SLEEP); 1009 cache->cache_size = map_size; 1010 di_copymem(st, cache->cache_data, cache->cache_size); 1011 1012 return (map_size); 1013 } 1014 1015 /* 1016 * Make sure there is at least "size" bytes memory left before 1017 * going on. Otherwise, start on a new chunk. 1018 */ 1019 static di_off_t 1020 di_checkmem(struct di_state *st, di_off_t off, size_t size) 1021 { 1022 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", 1023 off, (int)size)); 1024 1025 /* 1026 * di_checkmem() shouldn't be called with a size of zero. 1027 * But in case it is, we want to make sure we return a valid 1028 * offset within the memlist and not an offset that points us 1029 * at the end of the memlist. 1030 */ 1031 if (size == 0) { 1032 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); 1033 size = 1; 1034 } 1035 1036 off = DI_ALIGN(off); 1037 if ((st->mem_size - off) < size) { 1038 off = st->mem_size; 1039 di_allocmem(st, size); 1040 } 1041 1042 return (off); 1043 } 1044 1045 /* 1046 * Copy the private data format from ioctl arg. 1047 * On success, the ending offset is returned. On error 0 is returned. 1048 */ 1049 static di_off_t 1050 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) 1051 { 1052 di_off_t size; 1053 struct di_priv_data *priv; 1054 struct di_all *all = (struct di_all *)di_mem_addr(st, 0); 1055 1056 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", 1057 off, (void *)arg, mode)); 1058 1059 /* 1060 * Copyin data and check version. 1061 * We only handle private data version 0. 1062 */ 1063 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); 1064 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), 1065 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { 1066 kmem_free(priv, sizeof (struct di_priv_data)); 1067 return (0); 1068 } 1069 1070 /* 1071 * Save di_priv_data copied from userland in snapshot. 1072 */ 1073 all->pd_version = priv->version; 1074 all->n_ppdata = priv->n_parent; 1075 all->n_dpdata = priv->n_driver; 1076 1077 /* 1078 * copyin private data format, modify offset accordingly 1079 */ 1080 if (all->n_ppdata) { /* parent private data format */ 1081 /* 1082 * check memory 1083 */ 1084 size = all->n_ppdata * sizeof (struct di_priv_format); 1085 off = di_checkmem(st, off, size); 1086 all->ppdata_format = off; 1087 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, 1088 mode) != 0) { 1089 kmem_free(priv, sizeof (struct di_priv_data)); 1090 return (0); 1091 } 1092 1093 off += size; 1094 } 1095 1096 if (all->n_dpdata) { /* driver private data format */ 1097 /* 1098 * check memory 1099 */ 1100 size = all->n_dpdata * sizeof (struct di_priv_format); 1101 off = di_checkmem(st, off, size); 1102 all->dpdata_format = off; 1103 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, 1104 mode) != 0) { 1105 kmem_free(priv, sizeof (struct di_priv_data)); 1106 return (0); 1107 } 1108 1109 off += size; 1110 } 1111 1112 kmem_free(priv, sizeof (struct di_priv_data)); 1113 return (off); 1114 } 1115 1116 /* 1117 * Return the real address based on the offset (off) within snapshot 1118 */ 1119 static caddr_t 1120 di_mem_addr(struct di_state *st, di_off_t off) 1121 { 1122 struct di_mem *dcp = st->memlist; 1123 1124 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", 1125 (void *)dcp, off)); 1126 1127 ASSERT(off < st->mem_size); 1128 1129 while (off >= dcp->buf_size) { 1130 off -= dcp->buf_size; 1131 dcp = dcp->next; 1132 } 1133 1134 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", 1135 off, (void *)(dcp->buf + off))); 1136 1137 return (dcp->buf + off); 1138 } 1139 1140 /* 1141 * Ideally we would use the whole key to derive the hash 1142 * value. However, the probability that two keys will 1143 * have the same dip (or pip) is very low, so 1144 * hashing by dip (or pip) pointer should suffice. 1145 */ 1146 static uint_t 1147 di_hash_byptr(void *arg, mod_hash_key_t key) 1148 { 1149 struct di_key *dik = key; 1150 size_t rshift; 1151 void *ptr; 1152 1153 ASSERT(arg == NULL); 1154 1155 switch (dik->k_type) { 1156 case DI_DKEY: 1157 ptr = dik->k_u.dkey.dk_dip; 1158 rshift = highbit(sizeof (struct dev_info)); 1159 break; 1160 case DI_PKEY: 1161 ptr = dik->k_u.pkey.pk_pip; 1162 rshift = highbit(sizeof (struct mdi_pathinfo)); 1163 break; 1164 default: 1165 panic("devinfo: unknown key type"); 1166 /*NOTREACHED*/ 1167 } 1168 return (mod_hash_byptr((void *)rshift, ptr)); 1169 } 1170 1171 static void 1172 di_key_dtor(mod_hash_key_t key) 1173 { 1174 char *path_addr; 1175 struct di_key *dik = key; 1176 1177 switch (dik->k_type) { 1178 case DI_DKEY: 1179 break; 1180 case DI_PKEY: 1181 path_addr = dik->k_u.pkey.pk_path_addr; 1182 if (path_addr) 1183 kmem_free(path_addr, strlen(path_addr) + 1); 1184 break; 1185 default: 1186 panic("devinfo: unknown key type"); 1187 /*NOTREACHED*/ 1188 } 1189 1190 kmem_free(dik, sizeof (struct di_key)); 1191 } 1192 1193 static int 1194 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) 1195 { 1196 if (dk1->dk_dip != dk2->dk_dip) 1197 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); 1198 1199 if (dk1->dk_major != -1 && dk2->dk_major != -1) { 1200 if (dk1->dk_major != dk2->dk_major) 1201 return (dk1->dk_major > dk2->dk_major ? 1 : -1); 1202 1203 if (dk1->dk_inst != dk2->dk_inst) 1204 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); 1205 } 1206 1207 if (dk1->dk_nodeid != dk2->dk_nodeid) 1208 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); 1209 1210 return (0); 1211 } 1212 1213 static int 1214 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) 1215 { 1216 char *p1, *p2; 1217 int rv; 1218 1219 if (pk1->pk_pip != pk2->pk_pip) 1220 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); 1221 1222 p1 = pk1->pk_path_addr; 1223 p2 = pk2->pk_path_addr; 1224 1225 p1 = p1 ? p1 : ""; 1226 p2 = p2 ? p2 : ""; 1227 1228 rv = strcmp(p1, p2); 1229 if (rv) 1230 return (rv > 0 ? 1 : -1); 1231 1232 if (pk1->pk_client != pk2->pk_client) 1233 return (pk1->pk_client > pk2->pk_client ? 1 : -1); 1234 1235 if (pk1->pk_phci != pk2->pk_phci) 1236 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); 1237 1238 return (0); 1239 } 1240 1241 static int 1242 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 1243 { 1244 struct di_key *dik1, *dik2; 1245 1246 dik1 = key1; 1247 dik2 = key2; 1248 1249 if (dik1->k_type != dik2->k_type) { 1250 panic("devinfo: mismatched keys"); 1251 /*NOTREACHED*/ 1252 } 1253 1254 switch (dik1->k_type) { 1255 case DI_DKEY: 1256 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); 1257 case DI_PKEY: 1258 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); 1259 default: 1260 panic("devinfo: unknown key type"); 1261 /*NOTREACHED*/ 1262 } 1263 } 1264 1265 /* 1266 * This is the main function that takes a snapshot 1267 */ 1268 static di_off_t 1269 di_snapshot(struct di_state *st) 1270 { 1271 di_off_t off; 1272 struct di_all *all; 1273 dev_info_t *rootnode; 1274 char buf[80]; 1275 int plen; 1276 char *path; 1277 vnode_t *vp; 1278 1279 all = (struct di_all *)di_mem_addr(st, 0); 1280 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); 1281 1282 /* 1283 * Verify path before entrusting it to e_ddi_hold_devi_by_path because 1284 * some platforms have OBP bugs where executing the NDI_PROMNAME code 1285 * path against an invalid path results in panic. The lookupnameat 1286 * is done relative to rootdir without a leading '/' on "devices/" 1287 * to force the lookup to occur in the global zone. 1288 */ 1289 plen = strlen("devices/") + strlen(all->root_path) + 1; 1290 path = kmem_alloc(plen, KM_SLEEP); 1291 (void) snprintf(path, plen, "devices/%s", all->root_path); 1292 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) { 1293 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1294 all->root_path)); 1295 kmem_free(path, plen); 1296 return (0); 1297 } 1298 kmem_free(path, plen); 1299 VN_RELE(vp); 1300 1301 /* 1302 * Hold the devinfo node referred by the path. 1303 */ 1304 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); 1305 if (rootnode == NULL) { 1306 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1307 all->root_path)); 1308 return (0); 1309 } 1310 1311 (void) snprintf(buf, sizeof (buf), 1312 "devinfo registered dips (statep=%p)", (void *)st); 1313 1314 st->reg_dip_hash = mod_hash_create_extended(buf, 64, 1315 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1316 NULL, di_key_cmp, KM_SLEEP); 1317 1318 1319 (void) snprintf(buf, sizeof (buf), 1320 "devinfo registered pips (statep=%p)", (void *)st); 1321 1322 st->reg_pip_hash = mod_hash_create_extended(buf, 64, 1323 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1324 NULL, di_key_cmp, KM_SLEEP); 1325 1326 /* 1327 * copy the device tree 1328 */ 1329 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); 1330 1331 if (DINFOPATH & st->command) { 1332 mdi_walk_vhcis(build_vhci_list, st); 1333 } 1334 1335 ddi_release_devi(rootnode); 1336 1337 /* 1338 * copy the devnames array 1339 */ 1340 all->devnames = off; 1341 off = di_copydevnm(&all->devnames, st); 1342 1343 1344 /* initialize the hash tables */ 1345 st->lnode_count = 0; 1346 st->link_count = 0; 1347 1348 if (DINFOLYR & st->command) { 1349 off = di_getlink_data(off, st); 1350 } 1351 1352 /* 1353 * Free up hash tables 1354 */ 1355 mod_hash_destroy_hash(st->reg_dip_hash); 1356 mod_hash_destroy_hash(st->reg_pip_hash); 1357 1358 /* 1359 * Record the timestamp now that we are done with snapshot. 1360 * 1361 * We compute the checksum later and then only if we cache 1362 * the snapshot, since checksumming adds some overhead. 1363 * The checksum is checked later if we read the cache file. 1364 * from disk. 1365 * 1366 * Set checksum field to 0 as CRC is calculated with that 1367 * field set to 0. 1368 */ 1369 all->snapshot_time = ddi_get_time(); 1370 all->cache_checksum = 0; 1371 1372 return (off); 1373 } 1374 1375 /* 1376 * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set 1377 */ 1378 static di_off_t 1379 di_snapshot_and_clean(struct di_state *st) 1380 { 1381 di_off_t off; 1382 1383 modunload_disable(); 1384 off = di_snapshot(st); 1385 if (off != 0 && (st->command & DINFOCLEANUP)) { 1386 ASSERT(DEVICES_FILES_CLEANABLE(st)); 1387 /* 1388 * Cleanup /etc/devices files: 1389 * In order to accurately account for the system configuration 1390 * in /etc/devices files, the appropriate drivers must be 1391 * fully configured before the cleanup starts. 1392 * So enable modunload only after the cleanup. 1393 */ 1394 i_ddi_clean_devices_files(); 1395 } 1396 modunload_enable(); 1397 1398 return (off); 1399 } 1400 1401 /* 1402 * construct vhci linkage in the snapshot. 1403 */ 1404 int 1405 build_vhci_list(dev_info_t *vh_devinfo, void *arg) 1406 { 1407 struct di_all *all; 1408 struct di_node *me; 1409 struct di_state *st; 1410 di_off_t off; 1411 struct phci_walk_arg pwa; 1412 1413 dcmn_err3((CE_CONT, "build_vhci list\n")); 1414 1415 dcmn_err3((CE_CONT, "vhci node %s, instance #%d\n", 1416 DEVI(vh_devinfo)->devi_node_name, 1417 DEVI(vh_devinfo)->devi_instance)); 1418 1419 st = (struct di_state *)arg; 1420 if (di_dip_find(st, vh_devinfo, &off) != 0) { 1421 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1422 return (DDI_WALK_TERMINATE); 1423 } 1424 1425 dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n", 1426 st->mem_size, off)); 1427 1428 all = (struct di_all *)di_mem_addr(st, 0); 1429 if (all->top_vhci_devinfo == 0) { 1430 all->top_vhci_devinfo = off; 1431 } else { 1432 me = (struct di_node *)di_mem_addr(st, all->top_vhci_devinfo); 1433 1434 while (me->next_vhci != 0) { 1435 me = (struct di_node *)di_mem_addr(st, me->next_vhci); 1436 } 1437 1438 me->next_vhci = off; 1439 } 1440 1441 pwa.off = off; 1442 pwa.st = st; 1443 mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa); 1444 1445 return (DDI_WALK_CONTINUE); 1446 } 1447 1448 /* 1449 * construct phci linkage for the given vhci in the snapshot. 1450 */ 1451 int 1452 build_phci_list(dev_info_t *ph_devinfo, void *arg) 1453 { 1454 struct di_node *vh_di_node; 1455 struct di_node *me; 1456 struct phci_walk_arg *pwa; 1457 di_off_t off; 1458 1459 pwa = (struct phci_walk_arg *)arg; 1460 1461 dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n", 1462 pwa->off)); 1463 1464 vh_di_node = (struct di_node *)di_mem_addr(pwa->st, pwa->off); 1465 1466 if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) { 1467 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1468 return (DDI_WALK_TERMINATE); 1469 } 1470 1471 dcmn_err3((CE_CONT, "phci node %s, instance #%d, at offset 0x%x\n", 1472 DEVI(ph_devinfo)->devi_node_name, 1473 DEVI(ph_devinfo)->devi_instance, off)); 1474 1475 if (vh_di_node->top_phci == 0) { 1476 vh_di_node->top_phci = off; 1477 return (DDI_WALK_CONTINUE); 1478 } 1479 1480 me = (struct di_node *)di_mem_addr(pwa->st, vh_di_node->top_phci); 1481 1482 while (me->next_phci != 0) { 1483 me = (struct di_node *)di_mem_addr(pwa->st, me->next_phci); 1484 } 1485 me->next_phci = off; 1486 1487 return (DDI_WALK_CONTINUE); 1488 } 1489 1490 /* 1491 * Assumes all devinfo nodes in device tree have been snapshotted 1492 */ 1493 static void 1494 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *poff_p) 1495 { 1496 struct dev_info *node; 1497 struct di_node *me; 1498 di_off_t off; 1499 1500 ASSERT(mutex_owned(&dnp->dn_lock)); 1501 1502 node = DEVI(dnp->dn_head); 1503 for (; node; node = node->devi_next) { 1504 if (di_dip_find(st, (dev_info_t *)node, &off) != 0) 1505 continue; 1506 1507 ASSERT(off > 0); 1508 me = (struct di_node *)di_mem_addr(st, off); 1509 ASSERT(me->next == 0 || me->next == -1); 1510 /* 1511 * Only nodes which were BOUND when they were 1512 * snapshotted will be added to per-driver list. 1513 */ 1514 if (me->next != -1) 1515 continue; 1516 1517 *poff_p = off; 1518 poff_p = &me->next; 1519 } 1520 1521 *poff_p = 0; 1522 } 1523 1524 /* 1525 * Copy the devnames array, so we have a list of drivers in the snapshot. 1526 * Also makes it possible to locate the per-driver devinfo nodes. 1527 */ 1528 static di_off_t 1529 di_copydevnm(di_off_t *off_p, struct di_state *st) 1530 { 1531 int i; 1532 di_off_t off; 1533 size_t size; 1534 struct di_devnm *dnp; 1535 1536 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); 1537 1538 /* 1539 * make sure there is some allocated memory 1540 */ 1541 size = devcnt * sizeof (struct di_devnm); 1542 off = di_checkmem(st, *off_p, size); 1543 *off_p = off; 1544 1545 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", 1546 devcnt, off)); 1547 1548 dnp = (struct di_devnm *)di_mem_addr(st, off); 1549 off += size; 1550 1551 for (i = 0; i < devcnt; i++) { 1552 if (devnamesp[i].dn_name == NULL) { 1553 continue; 1554 } 1555 1556 /* 1557 * dn_name is not freed during driver unload or removal. 1558 * 1559 * There is a race condition when make_devname() changes 1560 * dn_name during our strcpy. This should be rare since 1561 * only add_drv does this. At any rate, we never had a 1562 * problem with ddi_name_to_major(), which should have 1563 * the same problem. 1564 */ 1565 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", 1566 devnamesp[i].dn_name, devnamesp[i].dn_instance, 1567 off)); 1568 1569 off = di_checkmem(st, off, strlen(devnamesp[i].dn_name) + 1); 1570 dnp[i].name = off; 1571 (void) strcpy((char *)di_mem_addr(st, off), 1572 devnamesp[i].dn_name); 1573 off += DI_ALIGN(strlen(devnamesp[i].dn_name) + 1); 1574 1575 mutex_enter(&devnamesp[i].dn_lock); 1576 1577 /* 1578 * Snapshot per-driver node list 1579 */ 1580 snap_driver_list(st, &devnamesp[i], &dnp[i].head); 1581 1582 /* 1583 * This is not used by libdevinfo, leave it for now 1584 */ 1585 dnp[i].flags = devnamesp[i].dn_flags; 1586 dnp[i].instance = devnamesp[i].dn_instance; 1587 1588 /* 1589 * get global properties 1590 */ 1591 if ((DINFOPROP & st->command) && 1592 devnamesp[i].dn_global_prop_ptr) { 1593 dnp[i].global_prop = off; 1594 off = di_getprop( 1595 devnamesp[i].dn_global_prop_ptr->prop_list, 1596 &dnp[i].global_prop, st, NULL, DI_PROP_GLB_LIST); 1597 } 1598 1599 /* 1600 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str 1601 */ 1602 if (CB_DRV_INSTALLED(devopsp[i])) { 1603 if (devopsp[i]->devo_cb_ops) { 1604 dnp[i].ops |= DI_CB_OPS; 1605 if (devopsp[i]->devo_cb_ops->cb_str) 1606 dnp[i].ops |= DI_STREAM_OPS; 1607 } 1608 if (NEXUS_DRV(devopsp[i])) { 1609 dnp[i].ops |= DI_BUS_OPS; 1610 } 1611 } 1612 1613 mutex_exit(&devnamesp[i].dn_lock); 1614 } 1615 1616 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); 1617 1618 return (off); 1619 } 1620 1621 /* 1622 * Copy the kernel devinfo tree. The tree and the devnames array forms 1623 * the entire snapshot (see also di_copydevnm). 1624 */ 1625 static di_off_t 1626 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) 1627 { 1628 di_off_t off; 1629 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); 1630 1631 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", 1632 (void *)root, *off_p)); 1633 1634 /* force attach drivers */ 1635 if (i_ddi_devi_attached((dev_info_t *)root) && 1636 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { 1637 (void) ndi_devi_config((dev_info_t *)root, 1638 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | 1639 NDI_DRV_CONF_REPROBE); 1640 } 1641 1642 /* 1643 * Push top_devinfo onto a stack 1644 * 1645 * The stack is necessary to avoid recursion, which can overrun 1646 * the kernel stack. 1647 */ 1648 PUSH_STACK(dsp, root, off_p); 1649 1650 /* 1651 * As long as there is a node on the stack, copy the node. 1652 * di_copynode() is responsible for pushing and popping 1653 * child and sibling nodes on the stack. 1654 */ 1655 while (!EMPTY_STACK(dsp)) { 1656 off = di_copynode(dsp, st); 1657 } 1658 1659 /* 1660 * Free the stack structure 1661 */ 1662 kmem_free(dsp, sizeof (struct di_stack)); 1663 1664 return (off); 1665 } 1666 1667 /* 1668 * This is the core function, which copies all data associated with a single 1669 * node into the snapshot. The amount of information is determined by the 1670 * ioctl command. 1671 */ 1672 static di_off_t 1673 di_copynode(struct di_stack *dsp, struct di_state *st) 1674 { 1675 di_off_t off; 1676 struct di_node *me; 1677 struct dev_info *node; 1678 1679 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", 1680 dsp->depth)); 1681 1682 node = TOP_NODE(dsp); 1683 1684 ASSERT(node != NULL); 1685 1686 /* 1687 * check memory usage, and fix offsets accordingly. 1688 */ 1689 off = di_checkmem(st, *(TOP_OFFSET(dsp)), sizeof (struct di_node)); 1690 *(TOP_OFFSET(dsp)) = off; 1691 me = DI_NODE(di_mem_addr(st, off)); 1692 1693 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", 1694 node->devi_node_name, node->devi_instance, off)); 1695 1696 /* 1697 * Node parameters: 1698 * self -- offset of current node within snapshot 1699 * nodeid -- pointer to PROM node (tri-valued) 1700 * state -- hot plugging device state 1701 * node_state -- devinfo node state (CF1, CF2, etc.) 1702 */ 1703 me->self = off; 1704 me->instance = node->devi_instance; 1705 me->nodeid = node->devi_nodeid; 1706 me->node_class = node->devi_node_class; 1707 me->attributes = node->devi_node_attributes; 1708 me->state = node->devi_state; 1709 me->node_state = node->devi_node_state; 1710 me->next_vhci = 0; /* Filled up by build_vhci_list. */ 1711 me->top_phci = 0; /* Filled up by build_phci_list. */ 1712 me->next_phci = 0; /* Filled up by build_phci_list. */ 1713 me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */ 1714 me->user_private_data = NULL; 1715 1716 /* 1717 * Get parent's offset in snapshot from the stack 1718 * and store it in the current node 1719 */ 1720 if (dsp->depth > 1) { 1721 me->parent = *(PARENT_OFFSET(dsp)); 1722 } 1723 1724 /* 1725 * Save the offset of this di_node in a hash table. 1726 * This is used later to resolve references to this 1727 * dip from other parts of the tree (per-driver list, 1728 * multipathing linkages, layered usage linkages). 1729 * The key used for the hash table is derived from 1730 * information in the dip. 1731 */ 1732 di_register_dip(st, (dev_info_t *)node, me->self); 1733 1734 /* 1735 * increment offset 1736 */ 1737 off += sizeof (struct di_node); 1738 1739 #ifdef DEVID_COMPATIBILITY 1740 /* check for devid as property marker */ 1741 if (node->devi_devid) { 1742 ddi_devid_t devid; 1743 char *devidstr; 1744 int devid_size; 1745 1746 /* 1747 * The devid is now represented as a property. 1748 * For micro release compatibility with di_devid interface 1749 * in libdevinfo we must return it as a binary structure in' 1750 * the snapshot. When di_devid is removed from libdevinfo 1751 * in a future release (and devi_devid is deleted) then 1752 * code related to DEVID_COMPATIBILITY can be removed. 1753 */ 1754 ASSERT(node->devi_devid == DEVID_COMPATIBILITY); 1755 /* XXX should be DDI_DEV_T_NONE! */ 1756 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, (dev_info_t *)node, 1757 DDI_PROP_DONTPASS, DEVID_PROP_NAME, &devidstr) == 1758 DDI_PROP_SUCCESS) { 1759 if (ddi_devid_str_decode(devidstr, &devid, NULL) == 1760 DDI_SUCCESS) { 1761 devid_size = ddi_devid_sizeof(devid); 1762 off = di_checkmem(st, off, devid_size); 1763 me->devid = off; 1764 bcopy(devid, 1765 di_mem_addr(st, off), devid_size); 1766 off += devid_size; 1767 ddi_devid_free(devid); 1768 } 1769 ddi_prop_free(devidstr); 1770 } 1771 } 1772 #endif /* DEVID_COMPATIBILITY */ 1773 1774 if (node->devi_node_name) { 1775 off = di_checkmem(st, off, strlen(node->devi_node_name) + 1); 1776 me->node_name = off; 1777 (void) strcpy(di_mem_addr(st, off), node->devi_node_name); 1778 off += strlen(node->devi_node_name) + 1; 1779 } 1780 1781 if (node->devi_compat_names && (node->devi_compat_length > 1)) { 1782 off = di_checkmem(st, off, node->devi_compat_length); 1783 me->compat_names = off; 1784 me->compat_length = node->devi_compat_length; 1785 bcopy(node->devi_compat_names, di_mem_addr(st, off), 1786 node->devi_compat_length); 1787 off += node->devi_compat_length; 1788 } 1789 1790 if (node->devi_addr) { 1791 off = di_checkmem(st, off, strlen(node->devi_addr) + 1); 1792 me->address = off; 1793 (void) strcpy(di_mem_addr(st, off), node->devi_addr); 1794 off += strlen(node->devi_addr) + 1; 1795 } 1796 1797 if (node->devi_binding_name) { 1798 off = di_checkmem(st, off, strlen(node->devi_binding_name) + 1); 1799 me->bind_name = off; 1800 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name); 1801 off += strlen(node->devi_binding_name) + 1; 1802 } 1803 1804 me->drv_major = node->devi_major; 1805 1806 /* 1807 * If the dip is BOUND, set the next pointer of the 1808 * per-instance list to -1, indicating that it is yet to be resolved. 1809 * This will be resolved later in snap_driver_list(). 1810 */ 1811 if (me->drv_major != -1) { 1812 me->next = -1; 1813 } else { 1814 me->next = 0; 1815 } 1816 1817 /* 1818 * An optimization to skip mutex_enter when not needed. 1819 */ 1820 if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) { 1821 goto priv_data; 1822 } 1823 1824 /* 1825 * Grab current per dev_info node lock to 1826 * get minor data and properties. 1827 */ 1828 mutex_enter(&(node->devi_lock)); 1829 1830 if (!(DINFOMINOR & st->command)) { 1831 goto path; 1832 } 1833 1834 if (node->devi_minor) { /* minor data */ 1835 me->minor_data = DI_ALIGN(off); 1836 off = di_getmdata(node->devi_minor, &me->minor_data, 1837 me->self, st); 1838 } 1839 1840 path: 1841 if (!(DINFOPATH & st->command)) { 1842 goto property; 1843 } 1844 1845 if (MDI_VHCI(node)) { 1846 me->multipath_component = MULTIPATH_COMPONENT_VHCI; 1847 } 1848 1849 if (MDI_CLIENT(node)) { 1850 me->multipath_component = MULTIPATH_COMPONENT_CLIENT; 1851 me->multipath_client = DI_ALIGN(off); 1852 off = di_getpath_data((dev_info_t *)node, &me->multipath_client, 1853 me->self, st, 1); 1854 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " 1855 "component type = %d. off=%d", 1856 me->multipath_client, 1857 (void *)node, node->devi_mdi_component, off)); 1858 } 1859 1860 if (MDI_PHCI(node)) { 1861 me->multipath_component = MULTIPATH_COMPONENT_PHCI; 1862 me->multipath_phci = DI_ALIGN(off); 1863 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, 1864 me->self, st, 0); 1865 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " 1866 "component type = %d. off=%d", 1867 me->multipath_phci, 1868 (void *)node, node->devi_mdi_component, off)); 1869 } 1870 1871 property: 1872 if (!(DINFOPROP & st->command)) { 1873 goto unlock; 1874 } 1875 1876 if (node->devi_drv_prop_ptr) { /* driver property list */ 1877 me->drv_prop = DI_ALIGN(off); 1878 off = di_getprop(node->devi_drv_prop_ptr, &me->drv_prop, st, 1879 node, DI_PROP_DRV_LIST); 1880 } 1881 1882 if (node->devi_sys_prop_ptr) { /* system property list */ 1883 me->sys_prop = DI_ALIGN(off); 1884 off = di_getprop(node->devi_sys_prop_ptr, &me->sys_prop, st, 1885 node, DI_PROP_SYS_LIST); 1886 } 1887 1888 if (node->devi_hw_prop_ptr) { /* hardware property list */ 1889 me->hw_prop = DI_ALIGN(off); 1890 off = di_getprop(node->devi_hw_prop_ptr, &me->hw_prop, st, 1891 node, DI_PROP_HW_LIST); 1892 } 1893 1894 if (node->devi_global_prop_list == NULL) { 1895 me->glob_prop = (di_off_t)-1; /* not global property */ 1896 } else { 1897 /* 1898 * Make copy of global property list if this devinfo refers 1899 * global properties different from what's on the devnames 1900 * array. It can happen if there has been a forced 1901 * driver.conf update. See mod_drv(1M). 1902 */ 1903 ASSERT(me->drv_major != -1); 1904 if (node->devi_global_prop_list != 1905 devnamesp[me->drv_major].dn_global_prop_ptr) { 1906 me->glob_prop = DI_ALIGN(off); 1907 off = di_getprop(node->devi_global_prop_list->prop_list, 1908 &me->glob_prop, st, node, DI_PROP_GLB_LIST); 1909 } 1910 } 1911 1912 unlock: 1913 /* 1914 * release current per dev_info node lock 1915 */ 1916 mutex_exit(&(node->devi_lock)); 1917 1918 priv_data: 1919 if (!(DINFOPRIVDATA & st->command)) { 1920 goto pm_info; 1921 } 1922 1923 if (ddi_get_parent_data((dev_info_t *)node) != NULL) { 1924 me->parent_data = DI_ALIGN(off); 1925 off = di_getppdata(node, &me->parent_data, st); 1926 } 1927 1928 if (ddi_get_driver_private((dev_info_t *)node) != NULL) { 1929 me->driver_data = DI_ALIGN(off); 1930 off = di_getdpdata(node, &me->driver_data, st); 1931 } 1932 1933 pm_info: /* NOT implemented */ 1934 1935 subtree: 1936 if (!(DINFOSUBTREE & st->command)) { 1937 POP_STACK(dsp); 1938 return (DI_ALIGN(off)); 1939 } 1940 1941 child: 1942 /* 1943 * If there is a child--push child onto stack. 1944 * Hold the parent busy while doing so. 1945 */ 1946 if (node->devi_child) { 1947 me->child = DI_ALIGN(off); 1948 PUSH_STACK(dsp, node->devi_child, &me->child); 1949 return (me->child); 1950 } 1951 1952 sibling: 1953 /* 1954 * no child node, unroll the stack till a sibling of 1955 * a parent node is found or root node is reached 1956 */ 1957 POP_STACK(dsp); 1958 while (!EMPTY_STACK(dsp) && (node->devi_sibling == NULL)) { 1959 node = TOP_NODE(dsp); 1960 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); 1961 POP_STACK(dsp); 1962 } 1963 1964 if (!EMPTY_STACK(dsp)) { 1965 /* 1966 * a sibling is found, replace top of stack by its sibling 1967 */ 1968 me->sibling = DI_ALIGN(off); 1969 PUSH_STACK(dsp, node->devi_sibling, &me->sibling); 1970 return (me->sibling); 1971 } 1972 1973 /* 1974 * DONE with all nodes 1975 */ 1976 return (DI_ALIGN(off)); 1977 } 1978 1979 static i_lnode_t * 1980 i_lnode_alloc(int modid) 1981 { 1982 i_lnode_t *i_lnode; 1983 1984 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); 1985 1986 ASSERT(modid != -1); 1987 i_lnode->modid = modid; 1988 1989 return (i_lnode); 1990 } 1991 1992 static void 1993 i_lnode_free(i_lnode_t *i_lnode) 1994 { 1995 kmem_free(i_lnode, sizeof (i_lnode_t)); 1996 } 1997 1998 static void 1999 i_lnode_check_free(i_lnode_t *i_lnode) 2000 { 2001 /* This lnode and its dip must have been snapshotted */ 2002 ASSERT(i_lnode->self > 0); 2003 ASSERT(i_lnode->di_node->self > 0); 2004 2005 /* at least 1 link (in or out) must exist for this lnode */ 2006 ASSERT(i_lnode->link_in || i_lnode->link_out); 2007 2008 i_lnode_free(i_lnode); 2009 } 2010 2011 static i_link_t * 2012 i_link_alloc(int spec_type) 2013 { 2014 i_link_t *i_link; 2015 2016 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); 2017 i_link->spec_type = spec_type; 2018 2019 return (i_link); 2020 } 2021 2022 static void 2023 i_link_check_free(i_link_t *i_link) 2024 { 2025 /* This link must have been snapshotted */ 2026 ASSERT(i_link->self > 0); 2027 2028 /* Both endpoint lnodes must exist for this link */ 2029 ASSERT(i_link->src_lnode); 2030 ASSERT(i_link->tgt_lnode); 2031 2032 kmem_free(i_link, sizeof (i_link_t)); 2033 } 2034 2035 /*ARGSUSED*/ 2036 static uint_t 2037 i_lnode_hashfunc(void *arg, mod_hash_key_t key) 2038 { 2039 i_lnode_t *i_lnode = (i_lnode_t *)key; 2040 struct di_node *ptr; 2041 dev_t dev; 2042 2043 dev = i_lnode->devt; 2044 if (dev != DDI_DEV_T_NONE) 2045 return (i_lnode->modid + getminor(dev) + getmajor(dev)); 2046 2047 ptr = i_lnode->di_node; 2048 ASSERT(ptr->self > 0); 2049 if (ptr) { 2050 uintptr_t k = (uintptr_t)ptr; 2051 k >>= (int)highbit(sizeof (struct di_node)); 2052 return ((uint_t)k); 2053 } 2054 2055 return (i_lnode->modid); 2056 } 2057 2058 static int 2059 i_lnode_cmp(void *arg1, void *arg2) 2060 { 2061 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1; 2062 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2; 2063 2064 if (i_lnode1->modid != i_lnode2->modid) { 2065 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); 2066 } 2067 2068 if (i_lnode1->di_node != i_lnode2->di_node) 2069 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); 2070 2071 if (i_lnode1->devt != i_lnode2->devt) 2072 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); 2073 2074 return (0); 2075 } 2076 2077 /* 2078 * An lnode represents a {dip, dev_t} tuple. A link represents a 2079 * {src_lnode, tgt_lnode, spec_type} tuple. 2080 * The following callback assumes that LDI framework ref-counts the 2081 * src_dip and tgt_dip while invoking this callback. 2082 */ 2083 static int 2084 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) 2085 { 2086 struct di_state *st = (struct di_state *)arg; 2087 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode; 2088 i_link_t **i_link_next, *i_link; 2089 di_off_t soff, toff; 2090 mod_hash_val_t nodep = NULL; 2091 int res; 2092 2093 /* 2094 * if the source or target of this device usage information doesn't 2095 * corrospond to a device node then we don't report it via 2096 * libdevinfo so return. 2097 */ 2098 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) 2099 return (LDI_USAGE_CONTINUE); 2100 2101 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); 2102 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); 2103 2104 /* 2105 * Skip the ldi_usage if either src or tgt dip is not in the 2106 * snapshot. This saves us from pruning bad lnodes/links later. 2107 */ 2108 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) 2109 return (LDI_USAGE_CONTINUE); 2110 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) 2111 return (LDI_USAGE_CONTINUE); 2112 2113 ASSERT(soff > 0); 2114 ASSERT(toff > 0); 2115 2116 /* 2117 * allocate an i_lnode and add it to the lnode hash 2118 * if it is not already present. For this particular 2119 * link the lnode is a source, but it may 2120 * participate as tgt or src in any number of layered 2121 * operations - so it may already be in the hash. 2122 */ 2123 i_lnode = i_lnode_alloc(ldi_usage->src_modid); 2124 i_lnode->di_node = (struct di_node *)di_mem_addr(st, soff); 2125 i_lnode->devt = ldi_usage->src_devt; 2126 2127 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2128 if (res == MH_ERR_NOTFOUND) { 2129 /* 2130 * new i_lnode 2131 * add it to the hash and increment the lnode count 2132 */ 2133 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2134 ASSERT(res == 0); 2135 st->lnode_count++; 2136 src_lnode = i_lnode; 2137 } else { 2138 /* this i_lnode already exists in the lnode_hash */ 2139 i_lnode_free(i_lnode); 2140 src_lnode = (i_lnode_t *)nodep; 2141 } 2142 2143 /* 2144 * allocate a tgt i_lnode and add it to the lnode hash 2145 */ 2146 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); 2147 i_lnode->di_node = (struct di_node *)di_mem_addr(st, toff); 2148 i_lnode->devt = ldi_usage->tgt_devt; 2149 2150 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2151 if (res == MH_ERR_NOTFOUND) { 2152 /* 2153 * new i_lnode 2154 * add it to the hash and increment the lnode count 2155 */ 2156 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2157 ASSERT(res == 0); 2158 st->lnode_count++; 2159 tgt_lnode = i_lnode; 2160 } else { 2161 /* this i_lnode already exists in the lnode_hash */ 2162 i_lnode_free(i_lnode); 2163 tgt_lnode = (i_lnode_t *)nodep; 2164 } 2165 2166 /* 2167 * allocate a i_link 2168 */ 2169 i_link = i_link_alloc(ldi_usage->tgt_spec_type); 2170 i_link->src_lnode = src_lnode; 2171 i_link->tgt_lnode = tgt_lnode; 2172 2173 /* 2174 * add this link onto the src i_lnodes outbound i_link list 2175 */ 2176 i_link_next = &(src_lnode->link_out); 2177 while (*i_link_next != NULL) { 2178 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && 2179 (i_link->spec_type == (*i_link_next)->spec_type)) { 2180 /* this link already exists */ 2181 kmem_free(i_link, sizeof (i_link_t)); 2182 return (LDI_USAGE_CONTINUE); 2183 } 2184 i_link_next = &((*i_link_next)->src_link_next); 2185 } 2186 *i_link_next = i_link; 2187 2188 /* 2189 * add this link onto the tgt i_lnodes inbound i_link list 2190 */ 2191 i_link_next = &(tgt_lnode->link_in); 2192 while (*i_link_next != NULL) { 2193 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); 2194 i_link_next = &((*i_link_next)->tgt_link_next); 2195 } 2196 *i_link_next = i_link; 2197 2198 /* 2199 * add this i_link to the link hash 2200 */ 2201 res = mod_hash_insert(st->link_hash, i_link, i_link); 2202 ASSERT(res == 0); 2203 st->link_count++; 2204 2205 return (LDI_USAGE_CONTINUE); 2206 } 2207 2208 struct i_layer_data { 2209 struct di_state *st; 2210 int lnode_count; 2211 int link_count; 2212 di_off_t lnode_off; 2213 di_off_t link_off; 2214 }; 2215 2216 /*ARGSUSED*/ 2217 static uint_t 2218 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2219 { 2220 i_link_t *i_link = (i_link_t *)key; 2221 struct i_layer_data *data = arg; 2222 struct di_link *me; 2223 struct di_lnode *melnode; 2224 struct di_node *medinode; 2225 2226 ASSERT(i_link->self == 0); 2227 2228 i_link->self = data->link_off + 2229 (data->link_count * sizeof (struct di_link)); 2230 data->link_count++; 2231 2232 ASSERT(data->link_off > 0 && data->link_count > 0); 2233 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ 2234 ASSERT(data->link_count <= data->st->link_count); 2235 2236 /* fill in fields for the di_link snapshot */ 2237 me = (struct di_link *)di_mem_addr(data->st, i_link->self); 2238 me->self = i_link->self; 2239 me->spec_type = i_link->spec_type; 2240 2241 /* 2242 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t 2243 * are created during the LDI table walk. Since we are 2244 * walking the link hash, the lnode hash has already been 2245 * walked and the lnodes have been snapshotted. Save lnode 2246 * offsets. 2247 */ 2248 me->src_lnode = i_link->src_lnode->self; 2249 me->tgt_lnode = i_link->tgt_lnode->self; 2250 2251 /* 2252 * Save this link's offset in the src_lnode snapshot's link_out 2253 * field 2254 */ 2255 melnode = (struct di_lnode *)di_mem_addr(data->st, me->src_lnode); 2256 me->src_link_next = melnode->link_out; 2257 melnode->link_out = me->self; 2258 2259 /* 2260 * Put this link on the tgt_lnode's link_in field 2261 */ 2262 melnode = (struct di_lnode *)di_mem_addr(data->st, me->tgt_lnode); 2263 me->tgt_link_next = melnode->link_in; 2264 melnode->link_in = me->self; 2265 2266 /* 2267 * An i_lnode_t is only created if the corresponding dip exists 2268 * in the snapshot. A pointer to the di_node is saved in the 2269 * i_lnode_t when it is allocated. For this link, get the di_node 2270 * for the source lnode. Then put the link on the di_node's list 2271 * of src links 2272 */ 2273 medinode = i_link->src_lnode->di_node; 2274 me->src_node_next = medinode->src_links; 2275 medinode->src_links = me->self; 2276 2277 /* 2278 * Put this link on the tgt_links list of the target 2279 * dip. 2280 */ 2281 medinode = i_link->tgt_lnode->di_node; 2282 me->tgt_node_next = medinode->tgt_links; 2283 medinode->tgt_links = me->self; 2284 2285 return (MH_WALK_CONTINUE); 2286 } 2287 2288 /*ARGSUSED*/ 2289 static uint_t 2290 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2291 { 2292 i_lnode_t *i_lnode = (i_lnode_t *)key; 2293 struct i_layer_data *data = arg; 2294 struct di_lnode *me; 2295 struct di_node *medinode; 2296 2297 ASSERT(i_lnode->self == 0); 2298 2299 i_lnode->self = data->lnode_off + 2300 (data->lnode_count * sizeof (struct di_lnode)); 2301 data->lnode_count++; 2302 2303 ASSERT(data->lnode_off > 0 && data->lnode_count > 0); 2304 ASSERT(data->link_count == 0); /* links not done yet */ 2305 ASSERT(data->lnode_count <= data->st->lnode_count); 2306 2307 /* fill in fields for the di_lnode snapshot */ 2308 me = (struct di_lnode *)di_mem_addr(data->st, i_lnode->self); 2309 me->self = i_lnode->self; 2310 2311 if (i_lnode->devt == DDI_DEV_T_NONE) { 2312 me->dev_major = (major_t)-1; 2313 me->dev_minor = (minor_t)-1; 2314 } else { 2315 me->dev_major = getmajor(i_lnode->devt); 2316 me->dev_minor = getminor(i_lnode->devt); 2317 } 2318 2319 /* 2320 * The dip corresponding to this lnode must exist in 2321 * the snapshot or we wouldn't have created the i_lnode_t 2322 * during LDI walk. Save the offset of the dip. 2323 */ 2324 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); 2325 me->node = i_lnode->di_node->self; 2326 2327 /* 2328 * There must be at least one link in or out of this lnode 2329 * or we wouldn't have created it. These fields will be set 2330 * during the link hash walk. 2331 */ 2332 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); 2333 2334 /* 2335 * set the offset of the devinfo node associated with this 2336 * lnode. Also update the node_next next pointer. this pointer 2337 * is set if there are multiple lnodes associated with the same 2338 * devinfo node. (could occure when multiple minor nodes 2339 * are open for one device, etc.) 2340 */ 2341 medinode = i_lnode->di_node; 2342 me->node_next = medinode->lnodes; 2343 medinode->lnodes = me->self; 2344 2345 return (MH_WALK_CONTINUE); 2346 } 2347 2348 static di_off_t 2349 di_getlink_data(di_off_t off, struct di_state *st) 2350 { 2351 struct i_layer_data data = {0}; 2352 size_t size; 2353 2354 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); 2355 2356 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, 2357 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, 2358 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); 2359 2360 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, 2361 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); 2362 2363 /* get driver layering information */ 2364 (void) ldi_usage_walker(st, di_ldi_callback); 2365 2366 /* check if there is any link data to include in the snapshot */ 2367 if (st->lnode_count == 0) { 2368 ASSERT(st->link_count == 0); 2369 goto out; 2370 } 2371 2372 ASSERT(st->link_count != 0); 2373 2374 /* get a pointer to snapshot memory for all the di_lnodes */ 2375 size = sizeof (struct di_lnode) * st->lnode_count; 2376 data.lnode_off = off = di_checkmem(st, off, size); 2377 off += DI_ALIGN(size); 2378 2379 /* get a pointer to snapshot memory for all the di_links */ 2380 size = sizeof (struct di_link) * st->link_count; 2381 data.link_off = off = di_checkmem(st, off, size); 2382 off += DI_ALIGN(size); 2383 2384 data.lnode_count = data.link_count = 0; 2385 data.st = st; 2386 2387 /* 2388 * We have lnodes and links that will go into the 2389 * snapshot, so let's walk the respective hashes 2390 * and snapshot them. The various linkages are 2391 * also set up during the walk. 2392 */ 2393 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); 2394 ASSERT(data.lnode_count == st->lnode_count); 2395 2396 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); 2397 ASSERT(data.link_count == st->link_count); 2398 2399 out: 2400 /* free up the i_lnodes and i_links used to create the snapshot */ 2401 mod_hash_destroy_hash(st->lnode_hash); 2402 mod_hash_destroy_hash(st->link_hash); 2403 st->lnode_count = 0; 2404 st->link_count = 0; 2405 2406 return (off); 2407 } 2408 2409 2410 /* 2411 * Copy all minor data nodes attached to a devinfo node into the snapshot. 2412 * It is called from di_copynode with devi_lock held. 2413 */ 2414 static di_off_t 2415 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, 2416 struct di_state *st) 2417 { 2418 di_off_t off; 2419 struct di_minor *me; 2420 2421 dcmn_err2((CE_CONT, "di_getmdata:\n")); 2422 2423 /* 2424 * check memory first 2425 */ 2426 off = di_checkmem(st, *off_p, sizeof (struct di_minor)); 2427 *off_p = off; 2428 2429 do { 2430 me = (struct di_minor *)di_mem_addr(st, off); 2431 me->self = off; 2432 me->type = mnode->type; 2433 me->node = node; 2434 me->user_private_data = NULL; 2435 2436 off += DI_ALIGN(sizeof (struct di_minor)); 2437 2438 /* 2439 * Split dev_t to major/minor, so it works for 2440 * both ILP32 and LP64 model 2441 */ 2442 me->dev_major = getmajor(mnode->ddm_dev); 2443 me->dev_minor = getminor(mnode->ddm_dev); 2444 me->spec_type = mnode->ddm_spec_type; 2445 2446 if (mnode->ddm_name) { 2447 off = di_checkmem(st, off, 2448 strlen(mnode->ddm_name) + 1); 2449 me->name = off; 2450 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name); 2451 off += DI_ALIGN(strlen(mnode->ddm_name) + 1); 2452 } 2453 2454 if (mnode->ddm_node_type) { 2455 off = di_checkmem(st, off, 2456 strlen(mnode->ddm_node_type) + 1); 2457 me->node_type = off; 2458 (void) strcpy(di_mem_addr(st, off), 2459 mnode->ddm_node_type); 2460 off += DI_ALIGN(strlen(mnode->ddm_node_type) + 1); 2461 } 2462 2463 off = di_checkmem(st, off, sizeof (struct di_minor)); 2464 me->next = off; 2465 mnode = mnode->next; 2466 } while (mnode); 2467 2468 me->next = 0; 2469 2470 return (off); 2471 } 2472 2473 /* 2474 * di_register_dip(), di_find_dip(): The dip must be protected 2475 * from deallocation when using these routines - this can either 2476 * be a reference count, a busy hold or a per-driver lock. 2477 */ 2478 2479 static void 2480 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) 2481 { 2482 struct dev_info *node = DEVI(dip); 2483 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2484 struct di_dkey *dk; 2485 2486 ASSERT(dip); 2487 ASSERT(off > 0); 2488 2489 key->k_type = DI_DKEY; 2490 dk = &(key->k_u.dkey); 2491 2492 dk->dk_dip = dip; 2493 dk->dk_major = node->devi_major; 2494 dk->dk_inst = node->devi_instance; 2495 dk->dk_nodeid = node->devi_nodeid; 2496 2497 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, 2498 (mod_hash_val_t)(uintptr_t)off) != 0) { 2499 panic( 2500 "duplicate devinfo (%p) registered during device " 2501 "tree walk", (void *)dip); 2502 } 2503 } 2504 2505 2506 static int 2507 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) 2508 { 2509 /* 2510 * uintptr_t must be used because it matches the size of void *; 2511 * mod_hash expects clients to place results into pointer-size 2512 * containers; since di_off_t is always a 32-bit offset, alignment 2513 * would otherwise be broken on 64-bit kernels. 2514 */ 2515 uintptr_t offset; 2516 struct di_key key = {0}; 2517 struct di_dkey *dk; 2518 2519 ASSERT(st->reg_dip_hash); 2520 ASSERT(dip); 2521 ASSERT(off_p); 2522 2523 2524 key.k_type = DI_DKEY; 2525 dk = &(key.k_u.dkey); 2526 2527 dk->dk_dip = dip; 2528 dk->dk_major = DEVI(dip)->devi_major; 2529 dk->dk_inst = DEVI(dip)->devi_instance; 2530 dk->dk_nodeid = DEVI(dip)->devi_nodeid; 2531 2532 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, 2533 (mod_hash_val_t *)&offset) == 0) { 2534 *off_p = (di_off_t)offset; 2535 return (0); 2536 } else { 2537 return (-1); 2538 } 2539 } 2540 2541 /* 2542 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation 2543 * when using these routines. The caller must do this by protecting the 2544 * client(or phci)<->pip linkage while traversing the list and then holding the 2545 * pip when it is found in the list. 2546 */ 2547 2548 static void 2549 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) 2550 { 2551 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2552 char *path_addr; 2553 struct di_pkey *pk; 2554 2555 ASSERT(pip); 2556 ASSERT(off > 0); 2557 2558 key->k_type = DI_PKEY; 2559 pk = &(key->k_u.pkey); 2560 2561 pk->pk_pip = pip; 2562 path_addr = mdi_pi_get_addr(pip); 2563 if (path_addr) 2564 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); 2565 pk->pk_client = mdi_pi_get_client(pip); 2566 pk->pk_phci = mdi_pi_get_phci(pip); 2567 2568 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, 2569 (mod_hash_val_t)(uintptr_t)off) != 0) { 2570 panic( 2571 "duplicate pathinfo (%p) registered during device " 2572 "tree walk", (void *)pip); 2573 } 2574 } 2575 2576 /* 2577 * As with di_register_pip, the caller must hold or lock the pip 2578 */ 2579 static int 2580 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) 2581 { 2582 /* 2583 * uintptr_t must be used because it matches the size of void *; 2584 * mod_hash expects clients to place results into pointer-size 2585 * containers; since di_off_t is always a 32-bit offset, alignment 2586 * would otherwise be broken on 64-bit kernels. 2587 */ 2588 uintptr_t offset; 2589 struct di_key key = {0}; 2590 struct di_pkey *pk; 2591 2592 ASSERT(st->reg_pip_hash); 2593 ASSERT(off_p); 2594 2595 if (pip == NULL) { 2596 *off_p = 0; 2597 return (0); 2598 } 2599 2600 key.k_type = DI_PKEY; 2601 pk = &(key.k_u.pkey); 2602 2603 pk->pk_pip = pip; 2604 pk->pk_path_addr = mdi_pi_get_addr(pip); 2605 pk->pk_client = mdi_pi_get_client(pip); 2606 pk->pk_phci = mdi_pi_get_phci(pip); 2607 2608 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, 2609 (mod_hash_val_t *)&offset) == 0) { 2610 *off_p = (di_off_t)offset; 2611 return (0); 2612 } else { 2613 return (-1); 2614 } 2615 } 2616 2617 static di_path_state_t 2618 path_state_convert(mdi_pathinfo_state_t st) 2619 { 2620 switch (st) { 2621 case MDI_PATHINFO_STATE_ONLINE: 2622 return (DI_PATH_STATE_ONLINE); 2623 case MDI_PATHINFO_STATE_STANDBY: 2624 return (DI_PATH_STATE_STANDBY); 2625 case MDI_PATHINFO_STATE_OFFLINE: 2626 return (DI_PATH_STATE_OFFLINE); 2627 case MDI_PATHINFO_STATE_FAULT: 2628 return (DI_PATH_STATE_FAULT); 2629 default: 2630 return (DI_PATH_STATE_UNKNOWN); 2631 } 2632 } 2633 2634 2635 static di_off_t 2636 di_path_getprop(mdi_pathinfo_t *pip, di_off_t off, di_off_t *off_p, 2637 struct di_state *st) 2638 { 2639 nvpair_t *prop = NULL; 2640 struct di_path_prop *me; 2641 2642 if (mdi_pi_get_next_prop(pip, NULL) == NULL) { 2643 *off_p = 0; 2644 return (off); 2645 } 2646 2647 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2648 *off_p = off; 2649 2650 while (prop = mdi_pi_get_next_prop(pip, prop)) { 2651 int delta = 0; 2652 2653 me = (struct di_path_prop *)di_mem_addr(st, off); 2654 me->self = off; 2655 off += sizeof (struct di_path_prop); 2656 2657 /* 2658 * property name 2659 */ 2660 off = di_checkmem(st, off, strlen(nvpair_name(prop)) + 1); 2661 me->prop_name = off; 2662 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); 2663 off += strlen(nvpair_name(prop)) + 1; 2664 2665 switch (nvpair_type(prop)) { 2666 case DATA_TYPE_BYTE: 2667 case DATA_TYPE_INT16: 2668 case DATA_TYPE_UINT16: 2669 case DATA_TYPE_INT32: 2670 case DATA_TYPE_UINT32: 2671 delta = sizeof (int32_t); 2672 me->prop_type = DDI_PROP_TYPE_INT; 2673 off = di_checkmem(st, off, delta); 2674 (void) nvpair_value_int32(prop, 2675 (int32_t *)di_mem_addr(st, off)); 2676 break; 2677 2678 case DATA_TYPE_INT64: 2679 case DATA_TYPE_UINT64: 2680 delta = sizeof (int64_t); 2681 me->prop_type = DDI_PROP_TYPE_INT64; 2682 off = di_checkmem(st, off, delta); 2683 (void) nvpair_value_int64(prop, 2684 (int64_t *)di_mem_addr(st, off)); 2685 break; 2686 2687 case DATA_TYPE_STRING: 2688 { 2689 char *str; 2690 (void) nvpair_value_string(prop, &str); 2691 delta = strlen(str) + 1; 2692 me->prop_type = DDI_PROP_TYPE_STRING; 2693 off = di_checkmem(st, off, delta); 2694 (void) strcpy(di_mem_addr(st, off), str); 2695 break; 2696 } 2697 case DATA_TYPE_BYTE_ARRAY: 2698 case DATA_TYPE_INT16_ARRAY: 2699 case DATA_TYPE_UINT16_ARRAY: 2700 case DATA_TYPE_INT32_ARRAY: 2701 case DATA_TYPE_UINT32_ARRAY: 2702 case DATA_TYPE_INT64_ARRAY: 2703 case DATA_TYPE_UINT64_ARRAY: 2704 { 2705 uchar_t *buf; 2706 uint_t nelems; 2707 (void) nvpair_value_byte_array(prop, &buf, &nelems); 2708 delta = nelems; 2709 me->prop_type = DDI_PROP_TYPE_BYTE; 2710 if (nelems != 0) { 2711 off = di_checkmem(st, off, delta); 2712 bcopy(buf, di_mem_addr(st, off), nelems); 2713 } 2714 break; 2715 } 2716 2717 default: /* Unknown or unhandled type; skip it */ 2718 delta = 0; 2719 break; 2720 } 2721 2722 if (delta > 0) { 2723 me->prop_data = off; 2724 } 2725 2726 me->prop_len = delta; 2727 off += delta; 2728 2729 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2730 me->prop_next = off; 2731 } 2732 2733 me->prop_next = 0; 2734 return (off); 2735 } 2736 2737 2738 static void 2739 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp, 2740 int get_client) 2741 { 2742 if (get_client) { 2743 ASSERT(me->path_client == 0); 2744 me->path_client = noff; 2745 ASSERT(me->path_c_link == 0); 2746 *off_pp = &me->path_c_link; 2747 me->path_snap_state &= 2748 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK); 2749 } else { 2750 ASSERT(me->path_phci == 0); 2751 me->path_phci = noff; 2752 ASSERT(me->path_p_link == 0); 2753 *off_pp = &me->path_p_link; 2754 me->path_snap_state &= 2755 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK); 2756 } 2757 } 2758 2759 /* 2760 * poff_p: pointer to the linkage field. This links pips along the client|phci 2761 * linkage list. 2762 * noff : Offset for the endpoint dip snapshot. 2763 */ 2764 static di_off_t 2765 di_getpath_data(dev_info_t *dip, di_off_t *poff_p, di_off_t noff, 2766 struct di_state *st, int get_client) 2767 { 2768 di_off_t off; 2769 mdi_pathinfo_t *pip; 2770 struct di_path *me; 2771 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *); 2772 2773 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client)); 2774 2775 /* 2776 * The naming of the following mdi_xyz() is unfortunately 2777 * non-intuitive. mdi_get_next_phci_path() follows the 2778 * client_link i.e. the list of pip's belonging to the 2779 * given client dip. 2780 */ 2781 if (get_client) 2782 next_pip = &mdi_get_next_phci_path; 2783 else 2784 next_pip = &mdi_get_next_client_path; 2785 2786 off = *poff_p; 2787 2788 pip = NULL; 2789 while (pip = (*next_pip)(dip, pip)) { 2790 mdi_pathinfo_state_t state; 2791 di_off_t stored_offset; 2792 2793 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip)); 2794 2795 mdi_pi_lock(pip); 2796 2797 if (di_pip_find(st, pip, &stored_offset) != -1) { 2798 /* 2799 * We've already seen this pathinfo node so we need to 2800 * take care not to snap it again; However, one endpoint 2801 * and linkage will be set here. The other endpoint 2802 * and linkage has already been set when the pip was 2803 * first snapshotted i.e. when the other endpoint dip 2804 * was snapshotted. 2805 */ 2806 me = (struct di_path *)di_mem_addr(st, stored_offset); 2807 2808 *poff_p = stored_offset; 2809 2810 di_path_one_endpoint(me, noff, &poff_p, get_client); 2811 2812 /* 2813 * The other endpoint and linkage were set when this 2814 * pip was snapshotted. So we are done with both 2815 * endpoints and linkages. 2816 */ 2817 ASSERT(!(me->path_snap_state & 2818 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI))); 2819 ASSERT(!(me->path_snap_state & 2820 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK))); 2821 2822 mdi_pi_unlock(pip); 2823 continue; 2824 } 2825 2826 /* 2827 * Now that we need to snapshot this pip, check memory 2828 */ 2829 off = di_checkmem(st, off, sizeof (struct di_path)); 2830 me = (struct di_path *)di_mem_addr(st, off); 2831 me->self = off; 2832 *poff_p = off; 2833 off += sizeof (struct di_path); 2834 2835 me->path_snap_state = 2836 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK; 2837 me->path_snap_state |= 2838 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI; 2839 2840 /* 2841 * Zero out fields as di_checkmem() doesn't guarantee 2842 * zero-filled memory 2843 */ 2844 me->path_client = me->path_phci = 0; 2845 me->path_c_link = me->path_p_link = 0; 2846 2847 di_path_one_endpoint(me, noff, &poff_p, get_client); 2848 2849 /* 2850 * Note the existence of this pathinfo 2851 */ 2852 di_register_pip(st, pip, me->self); 2853 2854 state = mdi_pi_get_state(pip); 2855 me->path_state = path_state_convert(state); 2856 2857 /* 2858 * Get intermediate addressing info. 2859 */ 2860 off = di_checkmem(st, off, strlen(mdi_pi_get_addr(pip)) + 1); 2861 me->path_addr = off; 2862 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip)); 2863 off += strlen(mdi_pi_get_addr(pip)) + 1; 2864 2865 /* 2866 * Get path properties if props are to be included in the 2867 * snapshot 2868 */ 2869 if (DINFOPROP & st->command) { 2870 off = di_path_getprop(pip, off, &me->path_prop, st); 2871 } else { 2872 me->path_prop = 0; 2873 } 2874 2875 mdi_pi_unlock(pip); 2876 } 2877 2878 *poff_p = 0; 2879 2880 return (off); 2881 } 2882 2883 /* 2884 * Copy a list of properties attached to a devinfo node. Called from 2885 * di_copynode with devi_lock held. The major number is passed in case 2886 * we need to call driver's prop_op entry. The value of list indicates 2887 * which list we are copying. Possible values are: 2888 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST 2889 */ 2890 static di_off_t 2891 di_getprop(struct ddi_prop *prop, di_off_t *off_p, struct di_state *st, 2892 struct dev_info *dip, int list) 2893 { 2894 dev_t dev; 2895 int (*prop_op)(); 2896 int off, need_prop_op = 0; 2897 int prop_op_fail = 0; 2898 ddi_prop_t *propp = NULL; 2899 struct di_prop *pp; 2900 struct dev_ops *ops = NULL; 2901 int prop_len; 2902 caddr_t prop_val; 2903 2904 2905 dcmn_err2((CE_CONT, "di_getprop:\n")); 2906 2907 ASSERT(st != NULL); 2908 2909 dcmn_err((CE_CONT, "copy property list at addr %p\n", (void *)prop)); 2910 2911 /* 2912 * Figure out if we need to call driver's prop_op entry point. 2913 * The conditions are: 2914 * -- driver property list 2915 * -- driver must be attached and held 2916 * -- driver's cb_prop_op != ddi_prop_op 2917 * or parent's bus_prop_op != ddi_bus_prop_op 2918 */ 2919 2920 if (list != DI_PROP_DRV_LIST) { 2921 goto getprop; 2922 } 2923 2924 /* 2925 * If driver is not attached or if major is -1, we ignore 2926 * the driver property list. No one should rely on such 2927 * properties. 2928 */ 2929 if (!i_ddi_devi_attached((dev_info_t *)dip)) { 2930 off = *off_p; 2931 *off_p = 0; 2932 return (off); 2933 } 2934 2935 /* 2936 * Now we have a driver which is held. We can examine entry points 2937 * and check the condition listed above. 2938 */ 2939 ops = dip->devi_ops; 2940 2941 /* 2942 * Some nexus drivers incorrectly set cb_prop_op to nodev, 2943 * nulldev or even NULL. 2944 */ 2945 if (ops && ops->devo_cb_ops && 2946 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) && 2947 (ops->devo_cb_ops->cb_prop_op != nodev) && 2948 (ops->devo_cb_ops->cb_prop_op != nulldev) && 2949 (ops->devo_cb_ops->cb_prop_op != NULL)) { 2950 need_prop_op = 1; 2951 } 2952 2953 getprop: 2954 /* 2955 * check memory availability 2956 */ 2957 off = di_checkmem(st, *off_p, sizeof (struct di_prop)); 2958 *off_p = off; 2959 /* 2960 * Now copy properties 2961 */ 2962 do { 2963 pp = (struct di_prop *)di_mem_addr(st, off); 2964 pp->self = off; 2965 /* 2966 * Split dev_t to major/minor, so it works for 2967 * both ILP32 and LP64 model 2968 */ 2969 pp->dev_major = getmajor(prop->prop_dev); 2970 pp->dev_minor = getminor(prop->prop_dev); 2971 pp->prop_flags = prop->prop_flags; 2972 pp->prop_list = list; 2973 2974 /* 2975 * property name 2976 */ 2977 off += sizeof (struct di_prop); 2978 if (prop->prop_name) { 2979 off = di_checkmem(st, off, strlen(prop->prop_name) 2980 + 1); 2981 pp->prop_name = off; 2982 (void) strcpy(di_mem_addr(st, off), prop->prop_name); 2983 off += strlen(prop->prop_name) + 1; 2984 } 2985 2986 /* 2987 * Set prop_len here. This may change later 2988 * if cb_prop_op returns a different length. 2989 */ 2990 pp->prop_len = prop->prop_len; 2991 if (!need_prop_op) { 2992 if (prop->prop_val == NULL) { 2993 dcmn_err((CE_WARN, 2994 "devinfo: property fault at %p", 2995 (void *)prop)); 2996 pp->prop_data = -1; 2997 } else if (prop->prop_len != 0) { 2998 off = di_checkmem(st, off, prop->prop_len); 2999 pp->prop_data = off; 3000 bcopy(prop->prop_val, di_mem_addr(st, off), 3001 prop->prop_len); 3002 off += DI_ALIGN(pp->prop_len); 3003 } 3004 } 3005 3006 off = di_checkmem(st, off, sizeof (struct di_prop)); 3007 pp->next = off; 3008 prop = prop->prop_next; 3009 } while (prop); 3010 3011 pp->next = 0; 3012 3013 if (!need_prop_op) { 3014 dcmn_err((CE_CONT, "finished property " 3015 "list at offset 0x%x\n", off)); 3016 return (off); 3017 } 3018 3019 /* 3020 * If there is a need to call driver's prop_op entry, 3021 * we must release driver's devi_lock, because the 3022 * cb_prop_op entry point will grab it. 3023 * 3024 * The snapshot memory has already been allocated above, 3025 * which means the length of an active property should 3026 * remain fixed for this implementation to work. 3027 */ 3028 3029 3030 prop_op = ops->devo_cb_ops->cb_prop_op; 3031 pp = (struct di_prop *)di_mem_addr(st, *off_p); 3032 3033 mutex_exit(&dip->devi_lock); 3034 3035 do { 3036 int err; 3037 struct di_prop *tmp; 3038 3039 if (pp->next) { 3040 tmp = (struct di_prop *) 3041 di_mem_addr(st, pp->next); 3042 } else { 3043 tmp = NULL; 3044 } 3045 3046 /* 3047 * call into driver's prop_op entry point 3048 * 3049 * Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY 3050 */ 3051 dev = makedevice(pp->dev_major, pp->dev_minor); 3052 if (dev == DDI_DEV_T_NONE) 3053 dev = DDI_DEV_T_ANY; 3054 3055 dcmn_err((CE_CONT, "call prop_op" 3056 "(%lx, %p, PROP_LEN_AND_VAL_BUF, " 3057 "DDI_PROP_DONTPASS, \"%s\", %p, &%d)\n", 3058 dev, 3059 (void *)dip, 3060 (char *)di_mem_addr(st, pp->prop_name), 3061 (void *)di_mem_addr(st, pp->prop_data), 3062 pp->prop_len)); 3063 3064 if ((err = (*prop_op)(dev, (dev_info_t)dip, 3065 PROP_LEN_AND_VAL_ALLOC, DDI_PROP_DONTPASS, 3066 (char *)di_mem_addr(st, pp->prop_name), 3067 &prop_val, &prop_len)) != DDI_PROP_SUCCESS) { 3068 if ((propp = i_ddi_prop_search(dev, 3069 (char *)di_mem_addr(st, pp->prop_name), 3070 (uint_t)pp->prop_flags, 3071 &(DEVI(dip)->devi_drv_prop_ptr))) != NULL) { 3072 pp->prop_len = propp->prop_len; 3073 if (pp->prop_len != 0) { 3074 off = di_checkmem(st, off, 3075 pp->prop_len); 3076 pp->prop_data = off; 3077 bcopy(propp->prop_val, di_mem_addr(st, 3078 pp->prop_data), propp->prop_len); 3079 off += DI_ALIGN(pp->prop_len); 3080 } 3081 } else { 3082 prop_op_fail = 1; 3083 } 3084 } else if (prop_len != 0) { 3085 pp->prop_len = prop_len; 3086 off = di_checkmem(st, off, prop_len); 3087 pp->prop_data = off; 3088 bcopy(prop_val, di_mem_addr(st, off), prop_len); 3089 off += DI_ALIGN(prop_len); 3090 kmem_free(prop_val, prop_len); 3091 } 3092 3093 if (prop_op_fail) { 3094 pp->prop_data = -1; 3095 dcmn_err((CE_WARN, "devinfo: prop_op failure " 3096 "for \"%s\" err %d", 3097 di_mem_addr(st, pp->prop_name), err)); 3098 } 3099 3100 pp = tmp; 3101 3102 } while (pp); 3103 3104 mutex_enter(&dip->devi_lock); 3105 dcmn_err((CE_CONT, "finished property list at offset 0x%x\n", off)); 3106 return (off); 3107 } 3108 3109 /* 3110 * find private data format attached to a dip 3111 * parent = 1 to match driver name of parent dip (for parent private data) 3112 * 0 to match driver name of current dip (for driver private data) 3113 */ 3114 #define DI_MATCH_DRIVER 0 3115 #define DI_MATCH_PARENT 1 3116 3117 struct di_priv_format * 3118 di_match_drv_name(struct dev_info *node, struct di_state *st, int match) 3119 { 3120 int i, count, len; 3121 char *drv_name; 3122 major_t major; 3123 struct di_all *all; 3124 struct di_priv_format *form; 3125 3126 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n", 3127 node->devi_node_name, match)); 3128 3129 if (match == DI_MATCH_PARENT) { 3130 node = DEVI(node->devi_parent); 3131 } 3132 3133 if (node == NULL) { 3134 return (NULL); 3135 } 3136 3137 major = ddi_name_to_major(node->devi_binding_name); 3138 if (major == (major_t)(-1)) { 3139 return (NULL); 3140 } 3141 3142 /* 3143 * Match the driver name. 3144 */ 3145 drv_name = ddi_major_to_name(major); 3146 if ((drv_name == NULL) || *drv_name == '\0') { 3147 return (NULL); 3148 } 3149 3150 /* Now get the di_priv_format array */ 3151 all = (struct di_all *)di_mem_addr(st, 0); 3152 3153 if (match == DI_MATCH_PARENT) { 3154 count = all->n_ppdata; 3155 form = (struct di_priv_format *) 3156 (di_mem_addr(st, 0) + all->ppdata_format); 3157 } else { 3158 count = all->n_dpdata; 3159 form = (struct di_priv_format *) 3160 ((caddr_t)all + all->dpdata_format); 3161 } 3162 3163 len = strlen(drv_name); 3164 for (i = 0; i < count; i++) { 3165 char *tmp; 3166 3167 tmp = form[i].drv_name; 3168 while (tmp && (*tmp != '\0')) { 3169 if (strncmp(drv_name, tmp, len) == 0) { 3170 return (&form[i]); 3171 } 3172 /* 3173 * Move to next driver name, skipping a white space 3174 */ 3175 if (tmp = strchr(tmp, ' ')) { 3176 tmp++; 3177 } 3178 } 3179 } 3180 3181 return (NULL); 3182 } 3183 3184 /* 3185 * The following functions copy data as specified by the format passed in. 3186 * To prevent invalid format from panicing the system, we call on_fault(). 3187 * A return value of 0 indicates an error. Otherwise, the total offset 3188 * is returned. 3189 */ 3190 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */ 3191 3192 static di_off_t 3193 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node, 3194 void *data, di_off_t *off_p, struct di_state *st) 3195 { 3196 caddr_t pa; 3197 void *ptr; 3198 int i, size, repeat; 3199 di_off_t off, off0, *tmp; 3200 char *path; 3201 3202 label_t ljb; 3203 3204 dcmn_err2((CE_CONT, "di_getprvdata:\n")); 3205 3206 /* 3207 * check memory availability. Private data size is 3208 * limited to DI_MAX_PRIVDATA. 3209 */ 3210 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA); 3211 3212 if ((pdp->bytes <= 0) || pdp->bytes > DI_MAX_PRIVDATA) { 3213 goto failure; 3214 } 3215 3216 if (!on_fault(&ljb)) { 3217 /* copy the struct */ 3218 bcopy(data, di_mem_addr(st, off), pdp->bytes); 3219 off0 = DI_ALIGN(pdp->bytes); 3220 3221 /* dereferencing pointers */ 3222 for (i = 0; i < MAX_PTR_IN_PRV; i++) { 3223 3224 if (pdp->ptr[i].size == 0) { 3225 goto success; /* no more ptrs */ 3226 } 3227 3228 /* 3229 * first, get the pointer content 3230 */ 3231 if ((pdp->ptr[i].offset < 0) || 3232 (pdp->ptr[i].offset > 3233 pdp->bytes - sizeof (char *))) 3234 goto failure; /* wrong offset */ 3235 3236 pa = di_mem_addr(st, off + pdp->ptr[i].offset); 3237 tmp = (di_off_t *)pa; /* to store off_t later */ 3238 3239 ptr = *((void **) pa); /* get pointer value */ 3240 if (ptr == NULL) { /* if NULL pointer, go on */ 3241 continue; 3242 } 3243 3244 /* 3245 * next, find the repeat count (array dimension) 3246 */ 3247 repeat = pdp->ptr[i].len_offset; 3248 3249 /* 3250 * Positive value indicates a fixed sized array. 3251 * 0 or negative value indicates variable sized array. 3252 * 3253 * For variable sized array, the variable must be 3254 * an int member of the structure, with an offset 3255 * equal to the absolution value of struct member. 3256 */ 3257 if (repeat > pdp->bytes - sizeof (int)) { 3258 goto failure; /* wrong offset */ 3259 } 3260 3261 if (repeat >= 0) { 3262 repeat = *((int *)((caddr_t)data + repeat)); 3263 } else { 3264 repeat = -repeat; 3265 } 3266 3267 /* 3268 * next, get the size of the object to be copied 3269 */ 3270 size = pdp->ptr[i].size * repeat; 3271 3272 /* 3273 * Arbitrarily limit the total size of object to be 3274 * copied (1 byte to 1/4 page). 3275 */ 3276 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) { 3277 goto failure; /* wrong size or too big */ 3278 } 3279 3280 /* 3281 * Now copy the data 3282 */ 3283 *tmp = off0; 3284 bcopy(ptr, di_mem_addr(st, off + off0), size); 3285 off0 += DI_ALIGN(size); 3286 } 3287 } else { 3288 goto failure; 3289 } 3290 3291 success: 3292 /* 3293 * success if reached here 3294 */ 3295 no_fault(); 3296 *off_p = off; 3297 3298 return (off + off0); 3299 /*NOTREACHED*/ 3300 3301 failure: 3302 /* 3303 * fault occurred 3304 */ 3305 no_fault(); 3306 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3307 cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p", 3308 ddi_pathname((dev_info_t *)node, path), data); 3309 kmem_free(path, MAXPATHLEN); 3310 *off_p = -1; /* set private data to indicate error */ 3311 3312 return (off); 3313 } 3314 3315 /* 3316 * get parent private data; on error, returns original offset 3317 */ 3318 static di_off_t 3319 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3320 { 3321 int off; 3322 struct di_priv_format *ppdp; 3323 3324 dcmn_err2((CE_CONT, "di_getppdata:\n")); 3325 3326 /* find the parent data format */ 3327 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) { 3328 off = *off_p; 3329 *off_p = 0; /* set parent data to none */ 3330 return (off); 3331 } 3332 3333 return (di_getprvdata(ppdp, node, 3334 ddi_get_parent_data((dev_info_t *)node), off_p, st)); 3335 } 3336 3337 /* 3338 * get parent private data; returns original offset 3339 */ 3340 static di_off_t 3341 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3342 { 3343 int off; 3344 struct di_priv_format *dpdp; 3345 3346 dcmn_err2((CE_CONT, "di_getdpdata:")); 3347 3348 /* find the parent data format */ 3349 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) { 3350 off = *off_p; 3351 *off_p = 0; /* set driver data to none */ 3352 return (off); 3353 } 3354 3355 return (di_getprvdata(dpdp, node, 3356 ddi_get_driver_private((dev_info_t *)node), off_p, st)); 3357 } 3358 3359 /* 3360 * The driver is stateful across DINFOCPYALL and DINFOUSRLD. 3361 * This function encapsulates the state machine: 3362 * 3363 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY -> 3364 * | SNAPSHOT USRLD | 3365 * -------------------------------------------------- 3366 * 3367 * Returns 0 on success and -1 on failure 3368 */ 3369 static int 3370 di_setstate(struct di_state *st, int new_state) 3371 { 3372 int ret = 0; 3373 3374 mutex_enter(&di_lock); 3375 switch (new_state) { 3376 case IOC_IDLE: 3377 case IOC_DONE: 3378 break; 3379 case IOC_SNAP: 3380 if (st->di_iocstate != IOC_IDLE) 3381 ret = -1; 3382 break; 3383 case IOC_COPY: 3384 if (st->di_iocstate != IOC_DONE) 3385 ret = -1; 3386 break; 3387 default: 3388 ret = -1; 3389 } 3390 3391 if (ret == 0) 3392 st->di_iocstate = new_state; 3393 else 3394 cmn_err(CE_NOTE, "incorrect state transition from %d to %d", 3395 st->di_iocstate, new_state); 3396 mutex_exit(&di_lock); 3397 return (ret); 3398 } 3399 3400 /* 3401 * We cannot assume the presence of the entire 3402 * snapshot in this routine. All we are guaranteed 3403 * is the di_all struct + 1 byte (for root_path) 3404 */ 3405 static int 3406 header_plus_one_ok(struct di_all *all) 3407 { 3408 /* 3409 * Refuse to read old versions 3410 */ 3411 if (all->version != DI_SNAPSHOT_VERSION) { 3412 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version)); 3413 return (0); 3414 } 3415 3416 if (all->cache_magic != DI_CACHE_MAGIC) { 3417 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic)); 3418 return (0); 3419 } 3420 3421 if (all->snapshot_time <= 0) { 3422 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time)); 3423 return (0); 3424 } 3425 3426 if (all->top_devinfo == 0) { 3427 CACHE_DEBUG((DI_ERR, "NULL top devinfo")); 3428 return (0); 3429 } 3430 3431 if (all->map_size < sizeof (*all) + 1) { 3432 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size)); 3433 return (0); 3434 } 3435 3436 if (all->root_path[0] != '/' || all->root_path[1] != '\0') { 3437 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c", 3438 all->root_path[0], all->root_path[1])); 3439 return (0); 3440 } 3441 3442 /* 3443 * We can't check checksum here as we just have the header 3444 */ 3445 3446 return (1); 3447 } 3448 3449 static int 3450 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len) 3451 { 3452 rlim64_t rlimit; 3453 ssize_t resid; 3454 int error = 0; 3455 3456 3457 rlimit = RLIM64_INFINITY; 3458 3459 while (len) { 3460 resid = 0; 3461 error = vn_rdwr(UIO_WRITE, vp, buf, len, off, 3462 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 3463 3464 if (error || resid < 0) { 3465 error = error ? error : EIO; 3466 CACHE_DEBUG((DI_ERR, "write error: %d", error)); 3467 break; 3468 } 3469 3470 /* 3471 * Check if we are making progress 3472 */ 3473 if (resid >= len) { 3474 error = ENOSPC; 3475 break; 3476 } 3477 buf += len - resid; 3478 off += len - resid; 3479 len = resid; 3480 } 3481 3482 return (error); 3483 } 3484 3485 extern int modrootloaded; 3486 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *); 3487 extern void mdi_vhci_walk_phcis(dev_info_t *, 3488 int (*)(dev_info_t *, void *), void *); 3489 3490 static void 3491 di_cache_write(struct di_cache *cache) 3492 { 3493 struct di_all *all; 3494 struct vnode *vp; 3495 int oflags; 3496 size_t map_size; 3497 size_t chunk; 3498 offset_t off; 3499 int error; 3500 char *buf; 3501 3502 ASSERT(DI_CACHE_LOCKED(*cache)); 3503 ASSERT(!servicing_interrupt()); 3504 3505 if (cache->cache_size == 0) { 3506 ASSERT(cache->cache_data == NULL); 3507 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write")); 3508 return; 3509 } 3510 3511 ASSERT(cache->cache_size > 0); 3512 ASSERT(cache->cache_data); 3513 3514 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) { 3515 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write")); 3516 return; 3517 } 3518 3519 all = (struct di_all *)cache->cache_data; 3520 3521 if (!header_plus_one_ok(all)) { 3522 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write")); 3523 return; 3524 } 3525 3526 ASSERT(strcmp(all->root_path, "/") == 0); 3527 3528 /* 3529 * The cache_size is the total allocated memory for the cache. 3530 * The map_size is the actual size of valid data in the cache. 3531 * map_size may be smaller than cache_size but cannot exceed 3532 * cache_size. 3533 */ 3534 if (all->map_size > cache->cache_size) { 3535 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)." 3536 " Skipping write", all->map_size, cache->cache_size)); 3537 return; 3538 } 3539 3540 /* 3541 * First unlink the temp file 3542 */ 3543 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE); 3544 if (error && error != ENOENT) { 3545 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d", 3546 DI_CACHE_TEMP, error)); 3547 } 3548 3549 if (error == EROFS) { 3550 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write")); 3551 return; 3552 } 3553 3554 vp = NULL; 3555 oflags = (FCREAT|FWRITE); 3556 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags, 3557 DI_CACHE_PERMS, &vp, CRCREAT, 0)) { 3558 CACHE_DEBUG((DI_ERR, "%s: create failed: %d", 3559 DI_CACHE_TEMP, error)); 3560 return; 3561 } 3562 3563 ASSERT(vp); 3564 3565 /* 3566 * Paranoid: Check if the file is on a read-only FS 3567 */ 3568 if (vn_is_readonly(vp)) { 3569 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS")); 3570 goto fail; 3571 } 3572 3573 /* 3574 * Note that we only write map_size bytes to disk - this saves 3575 * space as the actual cache size may be larger than size of 3576 * valid data in the cache. 3577 * Another advantage is that it makes verification of size 3578 * easier when the file is read later. 3579 */ 3580 map_size = all->map_size; 3581 off = 0; 3582 buf = cache->cache_data; 3583 3584 while (map_size) { 3585 ASSERT(map_size > 0); 3586 /* 3587 * Write in chunks so that VM system 3588 * is not overwhelmed 3589 */ 3590 if (map_size > di_chunk * PAGESIZE) 3591 chunk = di_chunk * PAGESIZE; 3592 else 3593 chunk = map_size; 3594 3595 error = chunk_write(vp, off, buf, chunk); 3596 if (error) { 3597 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d", 3598 off, error)); 3599 goto fail; 3600 } 3601 3602 off += chunk; 3603 buf += chunk; 3604 map_size -= chunk; 3605 3606 /* Give pageout a chance to run */ 3607 delay(1); 3608 } 3609 3610 /* 3611 * Now sync the file and close it 3612 */ 3613 if (error = VOP_FSYNC(vp, FSYNC, kcred)) { 3614 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error)); 3615 } 3616 3617 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred)) { 3618 CACHE_DEBUG((DI_ERR, "close() failed: %d", error)); 3619 VN_RELE(vp); 3620 return; 3621 } 3622 3623 VN_RELE(vp); 3624 3625 /* 3626 * Now do the rename 3627 */ 3628 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) { 3629 CACHE_DEBUG((DI_ERR, "rename failed: %d", error)); 3630 return; 3631 } 3632 3633 CACHE_DEBUG((DI_INFO, "Cache write successful.")); 3634 3635 return; 3636 3637 fail: 3638 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred); 3639 VN_RELE(vp); 3640 } 3641 3642 3643 /* 3644 * Since we could be called early in boot, 3645 * use kobj_read_file() 3646 */ 3647 static void 3648 di_cache_read(struct di_cache *cache) 3649 { 3650 struct _buf *file; 3651 struct di_all *all; 3652 int n; 3653 size_t map_size, sz, chunk; 3654 offset_t off; 3655 caddr_t buf; 3656 uint32_t saved_crc, crc; 3657 3658 ASSERT(modrootloaded); 3659 ASSERT(DI_CACHE_LOCKED(*cache)); 3660 ASSERT(cache->cache_data == NULL); 3661 ASSERT(cache->cache_size == 0); 3662 ASSERT(!servicing_interrupt()); 3663 3664 file = kobj_open_file(DI_CACHE_FILE); 3665 if (file == (struct _buf *)-1) { 3666 CACHE_DEBUG((DI_ERR, "%s: open failed: %d", 3667 DI_CACHE_FILE, ENOENT)); 3668 return; 3669 } 3670 3671 /* 3672 * Read in the header+root_path first. The root_path must be "/" 3673 */ 3674 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP); 3675 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0); 3676 3677 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) { 3678 kmem_free(all, sizeof (*all) + 1); 3679 kobj_close_file(file); 3680 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid")); 3681 return; 3682 } 3683 3684 map_size = all->map_size; 3685 3686 kmem_free(all, sizeof (*all) + 1); 3687 3688 ASSERT(map_size >= sizeof (*all) + 1); 3689 3690 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP); 3691 sz = map_size; 3692 off = 0; 3693 while (sz) { 3694 /* Don't overload VM with large reads */ 3695 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz; 3696 n = kobj_read_file(file, buf, chunk, off); 3697 if (n != chunk) { 3698 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld", 3699 DI_CACHE_FILE, off)); 3700 goto fail; 3701 } 3702 off += chunk; 3703 buf += chunk; 3704 sz -= chunk; 3705 } 3706 3707 ASSERT(off == map_size); 3708 3709 /* 3710 * Read past expected EOF to verify size. 3711 */ 3712 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) { 3713 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE)); 3714 goto fail; 3715 } 3716 3717 all = (struct di_all *)di_cache.cache_data; 3718 if (!header_plus_one_ok(all)) { 3719 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE)); 3720 goto fail; 3721 } 3722 3723 /* 3724 * Compute CRC with checksum field in the cache data set to 0 3725 */ 3726 saved_crc = all->cache_checksum; 3727 all->cache_checksum = 0; 3728 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table); 3729 all->cache_checksum = saved_crc; 3730 3731 if (crc != all->cache_checksum) { 3732 CACHE_DEBUG((DI_ERR, 3733 "%s: checksum error: expected=0x%x actual=0x%x", 3734 DI_CACHE_FILE, all->cache_checksum, crc)); 3735 goto fail; 3736 } 3737 3738 if (all->map_size != map_size) { 3739 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE)); 3740 goto fail; 3741 } 3742 3743 kobj_close_file(file); 3744 3745 di_cache.cache_size = map_size; 3746 3747 return; 3748 3749 fail: 3750 kmem_free(di_cache.cache_data, map_size); 3751 kobj_close_file(file); 3752 di_cache.cache_data = NULL; 3753 di_cache.cache_size = 0; 3754 } 3755 3756 3757 /* 3758 * Checks if arguments are valid for using the cache. 3759 */ 3760 static int 3761 cache_args_valid(struct di_state *st, int *error) 3762 { 3763 ASSERT(error); 3764 ASSERT(st->mem_size > 0); 3765 ASSERT(st->memlist != NULL); 3766 3767 if (!modrootloaded || !i_ddi_io_initialized()) { 3768 CACHE_DEBUG((DI_ERR, 3769 "cache lookup failure: I/O subsystem not inited")); 3770 *error = ENOTACTIVE; 3771 return (0); 3772 } 3773 3774 /* 3775 * No other flags allowed with DINFOCACHE 3776 */ 3777 if (st->command != (DINFOCACHE & DIIOC_MASK)) { 3778 CACHE_DEBUG((DI_ERR, 3779 "cache lookup failure: bad flags: 0x%x", 3780 st->command)); 3781 *error = EINVAL; 3782 return (0); 3783 } 3784 3785 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3786 CACHE_DEBUG((DI_ERR, 3787 "cache lookup failure: bad root: %s", 3788 DI_ALL_PTR(st)->root_path)); 3789 *error = EINVAL; 3790 return (0); 3791 } 3792 3793 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command)); 3794 3795 *error = 0; 3796 3797 return (1); 3798 } 3799 3800 static int 3801 snapshot_is_cacheable(struct di_state *st) 3802 { 3803 ASSERT(st->mem_size > 0); 3804 ASSERT(st->memlist != NULL); 3805 3806 if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) != 3807 (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) { 3808 CACHE_DEBUG((DI_INFO, 3809 "not cacheable: incompatible flags: 0x%x", 3810 st->command)); 3811 return (0); 3812 } 3813 3814 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3815 CACHE_DEBUG((DI_INFO, 3816 "not cacheable: incompatible root path: %s", 3817 DI_ALL_PTR(st)->root_path)); 3818 return (0); 3819 } 3820 3821 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command)); 3822 3823 return (1); 3824 } 3825 3826 static int 3827 di_cache_lookup(struct di_state *st) 3828 { 3829 size_t rval; 3830 int cache_valid; 3831 3832 ASSERT(cache_args_valid(st, &cache_valid)); 3833 ASSERT(modrootloaded); 3834 3835 DI_CACHE_LOCK(di_cache); 3836 3837 /* 3838 * The following assignment determines the validity 3839 * of the cache as far as this snapshot is concerned. 3840 */ 3841 cache_valid = di_cache.cache_valid; 3842 3843 if (cache_valid && di_cache.cache_data == NULL) { 3844 di_cache_read(&di_cache); 3845 /* check for read or file error */ 3846 if (di_cache.cache_data == NULL) 3847 cache_valid = 0; 3848 } 3849 3850 if (cache_valid) { 3851 /* 3852 * Ok, the cache was valid as of this particular 3853 * snapshot. Copy the cached snapshot. This is safe 3854 * to do as the cache cannot be freed (we hold the 3855 * cache lock). Free the memory allocated in di_state 3856 * up until this point - we will simply copy everything 3857 * in the cache. 3858 */ 3859 3860 ASSERT(di_cache.cache_data != NULL); 3861 ASSERT(di_cache.cache_size > 0); 3862 3863 di_freemem(st); 3864 3865 rval = 0; 3866 if (di_cache2mem(&di_cache, st) > 0) { 3867 3868 ASSERT(DI_ALL_PTR(st)); 3869 3870 /* 3871 * map_size is size of valid data in the 3872 * cached snapshot and may be less than 3873 * size of the cache. 3874 */ 3875 rval = DI_ALL_PTR(st)->map_size; 3876 3877 ASSERT(rval >= sizeof (struct di_all)); 3878 ASSERT(rval <= di_cache.cache_size); 3879 } 3880 } else { 3881 /* 3882 * The cache isn't valid, we need to take a snapshot. 3883 * Set the command flags appropriately 3884 */ 3885 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK)); 3886 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK); 3887 rval = di_cache_update(st); 3888 st->command = (DINFOCACHE & DIIOC_MASK); 3889 } 3890 3891 DI_CACHE_UNLOCK(di_cache); 3892 3893 /* 3894 * For cached snapshots, the devinfo driver always returns 3895 * a snapshot rooted at "/". 3896 */ 3897 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0); 3898 3899 return (rval); 3900 } 3901 3902 /* 3903 * This is a forced update of the cache - the previous state of the cache 3904 * may be: 3905 * - unpopulated 3906 * - populated and invalid 3907 * - populated and valid 3908 */ 3909 static int 3910 di_cache_update(struct di_state *st) 3911 { 3912 int rval; 3913 uint32_t crc; 3914 struct di_all *all; 3915 3916 ASSERT(DI_CACHE_LOCKED(di_cache)); 3917 ASSERT(snapshot_is_cacheable(st)); 3918 3919 /* 3920 * Free the in-core cache and the on-disk file (if they exist) 3921 */ 3922 i_ddi_di_cache_free(&di_cache); 3923 3924 /* 3925 * Set valid flag before taking the snapshot, 3926 * so that any invalidations that arrive 3927 * during or after the snapshot are not 3928 * removed by us. 3929 */ 3930 atomic_or_32(&di_cache.cache_valid, 1); 3931 3932 rval = di_snapshot_and_clean(st); 3933 3934 if (rval == 0) { 3935 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot")); 3936 return (0); 3937 } 3938 3939 DI_ALL_PTR(st)->map_size = rval; 3940 3941 if (di_mem2cache(st, &di_cache) == 0) { 3942 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed")); 3943 return (0); 3944 } 3945 3946 ASSERT(di_cache.cache_data); 3947 ASSERT(di_cache.cache_size > 0); 3948 3949 /* 3950 * Now that we have cached the snapshot, compute its checksum. 3951 * The checksum is only computed over the valid data in the 3952 * cache, not the entire cache. 3953 * Also, set all the fields (except checksum) before computing 3954 * checksum. 3955 */ 3956 all = (struct di_all *)di_cache.cache_data; 3957 all->cache_magic = DI_CACHE_MAGIC; 3958 all->map_size = rval; 3959 3960 ASSERT(all->cache_checksum == 0); 3961 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table); 3962 all->cache_checksum = crc; 3963 3964 di_cache_write(&di_cache); 3965 3966 return (rval); 3967 } 3968 3969 static void 3970 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...) 3971 { 3972 va_list ap; 3973 3974 if (di_cache_debug <= DI_QUIET) 3975 return; 3976 3977 if (di_cache_debug < msglevel) 3978 return; 3979 3980 switch (msglevel) { 3981 case DI_ERR: 3982 msglevel = CE_WARN; 3983 break; 3984 case DI_INFO: 3985 case DI_TRACE: 3986 default: 3987 msglevel = CE_NOTE; 3988 break; 3989 } 3990 3991 va_start(ap, fmt); 3992 vcmn_err(msglevel, fmt, ap); 3993 va_end(ap); 3994 } 3995