1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * driver for accessing kernel devinfo tree. 31 */ 32 #include <sys/types.h> 33 #include <sys/pathname.h> 34 #include <sys/debug.h> 35 #include <sys/autoconf.h> 36 #include <sys/conf.h> 37 #include <sys/file.h> 38 #include <sys/kmem.h> 39 #include <sys/modctl.h> 40 #include <sys/stat.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/sunldi_impl.h> 44 #include <sys/sunndi.h> 45 #include <sys/esunddi.h> 46 #include <sys/sunmdi.h> 47 #include <sys/ddi_impldefs.h> 48 #include <sys/ndi_impldefs.h> 49 #include <sys/mdi_impldefs.h> 50 #include <sys/devinfo_impl.h> 51 #include <sys/thread.h> 52 #include <sys/modhash.h> 53 #include <sys/bitmap.h> 54 #include <util/qsort.h> 55 #include <sys/disp.h> 56 #include <sys/kobj.h> 57 #include <sys/crc32.h> 58 59 60 #ifdef DEBUG 61 static int di_debug; 62 #define dcmn_err(args) if (di_debug >= 1) cmn_err args 63 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args 64 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args 65 #else 66 #define dcmn_err(args) /* nothing */ 67 #define dcmn_err2(args) /* nothing */ 68 #define dcmn_err3(args) /* nothing */ 69 #endif 70 71 /* 72 * We partition the space of devinfo minor nodes equally between the full and 73 * unprivileged versions of the driver. The even-numbered minor nodes are the 74 * full version, while the odd-numbered ones are the read-only version. 75 */ 76 static int di_max_opens = 32; 77 78 #define DI_FULL_PARENT 0 79 #define DI_READONLY_PARENT 1 80 #define DI_NODE_SPECIES 2 81 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) 82 83 #define IOC_IDLE 0 /* snapshot ioctl states */ 84 #define IOC_SNAP 1 /* snapshot in progress */ 85 #define IOC_DONE 2 /* snapshot done, but not copied out */ 86 #define IOC_COPY 3 /* copyout in progress */ 87 88 /* 89 * Keep max alignment so we can move snapshot to different platforms 90 */ 91 #define DI_ALIGN(addr) ((addr + 7l) & ~7l) 92 93 /* 94 * To avoid wasting memory, make a linked list of memory chunks. 95 * Size of each chunk is buf_size. 96 */ 97 struct di_mem { 98 struct di_mem *next; /* link to next chunk */ 99 char *buf; /* contiguous kernel memory */ 100 size_t buf_size; /* size of buf in bytes */ 101 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ 102 }; 103 104 /* 105 * This is a stack for walking the tree without using recursion. 106 * When the devinfo tree height is above some small size, one 107 * gets watchdog resets on sun4m. 108 */ 109 struct di_stack { 110 void *offset[MAX_TREE_DEPTH]; 111 struct dev_info *dip[MAX_TREE_DEPTH]; 112 int circ[MAX_TREE_DEPTH]; 113 int depth; /* depth of current node to be copied */ 114 }; 115 116 #define TOP_OFFSET(stack) \ 117 ((di_off_t *)(stack)->offset[(stack)->depth - 1]) 118 #define TOP_NODE(stack) \ 119 ((stack)->dip[(stack)->depth - 1]) 120 #define PARENT_OFFSET(stack) \ 121 ((di_off_t *)(stack)->offset[(stack)->depth - 2]) 122 #define EMPTY_STACK(stack) ((stack)->depth == 0) 123 #define POP_STACK(stack) { \ 124 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ 125 (stack)->circ[(stack)->depth - 1]); \ 126 ((stack)->depth--); \ 127 } 128 #define PUSH_STACK(stack, node, offp) { \ 129 ASSERT(node != NULL); \ 130 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ 131 (stack)->dip[(stack)->depth] = (node); \ 132 (stack)->offset[(stack)->depth] = (void *)(offp); \ 133 ((stack)->depth)++; \ 134 } 135 136 #define DI_ALL_PTR(s) ((struct di_all *)di_mem_addr((s), 0)) 137 138 /* 139 * With devfs, the device tree has no global locks. The device tree is 140 * dynamic and dips may come and go if they are not locked locally. Under 141 * these conditions, pointers are no longer reliable as unique IDs. 142 * Specifically, these pointers cannot be used as keys for hash tables 143 * as the same devinfo structure may be freed in one part of the tree only 144 * to be allocated as the structure for a different device in another 145 * part of the tree. This can happen if DR and the snapshot are 146 * happening concurrently. 147 * The following data structures act as keys for devinfo nodes and 148 * pathinfo nodes. 149 */ 150 151 enum di_ktype { 152 DI_DKEY = 1, 153 DI_PKEY = 2 154 }; 155 156 struct di_dkey { 157 dev_info_t *dk_dip; 158 major_t dk_major; 159 int dk_inst; 160 dnode_t dk_nodeid; 161 }; 162 163 struct di_pkey { 164 mdi_pathinfo_t *pk_pip; 165 char *pk_path_addr; 166 dev_info_t *pk_client; 167 dev_info_t *pk_phci; 168 }; 169 170 struct di_key { 171 enum di_ktype k_type; 172 union { 173 struct di_dkey dkey; 174 struct di_pkey pkey; 175 } k_u; 176 }; 177 178 179 struct i_lnode; 180 181 typedef struct i_link { 182 /* 183 * If a di_link struct representing this i_link struct makes it 184 * into the snapshot, then self will point to the offset of 185 * the di_link struct in the snapshot 186 */ 187 di_off_t self; 188 189 int spec_type; /* block or char access type */ 190 struct i_lnode *src_lnode; /* src i_lnode */ 191 struct i_lnode *tgt_lnode; /* tgt i_lnode */ 192 struct i_link *src_link_next; /* next src i_link /w same i_lnode */ 193 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ 194 } i_link_t; 195 196 typedef struct i_lnode { 197 /* 198 * If a di_lnode struct representing this i_lnode struct makes it 199 * into the snapshot, then self will point to the offset of 200 * the di_lnode struct in the snapshot 201 */ 202 di_off_t self; 203 204 /* 205 * used for hashing and comparing i_lnodes 206 */ 207 int modid; 208 209 /* 210 * public information describing a link endpoint 211 */ 212 struct di_node *di_node; /* di_node in snapshot */ 213 dev_t devt; /* devt */ 214 215 /* 216 * i_link ptr to links coming into this i_lnode node 217 * (this i_lnode is the target of these i_links) 218 */ 219 i_link_t *link_in; 220 221 /* 222 * i_link ptr to links going out of this i_lnode node 223 * (this i_lnode is the source of these i_links) 224 */ 225 i_link_t *link_out; 226 } i_lnode_t; 227 228 /* 229 * Soft state associated with each instance of driver open. 230 */ 231 static struct di_state { 232 di_off_t mem_size; /* total # bytes in memlist */ 233 struct di_mem *memlist; /* head of memlist */ 234 uint_t command; /* command from ioctl */ 235 int di_iocstate; /* snapshot ioctl state */ 236 mod_hash_t *reg_dip_hash; 237 mod_hash_t *reg_pip_hash; 238 int lnode_count; 239 int link_count; 240 241 mod_hash_t *lnode_hash; 242 mod_hash_t *link_hash; 243 } **di_states; 244 245 static kmutex_t di_lock; /* serialize instance assignment */ 246 247 typedef enum { 248 DI_QUIET = 0, /* DI_QUIET must always be 0 */ 249 DI_ERR, 250 DI_INFO, 251 DI_TRACE, 252 DI_TRACE1, 253 DI_TRACE2 254 } di_cache_debug_t; 255 256 static uint_t di_chunk = 32; /* I/O chunk size in pages */ 257 258 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) 259 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) 260 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) 261 262 #define CACHE_DEBUG(args) \ 263 { if (di_cache_debug != DI_QUIET) di_cache_print args; } 264 265 static int di_open(dev_t *, int, int, cred_t *); 266 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 267 static int di_close(dev_t, int, int, cred_t *); 268 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 269 static int di_attach(dev_info_t *, ddi_attach_cmd_t); 270 static int di_detach(dev_info_t *, ddi_detach_cmd_t); 271 272 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); 273 static di_off_t di_snapshot(struct di_state *); 274 static di_off_t di_copydevnm(di_off_t *, struct di_state *); 275 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); 276 static di_off_t di_copynode(struct di_stack *, struct di_state *); 277 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, 278 struct di_state *); 279 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); 280 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); 281 static di_off_t di_getprop(struct ddi_prop *, di_off_t *, 282 struct di_state *, struct dev_info *, int); 283 static void di_allocmem(struct di_state *, size_t); 284 static void di_freemem(struct di_state *); 285 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); 286 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); 287 static caddr_t di_mem_addr(struct di_state *, di_off_t); 288 static int di_setstate(struct di_state *, int); 289 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); 290 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); 291 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, 292 struct di_state *, int); 293 static di_off_t di_getlink_data(di_off_t, struct di_state *); 294 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); 295 296 static int cache_args_valid(struct di_state *st, int *error); 297 static int snapshot_is_cacheable(struct di_state *st); 298 static int di_cache_lookup(struct di_state *st); 299 static int di_cache_update(struct di_state *st); 300 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); 301 302 static struct cb_ops di_cb_ops = { 303 di_open, /* open */ 304 di_close, /* close */ 305 nodev, /* strategy */ 306 nodev, /* print */ 307 nodev, /* dump */ 308 nodev, /* read */ 309 nodev, /* write */ 310 di_ioctl, /* ioctl */ 311 nodev, /* devmap */ 312 nodev, /* mmap */ 313 nodev, /* segmap */ 314 nochpoll, /* poll */ 315 ddi_prop_op, /* prop_op */ 316 NULL, /* streamtab */ 317 D_NEW | D_MP /* Driver compatibility flag */ 318 }; 319 320 static struct dev_ops di_ops = { 321 DEVO_REV, /* devo_rev, */ 322 0, /* refcnt */ 323 di_info, /* info */ 324 nulldev, /* identify */ 325 nulldev, /* probe */ 326 di_attach, /* attach */ 327 di_detach, /* detach */ 328 nodev, /* reset */ 329 &di_cb_ops, /* driver operations */ 330 NULL /* bus operations */ 331 }; 332 333 /* 334 * Module linkage information for the kernel. 335 */ 336 static struct modldrv modldrv = { 337 &mod_driverops, 338 "DEVINFO Driver %I%", 339 &di_ops 340 }; 341 342 static struct modlinkage modlinkage = { 343 MODREV_1, 344 &modldrv, 345 NULL 346 }; 347 348 int 349 _init(void) 350 { 351 int error; 352 353 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); 354 355 error = mod_install(&modlinkage); 356 if (error != 0) { 357 mutex_destroy(&di_lock); 358 return (error); 359 } 360 361 return (0); 362 } 363 364 int 365 _info(struct modinfo *modinfop) 366 { 367 return (mod_info(&modlinkage, modinfop)); 368 } 369 370 int 371 _fini(void) 372 { 373 int error; 374 375 error = mod_remove(&modlinkage); 376 if (error != 0) { 377 return (error); 378 } 379 380 mutex_destroy(&di_lock); 381 return (0); 382 } 383 384 static dev_info_t *di_dip; 385 386 /*ARGSUSED*/ 387 static int 388 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 389 { 390 int error = DDI_FAILURE; 391 392 switch (infocmd) { 393 case DDI_INFO_DEVT2DEVINFO: 394 *result = (void *)di_dip; 395 error = DDI_SUCCESS; 396 break; 397 case DDI_INFO_DEVT2INSTANCE: 398 /* 399 * All dev_t's map to the same, single instance. 400 */ 401 *result = (void *)0; 402 error = DDI_SUCCESS; 403 break; 404 default: 405 break; 406 } 407 408 return (error); 409 } 410 411 static int 412 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 413 { 414 int error = DDI_FAILURE; 415 416 switch (cmd) { 417 case DDI_ATTACH: 418 di_states = kmem_zalloc( 419 di_max_opens * sizeof (struct di_state *), KM_SLEEP); 420 421 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, 422 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || 423 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, 424 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { 425 kmem_free(di_states, 426 di_max_opens * sizeof (struct di_state *)); 427 ddi_remove_minor_node(dip, NULL); 428 error = DDI_FAILURE; 429 } else { 430 di_dip = dip; 431 ddi_report_dev(dip); 432 433 error = DDI_SUCCESS; 434 } 435 break; 436 default: 437 error = DDI_FAILURE; 438 break; 439 } 440 441 return (error); 442 } 443 444 static int 445 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 446 { 447 int error = DDI_FAILURE; 448 449 switch (cmd) { 450 case DDI_DETACH: 451 ddi_remove_minor_node(dip, NULL); 452 di_dip = NULL; 453 kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); 454 455 error = DDI_SUCCESS; 456 break; 457 default: 458 error = DDI_FAILURE; 459 break; 460 } 461 462 return (error); 463 } 464 465 /* 466 * Allow multiple opens by tweaking the dev_t such that it looks like each 467 * open is getting a different minor device. Each minor gets a separate 468 * entry in the di_states[] table. Based on the original minor number, we 469 * discriminate opens of the full and read-only nodes. If all of the instances 470 * of the selected minor node are currently open, we return EAGAIN. 471 */ 472 /*ARGSUSED*/ 473 static int 474 di_open(dev_t *devp, int flag, int otyp, cred_t *credp) 475 { 476 int m; 477 minor_t minor_parent = getminor(*devp); 478 479 if (minor_parent != DI_FULL_PARENT && 480 minor_parent != DI_READONLY_PARENT) 481 return (ENXIO); 482 483 mutex_enter(&di_lock); 484 485 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { 486 if (di_states[m] != NULL) 487 continue; 488 489 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); 490 break; /* It's ours. */ 491 } 492 493 if (m >= di_max_opens) { 494 /* 495 * maximum open instance for device reached 496 */ 497 mutex_exit(&di_lock); 498 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); 499 return (EAGAIN); 500 } 501 mutex_exit(&di_lock); 502 503 ASSERT(m < di_max_opens); 504 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); 505 506 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", 507 (void *)curthread, m + DI_NODE_SPECIES)); 508 509 return (0); 510 } 511 512 /*ARGSUSED*/ 513 static int 514 di_close(dev_t dev, int flag, int otype, cred_t *cred_p) 515 { 516 struct di_state *st; 517 int m = (int)getminor(dev) - DI_NODE_SPECIES; 518 519 if (m < 0) { 520 cmn_err(CE_WARN, "closing non-existent devinfo minor %d", 521 m + DI_NODE_SPECIES); 522 return (ENXIO); 523 } 524 525 st = di_states[m]; 526 ASSERT(m < di_max_opens && st != NULL); 527 528 di_freemem(st); 529 kmem_free(st, sizeof (struct di_state)); 530 531 /* 532 * empty slot in state table 533 */ 534 mutex_enter(&di_lock); 535 di_states[m] = NULL; 536 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", 537 (void *)curthread, m + DI_NODE_SPECIES)); 538 mutex_exit(&di_lock); 539 540 return (0); 541 } 542 543 544 /*ARGSUSED*/ 545 static int 546 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 547 { 548 int rv, error; 549 di_off_t off; 550 struct di_all *all; 551 struct di_state *st; 552 int m = (int)getminor(dev) - DI_NODE_SPECIES; 553 554 major_t i; 555 char *drv_name; 556 size_t map_size, size; 557 struct di_mem *dcp; 558 int ndi_flags; 559 560 if (m < 0 || m >= di_max_opens) { 561 return (ENXIO); 562 } 563 564 st = di_states[m]; 565 ASSERT(st != NULL); 566 567 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); 568 569 switch (cmd) { 570 case DINFOIDENT: 571 /* 572 * This is called from di_init to verify that the driver 573 * opened is indeed devinfo. The purpose is to guard against 574 * sending ioctl to an unknown driver in case of an 575 * unresolved major number conflict during bfu. 576 */ 577 *rvalp = DI_MAGIC; 578 return (0); 579 580 case DINFOLODRV: 581 /* 582 * Hold an installed driver and return the result 583 */ 584 if (DI_UNPRIVILEGED_NODE(m)) { 585 /* 586 * Only the fully enabled instances may issue 587 * DINFOLDDRV. 588 */ 589 return (EACCES); 590 } 591 592 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 593 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { 594 kmem_free(drv_name, MAXNAMELEN); 595 return (EFAULT); 596 } 597 598 /* 599 * Some 3rd party driver's _init() walks the device tree, 600 * so we load the driver module before configuring driver. 601 */ 602 i = ddi_name_to_major(drv_name); 603 if (ddi_hold_driver(i) == NULL) { 604 kmem_free(drv_name, MAXNAMELEN); 605 return (ENXIO); 606 } 607 608 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; 609 610 /* 611 * i_ddi_load_drvconf() below will trigger a reprobe 612 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't 613 * needed here. 614 */ 615 modunload_disable(); 616 (void) i_ddi_load_drvconf(i); 617 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); 618 kmem_free(drv_name, MAXNAMELEN); 619 ddi_rele_driver(i); 620 rv = i_ddi_devs_attached(i); 621 modunload_enable(); 622 623 i_ddi_di_cache_invalidate(KM_SLEEP); 624 625 return ((rv == DDI_SUCCESS)? 0 : ENXIO); 626 627 case DINFOUSRLD: 628 /* 629 * The case for copying snapshot to userland 630 */ 631 if (di_setstate(st, IOC_COPY) == -1) 632 return (EBUSY); 633 634 map_size = ((struct di_all *)di_mem_addr(st, 0))->map_size; 635 if (map_size == 0) { 636 (void) di_setstate(st, IOC_DONE); 637 return (EFAULT); 638 } 639 640 /* 641 * copyout the snapshot 642 */ 643 map_size = (map_size + PAGEOFFSET) & PAGEMASK; 644 645 /* 646 * Return the map size, so caller may do a sanity 647 * check against the return value of snapshot ioctl() 648 */ 649 *rvalp = (int)map_size; 650 651 /* 652 * Copy one chunk at a time 653 */ 654 off = 0; 655 dcp = st->memlist; 656 while (map_size) { 657 size = dcp->buf_size; 658 if (map_size <= size) { 659 size = map_size; 660 } 661 662 if (ddi_copyout(di_mem_addr(st, off), 663 (void *)(arg + off), size, mode) != 0) { 664 (void) di_setstate(st, IOC_DONE); 665 return (EFAULT); 666 } 667 668 map_size -= size; 669 off += size; 670 dcp = dcp->next; 671 } 672 673 di_freemem(st); 674 (void) di_setstate(st, IOC_IDLE); 675 return (0); 676 677 default: 678 if ((cmd & ~DIIOC_MASK) != DIIOC) { 679 /* 680 * Invalid ioctl command 681 */ 682 return (ENOTTY); 683 } 684 /* 685 * take a snapshot 686 */ 687 st->command = cmd & DIIOC_MASK; 688 /*FALLTHROUGH*/ 689 } 690 691 /* 692 * Obtain enough memory to hold header + rootpath. We prevent kernel 693 * memory exhaustion by freeing any previously allocated snapshot and 694 * refusing the operation; otherwise we would be allowing ioctl(), 695 * ioctl(), ioctl(), ..., panic. 696 */ 697 if (di_setstate(st, IOC_SNAP) == -1) 698 return (EBUSY); 699 700 size = sizeof (struct di_all) + 701 sizeof (((struct dinfo_io *)(NULL))->root_path); 702 if (size < PAGESIZE) 703 size = PAGESIZE; 704 di_allocmem(st, size); 705 706 all = (struct di_all *)di_mem_addr(st, 0); 707 all->devcnt = devcnt; 708 all->command = st->command; 709 all->version = DI_SNAPSHOT_VERSION; 710 711 /* 712 * Note the endianness in case we need to transport snapshot 713 * over the network. 714 */ 715 #if defined(_LITTLE_ENDIAN) 716 all->endianness = DI_LITTLE_ENDIAN; 717 #else 718 all->endianness = DI_BIG_ENDIAN; 719 #endif 720 721 /* Copyin ioctl args, store in the snapshot. */ 722 if (copyinstr((void *)arg, all->root_path, 723 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { 724 di_freemem(st); 725 (void) di_setstate(st, IOC_IDLE); 726 return (EFAULT); 727 } 728 729 error = 0; 730 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { 731 di_freemem(st); 732 (void) di_setstate(st, IOC_IDLE); 733 return (error); 734 } 735 736 off = DI_ALIGN(sizeof (struct di_all) + size); 737 738 /* 739 * Only the fully enabled version may force load drivers or read 740 * the parent private data from a driver. 741 */ 742 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && 743 DI_UNPRIVILEGED_NODE(m)) { 744 di_freemem(st); 745 (void) di_setstate(st, IOC_IDLE); 746 return (EACCES); 747 } 748 749 /* Do we need private data? */ 750 if (st->command & DINFOPRIVDATA) { 751 arg += sizeof (((struct dinfo_io *)(NULL))->root_path); 752 753 #ifdef _MULTI_DATAMODEL 754 switch (ddi_model_convert_from(mode & FMODELS)) { 755 case DDI_MODEL_ILP32: { 756 /* 757 * Cannot copy private data from 64-bit kernel 758 * to 32-bit app 759 */ 760 di_freemem(st); 761 (void) di_setstate(st, IOC_IDLE); 762 return (EINVAL); 763 } 764 case DDI_MODEL_NONE: 765 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 766 di_freemem(st); 767 (void) di_setstate(st, IOC_IDLE); 768 return (EFAULT); 769 } 770 break; 771 } 772 #else /* !_MULTI_DATAMODEL */ 773 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 774 di_freemem(st); 775 (void) di_setstate(st, IOC_IDLE); 776 return (EFAULT); 777 } 778 #endif /* _MULTI_DATAMODEL */ 779 } 780 781 all->top_devinfo = DI_ALIGN(off); 782 783 /* 784 * For cache lookups we reallocate memory from scratch, 785 * so the value of "all" is no longer valid. 786 */ 787 all = NULL; 788 789 if (st->command & DINFOCACHE) { 790 *rvalp = di_cache_lookup(st); 791 } else if (snapshot_is_cacheable(st)) { 792 DI_CACHE_LOCK(di_cache); 793 *rvalp = di_cache_update(st); 794 DI_CACHE_UNLOCK(di_cache); 795 } else { 796 modunload_disable(); 797 *rvalp = di_snapshot(st); 798 modunload_enable(); 799 } 800 801 if (*rvalp) { 802 DI_ALL_PTR(st)->map_size = *rvalp; 803 (void) di_setstate(st, IOC_DONE); 804 } else { 805 di_freemem(st); 806 (void) di_setstate(st, IOC_IDLE); 807 } 808 809 return (0); 810 } 811 812 /* 813 * Get a chunk of memory >= size, for the snapshot 814 */ 815 static void 816 di_allocmem(struct di_state *st, size_t size) 817 { 818 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), 819 KM_SLEEP); 820 /* 821 * Round up size to nearest power of 2. If it is less 822 * than st->mem_size, set it to st->mem_size (i.e., 823 * the mem_size is doubled every time) to reduce the 824 * number of memory allocations. 825 */ 826 size_t tmp = 1; 827 while (tmp < size) { 828 tmp <<= 1; 829 } 830 size = (tmp > st->mem_size) ? tmp : st->mem_size; 831 832 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); 833 mem->buf_size = size; 834 835 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); 836 837 if (st->mem_size == 0) { /* first chunk */ 838 st->memlist = mem; 839 } else { 840 /* 841 * locate end of linked list and add a chunk at the end 842 */ 843 struct di_mem *dcp = st->memlist; 844 while (dcp->next != NULL) { 845 dcp = dcp->next; 846 } 847 848 dcp->next = mem; 849 } 850 851 st->mem_size += size; 852 } 853 854 /* 855 * Copy upto bufsiz bytes of the memlist to buf 856 */ 857 static void 858 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) 859 { 860 struct di_mem *dcp; 861 size_t copysz; 862 863 if (st->mem_size == 0) { 864 ASSERT(st->memlist == NULL); 865 return; 866 } 867 868 copysz = 0; 869 for (dcp = st->memlist; dcp; dcp = dcp->next) { 870 871 ASSERT(bufsiz > 0); 872 873 if (bufsiz <= dcp->buf_size) 874 copysz = bufsiz; 875 else 876 copysz = dcp->buf_size; 877 878 bcopy(dcp->buf, buf, copysz); 879 880 buf += copysz; 881 bufsiz -= copysz; 882 883 if (bufsiz == 0) 884 break; 885 } 886 } 887 888 /* 889 * Free all memory for the snapshot 890 */ 891 static void 892 di_freemem(struct di_state *st) 893 { 894 struct di_mem *dcp, *tmp; 895 896 dcmn_err2((CE_CONT, "di_freemem\n")); 897 898 if (st->mem_size) { 899 dcp = st->memlist; 900 while (dcp) { /* traverse the linked list */ 901 tmp = dcp; 902 dcp = dcp->next; 903 ddi_umem_free(tmp->cook); 904 kmem_free(tmp, sizeof (struct di_mem)); 905 } 906 st->mem_size = 0; 907 st->memlist = NULL; 908 } 909 910 ASSERT(st->mem_size == 0); 911 ASSERT(st->memlist == NULL); 912 } 913 914 /* 915 * Copies cached data to the di_state structure. 916 * Returns: 917 * - size of data copied, on SUCCESS 918 * - 0 on failure 919 */ 920 static int 921 di_cache2mem(struct di_cache *cache, struct di_state *st) 922 { 923 caddr_t pa; 924 925 ASSERT(st->mem_size == 0); 926 ASSERT(st->memlist == NULL); 927 ASSERT(!servicing_interrupt()); 928 ASSERT(DI_CACHE_LOCKED(*cache)); 929 930 if (cache->cache_size == 0) { 931 ASSERT(cache->cache_data == NULL); 932 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); 933 return (0); 934 } 935 936 ASSERT(cache->cache_data); 937 938 di_allocmem(st, cache->cache_size); 939 940 pa = di_mem_addr(st, 0); 941 942 ASSERT(pa); 943 944 /* 945 * Verify that di_allocmem() allocates contiguous memory, 946 * so that it is safe to do straight bcopy() 947 */ 948 ASSERT(st->memlist != NULL); 949 ASSERT(st->memlist->next == NULL); 950 bcopy(cache->cache_data, pa, cache->cache_size); 951 952 return (cache->cache_size); 953 } 954 955 /* 956 * Copies a snapshot from di_state to the cache 957 * Returns: 958 * - 0 on failure 959 * - size of copied data on success 960 */ 961 static int 962 di_mem2cache(struct di_state *st, struct di_cache *cache) 963 { 964 size_t map_size; 965 966 ASSERT(cache->cache_size == 0); 967 ASSERT(cache->cache_data == NULL); 968 ASSERT(!servicing_interrupt()); 969 ASSERT(DI_CACHE_LOCKED(*cache)); 970 971 if (st->mem_size == 0) { 972 ASSERT(st->memlist == NULL); 973 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); 974 return (0); 975 } 976 977 ASSERT(st->memlist); 978 979 /* 980 * The size of the memory list may be much larger than the 981 * size of valid data (map_size). Cache only the valid data 982 */ 983 map_size = DI_ALL_PTR(st)->map_size; 984 if (map_size == 0 || map_size < sizeof (struct di_all) || 985 map_size > st->mem_size) { 986 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); 987 return (0); 988 } 989 990 cache->cache_data = kmem_alloc(map_size, KM_SLEEP); 991 cache->cache_size = map_size; 992 di_copymem(st, cache->cache_data, cache->cache_size); 993 994 return (map_size); 995 } 996 997 /* 998 * Make sure there is at least "size" bytes memory left before 999 * going on. Otherwise, start on a new chunk. 1000 */ 1001 static di_off_t 1002 di_checkmem(struct di_state *st, di_off_t off, size_t size) 1003 { 1004 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", 1005 off, (int)size)); 1006 1007 /* 1008 * di_checkmem() shouldn't be called with a size of zero. 1009 * But in case it is, we want to make sure we return a valid 1010 * offset within the memlist and not an offset that points us 1011 * at the end of the memlist. 1012 */ 1013 if (size == 0) { 1014 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); 1015 size = 1; 1016 } 1017 1018 off = DI_ALIGN(off); 1019 if ((st->mem_size - off) < size) { 1020 off = st->mem_size; 1021 di_allocmem(st, size); 1022 } 1023 1024 return (off); 1025 } 1026 1027 /* 1028 * Copy the private data format from ioctl arg. 1029 * On success, the ending offset is returned. On error 0 is returned. 1030 */ 1031 static di_off_t 1032 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) 1033 { 1034 di_off_t size; 1035 struct di_priv_data *priv; 1036 struct di_all *all = (struct di_all *)di_mem_addr(st, 0); 1037 1038 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", 1039 off, (void *)arg, mode)); 1040 1041 /* 1042 * Copyin data and check version. 1043 * We only handle private data version 0. 1044 */ 1045 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); 1046 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), 1047 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { 1048 kmem_free(priv, sizeof (struct di_priv_data)); 1049 return (0); 1050 } 1051 1052 /* 1053 * Save di_priv_data copied from userland in snapshot. 1054 */ 1055 all->pd_version = priv->version; 1056 all->n_ppdata = priv->n_parent; 1057 all->n_dpdata = priv->n_driver; 1058 1059 /* 1060 * copyin private data format, modify offset accordingly 1061 */ 1062 if (all->n_ppdata) { /* parent private data format */ 1063 /* 1064 * check memory 1065 */ 1066 size = all->n_ppdata * sizeof (struct di_priv_format); 1067 off = di_checkmem(st, off, size); 1068 all->ppdata_format = off; 1069 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, 1070 mode) != 0) { 1071 kmem_free(priv, sizeof (struct di_priv_data)); 1072 return (0); 1073 } 1074 1075 off += size; 1076 } 1077 1078 if (all->n_dpdata) { /* driver private data format */ 1079 /* 1080 * check memory 1081 */ 1082 size = all->n_dpdata * sizeof (struct di_priv_format); 1083 off = di_checkmem(st, off, size); 1084 all->dpdata_format = off; 1085 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, 1086 mode) != 0) { 1087 kmem_free(priv, sizeof (struct di_priv_data)); 1088 return (0); 1089 } 1090 1091 off += size; 1092 } 1093 1094 kmem_free(priv, sizeof (struct di_priv_data)); 1095 return (off); 1096 } 1097 1098 /* 1099 * Return the real address based on the offset (off) within snapshot 1100 */ 1101 static caddr_t 1102 di_mem_addr(struct di_state *st, di_off_t off) 1103 { 1104 struct di_mem *dcp = st->memlist; 1105 1106 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", 1107 (void *)dcp, off)); 1108 1109 ASSERT(off < st->mem_size); 1110 1111 while (off >= dcp->buf_size) { 1112 off -= dcp->buf_size; 1113 dcp = dcp->next; 1114 } 1115 1116 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", 1117 off, (void *)(dcp->buf + off))); 1118 1119 return (dcp->buf + off); 1120 } 1121 1122 /* 1123 * Ideally we would use the whole key to derive the hash 1124 * value. However, the probability that two keys will 1125 * have the same dip (or pip) is very low, so 1126 * hashing by dip (or pip) pointer should suffice. 1127 */ 1128 static uint_t 1129 di_hash_byptr(void *arg, mod_hash_key_t key) 1130 { 1131 struct di_key *dik = key; 1132 size_t rshift; 1133 void *ptr; 1134 1135 ASSERT(arg == NULL); 1136 1137 switch (dik->k_type) { 1138 case DI_DKEY: 1139 ptr = dik->k_u.dkey.dk_dip; 1140 rshift = highbit(sizeof (struct dev_info)); 1141 break; 1142 case DI_PKEY: 1143 ptr = dik->k_u.pkey.pk_pip; 1144 rshift = highbit(sizeof (struct mdi_pathinfo)); 1145 break; 1146 default: 1147 panic("devinfo: unknown key type"); 1148 /*NOTREACHED*/ 1149 } 1150 return (mod_hash_byptr((void *)rshift, ptr)); 1151 } 1152 1153 static void 1154 di_key_dtor(mod_hash_key_t key) 1155 { 1156 char *path_addr; 1157 struct di_key *dik = key; 1158 1159 switch (dik->k_type) { 1160 case DI_DKEY: 1161 break; 1162 case DI_PKEY: 1163 path_addr = dik->k_u.pkey.pk_path_addr; 1164 if (path_addr) 1165 kmem_free(path_addr, strlen(path_addr) + 1); 1166 break; 1167 default: 1168 panic("devinfo: unknown key type"); 1169 /*NOTREACHED*/ 1170 } 1171 1172 kmem_free(dik, sizeof (struct di_key)); 1173 } 1174 1175 static int 1176 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) 1177 { 1178 if (dk1->dk_dip != dk2->dk_dip) 1179 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); 1180 1181 if (dk1->dk_major != -1 && dk2->dk_major != -1) { 1182 if (dk1->dk_major != dk2->dk_major) 1183 return (dk1->dk_major > dk2->dk_major ? 1 : -1); 1184 1185 if (dk1->dk_inst != dk2->dk_inst) 1186 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); 1187 } 1188 1189 if (dk1->dk_nodeid != dk2->dk_nodeid) 1190 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); 1191 1192 return (0); 1193 } 1194 1195 static int 1196 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) 1197 { 1198 char *p1, *p2; 1199 int rv; 1200 1201 if (pk1->pk_pip != pk2->pk_pip) 1202 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); 1203 1204 p1 = pk1->pk_path_addr; 1205 p2 = pk2->pk_path_addr; 1206 1207 p1 = p1 ? p1 : ""; 1208 p2 = p2 ? p2 : ""; 1209 1210 rv = strcmp(p1, p2); 1211 if (rv) 1212 return (rv > 0 ? 1 : -1); 1213 1214 if (pk1->pk_client != pk2->pk_client) 1215 return (pk1->pk_client > pk2->pk_client ? 1 : -1); 1216 1217 if (pk1->pk_phci != pk2->pk_phci) 1218 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); 1219 1220 return (0); 1221 } 1222 1223 static int 1224 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 1225 { 1226 struct di_key *dik1, *dik2; 1227 1228 dik1 = key1; 1229 dik2 = key2; 1230 1231 if (dik1->k_type != dik2->k_type) { 1232 panic("devinfo: mismatched keys"); 1233 /*NOTREACHED*/ 1234 } 1235 1236 switch (dik1->k_type) { 1237 case DI_DKEY: 1238 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); 1239 case DI_PKEY: 1240 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); 1241 default: 1242 panic("devinfo: unknown key type"); 1243 /*NOTREACHED*/ 1244 } 1245 } 1246 1247 /* 1248 * This is the main function that takes a snapshot 1249 */ 1250 static di_off_t 1251 di_snapshot(struct di_state *st) 1252 { 1253 di_off_t off; 1254 struct di_all *all; 1255 dev_info_t *rootnode; 1256 char buf[80]; 1257 1258 all = (struct di_all *)di_mem_addr(st, 0); 1259 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); 1260 1261 /* 1262 * Hold the devinfo node referred by the path. 1263 */ 1264 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); 1265 if (rootnode == NULL) { 1266 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1267 all->root_path)); 1268 return (0); 1269 } 1270 1271 (void) snprintf(buf, sizeof (buf), 1272 "devinfo registered dips (statep=%p)", (void *)st); 1273 1274 st->reg_dip_hash = mod_hash_create_extended(buf, 64, 1275 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1276 NULL, di_key_cmp, KM_SLEEP); 1277 1278 1279 (void) snprintf(buf, sizeof (buf), 1280 "devinfo registered pips (statep=%p)", (void *)st); 1281 1282 st->reg_pip_hash = mod_hash_create_extended(buf, 64, 1283 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1284 NULL, di_key_cmp, KM_SLEEP); 1285 1286 /* 1287 * copy the device tree 1288 */ 1289 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); 1290 1291 ddi_release_devi(rootnode); 1292 1293 /* 1294 * copy the devnames array 1295 */ 1296 all->devnames = off; 1297 off = di_copydevnm(&all->devnames, st); 1298 1299 1300 /* initialize the hash tables */ 1301 st->lnode_count = 0; 1302 st->link_count = 0; 1303 1304 if (DINFOLYR & st->command) { 1305 off = di_getlink_data(off, st); 1306 } 1307 1308 /* 1309 * Free up hash tables 1310 */ 1311 mod_hash_destroy_hash(st->reg_dip_hash); 1312 mod_hash_destroy_hash(st->reg_pip_hash); 1313 1314 /* 1315 * Record the timestamp now that we are done with snapshot. 1316 * 1317 * We compute the checksum later and then only if we cache 1318 * the snapshot, since checksumming adds some overhead. 1319 * The checksum is checked later if we read the cache file. 1320 * from disk. 1321 * 1322 * Set checksum field to 0 as CRC is calculated with that 1323 * field set to 0. 1324 */ 1325 all->snapshot_time = ddi_get_time(); 1326 all->cache_checksum = 0; 1327 1328 return (off); 1329 } 1330 1331 /* 1332 * Assumes all devinfo nodes in device tree have been snapshotted 1333 */ 1334 static void 1335 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *poff_p) 1336 { 1337 struct dev_info *node; 1338 struct di_node *me; 1339 di_off_t off; 1340 1341 ASSERT(mutex_owned(&dnp->dn_lock)); 1342 1343 node = DEVI(dnp->dn_head); 1344 for (; node; node = node->devi_next) { 1345 if (di_dip_find(st, (dev_info_t *)node, &off) != 0) 1346 continue; 1347 1348 ASSERT(off > 0); 1349 me = (struct di_node *)di_mem_addr(st, off); 1350 ASSERT(me->next == 0 || me->next == -1); 1351 /* 1352 * Only nodes which were BOUND when they were 1353 * snapshotted will be added to per-driver list. 1354 */ 1355 if (me->next != -1) 1356 continue; 1357 1358 *poff_p = off; 1359 poff_p = &me->next; 1360 } 1361 1362 *poff_p = 0; 1363 } 1364 1365 /* 1366 * Copy the devnames array, so we have a list of drivers in the snapshot. 1367 * Also makes it possible to locate the per-driver devinfo nodes. 1368 */ 1369 static di_off_t 1370 di_copydevnm(di_off_t *off_p, struct di_state *st) 1371 { 1372 int i; 1373 di_off_t off; 1374 size_t size; 1375 struct di_devnm *dnp; 1376 1377 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); 1378 1379 /* 1380 * make sure there is some allocated memory 1381 */ 1382 size = devcnt * sizeof (struct di_devnm); 1383 off = di_checkmem(st, *off_p, size); 1384 *off_p = off; 1385 1386 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", 1387 devcnt, off)); 1388 1389 dnp = (struct di_devnm *)di_mem_addr(st, off); 1390 off += size; 1391 1392 for (i = 0; i < devcnt; i++) { 1393 if (devnamesp[i].dn_name == NULL) { 1394 continue; 1395 } 1396 1397 /* 1398 * dn_name is not freed during driver unload or removal. 1399 * 1400 * There is a race condition when make_devname() changes 1401 * dn_name during our strcpy. This should be rare since 1402 * only add_drv does this. At any rate, we never had a 1403 * problem with ddi_name_to_major(), which should have 1404 * the same problem. 1405 */ 1406 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", 1407 devnamesp[i].dn_name, devnamesp[i].dn_instance, 1408 off)); 1409 1410 off = di_checkmem(st, off, strlen(devnamesp[i].dn_name) + 1); 1411 dnp[i].name = off; 1412 (void) strcpy((char *)di_mem_addr(st, off), 1413 devnamesp[i].dn_name); 1414 off += DI_ALIGN(strlen(devnamesp[i].dn_name) + 1); 1415 1416 mutex_enter(&devnamesp[i].dn_lock); 1417 1418 /* 1419 * Snapshot per-driver node list 1420 */ 1421 snap_driver_list(st, &devnamesp[i], &dnp[i].head); 1422 1423 /* 1424 * This is not used by libdevinfo, leave it for now 1425 */ 1426 dnp[i].flags = devnamesp[i].dn_flags; 1427 dnp[i].instance = devnamesp[i].dn_instance; 1428 1429 /* 1430 * get global properties 1431 */ 1432 if ((DINFOPROP & st->command) && 1433 devnamesp[i].dn_global_prop_ptr) { 1434 dnp[i].global_prop = off; 1435 off = di_getprop( 1436 devnamesp[i].dn_global_prop_ptr->prop_list, 1437 &dnp[i].global_prop, st, NULL, DI_PROP_GLB_LIST); 1438 } 1439 1440 /* 1441 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str 1442 */ 1443 if (CB_DRV_INSTALLED(devopsp[i])) { 1444 if (devopsp[i]->devo_cb_ops) { 1445 dnp[i].ops |= DI_CB_OPS; 1446 if (devopsp[i]->devo_cb_ops->cb_str) 1447 dnp[i].ops |= DI_STREAM_OPS; 1448 } 1449 if (NEXUS_DRV(devopsp[i])) { 1450 dnp[i].ops |= DI_BUS_OPS; 1451 } 1452 } 1453 1454 mutex_exit(&devnamesp[i].dn_lock); 1455 } 1456 1457 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); 1458 1459 return (off); 1460 } 1461 1462 /* 1463 * Copy the kernel devinfo tree. The tree and the devnames array forms 1464 * the entire snapshot (see also di_copydevnm). 1465 */ 1466 static di_off_t 1467 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) 1468 { 1469 di_off_t off; 1470 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); 1471 1472 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", 1473 (void *)root, *off_p)); 1474 1475 /* force attach drivers */ 1476 if ((i_ddi_node_state((dev_info_t *)root) == DS_READY) && 1477 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { 1478 (void) ndi_devi_config((dev_info_t *)root, 1479 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | 1480 NDI_DRV_CONF_REPROBE); 1481 } 1482 1483 /* 1484 * Push top_devinfo onto a stack 1485 * 1486 * The stack is necessary to avoid recursion, which can overrun 1487 * the kernel stack. 1488 */ 1489 PUSH_STACK(dsp, root, off_p); 1490 1491 /* 1492 * As long as there is a node on the stack, copy the node. 1493 * di_copynode() is responsible for pushing and popping 1494 * child and sibling nodes on the stack. 1495 */ 1496 while (!EMPTY_STACK(dsp)) { 1497 off = di_copynode(dsp, st); 1498 } 1499 1500 /* 1501 * Free the stack structure 1502 */ 1503 kmem_free(dsp, sizeof (struct di_stack)); 1504 1505 return (off); 1506 } 1507 1508 /* 1509 * This is the core function, which copies all data associated with a single 1510 * node into the snapshot. The amount of information is determined by the 1511 * ioctl command. 1512 */ 1513 static di_off_t 1514 di_copynode(struct di_stack *dsp, struct di_state *st) 1515 { 1516 di_off_t off; 1517 struct di_node *me; 1518 struct dev_info *node; 1519 1520 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", 1521 dsp->depth)); 1522 1523 node = TOP_NODE(dsp); 1524 1525 ASSERT(node != NULL); 1526 1527 /* 1528 * check memory usage, and fix offsets accordingly. 1529 */ 1530 off = di_checkmem(st, *(TOP_OFFSET(dsp)), sizeof (struct di_node)); 1531 *(TOP_OFFSET(dsp)) = off; 1532 me = DI_NODE(di_mem_addr(st, off)); 1533 1534 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", 1535 node->devi_node_name, node->devi_instance, off)); 1536 1537 /* 1538 * Node parameters: 1539 * self -- offset of current node within snapshot 1540 * nodeid -- pointer to PROM node (tri-valued) 1541 * state -- hot plugging device state 1542 * node_state -- devinfo node state (CF1, CF2, etc.) 1543 */ 1544 me->self = off; 1545 me->instance = node->devi_instance; 1546 me->nodeid = node->devi_nodeid; 1547 me->node_class = node->devi_node_class; 1548 me->attributes = node->devi_node_attributes; 1549 me->state = node->devi_state; 1550 me->node_state = node->devi_node_state; 1551 me->user_private_data = NULL; 1552 1553 /* 1554 * Get parent's offset in snapshot from the stack 1555 * and store it in the current node 1556 */ 1557 if (dsp->depth > 1) { 1558 me->parent = *(PARENT_OFFSET(dsp)); 1559 } 1560 1561 /* 1562 * Save the offset of this di_node in a hash table. 1563 * This is used later to resolve references to this 1564 * dip from other parts of the tree (per-driver list, 1565 * multipathing linkages, layered usage linkages). 1566 * The key used for the hash table is derived from 1567 * information in the dip. 1568 */ 1569 di_register_dip(st, (dev_info_t *)node, me->self); 1570 1571 /* 1572 * increment offset 1573 */ 1574 off += sizeof (struct di_node); 1575 1576 #ifdef DEVID_COMPATIBILITY 1577 /* check for devid as property marker */ 1578 if (node->devi_devid) { 1579 ddi_devid_t devid; 1580 char *devidstr; 1581 int devid_size; 1582 1583 /* 1584 * The devid is now represented as a property. 1585 * For micro release compatibility with di_devid interface 1586 * in libdevinfo we must return it as a binary structure in' 1587 * the snapshot. When di_devid is removed from libdevinfo 1588 * in a future release (and devi_devid is deleted) then 1589 * code related to DEVID_COMPATIBILITY can be removed. 1590 */ 1591 ASSERT(node->devi_devid == DEVID_COMPATIBILITY); 1592 /* XXX should be DDI_DEV_T_NONE! */ 1593 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, (dev_info_t *)node, 1594 DDI_PROP_DONTPASS, DEVID_PROP_NAME, &devidstr) == 1595 DDI_PROP_SUCCESS) { 1596 if (ddi_devid_str_decode(devidstr, &devid, NULL) == 1597 DDI_SUCCESS) { 1598 devid_size = ddi_devid_sizeof(devid); 1599 off = di_checkmem(st, off, devid_size); 1600 me->devid = off; 1601 bcopy(devid, 1602 di_mem_addr(st, off), devid_size); 1603 off += devid_size; 1604 ddi_devid_free(devid); 1605 } 1606 ddi_prop_free(devidstr); 1607 } 1608 } 1609 #endif /* DEVID_COMPATIBILITY */ 1610 1611 if (node->devi_node_name) { 1612 off = di_checkmem(st, off, strlen(node->devi_node_name) + 1); 1613 me->node_name = off; 1614 (void) strcpy(di_mem_addr(st, off), node->devi_node_name); 1615 off += strlen(node->devi_node_name) + 1; 1616 } 1617 1618 if (node->devi_compat_names && (node->devi_compat_length > 1)) { 1619 off = di_checkmem(st, off, node->devi_compat_length); 1620 me->compat_names = off; 1621 me->compat_length = node->devi_compat_length; 1622 bcopy(node->devi_compat_names, di_mem_addr(st, off), 1623 node->devi_compat_length); 1624 off += node->devi_compat_length; 1625 } 1626 1627 if (node->devi_addr) { 1628 off = di_checkmem(st, off, strlen(node->devi_addr) + 1); 1629 me->address = off; 1630 (void) strcpy(di_mem_addr(st, off), node->devi_addr); 1631 off += strlen(node->devi_addr) + 1; 1632 } 1633 1634 if (node->devi_binding_name) { 1635 off = di_checkmem(st, off, strlen(node->devi_binding_name) + 1); 1636 me->bind_name = off; 1637 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name); 1638 off += strlen(node->devi_binding_name) + 1; 1639 } 1640 1641 me->drv_major = node->devi_major; 1642 1643 /* 1644 * If the dip is BOUND, set the next pointer of the 1645 * per-instance list to -1, indicating that it is yet to be resolved. 1646 * This will be resolved later in snap_driver_list(). 1647 */ 1648 if (me->drv_major != -1) { 1649 me->next = -1; 1650 } else { 1651 me->next = 0; 1652 } 1653 1654 /* 1655 * An optimization to skip mutex_enter when not needed. 1656 */ 1657 if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) { 1658 goto priv_data; 1659 } 1660 1661 /* 1662 * Grab current per dev_info node lock to 1663 * get minor data and properties. 1664 */ 1665 mutex_enter(&(node->devi_lock)); 1666 1667 if (!(DINFOMINOR & st->command)) { 1668 goto path; 1669 } 1670 1671 if (node->devi_minor) { /* minor data */ 1672 me->minor_data = DI_ALIGN(off); 1673 off = di_getmdata(node->devi_minor, &me->minor_data, 1674 me->self, st); 1675 } 1676 1677 path: 1678 if (!(DINFOPATH & st->command)) { 1679 goto property; 1680 } 1681 1682 if (MDI_CLIENT(node)) { 1683 me->multipath_client = DI_ALIGN(off); 1684 off = di_getpath_data((dev_info_t *)node, &me->multipath_client, 1685 me->self, st, 1); 1686 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " 1687 "component type = %d. off=%d", 1688 me->multipath_client, 1689 (void *)node, node->devi_mdi_component, off)); 1690 } 1691 1692 if (MDI_PHCI(node)) { 1693 me->multipath_phci = DI_ALIGN(off); 1694 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, 1695 me->self, st, 0); 1696 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " 1697 "component type = %d. off=%d", 1698 me->multipath_phci, 1699 (void *)node, node->devi_mdi_component, off)); 1700 } 1701 1702 property: 1703 if (!(DINFOPROP & st->command)) { 1704 goto unlock; 1705 } 1706 1707 if (node->devi_drv_prop_ptr) { /* driver property list */ 1708 me->drv_prop = DI_ALIGN(off); 1709 off = di_getprop(node->devi_drv_prop_ptr, &me->drv_prop, st, 1710 node, DI_PROP_DRV_LIST); 1711 } 1712 1713 if (node->devi_sys_prop_ptr) { /* system property list */ 1714 me->sys_prop = DI_ALIGN(off); 1715 off = di_getprop(node->devi_sys_prop_ptr, &me->sys_prop, st, 1716 node, DI_PROP_SYS_LIST); 1717 } 1718 1719 if (node->devi_hw_prop_ptr) { /* hardware property list */ 1720 me->hw_prop = DI_ALIGN(off); 1721 off = di_getprop(node->devi_hw_prop_ptr, &me->hw_prop, st, 1722 node, DI_PROP_HW_LIST); 1723 } 1724 1725 if (node->devi_global_prop_list == NULL) { 1726 me->glob_prop = (di_off_t)-1; /* not global property */ 1727 } else { 1728 /* 1729 * Make copy of global property list if this devinfo refers 1730 * global properties different from what's on the devnames 1731 * array. It can happen if there has been a forced 1732 * driver.conf update. See mod_drv(1M). 1733 */ 1734 ASSERT(me->drv_major != -1); 1735 if (node->devi_global_prop_list != 1736 devnamesp[me->drv_major].dn_global_prop_ptr) { 1737 me->glob_prop = DI_ALIGN(off); 1738 off = di_getprop(node->devi_global_prop_list->prop_list, 1739 &me->glob_prop, st, node, DI_PROP_GLB_LIST); 1740 } 1741 } 1742 1743 unlock: 1744 /* 1745 * release current per dev_info node lock 1746 */ 1747 mutex_exit(&(node->devi_lock)); 1748 1749 priv_data: 1750 if (!(DINFOPRIVDATA & st->command)) { 1751 goto pm_info; 1752 } 1753 1754 if (ddi_get_parent_data((dev_info_t *)node) != NULL) { 1755 me->parent_data = DI_ALIGN(off); 1756 off = di_getppdata(node, &me->parent_data, st); 1757 } 1758 1759 if (ddi_get_driver_private((dev_info_t *)node) != NULL) { 1760 me->driver_data = DI_ALIGN(off); 1761 off = di_getdpdata(node, &me->driver_data, st); 1762 } 1763 1764 pm_info: /* NOT implemented */ 1765 1766 subtree: 1767 if (!(DINFOSUBTREE & st->command)) { 1768 POP_STACK(dsp); 1769 return (DI_ALIGN(off)); 1770 } 1771 1772 child: 1773 /* 1774 * If there is a child--push child onto stack. 1775 * Hold the parent busy while doing so. 1776 */ 1777 if (node->devi_child) { 1778 me->child = DI_ALIGN(off); 1779 PUSH_STACK(dsp, node->devi_child, &me->child); 1780 return (me->child); 1781 } 1782 1783 sibling: 1784 /* 1785 * no child node, unroll the stack till a sibling of 1786 * a parent node is found or root node is reached 1787 */ 1788 POP_STACK(dsp); 1789 while (!EMPTY_STACK(dsp) && (node->devi_sibling == NULL)) { 1790 node = TOP_NODE(dsp); 1791 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); 1792 POP_STACK(dsp); 1793 } 1794 1795 if (!EMPTY_STACK(dsp)) { 1796 /* 1797 * a sibling is found, replace top of stack by its sibling 1798 */ 1799 me->sibling = DI_ALIGN(off); 1800 PUSH_STACK(dsp, node->devi_sibling, &me->sibling); 1801 return (me->sibling); 1802 } 1803 1804 /* 1805 * DONE with all nodes 1806 */ 1807 return (DI_ALIGN(off)); 1808 } 1809 1810 static i_lnode_t * 1811 i_lnode_alloc(int modid) 1812 { 1813 i_lnode_t *i_lnode; 1814 1815 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); 1816 1817 ASSERT(modid != -1); 1818 i_lnode->modid = modid; 1819 1820 return (i_lnode); 1821 } 1822 1823 static void 1824 i_lnode_free(i_lnode_t *i_lnode) 1825 { 1826 kmem_free(i_lnode, sizeof (i_lnode_t)); 1827 } 1828 1829 static void 1830 i_lnode_check_free(i_lnode_t *i_lnode) 1831 { 1832 /* This lnode and its dip must have been snapshotted */ 1833 ASSERT(i_lnode->self > 0); 1834 ASSERT(i_lnode->di_node->self > 0); 1835 1836 /* at least 1 link (in or out) must exist for this lnode */ 1837 ASSERT(i_lnode->link_in || i_lnode->link_out); 1838 1839 i_lnode_free(i_lnode); 1840 } 1841 1842 static i_link_t * 1843 i_link_alloc(int spec_type) 1844 { 1845 i_link_t *i_link; 1846 1847 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); 1848 i_link->spec_type = spec_type; 1849 1850 return (i_link); 1851 } 1852 1853 static void 1854 i_link_check_free(i_link_t *i_link) 1855 { 1856 /* This link must have been snapshotted */ 1857 ASSERT(i_link->self > 0); 1858 1859 /* Both endpoint lnodes must exist for this link */ 1860 ASSERT(i_link->src_lnode); 1861 ASSERT(i_link->tgt_lnode); 1862 1863 kmem_free(i_link, sizeof (i_link_t)); 1864 } 1865 1866 /*ARGSUSED*/ 1867 static uint_t 1868 i_lnode_hashfunc(void *arg, mod_hash_key_t key) 1869 { 1870 i_lnode_t *i_lnode = (i_lnode_t *)key; 1871 struct di_node *ptr; 1872 dev_t dev; 1873 1874 dev = i_lnode->devt; 1875 if (dev != DDI_DEV_T_NONE) 1876 return (i_lnode->modid + getminor(dev) + getmajor(dev)); 1877 1878 ptr = i_lnode->di_node; 1879 ASSERT(ptr->self > 0); 1880 if (ptr) { 1881 uintptr_t k = (uintptr_t)ptr; 1882 k >>= (int)highbit(sizeof (struct di_node)); 1883 return ((uint_t)k); 1884 } 1885 1886 return (i_lnode->modid); 1887 } 1888 1889 static int 1890 i_lnode_cmp(void *arg1, void *arg2) 1891 { 1892 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1; 1893 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2; 1894 1895 if (i_lnode1->modid != i_lnode2->modid) { 1896 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); 1897 } 1898 1899 if (i_lnode1->di_node != i_lnode2->di_node) 1900 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); 1901 1902 if (i_lnode1->devt != i_lnode2->devt) 1903 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); 1904 1905 return (0); 1906 } 1907 1908 /* 1909 * An lnode represents a {dip, dev_t} tuple. A link represents a 1910 * {src_lnode, tgt_lnode, spec_type} tuple. 1911 * The following callback assumes that LDI framework ref-counts the 1912 * src_dip and tgt_dip while invoking this callback. 1913 */ 1914 static int 1915 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) 1916 { 1917 struct di_state *st = (struct di_state *)arg; 1918 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode; 1919 i_link_t **i_link_next, *i_link; 1920 di_off_t soff, toff; 1921 mod_hash_val_t nodep = NULL; 1922 int res; 1923 1924 /* 1925 * if the source or target of this device usage information doesn't 1926 * corrospond to a device node then we don't report it via 1927 * libdevinfo so return. 1928 */ 1929 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) 1930 return (LDI_USAGE_CONTINUE); 1931 1932 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); 1933 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); 1934 1935 /* 1936 * Skip the ldi_usage if either src or tgt dip is not in the 1937 * snapshot. This saves us from pruning bad lnodes/links later. 1938 */ 1939 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) 1940 return (LDI_USAGE_CONTINUE); 1941 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) 1942 return (LDI_USAGE_CONTINUE); 1943 1944 ASSERT(soff > 0); 1945 ASSERT(toff > 0); 1946 1947 /* 1948 * allocate an i_lnode and add it to the lnode hash 1949 * if it is not already present. For this particular 1950 * link the lnode is a source, but it may 1951 * participate as tgt or src in any number of layered 1952 * operations - so it may already be in the hash. 1953 */ 1954 i_lnode = i_lnode_alloc(ldi_usage->src_modid); 1955 i_lnode->di_node = (struct di_node *)di_mem_addr(st, soff); 1956 i_lnode->devt = ldi_usage->src_devt; 1957 1958 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 1959 if (res == MH_ERR_NOTFOUND) { 1960 /* 1961 * new i_lnode 1962 * add it to the hash and increment the lnode count 1963 */ 1964 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 1965 ASSERT(res == 0); 1966 st->lnode_count++; 1967 src_lnode = i_lnode; 1968 } else { 1969 /* this i_lnode already exists in the lnode_hash */ 1970 i_lnode_free(i_lnode); 1971 src_lnode = (i_lnode_t *)nodep; 1972 } 1973 1974 /* 1975 * allocate a tgt i_lnode and add it to the lnode hash 1976 */ 1977 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); 1978 i_lnode->di_node = (struct di_node *)di_mem_addr(st, toff); 1979 i_lnode->devt = ldi_usage->tgt_devt; 1980 1981 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 1982 if (res == MH_ERR_NOTFOUND) { 1983 /* 1984 * new i_lnode 1985 * add it to the hash and increment the lnode count 1986 */ 1987 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 1988 ASSERT(res == 0); 1989 st->lnode_count++; 1990 tgt_lnode = i_lnode; 1991 } else { 1992 /* this i_lnode already exists in the lnode_hash */ 1993 i_lnode_free(i_lnode); 1994 tgt_lnode = (i_lnode_t *)nodep; 1995 } 1996 1997 /* 1998 * allocate a i_link 1999 */ 2000 i_link = i_link_alloc(ldi_usage->tgt_spec_type); 2001 i_link->src_lnode = src_lnode; 2002 i_link->tgt_lnode = tgt_lnode; 2003 2004 /* 2005 * add this link onto the src i_lnodes outbound i_link list 2006 */ 2007 i_link_next = &(src_lnode->link_out); 2008 while (*i_link_next != NULL) { 2009 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && 2010 (i_link->spec_type == (*i_link_next)->spec_type)) { 2011 /* this link already exists */ 2012 kmem_free(i_link, sizeof (i_link_t)); 2013 return (LDI_USAGE_CONTINUE); 2014 } 2015 i_link_next = &((*i_link_next)->src_link_next); 2016 } 2017 *i_link_next = i_link; 2018 2019 /* 2020 * add this link onto the tgt i_lnodes inbound i_link list 2021 */ 2022 i_link_next = &(tgt_lnode->link_in); 2023 while (*i_link_next != NULL) { 2024 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); 2025 i_link_next = &((*i_link_next)->tgt_link_next); 2026 } 2027 *i_link_next = i_link; 2028 2029 /* 2030 * add this i_link to the link hash 2031 */ 2032 res = mod_hash_insert(st->link_hash, i_link, i_link); 2033 ASSERT(res == 0); 2034 st->link_count++; 2035 2036 return (LDI_USAGE_CONTINUE); 2037 } 2038 2039 struct i_layer_data { 2040 struct di_state *st; 2041 int lnode_count; 2042 int link_count; 2043 di_off_t lnode_off; 2044 di_off_t link_off; 2045 }; 2046 2047 /*ARGSUSED*/ 2048 static uint_t 2049 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2050 { 2051 i_link_t *i_link = (i_link_t *)key; 2052 struct i_layer_data *data = arg; 2053 struct di_link *me; 2054 struct di_lnode *melnode; 2055 struct di_node *medinode; 2056 2057 ASSERT(i_link->self == 0); 2058 2059 i_link->self = data->link_off + 2060 (data->link_count * sizeof (struct di_link)); 2061 data->link_count++; 2062 2063 ASSERT(data->link_off > 0 && data->link_count > 0); 2064 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ 2065 ASSERT(data->link_count <= data->st->link_count); 2066 2067 /* fill in fields for the di_link snapshot */ 2068 me = (struct di_link *)di_mem_addr(data->st, i_link->self); 2069 me->self = i_link->self; 2070 me->spec_type = i_link->spec_type; 2071 2072 /* 2073 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t 2074 * are created during the LDI table walk. Since we are 2075 * walking the link hash, the lnode hash has already been 2076 * walked and the lnodes have been snapshotted. Save lnode 2077 * offsets. 2078 */ 2079 me->src_lnode = i_link->src_lnode->self; 2080 me->tgt_lnode = i_link->tgt_lnode->self; 2081 2082 /* 2083 * Save this link's offset in the src_lnode snapshot's link_out 2084 * field 2085 */ 2086 melnode = (struct di_lnode *)di_mem_addr(data->st, me->src_lnode); 2087 me->src_link_next = melnode->link_out; 2088 melnode->link_out = me->self; 2089 2090 /* 2091 * Put this link on the tgt_lnode's link_in field 2092 */ 2093 melnode = (struct di_lnode *)di_mem_addr(data->st, me->tgt_lnode); 2094 me->tgt_link_next = melnode->link_in; 2095 melnode->link_in = me->self; 2096 2097 /* 2098 * An i_lnode_t is only created if the corresponding dip exists 2099 * in the snapshot. A pointer to the di_node is saved in the 2100 * i_lnode_t when it is allocated. For this link, get the di_node 2101 * for the source lnode. Then put the link on the di_node's list 2102 * of src links 2103 */ 2104 medinode = i_link->src_lnode->di_node; 2105 me->src_node_next = medinode->src_links; 2106 medinode->src_links = me->self; 2107 2108 /* 2109 * Put this link on the tgt_links list of the target 2110 * dip. 2111 */ 2112 medinode = i_link->tgt_lnode->di_node; 2113 me->tgt_node_next = medinode->tgt_links; 2114 medinode->tgt_links = me->self; 2115 2116 return (MH_WALK_CONTINUE); 2117 } 2118 2119 /*ARGSUSED*/ 2120 static uint_t 2121 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2122 { 2123 i_lnode_t *i_lnode = (i_lnode_t *)key; 2124 struct i_layer_data *data = arg; 2125 struct di_lnode *me; 2126 struct di_node *medinode; 2127 2128 ASSERT(i_lnode->self == 0); 2129 2130 i_lnode->self = data->lnode_off + 2131 (data->lnode_count * sizeof (struct di_lnode)); 2132 data->lnode_count++; 2133 2134 ASSERT(data->lnode_off > 0 && data->lnode_count > 0); 2135 ASSERT(data->link_count == 0); /* links not done yet */ 2136 ASSERT(data->lnode_count <= data->st->lnode_count); 2137 2138 /* fill in fields for the di_lnode snapshot */ 2139 me = (struct di_lnode *)di_mem_addr(data->st, i_lnode->self); 2140 me->self = i_lnode->self; 2141 2142 if (i_lnode->devt == DDI_DEV_T_NONE) { 2143 me->dev_major = (major_t)-1; 2144 me->dev_minor = (minor_t)-1; 2145 } else { 2146 me->dev_major = getmajor(i_lnode->devt); 2147 me->dev_minor = getminor(i_lnode->devt); 2148 } 2149 2150 /* 2151 * The dip corresponding to this lnode must exist in 2152 * the snapshot or we wouldn't have created the i_lnode_t 2153 * during LDI walk. Save the offset of the dip. 2154 */ 2155 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); 2156 me->node = i_lnode->di_node->self; 2157 2158 /* 2159 * There must be at least one link in or out of this lnode 2160 * or we wouldn't have created it. These fields will be set 2161 * during the link hash walk. 2162 */ 2163 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); 2164 2165 /* 2166 * set the offset of the devinfo node associated with this 2167 * lnode. Also update the node_next next pointer. this pointer 2168 * is set if there are multiple lnodes associated with the same 2169 * devinfo node. (could occure when multiple minor nodes 2170 * are open for one device, etc.) 2171 */ 2172 medinode = i_lnode->di_node; 2173 me->node_next = medinode->lnodes; 2174 medinode->lnodes = me->self; 2175 2176 return (MH_WALK_CONTINUE); 2177 } 2178 2179 static di_off_t 2180 di_getlink_data(di_off_t off, struct di_state *st) 2181 { 2182 struct i_layer_data data = {0}; 2183 size_t size; 2184 2185 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); 2186 2187 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, 2188 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, 2189 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); 2190 2191 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, 2192 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); 2193 2194 /* get driver layering information */ 2195 (void) ldi_usage_walker(st, di_ldi_callback); 2196 2197 /* check if there is any link data to include in the snapshot */ 2198 if (st->lnode_count == 0) { 2199 ASSERT(st->link_count == 0); 2200 goto out; 2201 } 2202 2203 ASSERT(st->link_count != 0); 2204 2205 /* get a pointer to snapshot memory for all the di_lnodes */ 2206 size = sizeof (struct di_lnode) * st->lnode_count; 2207 data.lnode_off = off = di_checkmem(st, off, size); 2208 off += DI_ALIGN(size); 2209 2210 /* get a pointer to snapshot memory for all the di_links */ 2211 size = sizeof (struct di_link) * st->link_count; 2212 data.link_off = off = di_checkmem(st, off, size); 2213 off += DI_ALIGN(size); 2214 2215 data.lnode_count = data.link_count = 0; 2216 data.st = st; 2217 2218 /* 2219 * We have lnodes and links that will go into the 2220 * snapshot, so let's walk the respective hashes 2221 * and snapshot them. The various linkages are 2222 * also set up during the walk. 2223 */ 2224 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); 2225 ASSERT(data.lnode_count == st->lnode_count); 2226 2227 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); 2228 ASSERT(data.link_count == st->link_count); 2229 2230 out: 2231 /* free up the i_lnodes and i_links used to create the snapshot */ 2232 mod_hash_destroy_hash(st->lnode_hash); 2233 mod_hash_destroy_hash(st->link_hash); 2234 st->lnode_count = 0; 2235 st->link_count = 0; 2236 2237 return (off); 2238 } 2239 2240 2241 /* 2242 * Copy all minor data nodes attached to a devinfo node into the snapshot. 2243 * It is called from di_copynode with devi_lock held. 2244 */ 2245 static di_off_t 2246 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, 2247 struct di_state *st) 2248 { 2249 di_off_t off; 2250 struct di_minor *me; 2251 2252 dcmn_err2((CE_CONT, "di_getmdata:\n")); 2253 2254 /* 2255 * check memory first 2256 */ 2257 off = di_checkmem(st, *off_p, sizeof (struct di_minor)); 2258 *off_p = off; 2259 2260 do { 2261 me = (struct di_minor *)di_mem_addr(st, off); 2262 me->self = off; 2263 me->type = mnode->type; 2264 me->node = node; 2265 me->user_private_data = NULL; 2266 2267 off += DI_ALIGN(sizeof (struct di_minor)); 2268 2269 /* 2270 * Split dev_t to major/minor, so it works for 2271 * both ILP32 and LP64 model 2272 */ 2273 me->dev_major = getmajor(mnode->ddm_dev); 2274 me->dev_minor = getminor(mnode->ddm_dev); 2275 me->spec_type = mnode->ddm_spec_type; 2276 2277 if (mnode->ddm_name) { 2278 off = di_checkmem(st, off, 2279 strlen(mnode->ddm_name) + 1); 2280 me->name = off; 2281 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name); 2282 off += DI_ALIGN(strlen(mnode->ddm_name) + 1); 2283 } 2284 2285 if (mnode->ddm_node_type) { 2286 off = di_checkmem(st, off, 2287 strlen(mnode->ddm_node_type) + 1); 2288 me->node_type = off; 2289 (void) strcpy(di_mem_addr(st, off), 2290 mnode->ddm_node_type); 2291 off += DI_ALIGN(strlen(mnode->ddm_node_type) + 1); 2292 } 2293 2294 off = di_checkmem(st, off, sizeof (struct di_minor)); 2295 me->next = off; 2296 mnode = mnode->next; 2297 } while (mnode); 2298 2299 me->next = 0; 2300 2301 return (off); 2302 } 2303 2304 /* 2305 * di_register_dip(), di_find_dip(): The dip must be protected 2306 * from deallocation when using these routines - this can either 2307 * be a reference count, a busy hold or a per-driver lock. 2308 */ 2309 2310 static void 2311 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) 2312 { 2313 struct dev_info *node = DEVI(dip); 2314 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2315 struct di_dkey *dk; 2316 2317 ASSERT(dip); 2318 ASSERT(off > 0); 2319 2320 key->k_type = DI_DKEY; 2321 dk = &(key->k_u.dkey); 2322 2323 dk->dk_dip = dip; 2324 dk->dk_major = node->devi_major; 2325 dk->dk_inst = node->devi_instance; 2326 dk->dk_nodeid = node->devi_nodeid; 2327 2328 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, 2329 (mod_hash_val_t)(uintptr_t)off) != 0) { 2330 panic( 2331 "duplicate devinfo (%p) registered during device " 2332 "tree walk", (void *)dip); 2333 } 2334 } 2335 2336 2337 static int 2338 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) 2339 { 2340 /* 2341 * uintptr_t must be used because it matches the size of void *; 2342 * mod_hash expects clients to place results into pointer-size 2343 * containers; since di_off_t is always a 32-bit offset, alignment 2344 * would otherwise be broken on 64-bit kernels. 2345 */ 2346 uintptr_t offset; 2347 struct di_key key = {0}; 2348 struct di_dkey *dk; 2349 2350 ASSERT(st->reg_dip_hash); 2351 ASSERT(dip); 2352 ASSERT(off_p); 2353 2354 2355 key.k_type = DI_DKEY; 2356 dk = &(key.k_u.dkey); 2357 2358 dk->dk_dip = dip; 2359 dk->dk_major = DEVI(dip)->devi_major; 2360 dk->dk_inst = DEVI(dip)->devi_instance; 2361 dk->dk_nodeid = DEVI(dip)->devi_nodeid; 2362 2363 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, 2364 (mod_hash_val_t *)&offset) == 0) { 2365 *off_p = (di_off_t)offset; 2366 return (0); 2367 } else { 2368 return (-1); 2369 } 2370 } 2371 2372 /* 2373 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation 2374 * when using these routines. The caller must do this by protecting the 2375 * client(or phci)<->pip linkage while traversing the list and then holding the 2376 * pip when it is found in the list. 2377 */ 2378 2379 static void 2380 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) 2381 { 2382 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2383 char *path_addr; 2384 struct di_pkey *pk; 2385 2386 ASSERT(pip); 2387 ASSERT(off > 0); 2388 2389 key->k_type = DI_PKEY; 2390 pk = &(key->k_u.pkey); 2391 2392 pk->pk_pip = pip; 2393 path_addr = mdi_pi_get_addr(pip); 2394 if (path_addr) 2395 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); 2396 pk->pk_client = mdi_pi_get_client(pip); 2397 pk->pk_phci = mdi_pi_get_phci(pip); 2398 2399 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, 2400 (mod_hash_val_t)(uintptr_t)off) != 0) { 2401 panic( 2402 "duplicate pathinfo (%p) registered during device " 2403 "tree walk", (void *)pip); 2404 } 2405 } 2406 2407 /* 2408 * As with di_register_pip, the caller must hold or lock the pip 2409 */ 2410 static int 2411 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) 2412 { 2413 /* 2414 * uintptr_t must be used because it matches the size of void *; 2415 * mod_hash expects clients to place results into pointer-size 2416 * containers; since di_off_t is always a 32-bit offset, alignment 2417 * would otherwise be broken on 64-bit kernels. 2418 */ 2419 uintptr_t offset; 2420 struct di_key key = {0}; 2421 struct di_pkey *pk; 2422 2423 ASSERT(st->reg_pip_hash); 2424 ASSERT(off_p); 2425 2426 if (pip == NULL) { 2427 *off_p = 0; 2428 return (0); 2429 } 2430 2431 key.k_type = DI_PKEY; 2432 pk = &(key.k_u.pkey); 2433 2434 pk->pk_pip = pip; 2435 pk->pk_path_addr = mdi_pi_get_addr(pip); 2436 pk->pk_client = mdi_pi_get_client(pip); 2437 pk->pk_phci = mdi_pi_get_phci(pip); 2438 2439 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, 2440 (mod_hash_val_t *)&offset) == 0) { 2441 *off_p = (di_off_t)offset; 2442 return (0); 2443 } else { 2444 return (-1); 2445 } 2446 } 2447 2448 static di_path_state_t 2449 path_state_convert(mdi_pathinfo_state_t st) 2450 { 2451 switch (st) { 2452 case MDI_PATHINFO_STATE_ONLINE: 2453 return (DI_PATH_STATE_ONLINE); 2454 case MDI_PATHINFO_STATE_STANDBY: 2455 return (DI_PATH_STATE_STANDBY); 2456 case MDI_PATHINFO_STATE_OFFLINE: 2457 return (DI_PATH_STATE_OFFLINE); 2458 case MDI_PATHINFO_STATE_FAULT: 2459 return (DI_PATH_STATE_FAULT); 2460 default: 2461 return (DI_PATH_STATE_UNKNOWN); 2462 } 2463 } 2464 2465 2466 static di_off_t 2467 di_path_getprop(mdi_pathinfo_t *pip, di_off_t off, di_off_t *off_p, 2468 struct di_state *st) 2469 { 2470 nvpair_t *prop = NULL; 2471 struct di_path_prop *me; 2472 2473 if (mdi_pi_get_next_prop(pip, NULL) == NULL) { 2474 *off_p = 0; 2475 return (off); 2476 } 2477 2478 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2479 *off_p = off; 2480 2481 while (prop = mdi_pi_get_next_prop(pip, prop)) { 2482 int delta = 0; 2483 2484 me = (struct di_path_prop *)di_mem_addr(st, off); 2485 me->self = off; 2486 off += sizeof (struct di_path_prop); 2487 2488 /* 2489 * property name 2490 */ 2491 off = di_checkmem(st, off, strlen(nvpair_name(prop)) + 1); 2492 me->prop_name = off; 2493 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); 2494 off += strlen(nvpair_name(prop)) + 1; 2495 2496 switch (nvpair_type(prop)) { 2497 case DATA_TYPE_BYTE: 2498 case DATA_TYPE_INT16: 2499 case DATA_TYPE_UINT16: 2500 case DATA_TYPE_INT32: 2501 case DATA_TYPE_UINT32: 2502 delta = sizeof (int32_t); 2503 me->prop_type = DDI_PROP_TYPE_INT; 2504 off = di_checkmem(st, off, delta); 2505 (void) nvpair_value_int32(prop, 2506 (int32_t *)di_mem_addr(st, off)); 2507 break; 2508 2509 case DATA_TYPE_INT64: 2510 case DATA_TYPE_UINT64: 2511 delta = sizeof (int64_t); 2512 me->prop_type = DDI_PROP_TYPE_INT64; 2513 off = di_checkmem(st, off, delta); 2514 (void) nvpair_value_int64(prop, 2515 (int64_t *)di_mem_addr(st, off)); 2516 break; 2517 2518 case DATA_TYPE_STRING: 2519 { 2520 char *str; 2521 (void) nvpair_value_string(prop, &str); 2522 delta = strlen(str) + 1; 2523 me->prop_type = DDI_PROP_TYPE_STRING; 2524 off = di_checkmem(st, off, delta); 2525 (void) strcpy(di_mem_addr(st, off), str); 2526 break; 2527 } 2528 case DATA_TYPE_BYTE_ARRAY: 2529 case DATA_TYPE_INT16_ARRAY: 2530 case DATA_TYPE_UINT16_ARRAY: 2531 case DATA_TYPE_INT32_ARRAY: 2532 case DATA_TYPE_UINT32_ARRAY: 2533 case DATA_TYPE_INT64_ARRAY: 2534 case DATA_TYPE_UINT64_ARRAY: 2535 { 2536 uchar_t *buf; 2537 uint_t nelems; 2538 (void) nvpair_value_byte_array(prop, &buf, &nelems); 2539 delta = nelems; 2540 me->prop_type = DDI_PROP_TYPE_BYTE; 2541 if (nelems != 0) { 2542 off = di_checkmem(st, off, delta); 2543 bcopy(buf, di_mem_addr(st, off), nelems); 2544 } 2545 break; 2546 } 2547 2548 default: /* Unknown or unhandled type; skip it */ 2549 delta = 0; 2550 break; 2551 } 2552 2553 if (delta > 0) { 2554 me->prop_data = off; 2555 } 2556 2557 me->prop_len = delta; 2558 off += delta; 2559 2560 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2561 me->prop_next = off; 2562 } 2563 2564 me->prop_next = 0; 2565 return (off); 2566 } 2567 2568 2569 static void 2570 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp, 2571 int get_client) 2572 { 2573 if (get_client) { 2574 ASSERT(me->path_client == 0); 2575 me->path_client = noff; 2576 ASSERT(me->path_c_link == 0); 2577 *off_pp = &me->path_c_link; 2578 me->path_snap_state &= 2579 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK); 2580 } else { 2581 ASSERT(me->path_phci == 0); 2582 me->path_phci = noff; 2583 ASSERT(me->path_p_link == 0); 2584 *off_pp = &me->path_p_link; 2585 me->path_snap_state &= 2586 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK); 2587 } 2588 } 2589 2590 /* 2591 * poff_p: pointer to the linkage field. This links pips along the client|phci 2592 * linkage list. 2593 * noff : Offset for the endpoint dip snapshot. 2594 */ 2595 static di_off_t 2596 di_getpath_data(dev_info_t *dip, di_off_t *poff_p, di_off_t noff, 2597 struct di_state *st, int get_client) 2598 { 2599 di_off_t off; 2600 mdi_pathinfo_t *pip; 2601 struct di_path *me; 2602 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *); 2603 2604 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client)); 2605 2606 /* 2607 * The naming of the following mdi_xyz() is unfortunately 2608 * non-intuitive. mdi_get_next_phci_path() follows the 2609 * client_link i.e. the list of pip's belonging to the 2610 * given client dip. 2611 */ 2612 if (get_client) 2613 next_pip = &mdi_get_next_phci_path; 2614 else 2615 next_pip = &mdi_get_next_client_path; 2616 2617 off = *poff_p; 2618 2619 pip = NULL; 2620 while (pip = (*next_pip)(dip, pip)) { 2621 mdi_pathinfo_state_t state; 2622 di_off_t stored_offset; 2623 2624 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip)); 2625 2626 mdi_pi_lock(pip); 2627 2628 if (di_pip_find(st, pip, &stored_offset) != -1) { 2629 /* 2630 * We've already seen this pathinfo node so we need to 2631 * take care not to snap it again; However, one endpoint 2632 * and linkage will be set here. The other endpoint 2633 * and linkage has already been set when the pip was 2634 * first snapshotted i.e. when the other endpoint dip 2635 * was snapshotted. 2636 */ 2637 me = (struct di_path *)di_mem_addr(st, stored_offset); 2638 2639 *poff_p = stored_offset; 2640 2641 di_path_one_endpoint(me, noff, &poff_p, get_client); 2642 2643 /* 2644 * The other endpoint and linkage were set when this 2645 * pip was snapshotted. So we are done with both 2646 * endpoints and linkages. 2647 */ 2648 ASSERT(!(me->path_snap_state & 2649 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI))); 2650 ASSERT(!(me->path_snap_state & 2651 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK))); 2652 2653 mdi_pi_unlock(pip); 2654 continue; 2655 } 2656 2657 /* 2658 * Now that we need to snapshot this pip, check memory 2659 */ 2660 off = di_checkmem(st, off, sizeof (struct di_path)); 2661 me = (struct di_path *)di_mem_addr(st, off); 2662 me->self = off; 2663 *poff_p = off; 2664 off += sizeof (struct di_path); 2665 2666 me->path_snap_state = 2667 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK; 2668 me->path_snap_state |= 2669 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI; 2670 2671 /* 2672 * Zero out fields as di_checkmem() doesn't guarantee 2673 * zero-filled memory 2674 */ 2675 me->path_client = me->path_phci = 0; 2676 me->path_c_link = me->path_p_link = 0; 2677 2678 di_path_one_endpoint(me, noff, &poff_p, get_client); 2679 2680 /* 2681 * Note the existence of this pathinfo 2682 */ 2683 di_register_pip(st, pip, me->self); 2684 2685 state = mdi_pi_get_state(pip); 2686 me->path_state = path_state_convert(state); 2687 2688 /* 2689 * Get intermediate addressing info. 2690 */ 2691 off = di_checkmem(st, off, strlen(mdi_pi_get_addr(pip)) + 1); 2692 me->path_addr = off; 2693 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip)); 2694 off += strlen(mdi_pi_get_addr(pip)) + 1; 2695 2696 /* 2697 * Get path properties if props are to be included in the 2698 * snapshot 2699 */ 2700 if (DINFOPROP & st->command) { 2701 off = di_path_getprop(pip, off, &me->path_prop, st); 2702 } else { 2703 me->path_prop = 0; 2704 } 2705 2706 mdi_pi_unlock(pip); 2707 } 2708 2709 *poff_p = 0; 2710 2711 return (off); 2712 } 2713 2714 /* 2715 * Copy a list of properties attached to a devinfo node. Called from 2716 * di_copynode with devi_lock held. The major number is passed in case 2717 * we need to call driver's prop_op entry. The value of list indicates 2718 * which list we are copying. Possible values are: 2719 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST 2720 */ 2721 static di_off_t 2722 di_getprop(struct ddi_prop *prop, di_off_t *off_p, struct di_state *st, 2723 struct dev_info *dip, int list) 2724 { 2725 dev_t dev; 2726 int (*prop_op)(); 2727 int off, need_prop_op = 0; 2728 int prop_op_fail = 0; 2729 ddi_prop_t *propp = NULL; 2730 struct di_prop *pp; 2731 struct dev_ops *ops = NULL; 2732 int prop_len; 2733 caddr_t prop_val; 2734 2735 2736 dcmn_err2((CE_CONT, "di_getprop:\n")); 2737 2738 ASSERT(st != NULL); 2739 2740 dcmn_err((CE_CONT, "copy property list at addr %p\n", (void *)prop)); 2741 2742 /* 2743 * Figure out if we need to call driver's prop_op entry point. 2744 * The conditions are: 2745 * -- driver property list 2746 * -- driver must be attached and held 2747 * -- driver's cb_prop_op != ddi_prop_op 2748 * or parent's bus_prop_op != ddi_bus_prop_op 2749 */ 2750 2751 if (list != DI_PROP_DRV_LIST) { 2752 goto getprop; 2753 } 2754 2755 /* 2756 * If driver is not attached or if major is -1, we ignore 2757 * the driver property list. No one should rely on such 2758 * properties. 2759 */ 2760 if (i_ddi_node_state((dev_info_t *)dip) < DS_ATTACHED) { 2761 off = *off_p; 2762 *off_p = 0; 2763 return (off); 2764 } 2765 2766 /* 2767 * Now we have a driver which is held. We can examine entry points 2768 * and check the condition listed above. 2769 */ 2770 ops = dip->devi_ops; 2771 2772 /* 2773 * Some nexus drivers incorrectly set cb_prop_op to nodev, 2774 * nulldev or even NULL. 2775 */ 2776 if (ops && ops->devo_cb_ops && 2777 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) && 2778 (ops->devo_cb_ops->cb_prop_op != nodev) && 2779 (ops->devo_cb_ops->cb_prop_op != nulldev) && 2780 (ops->devo_cb_ops->cb_prop_op != NULL)) { 2781 need_prop_op = 1; 2782 } 2783 2784 getprop: 2785 /* 2786 * check memory availability 2787 */ 2788 off = di_checkmem(st, *off_p, sizeof (struct di_prop)); 2789 *off_p = off; 2790 /* 2791 * Now copy properties 2792 */ 2793 do { 2794 pp = (struct di_prop *)di_mem_addr(st, off); 2795 pp->self = off; 2796 /* 2797 * Split dev_t to major/minor, so it works for 2798 * both ILP32 and LP64 model 2799 */ 2800 pp->dev_major = getmajor(prop->prop_dev); 2801 pp->dev_minor = getminor(prop->prop_dev); 2802 pp->prop_flags = prop->prop_flags; 2803 pp->prop_list = list; 2804 2805 /* 2806 * property name 2807 */ 2808 off += sizeof (struct di_prop); 2809 if (prop->prop_name) { 2810 off = di_checkmem(st, off, strlen(prop->prop_name) 2811 + 1); 2812 pp->prop_name = off; 2813 (void) strcpy(di_mem_addr(st, off), prop->prop_name); 2814 off += strlen(prop->prop_name) + 1; 2815 } 2816 2817 /* 2818 * Set prop_len here. This may change later 2819 * if cb_prop_op returns a different length. 2820 */ 2821 pp->prop_len = prop->prop_len; 2822 if (!need_prop_op) { 2823 if (prop->prop_val == NULL) { 2824 dcmn_err((CE_WARN, 2825 "devinfo: property fault at %p", 2826 (void *)prop)); 2827 pp->prop_data = -1; 2828 } else if (prop->prop_len != 0) { 2829 off = di_checkmem(st, off, prop->prop_len); 2830 pp->prop_data = off; 2831 bcopy(prop->prop_val, di_mem_addr(st, off), 2832 prop->prop_len); 2833 off += DI_ALIGN(pp->prop_len); 2834 } 2835 } 2836 2837 off = di_checkmem(st, off, sizeof (struct di_prop)); 2838 pp->next = off; 2839 prop = prop->prop_next; 2840 } while (prop); 2841 2842 pp->next = 0; 2843 2844 if (!need_prop_op) { 2845 dcmn_err((CE_CONT, "finished property " 2846 "list at offset 0x%x\n", off)); 2847 return (off); 2848 } 2849 2850 /* 2851 * If there is a need to call driver's prop_op entry, 2852 * we must release driver's devi_lock, because the 2853 * cb_prop_op entry point will grab it. 2854 * 2855 * The snapshot memory has already been allocated above, 2856 * which means the length of an active property should 2857 * remain fixed for this implementation to work. 2858 */ 2859 2860 2861 prop_op = ops->devo_cb_ops->cb_prop_op; 2862 pp = (struct di_prop *)di_mem_addr(st, *off_p); 2863 2864 mutex_exit(&dip->devi_lock); 2865 2866 do { 2867 int err; 2868 struct di_prop *tmp; 2869 2870 if (pp->next) { 2871 tmp = (struct di_prop *) 2872 di_mem_addr(st, pp->next); 2873 } else { 2874 tmp = NULL; 2875 } 2876 2877 /* 2878 * call into driver's prop_op entry point 2879 * 2880 * Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY 2881 */ 2882 dev = makedevice(pp->dev_major, pp->dev_minor); 2883 if (dev == DDI_DEV_T_NONE) 2884 dev = DDI_DEV_T_ANY; 2885 2886 dcmn_err((CE_CONT, "call prop_op" 2887 "(%lx, %p, PROP_LEN_AND_VAL_BUF, " 2888 "DDI_PROP_DONTPASS, \"%s\", %p, &%d)\n", 2889 dev, 2890 (void *)dip, 2891 (char *)di_mem_addr(st, pp->prop_name), 2892 (void *)di_mem_addr(st, pp->prop_data), 2893 pp->prop_len)); 2894 2895 if ((err = (*prop_op)(dev, (dev_info_t)dip, 2896 PROP_LEN_AND_VAL_ALLOC, DDI_PROP_DONTPASS, 2897 (char *)di_mem_addr(st, pp->prop_name), 2898 &prop_val, &prop_len)) != DDI_PROP_SUCCESS) { 2899 if ((propp = i_ddi_prop_search(dev, 2900 (char *)di_mem_addr(st, pp->prop_name), 2901 (uint_t)pp->prop_flags, 2902 &(DEVI(dip)->devi_drv_prop_ptr))) != NULL) { 2903 pp->prop_len = propp->prop_len; 2904 if (pp->prop_len != 0) { 2905 off = di_checkmem(st, off, 2906 pp->prop_len); 2907 pp->prop_data = off; 2908 bcopy(propp->prop_val, di_mem_addr(st, 2909 pp->prop_data), propp->prop_len); 2910 off += DI_ALIGN(pp->prop_len); 2911 } 2912 } else { 2913 prop_op_fail = 1; 2914 } 2915 } else if (prop_len != 0) { 2916 pp->prop_len = prop_len; 2917 off = di_checkmem(st, off, prop_len); 2918 pp->prop_data = off; 2919 bcopy(prop_val, di_mem_addr(st, off), prop_len); 2920 off += DI_ALIGN(prop_len); 2921 kmem_free(prop_val, prop_len); 2922 } 2923 2924 if (prop_op_fail) { 2925 pp->prop_data = -1; 2926 dcmn_err((CE_WARN, "devinfo: prop_op failure " 2927 "for \"%s\" err %d", 2928 di_mem_addr(st, pp->prop_name), err)); 2929 } 2930 2931 pp = tmp; 2932 2933 } while (pp); 2934 2935 mutex_enter(&dip->devi_lock); 2936 dcmn_err((CE_CONT, "finished property list at offset 0x%x\n", off)); 2937 return (off); 2938 } 2939 2940 /* 2941 * find private data format attached to a dip 2942 * parent = 1 to match driver name of parent dip (for parent private data) 2943 * 0 to match driver name of current dip (for driver private data) 2944 */ 2945 #define DI_MATCH_DRIVER 0 2946 #define DI_MATCH_PARENT 1 2947 2948 struct di_priv_format * 2949 di_match_drv_name(struct dev_info *node, struct di_state *st, int match) 2950 { 2951 int i, count, len; 2952 char *drv_name; 2953 major_t major; 2954 struct di_all *all; 2955 struct di_priv_format *form; 2956 2957 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n", 2958 node->devi_node_name, match)); 2959 2960 if (match == DI_MATCH_PARENT) { 2961 node = DEVI(node->devi_parent); 2962 } 2963 2964 if (node == NULL) { 2965 return (NULL); 2966 } 2967 2968 major = ddi_name_to_major(node->devi_binding_name); 2969 if (major == (major_t)(-1)) { 2970 return (NULL); 2971 } 2972 2973 /* 2974 * Match the driver name. 2975 */ 2976 drv_name = ddi_major_to_name(major); 2977 if ((drv_name == NULL) || *drv_name == '\0') { 2978 return (NULL); 2979 } 2980 2981 /* Now get the di_priv_format array */ 2982 all = (struct di_all *)di_mem_addr(st, 0); 2983 2984 if (match == DI_MATCH_PARENT) { 2985 count = all->n_ppdata; 2986 form = (struct di_priv_format *) 2987 (di_mem_addr(st, 0) + all->ppdata_format); 2988 } else { 2989 count = all->n_dpdata; 2990 form = (struct di_priv_format *) 2991 ((caddr_t)all + all->dpdata_format); 2992 } 2993 2994 len = strlen(drv_name); 2995 for (i = 0; i < count; i++) { 2996 char *tmp; 2997 2998 tmp = form[i].drv_name; 2999 while (tmp && (*tmp != '\0')) { 3000 if (strncmp(drv_name, tmp, len) == 0) { 3001 return (&form[i]); 3002 } 3003 /* 3004 * Move to next driver name, skipping a white space 3005 */ 3006 if (tmp = strchr(tmp, ' ')) { 3007 tmp++; 3008 } 3009 } 3010 } 3011 3012 return (NULL); 3013 } 3014 3015 /* 3016 * The following functions copy data as specified by the format passed in. 3017 * To prevent invalid format from panicing the system, we call on_fault(). 3018 * A return value of 0 indicates an error. Otherwise, the total offset 3019 * is returned. 3020 */ 3021 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */ 3022 3023 static di_off_t 3024 di_getprvdata(struct di_priv_format *pdp, void *data, di_off_t *off_p, 3025 struct di_state *st) 3026 { 3027 caddr_t pa; 3028 void *ptr; 3029 int i, size, repeat; 3030 di_off_t off, off0, *tmp; 3031 3032 label_t ljb; 3033 3034 dcmn_err2((CE_CONT, "di_getprvdata:\n")); 3035 3036 /* 3037 * check memory availability. Private data size is 3038 * limited to DI_MAX_PRIVDATA. 3039 */ 3040 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA); 3041 3042 if ((pdp->bytes <= 0) || pdp->bytes > DI_MAX_PRIVDATA) { 3043 goto failure; 3044 } 3045 3046 if (!on_fault(&ljb)) { 3047 /* copy the struct */ 3048 bcopy(data, di_mem_addr(st, off), pdp->bytes); 3049 off0 = DI_ALIGN(pdp->bytes); 3050 3051 /* dereferencing pointers */ 3052 for (i = 0; i < MAX_PTR_IN_PRV; i++) { 3053 3054 if (pdp->ptr[i].size == 0) { 3055 goto success; /* no more ptrs */ 3056 } 3057 3058 /* 3059 * first, get the pointer content 3060 */ 3061 if ((pdp->ptr[i].offset < 0) || 3062 (pdp->ptr[i].offset > 3063 pdp->bytes - sizeof (char *))) 3064 goto failure; /* wrong offset */ 3065 3066 pa = di_mem_addr(st, off + pdp->ptr[i].offset); 3067 tmp = (di_off_t *)pa; /* to store off_t later */ 3068 3069 ptr = *((void **) pa); /* get pointer value */ 3070 if (ptr == NULL) { /* if NULL pointer, go on */ 3071 continue; 3072 } 3073 3074 /* 3075 * next, find the repeat count (array dimension) 3076 */ 3077 repeat = pdp->ptr[i].len_offset; 3078 3079 /* 3080 * Positive value indicates a fixed sized array. 3081 * 0 or negative value indicates variable sized array. 3082 * 3083 * For variable sized array, the variable must be 3084 * an int member of the structure, with an offset 3085 * equal to the absolution value of struct member. 3086 */ 3087 if (repeat > pdp->bytes - sizeof (int)) { 3088 goto failure; /* wrong offset */ 3089 } 3090 3091 if (repeat >= 0) { 3092 repeat = *((int *)((caddr_t)data + repeat)); 3093 } else { 3094 repeat = -repeat; 3095 } 3096 3097 /* 3098 * next, get the size of the object to be copied 3099 */ 3100 size = pdp->ptr[i].size * repeat; 3101 3102 /* 3103 * Arbitrarily limit the total size of object to be 3104 * copied (1 byte to 1/4 page). 3105 */ 3106 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) { 3107 goto failure; /* wrong size or too big */ 3108 } 3109 3110 /* 3111 * Now copy the data 3112 */ 3113 *tmp = off0; 3114 bcopy(ptr, di_mem_addr(st, off + off0), size); 3115 off0 += DI_ALIGN(size); 3116 } 3117 } else { 3118 goto failure; 3119 } 3120 3121 success: 3122 /* 3123 * success if reached here 3124 */ 3125 no_fault(); 3126 *off_p = off; 3127 3128 return (off + off0); 3129 /*NOTREACHED*/ 3130 3131 failure: 3132 /* 3133 * fault occurred 3134 */ 3135 no_fault(); 3136 cmn_err(CE_WARN, "devinfo: fault in private data at %p", data); 3137 *off_p = -1; /* set private data to indicate error */ 3138 3139 return (off); 3140 } 3141 3142 /* 3143 * get parent private data; on error, returns original offset 3144 */ 3145 static di_off_t 3146 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3147 { 3148 int off; 3149 struct di_priv_format *ppdp; 3150 3151 dcmn_err2((CE_CONT, "di_getppdata:\n")); 3152 3153 /* find the parent data format */ 3154 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) { 3155 off = *off_p; 3156 *off_p = 0; /* set parent data to none */ 3157 return (off); 3158 } 3159 3160 return (di_getprvdata(ppdp, ddi_get_parent_data((dev_info_t *)node), 3161 off_p, st)); 3162 } 3163 3164 /* 3165 * get parent private data; returns original offset 3166 */ 3167 static di_off_t 3168 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3169 { 3170 int off; 3171 struct di_priv_format *dpdp; 3172 3173 dcmn_err2((CE_CONT, "di_getdpdata:")); 3174 3175 /* find the parent data format */ 3176 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) { 3177 off = *off_p; 3178 *off_p = 0; /* set driver data to none */ 3179 return (off); 3180 } 3181 3182 return (di_getprvdata(dpdp, ddi_get_driver_private((dev_info_t *)node), 3183 off_p, st)); 3184 } 3185 3186 /* 3187 * The driver is stateful across DINFOCPYALL and DINFOUSRLD. 3188 * This function encapsulates the state machine: 3189 * 3190 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY -> 3191 * | SNAPSHOT USRLD | 3192 * -------------------------------------------------- 3193 * 3194 * Returns 0 on success and -1 on failure 3195 */ 3196 static int 3197 di_setstate(struct di_state *st, int new_state) 3198 { 3199 int ret = 0; 3200 3201 mutex_enter(&di_lock); 3202 switch (new_state) { 3203 case IOC_IDLE: 3204 case IOC_DONE: 3205 break; 3206 case IOC_SNAP: 3207 if (st->di_iocstate != IOC_IDLE) 3208 ret = -1; 3209 break; 3210 case IOC_COPY: 3211 if (st->di_iocstate != IOC_DONE) 3212 ret = -1; 3213 break; 3214 default: 3215 ret = -1; 3216 } 3217 3218 if (ret == 0) 3219 st->di_iocstate = new_state; 3220 else 3221 cmn_err(CE_NOTE, "incorrect state transition from %d to %d", 3222 st->di_iocstate, new_state); 3223 mutex_exit(&di_lock); 3224 return (ret); 3225 } 3226 3227 /* 3228 * We cannot assume the presence of the entire 3229 * snapshot in this routine. All we are guaranteed 3230 * is the di_all struct + 1 byte (for root_path) 3231 */ 3232 static int 3233 header_plus_one_ok(struct di_all *all) 3234 { 3235 /* 3236 * Refuse to read old versions 3237 */ 3238 if (all->version != DI_SNAPSHOT_VERSION) { 3239 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version)); 3240 return (0); 3241 } 3242 3243 if (all->cache_magic != DI_CACHE_MAGIC) { 3244 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic)); 3245 return (0); 3246 } 3247 3248 if (all->snapshot_time <= 0) { 3249 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time)); 3250 return (0); 3251 } 3252 3253 if (all->top_devinfo == 0) { 3254 CACHE_DEBUG((DI_ERR, "NULL top devinfo")); 3255 return (0); 3256 } 3257 3258 if (all->map_size < sizeof (*all) + 1) { 3259 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size)); 3260 return (0); 3261 } 3262 3263 if (all->root_path[0] != '/' || all->root_path[1] != '\0') { 3264 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c", 3265 all->root_path[0], all->root_path[1])); 3266 return (0); 3267 } 3268 3269 /* 3270 * We can't check checksum here as we just have the header 3271 */ 3272 3273 return (1); 3274 } 3275 3276 static int 3277 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len) 3278 { 3279 rlim64_t rlimit; 3280 ssize_t resid; 3281 int error = 0; 3282 3283 3284 rlimit = RLIM64_INFINITY; 3285 3286 while (len) { 3287 resid = 0; 3288 error = vn_rdwr(UIO_WRITE, vp, buf, len, off, 3289 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 3290 3291 if (error || resid < 0) { 3292 error = error ? error : EIO; 3293 CACHE_DEBUG((DI_ERR, "write error: %d", error)); 3294 break; 3295 } 3296 3297 /* 3298 * Check if we are making progress 3299 */ 3300 if (resid >= len) { 3301 error = ENOSPC; 3302 break; 3303 } 3304 buf += len - resid; 3305 off += len - resid; 3306 len = resid; 3307 } 3308 3309 return (error); 3310 } 3311 3312 extern int modrootloaded; 3313 3314 static void 3315 di_cache_write(struct di_cache *cache) 3316 { 3317 struct di_all *all; 3318 struct vnode *vp; 3319 int oflags; 3320 size_t map_size; 3321 size_t chunk; 3322 offset_t off; 3323 int error; 3324 char *buf; 3325 3326 ASSERT(DI_CACHE_LOCKED(*cache)); 3327 ASSERT(!servicing_interrupt()); 3328 3329 if (cache->cache_size == 0) { 3330 ASSERT(cache->cache_data == NULL); 3331 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write")); 3332 return; 3333 } 3334 3335 ASSERT(cache->cache_size > 0); 3336 ASSERT(cache->cache_data); 3337 3338 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) { 3339 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write")); 3340 return; 3341 } 3342 3343 all = (struct di_all *)cache->cache_data; 3344 3345 if (!header_plus_one_ok(all)) { 3346 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write")); 3347 return; 3348 } 3349 3350 ASSERT(strcmp(all->root_path, "/") == 0); 3351 3352 /* 3353 * The cache_size is the total allocated memory for the cache. 3354 * The map_size is the actual size of valid data in the cache. 3355 * map_size may be smaller than cache_size but cannot exceed 3356 * cache_size. 3357 */ 3358 if (all->map_size > cache->cache_size) { 3359 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)." 3360 " Skipping write", all->map_size, cache->cache_size)); 3361 return; 3362 } 3363 3364 /* 3365 * First unlink the temp file 3366 */ 3367 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE); 3368 if (error && error != ENOENT) { 3369 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d", 3370 DI_CACHE_TEMP, error)); 3371 } 3372 3373 if (error == EROFS) { 3374 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write")); 3375 return; 3376 } 3377 3378 vp = NULL; 3379 oflags = (FCREAT|FWRITE); 3380 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags, 3381 DI_CACHE_PERMS, &vp, CRCREAT, 0)) { 3382 CACHE_DEBUG((DI_ERR, "%s: create failed: %d", 3383 DI_CACHE_TEMP, error)); 3384 return; 3385 } 3386 3387 ASSERT(vp); 3388 3389 /* 3390 * Paranoid: Check if the file is on a read-only FS 3391 */ 3392 if (vn_is_readonly(vp)) { 3393 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS")); 3394 goto fail; 3395 } 3396 3397 /* 3398 * Note that we only write map_size bytes to disk - this saves 3399 * space as the actual cache size may be larger than size of 3400 * valid data in the cache. 3401 * Another advantage is that it makes verification of size 3402 * easier when the file is read later. 3403 */ 3404 map_size = all->map_size; 3405 off = 0; 3406 buf = cache->cache_data; 3407 3408 while (map_size) { 3409 ASSERT(map_size > 0); 3410 /* 3411 * Write in chunks so that VM system 3412 * is not overwhelmed 3413 */ 3414 if (map_size > di_chunk * PAGESIZE) 3415 chunk = di_chunk * PAGESIZE; 3416 else 3417 chunk = map_size; 3418 3419 error = chunk_write(vp, off, buf, chunk); 3420 if (error) { 3421 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d", 3422 off, error)); 3423 goto fail; 3424 } 3425 3426 off += chunk; 3427 buf += chunk; 3428 map_size -= chunk; 3429 3430 /* Give pageout a chance to run */ 3431 delay(1); 3432 } 3433 3434 /* 3435 * Now sync the file and close it 3436 */ 3437 if (error = VOP_FSYNC(vp, FSYNC, kcred)) { 3438 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error)); 3439 } 3440 3441 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred)) { 3442 CACHE_DEBUG((DI_ERR, "close() failed: %d", error)); 3443 VN_RELE(vp); 3444 return; 3445 } 3446 3447 VN_RELE(vp); 3448 3449 /* 3450 * Now do the rename 3451 */ 3452 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) { 3453 CACHE_DEBUG((DI_ERR, "rename failed: %d", error)); 3454 return; 3455 } 3456 3457 CACHE_DEBUG((DI_INFO, "Cache write successful.")); 3458 3459 return; 3460 3461 fail: 3462 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred); 3463 VN_RELE(vp); 3464 } 3465 3466 3467 /* 3468 * Since we could be called early in boot, 3469 * use kobj_read_file() 3470 */ 3471 static void 3472 di_cache_read(struct di_cache *cache) 3473 { 3474 struct _buf *file; 3475 struct di_all *all; 3476 int n; 3477 size_t map_size, sz, chunk; 3478 offset_t off; 3479 caddr_t buf; 3480 uint32_t saved_crc, crc; 3481 3482 ASSERT(modrootloaded); 3483 ASSERT(DI_CACHE_LOCKED(*cache)); 3484 ASSERT(cache->cache_data == NULL); 3485 ASSERT(cache->cache_size == 0); 3486 ASSERT(!servicing_interrupt()); 3487 3488 file = kobj_open_file(DI_CACHE_FILE); 3489 if (file == (struct _buf *)-1) { 3490 CACHE_DEBUG((DI_ERR, "%s: open failed: %d", 3491 DI_CACHE_FILE, ENOENT)); 3492 return; 3493 } 3494 3495 /* 3496 * Read in the header+root_path first. The root_path must be "/" 3497 */ 3498 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP); 3499 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0); 3500 3501 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) { 3502 kmem_free(all, sizeof (*all) + 1); 3503 kobj_close_file(file); 3504 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid")); 3505 return; 3506 } 3507 3508 map_size = all->map_size; 3509 3510 kmem_free(all, sizeof (*all) + 1); 3511 3512 ASSERT(map_size >= sizeof (*all) + 1); 3513 3514 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP); 3515 sz = map_size; 3516 off = 0; 3517 while (sz) { 3518 /* Don't overload VM with large reads */ 3519 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz; 3520 n = kobj_read_file(file, buf, chunk, off); 3521 if (n != chunk) { 3522 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld", 3523 DI_CACHE_FILE, off)); 3524 goto fail; 3525 } 3526 off += chunk; 3527 buf += chunk; 3528 sz -= chunk; 3529 } 3530 3531 ASSERT(off == map_size); 3532 3533 /* 3534 * Read past expected EOF to verify size. 3535 */ 3536 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) { 3537 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE)); 3538 goto fail; 3539 } 3540 3541 all = (struct di_all *)di_cache.cache_data; 3542 if (!header_plus_one_ok(all)) { 3543 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE)); 3544 goto fail; 3545 } 3546 3547 /* 3548 * Compute CRC with checksum field in the cache data set to 0 3549 */ 3550 saved_crc = all->cache_checksum; 3551 all->cache_checksum = 0; 3552 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table); 3553 all->cache_checksum = saved_crc; 3554 3555 if (crc != all->cache_checksum) { 3556 CACHE_DEBUG((DI_ERR, 3557 "%s: checksum error: expected=0x%x actual=0x%x", 3558 DI_CACHE_FILE, all->cache_checksum, crc)); 3559 goto fail; 3560 } 3561 3562 if (all->map_size != map_size) { 3563 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE)); 3564 goto fail; 3565 } 3566 3567 kobj_close_file(file); 3568 3569 di_cache.cache_size = map_size; 3570 3571 return; 3572 3573 fail: 3574 kmem_free(di_cache.cache_data, map_size); 3575 kobj_close_file(file); 3576 di_cache.cache_data = NULL; 3577 di_cache.cache_size = 0; 3578 } 3579 3580 3581 /* 3582 * Checks if arguments are valid for using the cache. 3583 */ 3584 static int 3585 cache_args_valid(struct di_state *st, int *error) 3586 { 3587 ASSERT(error); 3588 ASSERT(st->mem_size > 0); 3589 ASSERT(st->memlist != NULL); 3590 3591 if (!modrootloaded || !i_ddi_io_initialized()) { 3592 CACHE_DEBUG((DI_ERR, 3593 "cache lookup failure: I/O subsystem not inited")); 3594 *error = ENOTACTIVE; 3595 return (0); 3596 } 3597 3598 /* 3599 * No other flags allowed with DINFOCACHE 3600 */ 3601 if (st->command != (DINFOCACHE & DIIOC_MASK)) { 3602 CACHE_DEBUG((DI_ERR, 3603 "cache lookup failure: bad flags: 0x%x", 3604 st->command)); 3605 *error = EINVAL; 3606 return (0); 3607 } 3608 3609 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3610 CACHE_DEBUG((DI_ERR, 3611 "cache lookup failure: bad root: %s", 3612 DI_ALL_PTR(st)->root_path)); 3613 *error = EINVAL; 3614 return (0); 3615 } 3616 3617 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command)); 3618 3619 *error = 0; 3620 3621 return (1); 3622 } 3623 3624 static int 3625 snapshot_is_cacheable(struct di_state *st) 3626 { 3627 ASSERT(st->mem_size > 0); 3628 ASSERT(st->memlist != NULL); 3629 3630 if (st->command != (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) { 3631 CACHE_DEBUG((DI_INFO, 3632 "not cacheable: incompatible flags: 0x%x", 3633 st->command)); 3634 return (0); 3635 } 3636 3637 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3638 CACHE_DEBUG((DI_INFO, 3639 "not cacheable: incompatible root path: %s", 3640 DI_ALL_PTR(st)->root_path)); 3641 return (0); 3642 } 3643 3644 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command)); 3645 3646 return (1); 3647 } 3648 3649 static int 3650 di_cache_lookup(struct di_state *st) 3651 { 3652 size_t rval; 3653 int cache_valid; 3654 3655 ASSERT(cache_args_valid(st, &cache_valid)); 3656 ASSERT(modrootloaded); 3657 3658 DI_CACHE_LOCK(di_cache); 3659 3660 /* 3661 * The following assignment determines the validity 3662 * of the cache as far as this snapshot is concerned. 3663 */ 3664 cache_valid = di_cache.cache_valid; 3665 3666 if (cache_valid && di_cache.cache_data == NULL) { 3667 di_cache_read(&di_cache); 3668 /* check for read or file error */ 3669 if (di_cache.cache_data == NULL) 3670 cache_valid = 0; 3671 } 3672 3673 if (cache_valid) { 3674 /* 3675 * Ok, the cache was valid as of this particular 3676 * snapshot. Copy the cached snapshot. This is safe 3677 * to do as the cache cannot be freed (we hold the 3678 * cache lock). Free the memory allocated in di_state 3679 * up until this point - we will simply copy everything 3680 * in the cache. 3681 */ 3682 3683 ASSERT(di_cache.cache_data != NULL); 3684 ASSERT(di_cache.cache_size > 0); 3685 3686 di_freemem(st); 3687 3688 rval = 0; 3689 if (di_cache2mem(&di_cache, st) > 0) { 3690 3691 ASSERT(DI_ALL_PTR(st)); 3692 3693 /* 3694 * map_size is size of valid data in the 3695 * cached snapshot and may be less than 3696 * size of the cache. 3697 */ 3698 rval = DI_ALL_PTR(st)->map_size; 3699 3700 ASSERT(rval >= sizeof (struct di_all)); 3701 ASSERT(rval <= di_cache.cache_size); 3702 } 3703 } else { 3704 /* 3705 * The cache isn't valid, we need to take a snapshot. 3706 * Set the command flags appropriately 3707 */ 3708 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK)); 3709 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK); 3710 rval = di_cache_update(st); 3711 st->command = (DINFOCACHE & DIIOC_MASK); 3712 } 3713 3714 DI_CACHE_UNLOCK(di_cache); 3715 3716 /* 3717 * For cached snapshots, the devinfo driver always returns 3718 * a snapshot rooted at "/". 3719 */ 3720 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0); 3721 3722 return (rval); 3723 } 3724 3725 /* 3726 * This is a forced update of the cache - the previous state of the cache 3727 * may be: 3728 * - unpopulated 3729 * - populated and invalid 3730 * - populated and valid 3731 */ 3732 static int 3733 di_cache_update(struct di_state *st) 3734 { 3735 int rval; 3736 uint32_t crc; 3737 struct di_all *all; 3738 3739 ASSERT(DI_CACHE_LOCKED(di_cache)); 3740 ASSERT(snapshot_is_cacheable(st)); 3741 3742 /* 3743 * Free the in-core cache and the on-disk file (if they exist) 3744 */ 3745 i_ddi_di_cache_free(&di_cache); 3746 3747 /* 3748 * Set valid flag before taking the snapshot, 3749 * so that any invalidations that arrive 3750 * during or after the snapshot are not 3751 * removed by us. 3752 */ 3753 atomic_or_32(&di_cache.cache_valid, 1); 3754 3755 modunload_disable(); 3756 rval = di_snapshot(st); 3757 modunload_enable(); 3758 3759 if (rval == 0) { 3760 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot")); 3761 return (0); 3762 } 3763 3764 DI_ALL_PTR(st)->map_size = rval; 3765 3766 if (di_mem2cache(st, &di_cache) == 0) { 3767 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed")); 3768 return (0); 3769 } 3770 3771 ASSERT(di_cache.cache_data); 3772 ASSERT(di_cache.cache_size > 0); 3773 3774 /* 3775 * Now that we have cached the snapshot, compute its checksum. 3776 * The checksum is only computed over the valid data in the 3777 * cache, not the entire cache. 3778 * Also, set all the fields (except checksum) before computing 3779 * checksum. 3780 */ 3781 all = (struct di_all *)di_cache.cache_data; 3782 all->cache_magic = DI_CACHE_MAGIC; 3783 all->map_size = rval; 3784 3785 ASSERT(all->cache_checksum == 0); 3786 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table); 3787 all->cache_checksum = crc; 3788 3789 di_cache_write(&di_cache); 3790 3791 return (rval); 3792 } 3793 3794 static void 3795 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...) 3796 { 3797 va_list ap; 3798 3799 if (di_cache_debug <= DI_QUIET) 3800 return; 3801 3802 if (di_cache_debug < msglevel) 3803 return; 3804 3805 switch (msglevel) { 3806 case DI_ERR: 3807 msglevel = CE_WARN; 3808 break; 3809 case DI_INFO: 3810 case DI_TRACE: 3811 default: 3812 msglevel = CE_NOTE; 3813 break; 3814 } 3815 3816 va_start(ap, fmt); 3817 vcmn_err(msglevel, fmt, ap); 3818 va_end(ap); 3819 } 3820