1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * driver for accessing kernel devinfo tree. 31 */ 32 #include <sys/types.h> 33 #include <sys/pathname.h> 34 #include <sys/debug.h> 35 #include <sys/autoconf.h> 36 #include <sys/conf.h> 37 #include <sys/file.h> 38 #include <sys/kmem.h> 39 #include <sys/modctl.h> 40 #include <sys/stat.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/sunldi_impl.h> 44 #include <sys/sunndi.h> 45 #include <sys/esunddi.h> 46 #include <sys/sunmdi.h> 47 #include <sys/ddi_impldefs.h> 48 #include <sys/ndi_impldefs.h> 49 #include <sys/mdi_impldefs.h> 50 #include <sys/devinfo_impl.h> 51 #include <sys/thread.h> 52 #include <sys/modhash.h> 53 #include <sys/bitmap.h> 54 #include <util/qsort.h> 55 #include <sys/disp.h> 56 #include <sys/kobj.h> 57 #include <sys/crc32.h> 58 59 60 #ifdef DEBUG 61 static int di_debug; 62 #define dcmn_err(args) if (di_debug >= 1) cmn_err args 63 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args 64 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args 65 #else 66 #define dcmn_err(args) /* nothing */ 67 #define dcmn_err2(args) /* nothing */ 68 #define dcmn_err3(args) /* nothing */ 69 #endif 70 71 /* 72 * We partition the space of devinfo minor nodes equally between the full and 73 * unprivileged versions of the driver. The even-numbered minor nodes are the 74 * full version, while the odd-numbered ones are the read-only version. 75 */ 76 static int di_max_opens = 32; 77 78 #define DI_FULL_PARENT 0 79 #define DI_READONLY_PARENT 1 80 #define DI_NODE_SPECIES 2 81 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) 82 83 #define IOC_IDLE 0 /* snapshot ioctl states */ 84 #define IOC_SNAP 1 /* snapshot in progress */ 85 #define IOC_DONE 2 /* snapshot done, but not copied out */ 86 #define IOC_COPY 3 /* copyout in progress */ 87 88 /* 89 * Keep max alignment so we can move snapshot to different platforms 90 */ 91 #define DI_ALIGN(addr) ((addr + 7l) & ~7l) 92 93 /* 94 * To avoid wasting memory, make a linked list of memory chunks. 95 * Size of each chunk is buf_size. 96 */ 97 struct di_mem { 98 struct di_mem *next; /* link to next chunk */ 99 char *buf; /* contiguous kernel memory */ 100 size_t buf_size; /* size of buf in bytes */ 101 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ 102 }; 103 104 /* 105 * This is a stack for walking the tree without using recursion. 106 * When the devinfo tree height is above some small size, one 107 * gets watchdog resets on sun4m. 108 */ 109 struct di_stack { 110 void *offset[MAX_TREE_DEPTH]; 111 struct dev_info *dip[MAX_TREE_DEPTH]; 112 int circ[MAX_TREE_DEPTH]; 113 int depth; /* depth of current node to be copied */ 114 }; 115 116 #define TOP_OFFSET(stack) \ 117 ((di_off_t *)(stack)->offset[(stack)->depth - 1]) 118 #define TOP_NODE(stack) \ 119 ((stack)->dip[(stack)->depth - 1]) 120 #define PARENT_OFFSET(stack) \ 121 ((di_off_t *)(stack)->offset[(stack)->depth - 2]) 122 #define EMPTY_STACK(stack) ((stack)->depth == 0) 123 #define POP_STACK(stack) { \ 124 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ 125 (stack)->circ[(stack)->depth - 1]); \ 126 ((stack)->depth--); \ 127 } 128 #define PUSH_STACK(stack, node, offp) { \ 129 ASSERT(node != NULL); \ 130 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ 131 (stack)->dip[(stack)->depth] = (node); \ 132 (stack)->offset[(stack)->depth] = (void *)(offp); \ 133 ((stack)->depth)++; \ 134 } 135 136 #define DI_ALL_PTR(s) ((struct di_all *)di_mem_addr((s), 0)) 137 138 /* 139 * With devfs, the device tree has no global locks. The device tree is 140 * dynamic and dips may come and go if they are not locked locally. Under 141 * these conditions, pointers are no longer reliable as unique IDs. 142 * Specifically, these pointers cannot be used as keys for hash tables 143 * as the same devinfo structure may be freed in one part of the tree only 144 * to be allocated as the structure for a different device in another 145 * part of the tree. This can happen if DR and the snapshot are 146 * happening concurrently. 147 * The following data structures act as keys for devinfo nodes and 148 * pathinfo nodes. 149 */ 150 151 enum di_ktype { 152 DI_DKEY = 1, 153 DI_PKEY = 2 154 }; 155 156 struct di_dkey { 157 dev_info_t *dk_dip; 158 major_t dk_major; 159 int dk_inst; 160 pnode_t dk_nodeid; 161 }; 162 163 struct di_pkey { 164 mdi_pathinfo_t *pk_pip; 165 char *pk_path_addr; 166 dev_info_t *pk_client; 167 dev_info_t *pk_phci; 168 }; 169 170 struct di_key { 171 enum di_ktype k_type; 172 union { 173 struct di_dkey dkey; 174 struct di_pkey pkey; 175 } k_u; 176 }; 177 178 179 struct i_lnode; 180 181 typedef struct i_link { 182 /* 183 * If a di_link struct representing this i_link struct makes it 184 * into the snapshot, then self will point to the offset of 185 * the di_link struct in the snapshot 186 */ 187 di_off_t self; 188 189 int spec_type; /* block or char access type */ 190 struct i_lnode *src_lnode; /* src i_lnode */ 191 struct i_lnode *tgt_lnode; /* tgt i_lnode */ 192 struct i_link *src_link_next; /* next src i_link /w same i_lnode */ 193 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ 194 } i_link_t; 195 196 typedef struct i_lnode { 197 /* 198 * If a di_lnode struct representing this i_lnode struct makes it 199 * into the snapshot, then self will point to the offset of 200 * the di_lnode struct in the snapshot 201 */ 202 di_off_t self; 203 204 /* 205 * used for hashing and comparing i_lnodes 206 */ 207 int modid; 208 209 /* 210 * public information describing a link endpoint 211 */ 212 struct di_node *di_node; /* di_node in snapshot */ 213 dev_t devt; /* devt */ 214 215 /* 216 * i_link ptr to links coming into this i_lnode node 217 * (this i_lnode is the target of these i_links) 218 */ 219 i_link_t *link_in; 220 221 /* 222 * i_link ptr to links going out of this i_lnode node 223 * (this i_lnode is the source of these i_links) 224 */ 225 i_link_t *link_out; 226 } i_lnode_t; 227 228 /* 229 * Soft state associated with each instance of driver open. 230 */ 231 static struct di_state { 232 di_off_t mem_size; /* total # bytes in memlist */ 233 struct di_mem *memlist; /* head of memlist */ 234 uint_t command; /* command from ioctl */ 235 int di_iocstate; /* snapshot ioctl state */ 236 mod_hash_t *reg_dip_hash; 237 mod_hash_t *reg_pip_hash; 238 int lnode_count; 239 int link_count; 240 241 mod_hash_t *lnode_hash; 242 mod_hash_t *link_hash; 243 } **di_states; 244 245 static kmutex_t di_lock; /* serialize instance assignment */ 246 247 typedef enum { 248 DI_QUIET = 0, /* DI_QUIET must always be 0 */ 249 DI_ERR, 250 DI_INFO, 251 DI_TRACE, 252 DI_TRACE1, 253 DI_TRACE2 254 } di_cache_debug_t; 255 256 static uint_t di_chunk = 32; /* I/O chunk size in pages */ 257 258 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) 259 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) 260 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) 261 262 #define CACHE_DEBUG(args) \ 263 { if (di_cache_debug != DI_QUIET) di_cache_print args; } 264 265 static int di_open(dev_t *, int, int, cred_t *); 266 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 267 static int di_close(dev_t, int, int, cred_t *); 268 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 269 static int di_attach(dev_info_t *, ddi_attach_cmd_t); 270 static int di_detach(dev_info_t *, ddi_detach_cmd_t); 271 272 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); 273 static di_off_t di_snapshot(struct di_state *); 274 static di_off_t di_copydevnm(di_off_t *, struct di_state *); 275 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); 276 static di_off_t di_copynode(struct di_stack *, struct di_state *); 277 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, 278 struct di_state *); 279 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); 280 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); 281 static di_off_t di_getprop(struct ddi_prop *, di_off_t *, 282 struct di_state *, struct dev_info *, int); 283 static void di_allocmem(struct di_state *, size_t); 284 static void di_freemem(struct di_state *); 285 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); 286 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); 287 static caddr_t di_mem_addr(struct di_state *, di_off_t); 288 static int di_setstate(struct di_state *, int); 289 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); 290 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); 291 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, 292 struct di_state *, int); 293 static di_off_t di_getlink_data(di_off_t, struct di_state *); 294 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); 295 296 static int cache_args_valid(struct di_state *st, int *error); 297 static int snapshot_is_cacheable(struct di_state *st); 298 static int di_cache_lookup(struct di_state *st); 299 static int di_cache_update(struct di_state *st); 300 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); 301 302 static struct cb_ops di_cb_ops = { 303 di_open, /* open */ 304 di_close, /* close */ 305 nodev, /* strategy */ 306 nodev, /* print */ 307 nodev, /* dump */ 308 nodev, /* read */ 309 nodev, /* write */ 310 di_ioctl, /* ioctl */ 311 nodev, /* devmap */ 312 nodev, /* mmap */ 313 nodev, /* segmap */ 314 nochpoll, /* poll */ 315 ddi_prop_op, /* prop_op */ 316 NULL, /* streamtab */ 317 D_NEW | D_MP /* Driver compatibility flag */ 318 }; 319 320 static struct dev_ops di_ops = { 321 DEVO_REV, /* devo_rev, */ 322 0, /* refcnt */ 323 di_info, /* info */ 324 nulldev, /* identify */ 325 nulldev, /* probe */ 326 di_attach, /* attach */ 327 di_detach, /* detach */ 328 nodev, /* reset */ 329 &di_cb_ops, /* driver operations */ 330 NULL /* bus operations */ 331 }; 332 333 /* 334 * Module linkage information for the kernel. 335 */ 336 static struct modldrv modldrv = { 337 &mod_driverops, 338 "DEVINFO Driver %I%", 339 &di_ops 340 }; 341 342 static struct modlinkage modlinkage = { 343 MODREV_1, 344 &modldrv, 345 NULL 346 }; 347 348 int 349 _init(void) 350 { 351 int error; 352 353 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); 354 355 error = mod_install(&modlinkage); 356 if (error != 0) { 357 mutex_destroy(&di_lock); 358 return (error); 359 } 360 361 return (0); 362 } 363 364 int 365 _info(struct modinfo *modinfop) 366 { 367 return (mod_info(&modlinkage, modinfop)); 368 } 369 370 int 371 _fini(void) 372 { 373 int error; 374 375 error = mod_remove(&modlinkage); 376 if (error != 0) { 377 return (error); 378 } 379 380 mutex_destroy(&di_lock); 381 return (0); 382 } 383 384 static dev_info_t *di_dip; 385 386 /*ARGSUSED*/ 387 static int 388 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 389 { 390 int error = DDI_FAILURE; 391 392 switch (infocmd) { 393 case DDI_INFO_DEVT2DEVINFO: 394 *result = (void *)di_dip; 395 error = DDI_SUCCESS; 396 break; 397 case DDI_INFO_DEVT2INSTANCE: 398 /* 399 * All dev_t's map to the same, single instance. 400 */ 401 *result = (void *)0; 402 error = DDI_SUCCESS; 403 break; 404 default: 405 break; 406 } 407 408 return (error); 409 } 410 411 static int 412 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 413 { 414 int error = DDI_FAILURE; 415 416 switch (cmd) { 417 case DDI_ATTACH: 418 di_states = kmem_zalloc( 419 di_max_opens * sizeof (struct di_state *), KM_SLEEP); 420 421 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, 422 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || 423 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, 424 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { 425 kmem_free(di_states, 426 di_max_opens * sizeof (struct di_state *)); 427 ddi_remove_minor_node(dip, NULL); 428 error = DDI_FAILURE; 429 } else { 430 di_dip = dip; 431 ddi_report_dev(dip); 432 433 error = DDI_SUCCESS; 434 } 435 break; 436 default: 437 error = DDI_FAILURE; 438 break; 439 } 440 441 return (error); 442 } 443 444 static int 445 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 446 { 447 int error = DDI_FAILURE; 448 449 switch (cmd) { 450 case DDI_DETACH: 451 ddi_remove_minor_node(dip, NULL); 452 di_dip = NULL; 453 kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); 454 455 error = DDI_SUCCESS; 456 break; 457 default: 458 error = DDI_FAILURE; 459 break; 460 } 461 462 return (error); 463 } 464 465 /* 466 * Allow multiple opens by tweaking the dev_t such that it looks like each 467 * open is getting a different minor device. Each minor gets a separate 468 * entry in the di_states[] table. Based on the original minor number, we 469 * discriminate opens of the full and read-only nodes. If all of the instances 470 * of the selected minor node are currently open, we return EAGAIN. 471 */ 472 /*ARGSUSED*/ 473 static int 474 di_open(dev_t *devp, int flag, int otyp, cred_t *credp) 475 { 476 int m; 477 minor_t minor_parent = getminor(*devp); 478 479 if (minor_parent != DI_FULL_PARENT && 480 minor_parent != DI_READONLY_PARENT) 481 return (ENXIO); 482 483 mutex_enter(&di_lock); 484 485 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { 486 if (di_states[m] != NULL) 487 continue; 488 489 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); 490 break; /* It's ours. */ 491 } 492 493 if (m >= di_max_opens) { 494 /* 495 * maximum open instance for device reached 496 */ 497 mutex_exit(&di_lock); 498 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); 499 return (EAGAIN); 500 } 501 mutex_exit(&di_lock); 502 503 ASSERT(m < di_max_opens); 504 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); 505 506 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", 507 (void *)curthread, m + DI_NODE_SPECIES)); 508 509 return (0); 510 } 511 512 /*ARGSUSED*/ 513 static int 514 di_close(dev_t dev, int flag, int otype, cred_t *cred_p) 515 { 516 struct di_state *st; 517 int m = (int)getminor(dev) - DI_NODE_SPECIES; 518 519 if (m < 0) { 520 cmn_err(CE_WARN, "closing non-existent devinfo minor %d", 521 m + DI_NODE_SPECIES); 522 return (ENXIO); 523 } 524 525 st = di_states[m]; 526 ASSERT(m < di_max_opens && st != NULL); 527 528 di_freemem(st); 529 kmem_free(st, sizeof (struct di_state)); 530 531 /* 532 * empty slot in state table 533 */ 534 mutex_enter(&di_lock); 535 di_states[m] = NULL; 536 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", 537 (void *)curthread, m + DI_NODE_SPECIES)); 538 mutex_exit(&di_lock); 539 540 return (0); 541 } 542 543 544 /*ARGSUSED*/ 545 static int 546 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 547 { 548 int rv, error; 549 di_off_t off; 550 struct di_all *all; 551 struct di_state *st; 552 int m = (int)getminor(dev) - DI_NODE_SPECIES; 553 554 major_t i; 555 char *drv_name; 556 size_t map_size, size; 557 struct di_mem *dcp; 558 int ndi_flags; 559 560 if (m < 0 || m >= di_max_opens) { 561 return (ENXIO); 562 } 563 564 st = di_states[m]; 565 ASSERT(st != NULL); 566 567 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); 568 569 switch (cmd) { 570 case DINFOIDENT: 571 /* 572 * This is called from di_init to verify that the driver 573 * opened is indeed devinfo. The purpose is to guard against 574 * sending ioctl to an unknown driver in case of an 575 * unresolved major number conflict during bfu. 576 */ 577 *rvalp = DI_MAGIC; 578 return (0); 579 580 case DINFOLODRV: 581 /* 582 * Hold an installed driver and return the result 583 */ 584 if (DI_UNPRIVILEGED_NODE(m)) { 585 /* 586 * Only the fully enabled instances may issue 587 * DINFOLDDRV. 588 */ 589 return (EACCES); 590 } 591 592 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 593 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { 594 kmem_free(drv_name, MAXNAMELEN); 595 return (EFAULT); 596 } 597 598 /* 599 * Some 3rd party driver's _init() walks the device tree, 600 * so we load the driver module before configuring driver. 601 */ 602 i = ddi_name_to_major(drv_name); 603 if (ddi_hold_driver(i) == NULL) { 604 kmem_free(drv_name, MAXNAMELEN); 605 return (ENXIO); 606 } 607 608 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; 609 610 /* 611 * i_ddi_load_drvconf() below will trigger a reprobe 612 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't 613 * needed here. 614 */ 615 modunload_disable(); 616 (void) i_ddi_load_drvconf(i); 617 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); 618 kmem_free(drv_name, MAXNAMELEN); 619 ddi_rele_driver(i); 620 rv = i_ddi_devs_attached(i); 621 modunload_enable(); 622 623 i_ddi_di_cache_invalidate(KM_SLEEP); 624 625 return ((rv == DDI_SUCCESS)? 0 : ENXIO); 626 627 case DINFOUSRLD: 628 /* 629 * The case for copying snapshot to userland 630 */ 631 if (di_setstate(st, IOC_COPY) == -1) 632 return (EBUSY); 633 634 map_size = ((struct di_all *)di_mem_addr(st, 0))->map_size; 635 if (map_size == 0) { 636 (void) di_setstate(st, IOC_DONE); 637 return (EFAULT); 638 } 639 640 /* 641 * copyout the snapshot 642 */ 643 map_size = (map_size + PAGEOFFSET) & PAGEMASK; 644 645 /* 646 * Return the map size, so caller may do a sanity 647 * check against the return value of snapshot ioctl() 648 */ 649 *rvalp = (int)map_size; 650 651 /* 652 * Copy one chunk at a time 653 */ 654 off = 0; 655 dcp = st->memlist; 656 while (map_size) { 657 size = dcp->buf_size; 658 if (map_size <= size) { 659 size = map_size; 660 } 661 662 if (ddi_copyout(di_mem_addr(st, off), 663 (void *)(arg + off), size, mode) != 0) { 664 (void) di_setstate(st, IOC_DONE); 665 return (EFAULT); 666 } 667 668 map_size -= size; 669 off += size; 670 dcp = dcp->next; 671 } 672 673 di_freemem(st); 674 (void) di_setstate(st, IOC_IDLE); 675 return (0); 676 677 default: 678 if ((cmd & ~DIIOC_MASK) != DIIOC) { 679 /* 680 * Invalid ioctl command 681 */ 682 return (ENOTTY); 683 } 684 /* 685 * take a snapshot 686 */ 687 st->command = cmd & DIIOC_MASK; 688 /*FALLTHROUGH*/ 689 } 690 691 /* 692 * Obtain enough memory to hold header + rootpath. We prevent kernel 693 * memory exhaustion by freeing any previously allocated snapshot and 694 * refusing the operation; otherwise we would be allowing ioctl(), 695 * ioctl(), ioctl(), ..., panic. 696 */ 697 if (di_setstate(st, IOC_SNAP) == -1) 698 return (EBUSY); 699 700 size = sizeof (struct di_all) + 701 sizeof (((struct dinfo_io *)(NULL))->root_path); 702 if (size < PAGESIZE) 703 size = PAGESIZE; 704 di_allocmem(st, size); 705 706 all = (struct di_all *)di_mem_addr(st, 0); 707 all->devcnt = devcnt; 708 all->command = st->command; 709 all->version = DI_SNAPSHOT_VERSION; 710 711 /* 712 * Note the endianness in case we need to transport snapshot 713 * over the network. 714 */ 715 #if defined(_LITTLE_ENDIAN) 716 all->endianness = DI_LITTLE_ENDIAN; 717 #else 718 all->endianness = DI_BIG_ENDIAN; 719 #endif 720 721 /* Copyin ioctl args, store in the snapshot. */ 722 if (copyinstr((void *)arg, all->root_path, 723 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { 724 di_freemem(st); 725 (void) di_setstate(st, IOC_IDLE); 726 return (EFAULT); 727 } 728 729 error = 0; 730 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { 731 di_freemem(st); 732 (void) di_setstate(st, IOC_IDLE); 733 return (error); 734 } 735 736 off = DI_ALIGN(sizeof (struct di_all) + size); 737 738 /* 739 * Only the fully enabled version may force load drivers or read 740 * the parent private data from a driver. 741 */ 742 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && 743 DI_UNPRIVILEGED_NODE(m)) { 744 di_freemem(st); 745 (void) di_setstate(st, IOC_IDLE); 746 return (EACCES); 747 } 748 749 /* Do we need private data? */ 750 if (st->command & DINFOPRIVDATA) { 751 arg += sizeof (((struct dinfo_io *)(NULL))->root_path); 752 753 #ifdef _MULTI_DATAMODEL 754 switch (ddi_model_convert_from(mode & FMODELS)) { 755 case DDI_MODEL_ILP32: { 756 /* 757 * Cannot copy private data from 64-bit kernel 758 * to 32-bit app 759 */ 760 di_freemem(st); 761 (void) di_setstate(st, IOC_IDLE); 762 return (EINVAL); 763 } 764 case DDI_MODEL_NONE: 765 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 766 di_freemem(st); 767 (void) di_setstate(st, IOC_IDLE); 768 return (EFAULT); 769 } 770 break; 771 } 772 #else /* !_MULTI_DATAMODEL */ 773 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 774 di_freemem(st); 775 (void) di_setstate(st, IOC_IDLE); 776 return (EFAULT); 777 } 778 #endif /* _MULTI_DATAMODEL */ 779 } 780 781 all->top_devinfo = DI_ALIGN(off); 782 783 /* 784 * For cache lookups we reallocate memory from scratch, 785 * so the value of "all" is no longer valid. 786 */ 787 all = NULL; 788 789 if (st->command & DINFOCACHE) { 790 *rvalp = di_cache_lookup(st); 791 } else if (snapshot_is_cacheable(st)) { 792 DI_CACHE_LOCK(di_cache); 793 *rvalp = di_cache_update(st); 794 DI_CACHE_UNLOCK(di_cache); 795 } else { 796 modunload_disable(); 797 *rvalp = di_snapshot(st); 798 modunload_enable(); 799 } 800 801 if (*rvalp) { 802 DI_ALL_PTR(st)->map_size = *rvalp; 803 (void) di_setstate(st, IOC_DONE); 804 } else { 805 di_freemem(st); 806 (void) di_setstate(st, IOC_IDLE); 807 } 808 809 return (0); 810 } 811 812 /* 813 * Get a chunk of memory >= size, for the snapshot 814 */ 815 static void 816 di_allocmem(struct di_state *st, size_t size) 817 { 818 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), 819 KM_SLEEP); 820 /* 821 * Round up size to nearest power of 2. If it is less 822 * than st->mem_size, set it to st->mem_size (i.e., 823 * the mem_size is doubled every time) to reduce the 824 * number of memory allocations. 825 */ 826 size_t tmp = 1; 827 while (tmp < size) { 828 tmp <<= 1; 829 } 830 size = (tmp > st->mem_size) ? tmp : st->mem_size; 831 832 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); 833 mem->buf_size = size; 834 835 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); 836 837 if (st->mem_size == 0) { /* first chunk */ 838 st->memlist = mem; 839 } else { 840 /* 841 * locate end of linked list and add a chunk at the end 842 */ 843 struct di_mem *dcp = st->memlist; 844 while (dcp->next != NULL) { 845 dcp = dcp->next; 846 } 847 848 dcp->next = mem; 849 } 850 851 st->mem_size += size; 852 } 853 854 /* 855 * Copy upto bufsiz bytes of the memlist to buf 856 */ 857 static void 858 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) 859 { 860 struct di_mem *dcp; 861 size_t copysz; 862 863 if (st->mem_size == 0) { 864 ASSERT(st->memlist == NULL); 865 return; 866 } 867 868 copysz = 0; 869 for (dcp = st->memlist; dcp; dcp = dcp->next) { 870 871 ASSERT(bufsiz > 0); 872 873 if (bufsiz <= dcp->buf_size) 874 copysz = bufsiz; 875 else 876 copysz = dcp->buf_size; 877 878 bcopy(dcp->buf, buf, copysz); 879 880 buf += copysz; 881 bufsiz -= copysz; 882 883 if (bufsiz == 0) 884 break; 885 } 886 } 887 888 /* 889 * Free all memory for the snapshot 890 */ 891 static void 892 di_freemem(struct di_state *st) 893 { 894 struct di_mem *dcp, *tmp; 895 896 dcmn_err2((CE_CONT, "di_freemem\n")); 897 898 if (st->mem_size) { 899 dcp = st->memlist; 900 while (dcp) { /* traverse the linked list */ 901 tmp = dcp; 902 dcp = dcp->next; 903 ddi_umem_free(tmp->cook); 904 kmem_free(tmp, sizeof (struct di_mem)); 905 } 906 st->mem_size = 0; 907 st->memlist = NULL; 908 } 909 910 ASSERT(st->mem_size == 0); 911 ASSERT(st->memlist == NULL); 912 } 913 914 /* 915 * Copies cached data to the di_state structure. 916 * Returns: 917 * - size of data copied, on SUCCESS 918 * - 0 on failure 919 */ 920 static int 921 di_cache2mem(struct di_cache *cache, struct di_state *st) 922 { 923 caddr_t pa; 924 925 ASSERT(st->mem_size == 0); 926 ASSERT(st->memlist == NULL); 927 ASSERT(!servicing_interrupt()); 928 ASSERT(DI_CACHE_LOCKED(*cache)); 929 930 if (cache->cache_size == 0) { 931 ASSERT(cache->cache_data == NULL); 932 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); 933 return (0); 934 } 935 936 ASSERT(cache->cache_data); 937 938 di_allocmem(st, cache->cache_size); 939 940 pa = di_mem_addr(st, 0); 941 942 ASSERT(pa); 943 944 /* 945 * Verify that di_allocmem() allocates contiguous memory, 946 * so that it is safe to do straight bcopy() 947 */ 948 ASSERT(st->memlist != NULL); 949 ASSERT(st->memlist->next == NULL); 950 bcopy(cache->cache_data, pa, cache->cache_size); 951 952 return (cache->cache_size); 953 } 954 955 /* 956 * Copies a snapshot from di_state to the cache 957 * Returns: 958 * - 0 on failure 959 * - size of copied data on success 960 */ 961 static int 962 di_mem2cache(struct di_state *st, struct di_cache *cache) 963 { 964 size_t map_size; 965 966 ASSERT(cache->cache_size == 0); 967 ASSERT(cache->cache_data == NULL); 968 ASSERT(!servicing_interrupt()); 969 ASSERT(DI_CACHE_LOCKED(*cache)); 970 971 if (st->mem_size == 0) { 972 ASSERT(st->memlist == NULL); 973 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); 974 return (0); 975 } 976 977 ASSERT(st->memlist); 978 979 /* 980 * The size of the memory list may be much larger than the 981 * size of valid data (map_size). Cache only the valid data 982 */ 983 map_size = DI_ALL_PTR(st)->map_size; 984 if (map_size == 0 || map_size < sizeof (struct di_all) || 985 map_size > st->mem_size) { 986 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); 987 return (0); 988 } 989 990 cache->cache_data = kmem_alloc(map_size, KM_SLEEP); 991 cache->cache_size = map_size; 992 di_copymem(st, cache->cache_data, cache->cache_size); 993 994 return (map_size); 995 } 996 997 /* 998 * Make sure there is at least "size" bytes memory left before 999 * going on. Otherwise, start on a new chunk. 1000 */ 1001 static di_off_t 1002 di_checkmem(struct di_state *st, di_off_t off, size_t size) 1003 { 1004 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", 1005 off, (int)size)); 1006 1007 /* 1008 * di_checkmem() shouldn't be called with a size of zero. 1009 * But in case it is, we want to make sure we return a valid 1010 * offset within the memlist and not an offset that points us 1011 * at the end of the memlist. 1012 */ 1013 if (size == 0) { 1014 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); 1015 size = 1; 1016 } 1017 1018 off = DI_ALIGN(off); 1019 if ((st->mem_size - off) < size) { 1020 off = st->mem_size; 1021 di_allocmem(st, size); 1022 } 1023 1024 return (off); 1025 } 1026 1027 /* 1028 * Copy the private data format from ioctl arg. 1029 * On success, the ending offset is returned. On error 0 is returned. 1030 */ 1031 static di_off_t 1032 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) 1033 { 1034 di_off_t size; 1035 struct di_priv_data *priv; 1036 struct di_all *all = (struct di_all *)di_mem_addr(st, 0); 1037 1038 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", 1039 off, (void *)arg, mode)); 1040 1041 /* 1042 * Copyin data and check version. 1043 * We only handle private data version 0. 1044 */ 1045 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); 1046 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), 1047 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { 1048 kmem_free(priv, sizeof (struct di_priv_data)); 1049 return (0); 1050 } 1051 1052 /* 1053 * Save di_priv_data copied from userland in snapshot. 1054 */ 1055 all->pd_version = priv->version; 1056 all->n_ppdata = priv->n_parent; 1057 all->n_dpdata = priv->n_driver; 1058 1059 /* 1060 * copyin private data format, modify offset accordingly 1061 */ 1062 if (all->n_ppdata) { /* parent private data format */ 1063 /* 1064 * check memory 1065 */ 1066 size = all->n_ppdata * sizeof (struct di_priv_format); 1067 off = di_checkmem(st, off, size); 1068 all->ppdata_format = off; 1069 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, 1070 mode) != 0) { 1071 kmem_free(priv, sizeof (struct di_priv_data)); 1072 return (0); 1073 } 1074 1075 off += size; 1076 } 1077 1078 if (all->n_dpdata) { /* driver private data format */ 1079 /* 1080 * check memory 1081 */ 1082 size = all->n_dpdata * sizeof (struct di_priv_format); 1083 off = di_checkmem(st, off, size); 1084 all->dpdata_format = off; 1085 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, 1086 mode) != 0) { 1087 kmem_free(priv, sizeof (struct di_priv_data)); 1088 return (0); 1089 } 1090 1091 off += size; 1092 } 1093 1094 kmem_free(priv, sizeof (struct di_priv_data)); 1095 return (off); 1096 } 1097 1098 /* 1099 * Return the real address based on the offset (off) within snapshot 1100 */ 1101 static caddr_t 1102 di_mem_addr(struct di_state *st, di_off_t off) 1103 { 1104 struct di_mem *dcp = st->memlist; 1105 1106 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", 1107 (void *)dcp, off)); 1108 1109 ASSERT(off < st->mem_size); 1110 1111 while (off >= dcp->buf_size) { 1112 off -= dcp->buf_size; 1113 dcp = dcp->next; 1114 } 1115 1116 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", 1117 off, (void *)(dcp->buf + off))); 1118 1119 return (dcp->buf + off); 1120 } 1121 1122 /* 1123 * Ideally we would use the whole key to derive the hash 1124 * value. However, the probability that two keys will 1125 * have the same dip (or pip) is very low, so 1126 * hashing by dip (or pip) pointer should suffice. 1127 */ 1128 static uint_t 1129 di_hash_byptr(void *arg, mod_hash_key_t key) 1130 { 1131 struct di_key *dik = key; 1132 size_t rshift; 1133 void *ptr; 1134 1135 ASSERT(arg == NULL); 1136 1137 switch (dik->k_type) { 1138 case DI_DKEY: 1139 ptr = dik->k_u.dkey.dk_dip; 1140 rshift = highbit(sizeof (struct dev_info)); 1141 break; 1142 case DI_PKEY: 1143 ptr = dik->k_u.pkey.pk_pip; 1144 rshift = highbit(sizeof (struct mdi_pathinfo)); 1145 break; 1146 default: 1147 panic("devinfo: unknown key type"); 1148 /*NOTREACHED*/ 1149 } 1150 return (mod_hash_byptr((void *)rshift, ptr)); 1151 } 1152 1153 static void 1154 di_key_dtor(mod_hash_key_t key) 1155 { 1156 char *path_addr; 1157 struct di_key *dik = key; 1158 1159 switch (dik->k_type) { 1160 case DI_DKEY: 1161 break; 1162 case DI_PKEY: 1163 path_addr = dik->k_u.pkey.pk_path_addr; 1164 if (path_addr) 1165 kmem_free(path_addr, strlen(path_addr) + 1); 1166 break; 1167 default: 1168 panic("devinfo: unknown key type"); 1169 /*NOTREACHED*/ 1170 } 1171 1172 kmem_free(dik, sizeof (struct di_key)); 1173 } 1174 1175 static int 1176 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) 1177 { 1178 if (dk1->dk_dip != dk2->dk_dip) 1179 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); 1180 1181 if (dk1->dk_major != -1 && dk2->dk_major != -1) { 1182 if (dk1->dk_major != dk2->dk_major) 1183 return (dk1->dk_major > dk2->dk_major ? 1 : -1); 1184 1185 if (dk1->dk_inst != dk2->dk_inst) 1186 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); 1187 } 1188 1189 if (dk1->dk_nodeid != dk2->dk_nodeid) 1190 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); 1191 1192 return (0); 1193 } 1194 1195 static int 1196 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) 1197 { 1198 char *p1, *p2; 1199 int rv; 1200 1201 if (pk1->pk_pip != pk2->pk_pip) 1202 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); 1203 1204 p1 = pk1->pk_path_addr; 1205 p2 = pk2->pk_path_addr; 1206 1207 p1 = p1 ? p1 : ""; 1208 p2 = p2 ? p2 : ""; 1209 1210 rv = strcmp(p1, p2); 1211 if (rv) 1212 return (rv > 0 ? 1 : -1); 1213 1214 if (pk1->pk_client != pk2->pk_client) 1215 return (pk1->pk_client > pk2->pk_client ? 1 : -1); 1216 1217 if (pk1->pk_phci != pk2->pk_phci) 1218 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); 1219 1220 return (0); 1221 } 1222 1223 static int 1224 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 1225 { 1226 struct di_key *dik1, *dik2; 1227 1228 dik1 = key1; 1229 dik2 = key2; 1230 1231 if (dik1->k_type != dik2->k_type) { 1232 panic("devinfo: mismatched keys"); 1233 /*NOTREACHED*/ 1234 } 1235 1236 switch (dik1->k_type) { 1237 case DI_DKEY: 1238 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); 1239 case DI_PKEY: 1240 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); 1241 default: 1242 panic("devinfo: unknown key type"); 1243 /*NOTREACHED*/ 1244 } 1245 } 1246 1247 /* 1248 * This is the main function that takes a snapshot 1249 */ 1250 static di_off_t 1251 di_snapshot(struct di_state *st) 1252 { 1253 di_off_t off; 1254 struct di_all *all; 1255 dev_info_t *rootnode; 1256 char buf[80]; 1257 int plen; 1258 char *path; 1259 vnode_t *vp; 1260 1261 all = (struct di_all *)di_mem_addr(st, 0); 1262 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); 1263 1264 /* 1265 * Verify path before entrusting it to e_ddi_hold_devi_by_path because 1266 * some platforms have OBP bugs where executing the NDI_PROMNAME code 1267 * path against an invalid path results in panic. The lookupnameat 1268 * is done relative to rootdir without a leading '/' on "devices/" 1269 * to force the lookup to occur in the global zone. 1270 */ 1271 plen = strlen("devices/") + strlen(all->root_path) + 1; 1272 path = kmem_alloc(plen, KM_SLEEP); 1273 (void) snprintf(path, plen, "devices/%s", all->root_path); 1274 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) { 1275 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1276 all->root_path)); 1277 kmem_free(path, plen); 1278 return (0); 1279 } 1280 kmem_free(path, plen); 1281 VN_RELE(vp); 1282 1283 /* 1284 * Hold the devinfo node referred by the path. 1285 */ 1286 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); 1287 if (rootnode == NULL) { 1288 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1289 all->root_path)); 1290 return (0); 1291 } 1292 1293 (void) snprintf(buf, sizeof (buf), 1294 "devinfo registered dips (statep=%p)", (void *)st); 1295 1296 st->reg_dip_hash = mod_hash_create_extended(buf, 64, 1297 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1298 NULL, di_key_cmp, KM_SLEEP); 1299 1300 1301 (void) snprintf(buf, sizeof (buf), 1302 "devinfo registered pips (statep=%p)", (void *)st); 1303 1304 st->reg_pip_hash = mod_hash_create_extended(buf, 64, 1305 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1306 NULL, di_key_cmp, KM_SLEEP); 1307 1308 /* 1309 * copy the device tree 1310 */ 1311 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); 1312 1313 ddi_release_devi(rootnode); 1314 1315 /* 1316 * copy the devnames array 1317 */ 1318 all->devnames = off; 1319 off = di_copydevnm(&all->devnames, st); 1320 1321 1322 /* initialize the hash tables */ 1323 st->lnode_count = 0; 1324 st->link_count = 0; 1325 1326 if (DINFOLYR & st->command) { 1327 off = di_getlink_data(off, st); 1328 } 1329 1330 /* 1331 * Free up hash tables 1332 */ 1333 mod_hash_destroy_hash(st->reg_dip_hash); 1334 mod_hash_destroy_hash(st->reg_pip_hash); 1335 1336 /* 1337 * Record the timestamp now that we are done with snapshot. 1338 * 1339 * We compute the checksum later and then only if we cache 1340 * the snapshot, since checksumming adds some overhead. 1341 * The checksum is checked later if we read the cache file. 1342 * from disk. 1343 * 1344 * Set checksum field to 0 as CRC is calculated with that 1345 * field set to 0. 1346 */ 1347 all->snapshot_time = ddi_get_time(); 1348 all->cache_checksum = 0; 1349 1350 return (off); 1351 } 1352 1353 /* 1354 * Assumes all devinfo nodes in device tree have been snapshotted 1355 */ 1356 static void 1357 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *poff_p) 1358 { 1359 struct dev_info *node; 1360 struct di_node *me; 1361 di_off_t off; 1362 1363 ASSERT(mutex_owned(&dnp->dn_lock)); 1364 1365 node = DEVI(dnp->dn_head); 1366 for (; node; node = node->devi_next) { 1367 if (di_dip_find(st, (dev_info_t *)node, &off) != 0) 1368 continue; 1369 1370 ASSERT(off > 0); 1371 me = (struct di_node *)di_mem_addr(st, off); 1372 ASSERT(me->next == 0 || me->next == -1); 1373 /* 1374 * Only nodes which were BOUND when they were 1375 * snapshotted will be added to per-driver list. 1376 */ 1377 if (me->next != -1) 1378 continue; 1379 1380 *poff_p = off; 1381 poff_p = &me->next; 1382 } 1383 1384 *poff_p = 0; 1385 } 1386 1387 /* 1388 * Copy the devnames array, so we have a list of drivers in the snapshot. 1389 * Also makes it possible to locate the per-driver devinfo nodes. 1390 */ 1391 static di_off_t 1392 di_copydevnm(di_off_t *off_p, struct di_state *st) 1393 { 1394 int i; 1395 di_off_t off; 1396 size_t size; 1397 struct di_devnm *dnp; 1398 1399 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); 1400 1401 /* 1402 * make sure there is some allocated memory 1403 */ 1404 size = devcnt * sizeof (struct di_devnm); 1405 off = di_checkmem(st, *off_p, size); 1406 *off_p = off; 1407 1408 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", 1409 devcnt, off)); 1410 1411 dnp = (struct di_devnm *)di_mem_addr(st, off); 1412 off += size; 1413 1414 for (i = 0; i < devcnt; i++) { 1415 if (devnamesp[i].dn_name == NULL) { 1416 continue; 1417 } 1418 1419 /* 1420 * dn_name is not freed during driver unload or removal. 1421 * 1422 * There is a race condition when make_devname() changes 1423 * dn_name during our strcpy. This should be rare since 1424 * only add_drv does this. At any rate, we never had a 1425 * problem with ddi_name_to_major(), which should have 1426 * the same problem. 1427 */ 1428 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", 1429 devnamesp[i].dn_name, devnamesp[i].dn_instance, 1430 off)); 1431 1432 off = di_checkmem(st, off, strlen(devnamesp[i].dn_name) + 1); 1433 dnp[i].name = off; 1434 (void) strcpy((char *)di_mem_addr(st, off), 1435 devnamesp[i].dn_name); 1436 off += DI_ALIGN(strlen(devnamesp[i].dn_name) + 1); 1437 1438 mutex_enter(&devnamesp[i].dn_lock); 1439 1440 /* 1441 * Snapshot per-driver node list 1442 */ 1443 snap_driver_list(st, &devnamesp[i], &dnp[i].head); 1444 1445 /* 1446 * This is not used by libdevinfo, leave it for now 1447 */ 1448 dnp[i].flags = devnamesp[i].dn_flags; 1449 dnp[i].instance = devnamesp[i].dn_instance; 1450 1451 /* 1452 * get global properties 1453 */ 1454 if ((DINFOPROP & st->command) && 1455 devnamesp[i].dn_global_prop_ptr) { 1456 dnp[i].global_prop = off; 1457 off = di_getprop( 1458 devnamesp[i].dn_global_prop_ptr->prop_list, 1459 &dnp[i].global_prop, st, NULL, DI_PROP_GLB_LIST); 1460 } 1461 1462 /* 1463 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str 1464 */ 1465 if (CB_DRV_INSTALLED(devopsp[i])) { 1466 if (devopsp[i]->devo_cb_ops) { 1467 dnp[i].ops |= DI_CB_OPS; 1468 if (devopsp[i]->devo_cb_ops->cb_str) 1469 dnp[i].ops |= DI_STREAM_OPS; 1470 } 1471 if (NEXUS_DRV(devopsp[i])) { 1472 dnp[i].ops |= DI_BUS_OPS; 1473 } 1474 } 1475 1476 mutex_exit(&devnamesp[i].dn_lock); 1477 } 1478 1479 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); 1480 1481 return (off); 1482 } 1483 1484 /* 1485 * Copy the kernel devinfo tree. The tree and the devnames array forms 1486 * the entire snapshot (see also di_copydevnm). 1487 */ 1488 static di_off_t 1489 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) 1490 { 1491 di_off_t off; 1492 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); 1493 1494 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", 1495 (void *)root, *off_p)); 1496 1497 /* force attach drivers */ 1498 if ((i_ddi_node_state((dev_info_t *)root) == DS_READY) && 1499 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { 1500 (void) ndi_devi_config((dev_info_t *)root, 1501 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | 1502 NDI_DRV_CONF_REPROBE); 1503 } 1504 1505 /* 1506 * Push top_devinfo onto a stack 1507 * 1508 * The stack is necessary to avoid recursion, which can overrun 1509 * the kernel stack. 1510 */ 1511 PUSH_STACK(dsp, root, off_p); 1512 1513 /* 1514 * As long as there is a node on the stack, copy the node. 1515 * di_copynode() is responsible for pushing and popping 1516 * child and sibling nodes on the stack. 1517 */ 1518 while (!EMPTY_STACK(dsp)) { 1519 off = di_copynode(dsp, st); 1520 } 1521 1522 /* 1523 * Free the stack structure 1524 */ 1525 kmem_free(dsp, sizeof (struct di_stack)); 1526 1527 return (off); 1528 } 1529 1530 /* 1531 * This is the core function, which copies all data associated with a single 1532 * node into the snapshot. The amount of information is determined by the 1533 * ioctl command. 1534 */ 1535 static di_off_t 1536 di_copynode(struct di_stack *dsp, struct di_state *st) 1537 { 1538 di_off_t off; 1539 struct di_node *me; 1540 struct dev_info *node; 1541 1542 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", 1543 dsp->depth)); 1544 1545 node = TOP_NODE(dsp); 1546 1547 ASSERT(node != NULL); 1548 1549 /* 1550 * check memory usage, and fix offsets accordingly. 1551 */ 1552 off = di_checkmem(st, *(TOP_OFFSET(dsp)), sizeof (struct di_node)); 1553 *(TOP_OFFSET(dsp)) = off; 1554 me = DI_NODE(di_mem_addr(st, off)); 1555 1556 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", 1557 node->devi_node_name, node->devi_instance, off)); 1558 1559 /* 1560 * Node parameters: 1561 * self -- offset of current node within snapshot 1562 * nodeid -- pointer to PROM node (tri-valued) 1563 * state -- hot plugging device state 1564 * node_state -- devinfo node state (CF1, CF2, etc.) 1565 */ 1566 me->self = off; 1567 me->instance = node->devi_instance; 1568 me->nodeid = node->devi_nodeid; 1569 me->node_class = node->devi_node_class; 1570 me->attributes = node->devi_node_attributes; 1571 me->state = node->devi_state; 1572 me->node_state = node->devi_node_state; 1573 me->user_private_data = NULL; 1574 1575 /* 1576 * Get parent's offset in snapshot from the stack 1577 * and store it in the current node 1578 */ 1579 if (dsp->depth > 1) { 1580 me->parent = *(PARENT_OFFSET(dsp)); 1581 } 1582 1583 /* 1584 * Save the offset of this di_node in a hash table. 1585 * This is used later to resolve references to this 1586 * dip from other parts of the tree (per-driver list, 1587 * multipathing linkages, layered usage linkages). 1588 * The key used for the hash table is derived from 1589 * information in the dip. 1590 */ 1591 di_register_dip(st, (dev_info_t *)node, me->self); 1592 1593 /* 1594 * increment offset 1595 */ 1596 off += sizeof (struct di_node); 1597 1598 #ifdef DEVID_COMPATIBILITY 1599 /* check for devid as property marker */ 1600 if (node->devi_devid) { 1601 ddi_devid_t devid; 1602 char *devidstr; 1603 int devid_size; 1604 1605 /* 1606 * The devid is now represented as a property. 1607 * For micro release compatibility with di_devid interface 1608 * in libdevinfo we must return it as a binary structure in' 1609 * the snapshot. When di_devid is removed from libdevinfo 1610 * in a future release (and devi_devid is deleted) then 1611 * code related to DEVID_COMPATIBILITY can be removed. 1612 */ 1613 ASSERT(node->devi_devid == DEVID_COMPATIBILITY); 1614 /* XXX should be DDI_DEV_T_NONE! */ 1615 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, (dev_info_t *)node, 1616 DDI_PROP_DONTPASS, DEVID_PROP_NAME, &devidstr) == 1617 DDI_PROP_SUCCESS) { 1618 if (ddi_devid_str_decode(devidstr, &devid, NULL) == 1619 DDI_SUCCESS) { 1620 devid_size = ddi_devid_sizeof(devid); 1621 off = di_checkmem(st, off, devid_size); 1622 me->devid = off; 1623 bcopy(devid, 1624 di_mem_addr(st, off), devid_size); 1625 off += devid_size; 1626 ddi_devid_free(devid); 1627 } 1628 ddi_prop_free(devidstr); 1629 } 1630 } 1631 #endif /* DEVID_COMPATIBILITY */ 1632 1633 if (node->devi_node_name) { 1634 off = di_checkmem(st, off, strlen(node->devi_node_name) + 1); 1635 me->node_name = off; 1636 (void) strcpy(di_mem_addr(st, off), node->devi_node_name); 1637 off += strlen(node->devi_node_name) + 1; 1638 } 1639 1640 if (node->devi_compat_names && (node->devi_compat_length > 1)) { 1641 off = di_checkmem(st, off, node->devi_compat_length); 1642 me->compat_names = off; 1643 me->compat_length = node->devi_compat_length; 1644 bcopy(node->devi_compat_names, di_mem_addr(st, off), 1645 node->devi_compat_length); 1646 off += node->devi_compat_length; 1647 } 1648 1649 if (node->devi_addr) { 1650 off = di_checkmem(st, off, strlen(node->devi_addr) + 1); 1651 me->address = off; 1652 (void) strcpy(di_mem_addr(st, off), node->devi_addr); 1653 off += strlen(node->devi_addr) + 1; 1654 } 1655 1656 if (node->devi_binding_name) { 1657 off = di_checkmem(st, off, strlen(node->devi_binding_name) + 1); 1658 me->bind_name = off; 1659 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name); 1660 off += strlen(node->devi_binding_name) + 1; 1661 } 1662 1663 me->drv_major = node->devi_major; 1664 1665 /* 1666 * If the dip is BOUND, set the next pointer of the 1667 * per-instance list to -1, indicating that it is yet to be resolved. 1668 * This will be resolved later in snap_driver_list(). 1669 */ 1670 if (me->drv_major != -1) { 1671 me->next = -1; 1672 } else { 1673 me->next = 0; 1674 } 1675 1676 /* 1677 * An optimization to skip mutex_enter when not needed. 1678 */ 1679 if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) { 1680 goto priv_data; 1681 } 1682 1683 /* 1684 * Grab current per dev_info node lock to 1685 * get minor data and properties. 1686 */ 1687 mutex_enter(&(node->devi_lock)); 1688 1689 if (!(DINFOMINOR & st->command)) { 1690 goto path; 1691 } 1692 1693 if (node->devi_minor) { /* minor data */ 1694 me->minor_data = DI_ALIGN(off); 1695 off = di_getmdata(node->devi_minor, &me->minor_data, 1696 me->self, st); 1697 } 1698 1699 path: 1700 if (!(DINFOPATH & st->command)) { 1701 goto property; 1702 } 1703 1704 if (MDI_CLIENT(node)) { 1705 me->multipath_client = DI_ALIGN(off); 1706 off = di_getpath_data((dev_info_t *)node, &me->multipath_client, 1707 me->self, st, 1); 1708 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " 1709 "component type = %d. off=%d", 1710 me->multipath_client, 1711 (void *)node, node->devi_mdi_component, off)); 1712 } 1713 1714 if (MDI_PHCI(node)) { 1715 me->multipath_phci = DI_ALIGN(off); 1716 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, 1717 me->self, st, 0); 1718 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " 1719 "component type = %d. off=%d", 1720 me->multipath_phci, 1721 (void *)node, node->devi_mdi_component, off)); 1722 } 1723 1724 property: 1725 if (!(DINFOPROP & st->command)) { 1726 goto unlock; 1727 } 1728 1729 if (node->devi_drv_prop_ptr) { /* driver property list */ 1730 me->drv_prop = DI_ALIGN(off); 1731 off = di_getprop(node->devi_drv_prop_ptr, &me->drv_prop, st, 1732 node, DI_PROP_DRV_LIST); 1733 } 1734 1735 if (node->devi_sys_prop_ptr) { /* system property list */ 1736 me->sys_prop = DI_ALIGN(off); 1737 off = di_getprop(node->devi_sys_prop_ptr, &me->sys_prop, st, 1738 node, DI_PROP_SYS_LIST); 1739 } 1740 1741 if (node->devi_hw_prop_ptr) { /* hardware property list */ 1742 me->hw_prop = DI_ALIGN(off); 1743 off = di_getprop(node->devi_hw_prop_ptr, &me->hw_prop, st, 1744 node, DI_PROP_HW_LIST); 1745 } 1746 1747 if (node->devi_global_prop_list == NULL) { 1748 me->glob_prop = (di_off_t)-1; /* not global property */ 1749 } else { 1750 /* 1751 * Make copy of global property list if this devinfo refers 1752 * global properties different from what's on the devnames 1753 * array. It can happen if there has been a forced 1754 * driver.conf update. See mod_drv(1M). 1755 */ 1756 ASSERT(me->drv_major != -1); 1757 if (node->devi_global_prop_list != 1758 devnamesp[me->drv_major].dn_global_prop_ptr) { 1759 me->glob_prop = DI_ALIGN(off); 1760 off = di_getprop(node->devi_global_prop_list->prop_list, 1761 &me->glob_prop, st, node, DI_PROP_GLB_LIST); 1762 } 1763 } 1764 1765 unlock: 1766 /* 1767 * release current per dev_info node lock 1768 */ 1769 mutex_exit(&(node->devi_lock)); 1770 1771 priv_data: 1772 if (!(DINFOPRIVDATA & st->command)) { 1773 goto pm_info; 1774 } 1775 1776 if (ddi_get_parent_data((dev_info_t *)node) != NULL) { 1777 me->parent_data = DI_ALIGN(off); 1778 off = di_getppdata(node, &me->parent_data, st); 1779 } 1780 1781 if (ddi_get_driver_private((dev_info_t *)node) != NULL) { 1782 me->driver_data = DI_ALIGN(off); 1783 off = di_getdpdata(node, &me->driver_data, st); 1784 } 1785 1786 pm_info: /* NOT implemented */ 1787 1788 subtree: 1789 if (!(DINFOSUBTREE & st->command)) { 1790 POP_STACK(dsp); 1791 return (DI_ALIGN(off)); 1792 } 1793 1794 child: 1795 /* 1796 * If there is a child--push child onto stack. 1797 * Hold the parent busy while doing so. 1798 */ 1799 if (node->devi_child) { 1800 me->child = DI_ALIGN(off); 1801 PUSH_STACK(dsp, node->devi_child, &me->child); 1802 return (me->child); 1803 } 1804 1805 sibling: 1806 /* 1807 * no child node, unroll the stack till a sibling of 1808 * a parent node is found or root node is reached 1809 */ 1810 POP_STACK(dsp); 1811 while (!EMPTY_STACK(dsp) && (node->devi_sibling == NULL)) { 1812 node = TOP_NODE(dsp); 1813 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); 1814 POP_STACK(dsp); 1815 } 1816 1817 if (!EMPTY_STACK(dsp)) { 1818 /* 1819 * a sibling is found, replace top of stack by its sibling 1820 */ 1821 me->sibling = DI_ALIGN(off); 1822 PUSH_STACK(dsp, node->devi_sibling, &me->sibling); 1823 return (me->sibling); 1824 } 1825 1826 /* 1827 * DONE with all nodes 1828 */ 1829 return (DI_ALIGN(off)); 1830 } 1831 1832 static i_lnode_t * 1833 i_lnode_alloc(int modid) 1834 { 1835 i_lnode_t *i_lnode; 1836 1837 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); 1838 1839 ASSERT(modid != -1); 1840 i_lnode->modid = modid; 1841 1842 return (i_lnode); 1843 } 1844 1845 static void 1846 i_lnode_free(i_lnode_t *i_lnode) 1847 { 1848 kmem_free(i_lnode, sizeof (i_lnode_t)); 1849 } 1850 1851 static void 1852 i_lnode_check_free(i_lnode_t *i_lnode) 1853 { 1854 /* This lnode and its dip must have been snapshotted */ 1855 ASSERT(i_lnode->self > 0); 1856 ASSERT(i_lnode->di_node->self > 0); 1857 1858 /* at least 1 link (in or out) must exist for this lnode */ 1859 ASSERT(i_lnode->link_in || i_lnode->link_out); 1860 1861 i_lnode_free(i_lnode); 1862 } 1863 1864 static i_link_t * 1865 i_link_alloc(int spec_type) 1866 { 1867 i_link_t *i_link; 1868 1869 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); 1870 i_link->spec_type = spec_type; 1871 1872 return (i_link); 1873 } 1874 1875 static void 1876 i_link_check_free(i_link_t *i_link) 1877 { 1878 /* This link must have been snapshotted */ 1879 ASSERT(i_link->self > 0); 1880 1881 /* Both endpoint lnodes must exist for this link */ 1882 ASSERT(i_link->src_lnode); 1883 ASSERT(i_link->tgt_lnode); 1884 1885 kmem_free(i_link, sizeof (i_link_t)); 1886 } 1887 1888 /*ARGSUSED*/ 1889 static uint_t 1890 i_lnode_hashfunc(void *arg, mod_hash_key_t key) 1891 { 1892 i_lnode_t *i_lnode = (i_lnode_t *)key; 1893 struct di_node *ptr; 1894 dev_t dev; 1895 1896 dev = i_lnode->devt; 1897 if (dev != DDI_DEV_T_NONE) 1898 return (i_lnode->modid + getminor(dev) + getmajor(dev)); 1899 1900 ptr = i_lnode->di_node; 1901 ASSERT(ptr->self > 0); 1902 if (ptr) { 1903 uintptr_t k = (uintptr_t)ptr; 1904 k >>= (int)highbit(sizeof (struct di_node)); 1905 return ((uint_t)k); 1906 } 1907 1908 return (i_lnode->modid); 1909 } 1910 1911 static int 1912 i_lnode_cmp(void *arg1, void *arg2) 1913 { 1914 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1; 1915 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2; 1916 1917 if (i_lnode1->modid != i_lnode2->modid) { 1918 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); 1919 } 1920 1921 if (i_lnode1->di_node != i_lnode2->di_node) 1922 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); 1923 1924 if (i_lnode1->devt != i_lnode2->devt) 1925 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); 1926 1927 return (0); 1928 } 1929 1930 /* 1931 * An lnode represents a {dip, dev_t} tuple. A link represents a 1932 * {src_lnode, tgt_lnode, spec_type} tuple. 1933 * The following callback assumes that LDI framework ref-counts the 1934 * src_dip and tgt_dip while invoking this callback. 1935 */ 1936 static int 1937 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) 1938 { 1939 struct di_state *st = (struct di_state *)arg; 1940 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode; 1941 i_link_t **i_link_next, *i_link; 1942 di_off_t soff, toff; 1943 mod_hash_val_t nodep = NULL; 1944 int res; 1945 1946 /* 1947 * if the source or target of this device usage information doesn't 1948 * corrospond to a device node then we don't report it via 1949 * libdevinfo so return. 1950 */ 1951 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) 1952 return (LDI_USAGE_CONTINUE); 1953 1954 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); 1955 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); 1956 1957 /* 1958 * Skip the ldi_usage if either src or tgt dip is not in the 1959 * snapshot. This saves us from pruning bad lnodes/links later. 1960 */ 1961 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) 1962 return (LDI_USAGE_CONTINUE); 1963 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) 1964 return (LDI_USAGE_CONTINUE); 1965 1966 ASSERT(soff > 0); 1967 ASSERT(toff > 0); 1968 1969 /* 1970 * allocate an i_lnode and add it to the lnode hash 1971 * if it is not already present. For this particular 1972 * link the lnode is a source, but it may 1973 * participate as tgt or src in any number of layered 1974 * operations - so it may already be in the hash. 1975 */ 1976 i_lnode = i_lnode_alloc(ldi_usage->src_modid); 1977 i_lnode->di_node = (struct di_node *)di_mem_addr(st, soff); 1978 i_lnode->devt = ldi_usage->src_devt; 1979 1980 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 1981 if (res == MH_ERR_NOTFOUND) { 1982 /* 1983 * new i_lnode 1984 * add it to the hash and increment the lnode count 1985 */ 1986 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 1987 ASSERT(res == 0); 1988 st->lnode_count++; 1989 src_lnode = i_lnode; 1990 } else { 1991 /* this i_lnode already exists in the lnode_hash */ 1992 i_lnode_free(i_lnode); 1993 src_lnode = (i_lnode_t *)nodep; 1994 } 1995 1996 /* 1997 * allocate a tgt i_lnode and add it to the lnode hash 1998 */ 1999 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); 2000 i_lnode->di_node = (struct di_node *)di_mem_addr(st, toff); 2001 i_lnode->devt = ldi_usage->tgt_devt; 2002 2003 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2004 if (res == MH_ERR_NOTFOUND) { 2005 /* 2006 * new i_lnode 2007 * add it to the hash and increment the lnode count 2008 */ 2009 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2010 ASSERT(res == 0); 2011 st->lnode_count++; 2012 tgt_lnode = i_lnode; 2013 } else { 2014 /* this i_lnode already exists in the lnode_hash */ 2015 i_lnode_free(i_lnode); 2016 tgt_lnode = (i_lnode_t *)nodep; 2017 } 2018 2019 /* 2020 * allocate a i_link 2021 */ 2022 i_link = i_link_alloc(ldi_usage->tgt_spec_type); 2023 i_link->src_lnode = src_lnode; 2024 i_link->tgt_lnode = tgt_lnode; 2025 2026 /* 2027 * add this link onto the src i_lnodes outbound i_link list 2028 */ 2029 i_link_next = &(src_lnode->link_out); 2030 while (*i_link_next != NULL) { 2031 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && 2032 (i_link->spec_type == (*i_link_next)->spec_type)) { 2033 /* this link already exists */ 2034 kmem_free(i_link, sizeof (i_link_t)); 2035 return (LDI_USAGE_CONTINUE); 2036 } 2037 i_link_next = &((*i_link_next)->src_link_next); 2038 } 2039 *i_link_next = i_link; 2040 2041 /* 2042 * add this link onto the tgt i_lnodes inbound i_link list 2043 */ 2044 i_link_next = &(tgt_lnode->link_in); 2045 while (*i_link_next != NULL) { 2046 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); 2047 i_link_next = &((*i_link_next)->tgt_link_next); 2048 } 2049 *i_link_next = i_link; 2050 2051 /* 2052 * add this i_link to the link hash 2053 */ 2054 res = mod_hash_insert(st->link_hash, i_link, i_link); 2055 ASSERT(res == 0); 2056 st->link_count++; 2057 2058 return (LDI_USAGE_CONTINUE); 2059 } 2060 2061 struct i_layer_data { 2062 struct di_state *st; 2063 int lnode_count; 2064 int link_count; 2065 di_off_t lnode_off; 2066 di_off_t link_off; 2067 }; 2068 2069 /*ARGSUSED*/ 2070 static uint_t 2071 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2072 { 2073 i_link_t *i_link = (i_link_t *)key; 2074 struct i_layer_data *data = arg; 2075 struct di_link *me; 2076 struct di_lnode *melnode; 2077 struct di_node *medinode; 2078 2079 ASSERT(i_link->self == 0); 2080 2081 i_link->self = data->link_off + 2082 (data->link_count * sizeof (struct di_link)); 2083 data->link_count++; 2084 2085 ASSERT(data->link_off > 0 && data->link_count > 0); 2086 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ 2087 ASSERT(data->link_count <= data->st->link_count); 2088 2089 /* fill in fields for the di_link snapshot */ 2090 me = (struct di_link *)di_mem_addr(data->st, i_link->self); 2091 me->self = i_link->self; 2092 me->spec_type = i_link->spec_type; 2093 2094 /* 2095 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t 2096 * are created during the LDI table walk. Since we are 2097 * walking the link hash, the lnode hash has already been 2098 * walked and the lnodes have been snapshotted. Save lnode 2099 * offsets. 2100 */ 2101 me->src_lnode = i_link->src_lnode->self; 2102 me->tgt_lnode = i_link->tgt_lnode->self; 2103 2104 /* 2105 * Save this link's offset in the src_lnode snapshot's link_out 2106 * field 2107 */ 2108 melnode = (struct di_lnode *)di_mem_addr(data->st, me->src_lnode); 2109 me->src_link_next = melnode->link_out; 2110 melnode->link_out = me->self; 2111 2112 /* 2113 * Put this link on the tgt_lnode's link_in field 2114 */ 2115 melnode = (struct di_lnode *)di_mem_addr(data->st, me->tgt_lnode); 2116 me->tgt_link_next = melnode->link_in; 2117 melnode->link_in = me->self; 2118 2119 /* 2120 * An i_lnode_t is only created if the corresponding dip exists 2121 * in the snapshot. A pointer to the di_node is saved in the 2122 * i_lnode_t when it is allocated. For this link, get the di_node 2123 * for the source lnode. Then put the link on the di_node's list 2124 * of src links 2125 */ 2126 medinode = i_link->src_lnode->di_node; 2127 me->src_node_next = medinode->src_links; 2128 medinode->src_links = me->self; 2129 2130 /* 2131 * Put this link on the tgt_links list of the target 2132 * dip. 2133 */ 2134 medinode = i_link->tgt_lnode->di_node; 2135 me->tgt_node_next = medinode->tgt_links; 2136 medinode->tgt_links = me->self; 2137 2138 return (MH_WALK_CONTINUE); 2139 } 2140 2141 /*ARGSUSED*/ 2142 static uint_t 2143 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2144 { 2145 i_lnode_t *i_lnode = (i_lnode_t *)key; 2146 struct i_layer_data *data = arg; 2147 struct di_lnode *me; 2148 struct di_node *medinode; 2149 2150 ASSERT(i_lnode->self == 0); 2151 2152 i_lnode->self = data->lnode_off + 2153 (data->lnode_count * sizeof (struct di_lnode)); 2154 data->lnode_count++; 2155 2156 ASSERT(data->lnode_off > 0 && data->lnode_count > 0); 2157 ASSERT(data->link_count == 0); /* links not done yet */ 2158 ASSERT(data->lnode_count <= data->st->lnode_count); 2159 2160 /* fill in fields for the di_lnode snapshot */ 2161 me = (struct di_lnode *)di_mem_addr(data->st, i_lnode->self); 2162 me->self = i_lnode->self; 2163 2164 if (i_lnode->devt == DDI_DEV_T_NONE) { 2165 me->dev_major = (major_t)-1; 2166 me->dev_minor = (minor_t)-1; 2167 } else { 2168 me->dev_major = getmajor(i_lnode->devt); 2169 me->dev_minor = getminor(i_lnode->devt); 2170 } 2171 2172 /* 2173 * The dip corresponding to this lnode must exist in 2174 * the snapshot or we wouldn't have created the i_lnode_t 2175 * during LDI walk. Save the offset of the dip. 2176 */ 2177 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); 2178 me->node = i_lnode->di_node->self; 2179 2180 /* 2181 * There must be at least one link in or out of this lnode 2182 * or we wouldn't have created it. These fields will be set 2183 * during the link hash walk. 2184 */ 2185 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); 2186 2187 /* 2188 * set the offset of the devinfo node associated with this 2189 * lnode. Also update the node_next next pointer. this pointer 2190 * is set if there are multiple lnodes associated with the same 2191 * devinfo node. (could occure when multiple minor nodes 2192 * are open for one device, etc.) 2193 */ 2194 medinode = i_lnode->di_node; 2195 me->node_next = medinode->lnodes; 2196 medinode->lnodes = me->self; 2197 2198 return (MH_WALK_CONTINUE); 2199 } 2200 2201 static di_off_t 2202 di_getlink_data(di_off_t off, struct di_state *st) 2203 { 2204 struct i_layer_data data = {0}; 2205 size_t size; 2206 2207 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); 2208 2209 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, 2210 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, 2211 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); 2212 2213 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, 2214 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); 2215 2216 /* get driver layering information */ 2217 (void) ldi_usage_walker(st, di_ldi_callback); 2218 2219 /* check if there is any link data to include in the snapshot */ 2220 if (st->lnode_count == 0) { 2221 ASSERT(st->link_count == 0); 2222 goto out; 2223 } 2224 2225 ASSERT(st->link_count != 0); 2226 2227 /* get a pointer to snapshot memory for all the di_lnodes */ 2228 size = sizeof (struct di_lnode) * st->lnode_count; 2229 data.lnode_off = off = di_checkmem(st, off, size); 2230 off += DI_ALIGN(size); 2231 2232 /* get a pointer to snapshot memory for all the di_links */ 2233 size = sizeof (struct di_link) * st->link_count; 2234 data.link_off = off = di_checkmem(st, off, size); 2235 off += DI_ALIGN(size); 2236 2237 data.lnode_count = data.link_count = 0; 2238 data.st = st; 2239 2240 /* 2241 * We have lnodes and links that will go into the 2242 * snapshot, so let's walk the respective hashes 2243 * and snapshot them. The various linkages are 2244 * also set up during the walk. 2245 */ 2246 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); 2247 ASSERT(data.lnode_count == st->lnode_count); 2248 2249 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); 2250 ASSERT(data.link_count == st->link_count); 2251 2252 out: 2253 /* free up the i_lnodes and i_links used to create the snapshot */ 2254 mod_hash_destroy_hash(st->lnode_hash); 2255 mod_hash_destroy_hash(st->link_hash); 2256 st->lnode_count = 0; 2257 st->link_count = 0; 2258 2259 return (off); 2260 } 2261 2262 2263 /* 2264 * Copy all minor data nodes attached to a devinfo node into the snapshot. 2265 * It is called from di_copynode with devi_lock held. 2266 */ 2267 static di_off_t 2268 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, 2269 struct di_state *st) 2270 { 2271 di_off_t off; 2272 struct di_minor *me; 2273 2274 dcmn_err2((CE_CONT, "di_getmdata:\n")); 2275 2276 /* 2277 * check memory first 2278 */ 2279 off = di_checkmem(st, *off_p, sizeof (struct di_minor)); 2280 *off_p = off; 2281 2282 do { 2283 me = (struct di_minor *)di_mem_addr(st, off); 2284 me->self = off; 2285 me->type = mnode->type; 2286 me->node = node; 2287 me->user_private_data = NULL; 2288 2289 off += DI_ALIGN(sizeof (struct di_minor)); 2290 2291 /* 2292 * Split dev_t to major/minor, so it works for 2293 * both ILP32 and LP64 model 2294 */ 2295 me->dev_major = getmajor(mnode->ddm_dev); 2296 me->dev_minor = getminor(mnode->ddm_dev); 2297 me->spec_type = mnode->ddm_spec_type; 2298 2299 if (mnode->ddm_name) { 2300 off = di_checkmem(st, off, 2301 strlen(mnode->ddm_name) + 1); 2302 me->name = off; 2303 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name); 2304 off += DI_ALIGN(strlen(mnode->ddm_name) + 1); 2305 } 2306 2307 if (mnode->ddm_node_type) { 2308 off = di_checkmem(st, off, 2309 strlen(mnode->ddm_node_type) + 1); 2310 me->node_type = off; 2311 (void) strcpy(di_mem_addr(st, off), 2312 mnode->ddm_node_type); 2313 off += DI_ALIGN(strlen(mnode->ddm_node_type) + 1); 2314 } 2315 2316 off = di_checkmem(st, off, sizeof (struct di_minor)); 2317 me->next = off; 2318 mnode = mnode->next; 2319 } while (mnode); 2320 2321 me->next = 0; 2322 2323 return (off); 2324 } 2325 2326 /* 2327 * di_register_dip(), di_find_dip(): The dip must be protected 2328 * from deallocation when using these routines - this can either 2329 * be a reference count, a busy hold or a per-driver lock. 2330 */ 2331 2332 static void 2333 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) 2334 { 2335 struct dev_info *node = DEVI(dip); 2336 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2337 struct di_dkey *dk; 2338 2339 ASSERT(dip); 2340 ASSERT(off > 0); 2341 2342 key->k_type = DI_DKEY; 2343 dk = &(key->k_u.dkey); 2344 2345 dk->dk_dip = dip; 2346 dk->dk_major = node->devi_major; 2347 dk->dk_inst = node->devi_instance; 2348 dk->dk_nodeid = node->devi_nodeid; 2349 2350 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, 2351 (mod_hash_val_t)(uintptr_t)off) != 0) { 2352 panic( 2353 "duplicate devinfo (%p) registered during device " 2354 "tree walk", (void *)dip); 2355 } 2356 } 2357 2358 2359 static int 2360 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) 2361 { 2362 /* 2363 * uintptr_t must be used because it matches the size of void *; 2364 * mod_hash expects clients to place results into pointer-size 2365 * containers; since di_off_t is always a 32-bit offset, alignment 2366 * would otherwise be broken on 64-bit kernels. 2367 */ 2368 uintptr_t offset; 2369 struct di_key key = {0}; 2370 struct di_dkey *dk; 2371 2372 ASSERT(st->reg_dip_hash); 2373 ASSERT(dip); 2374 ASSERT(off_p); 2375 2376 2377 key.k_type = DI_DKEY; 2378 dk = &(key.k_u.dkey); 2379 2380 dk->dk_dip = dip; 2381 dk->dk_major = DEVI(dip)->devi_major; 2382 dk->dk_inst = DEVI(dip)->devi_instance; 2383 dk->dk_nodeid = DEVI(dip)->devi_nodeid; 2384 2385 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, 2386 (mod_hash_val_t *)&offset) == 0) { 2387 *off_p = (di_off_t)offset; 2388 return (0); 2389 } else { 2390 return (-1); 2391 } 2392 } 2393 2394 /* 2395 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation 2396 * when using these routines. The caller must do this by protecting the 2397 * client(or phci)<->pip linkage while traversing the list and then holding the 2398 * pip when it is found in the list. 2399 */ 2400 2401 static void 2402 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) 2403 { 2404 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2405 char *path_addr; 2406 struct di_pkey *pk; 2407 2408 ASSERT(pip); 2409 ASSERT(off > 0); 2410 2411 key->k_type = DI_PKEY; 2412 pk = &(key->k_u.pkey); 2413 2414 pk->pk_pip = pip; 2415 path_addr = mdi_pi_get_addr(pip); 2416 if (path_addr) 2417 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); 2418 pk->pk_client = mdi_pi_get_client(pip); 2419 pk->pk_phci = mdi_pi_get_phci(pip); 2420 2421 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, 2422 (mod_hash_val_t)(uintptr_t)off) != 0) { 2423 panic( 2424 "duplicate pathinfo (%p) registered during device " 2425 "tree walk", (void *)pip); 2426 } 2427 } 2428 2429 /* 2430 * As with di_register_pip, the caller must hold or lock the pip 2431 */ 2432 static int 2433 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) 2434 { 2435 /* 2436 * uintptr_t must be used because it matches the size of void *; 2437 * mod_hash expects clients to place results into pointer-size 2438 * containers; since di_off_t is always a 32-bit offset, alignment 2439 * would otherwise be broken on 64-bit kernels. 2440 */ 2441 uintptr_t offset; 2442 struct di_key key = {0}; 2443 struct di_pkey *pk; 2444 2445 ASSERT(st->reg_pip_hash); 2446 ASSERT(off_p); 2447 2448 if (pip == NULL) { 2449 *off_p = 0; 2450 return (0); 2451 } 2452 2453 key.k_type = DI_PKEY; 2454 pk = &(key.k_u.pkey); 2455 2456 pk->pk_pip = pip; 2457 pk->pk_path_addr = mdi_pi_get_addr(pip); 2458 pk->pk_client = mdi_pi_get_client(pip); 2459 pk->pk_phci = mdi_pi_get_phci(pip); 2460 2461 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, 2462 (mod_hash_val_t *)&offset) == 0) { 2463 *off_p = (di_off_t)offset; 2464 return (0); 2465 } else { 2466 return (-1); 2467 } 2468 } 2469 2470 static di_path_state_t 2471 path_state_convert(mdi_pathinfo_state_t st) 2472 { 2473 switch (st) { 2474 case MDI_PATHINFO_STATE_ONLINE: 2475 return (DI_PATH_STATE_ONLINE); 2476 case MDI_PATHINFO_STATE_STANDBY: 2477 return (DI_PATH_STATE_STANDBY); 2478 case MDI_PATHINFO_STATE_OFFLINE: 2479 return (DI_PATH_STATE_OFFLINE); 2480 case MDI_PATHINFO_STATE_FAULT: 2481 return (DI_PATH_STATE_FAULT); 2482 default: 2483 return (DI_PATH_STATE_UNKNOWN); 2484 } 2485 } 2486 2487 2488 static di_off_t 2489 di_path_getprop(mdi_pathinfo_t *pip, di_off_t off, di_off_t *off_p, 2490 struct di_state *st) 2491 { 2492 nvpair_t *prop = NULL; 2493 struct di_path_prop *me; 2494 2495 if (mdi_pi_get_next_prop(pip, NULL) == NULL) { 2496 *off_p = 0; 2497 return (off); 2498 } 2499 2500 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2501 *off_p = off; 2502 2503 while (prop = mdi_pi_get_next_prop(pip, prop)) { 2504 int delta = 0; 2505 2506 me = (struct di_path_prop *)di_mem_addr(st, off); 2507 me->self = off; 2508 off += sizeof (struct di_path_prop); 2509 2510 /* 2511 * property name 2512 */ 2513 off = di_checkmem(st, off, strlen(nvpair_name(prop)) + 1); 2514 me->prop_name = off; 2515 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); 2516 off += strlen(nvpair_name(prop)) + 1; 2517 2518 switch (nvpair_type(prop)) { 2519 case DATA_TYPE_BYTE: 2520 case DATA_TYPE_INT16: 2521 case DATA_TYPE_UINT16: 2522 case DATA_TYPE_INT32: 2523 case DATA_TYPE_UINT32: 2524 delta = sizeof (int32_t); 2525 me->prop_type = DDI_PROP_TYPE_INT; 2526 off = di_checkmem(st, off, delta); 2527 (void) nvpair_value_int32(prop, 2528 (int32_t *)di_mem_addr(st, off)); 2529 break; 2530 2531 case DATA_TYPE_INT64: 2532 case DATA_TYPE_UINT64: 2533 delta = sizeof (int64_t); 2534 me->prop_type = DDI_PROP_TYPE_INT64; 2535 off = di_checkmem(st, off, delta); 2536 (void) nvpair_value_int64(prop, 2537 (int64_t *)di_mem_addr(st, off)); 2538 break; 2539 2540 case DATA_TYPE_STRING: 2541 { 2542 char *str; 2543 (void) nvpair_value_string(prop, &str); 2544 delta = strlen(str) + 1; 2545 me->prop_type = DDI_PROP_TYPE_STRING; 2546 off = di_checkmem(st, off, delta); 2547 (void) strcpy(di_mem_addr(st, off), str); 2548 break; 2549 } 2550 case DATA_TYPE_BYTE_ARRAY: 2551 case DATA_TYPE_INT16_ARRAY: 2552 case DATA_TYPE_UINT16_ARRAY: 2553 case DATA_TYPE_INT32_ARRAY: 2554 case DATA_TYPE_UINT32_ARRAY: 2555 case DATA_TYPE_INT64_ARRAY: 2556 case DATA_TYPE_UINT64_ARRAY: 2557 { 2558 uchar_t *buf; 2559 uint_t nelems; 2560 (void) nvpair_value_byte_array(prop, &buf, &nelems); 2561 delta = nelems; 2562 me->prop_type = DDI_PROP_TYPE_BYTE; 2563 if (nelems != 0) { 2564 off = di_checkmem(st, off, delta); 2565 bcopy(buf, di_mem_addr(st, off), nelems); 2566 } 2567 break; 2568 } 2569 2570 default: /* Unknown or unhandled type; skip it */ 2571 delta = 0; 2572 break; 2573 } 2574 2575 if (delta > 0) { 2576 me->prop_data = off; 2577 } 2578 2579 me->prop_len = delta; 2580 off += delta; 2581 2582 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2583 me->prop_next = off; 2584 } 2585 2586 me->prop_next = 0; 2587 return (off); 2588 } 2589 2590 2591 static void 2592 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp, 2593 int get_client) 2594 { 2595 if (get_client) { 2596 ASSERT(me->path_client == 0); 2597 me->path_client = noff; 2598 ASSERT(me->path_c_link == 0); 2599 *off_pp = &me->path_c_link; 2600 me->path_snap_state &= 2601 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK); 2602 } else { 2603 ASSERT(me->path_phci == 0); 2604 me->path_phci = noff; 2605 ASSERT(me->path_p_link == 0); 2606 *off_pp = &me->path_p_link; 2607 me->path_snap_state &= 2608 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK); 2609 } 2610 } 2611 2612 /* 2613 * poff_p: pointer to the linkage field. This links pips along the client|phci 2614 * linkage list. 2615 * noff : Offset for the endpoint dip snapshot. 2616 */ 2617 static di_off_t 2618 di_getpath_data(dev_info_t *dip, di_off_t *poff_p, di_off_t noff, 2619 struct di_state *st, int get_client) 2620 { 2621 di_off_t off; 2622 mdi_pathinfo_t *pip; 2623 struct di_path *me; 2624 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *); 2625 2626 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client)); 2627 2628 /* 2629 * The naming of the following mdi_xyz() is unfortunately 2630 * non-intuitive. mdi_get_next_phci_path() follows the 2631 * client_link i.e. the list of pip's belonging to the 2632 * given client dip. 2633 */ 2634 if (get_client) 2635 next_pip = &mdi_get_next_phci_path; 2636 else 2637 next_pip = &mdi_get_next_client_path; 2638 2639 off = *poff_p; 2640 2641 pip = NULL; 2642 while (pip = (*next_pip)(dip, pip)) { 2643 mdi_pathinfo_state_t state; 2644 di_off_t stored_offset; 2645 2646 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip)); 2647 2648 mdi_pi_lock(pip); 2649 2650 if (di_pip_find(st, pip, &stored_offset) != -1) { 2651 /* 2652 * We've already seen this pathinfo node so we need to 2653 * take care not to snap it again; However, one endpoint 2654 * and linkage will be set here. The other endpoint 2655 * and linkage has already been set when the pip was 2656 * first snapshotted i.e. when the other endpoint dip 2657 * was snapshotted. 2658 */ 2659 me = (struct di_path *)di_mem_addr(st, stored_offset); 2660 2661 *poff_p = stored_offset; 2662 2663 di_path_one_endpoint(me, noff, &poff_p, get_client); 2664 2665 /* 2666 * The other endpoint and linkage were set when this 2667 * pip was snapshotted. So we are done with both 2668 * endpoints and linkages. 2669 */ 2670 ASSERT(!(me->path_snap_state & 2671 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI))); 2672 ASSERT(!(me->path_snap_state & 2673 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK))); 2674 2675 mdi_pi_unlock(pip); 2676 continue; 2677 } 2678 2679 /* 2680 * Now that we need to snapshot this pip, check memory 2681 */ 2682 off = di_checkmem(st, off, sizeof (struct di_path)); 2683 me = (struct di_path *)di_mem_addr(st, off); 2684 me->self = off; 2685 *poff_p = off; 2686 off += sizeof (struct di_path); 2687 2688 me->path_snap_state = 2689 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK; 2690 me->path_snap_state |= 2691 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI; 2692 2693 /* 2694 * Zero out fields as di_checkmem() doesn't guarantee 2695 * zero-filled memory 2696 */ 2697 me->path_client = me->path_phci = 0; 2698 me->path_c_link = me->path_p_link = 0; 2699 2700 di_path_one_endpoint(me, noff, &poff_p, get_client); 2701 2702 /* 2703 * Note the existence of this pathinfo 2704 */ 2705 di_register_pip(st, pip, me->self); 2706 2707 state = mdi_pi_get_state(pip); 2708 me->path_state = path_state_convert(state); 2709 2710 /* 2711 * Get intermediate addressing info. 2712 */ 2713 off = di_checkmem(st, off, strlen(mdi_pi_get_addr(pip)) + 1); 2714 me->path_addr = off; 2715 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip)); 2716 off += strlen(mdi_pi_get_addr(pip)) + 1; 2717 2718 /* 2719 * Get path properties if props are to be included in the 2720 * snapshot 2721 */ 2722 if (DINFOPROP & st->command) { 2723 off = di_path_getprop(pip, off, &me->path_prop, st); 2724 } else { 2725 me->path_prop = 0; 2726 } 2727 2728 mdi_pi_unlock(pip); 2729 } 2730 2731 *poff_p = 0; 2732 2733 return (off); 2734 } 2735 2736 /* 2737 * Copy a list of properties attached to a devinfo node. Called from 2738 * di_copynode with devi_lock held. The major number is passed in case 2739 * we need to call driver's prop_op entry. The value of list indicates 2740 * which list we are copying. Possible values are: 2741 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST 2742 */ 2743 static di_off_t 2744 di_getprop(struct ddi_prop *prop, di_off_t *off_p, struct di_state *st, 2745 struct dev_info *dip, int list) 2746 { 2747 dev_t dev; 2748 int (*prop_op)(); 2749 int off, need_prop_op = 0; 2750 int prop_op_fail = 0; 2751 ddi_prop_t *propp = NULL; 2752 struct di_prop *pp; 2753 struct dev_ops *ops = NULL; 2754 int prop_len; 2755 caddr_t prop_val; 2756 2757 2758 dcmn_err2((CE_CONT, "di_getprop:\n")); 2759 2760 ASSERT(st != NULL); 2761 2762 dcmn_err((CE_CONT, "copy property list at addr %p\n", (void *)prop)); 2763 2764 /* 2765 * Figure out if we need to call driver's prop_op entry point. 2766 * The conditions are: 2767 * -- driver property list 2768 * -- driver must be attached and held 2769 * -- driver's cb_prop_op != ddi_prop_op 2770 * or parent's bus_prop_op != ddi_bus_prop_op 2771 */ 2772 2773 if (list != DI_PROP_DRV_LIST) { 2774 goto getprop; 2775 } 2776 2777 /* 2778 * If driver is not attached or if major is -1, we ignore 2779 * the driver property list. No one should rely on such 2780 * properties. 2781 */ 2782 if (i_ddi_node_state((dev_info_t *)dip) < DS_ATTACHED) { 2783 off = *off_p; 2784 *off_p = 0; 2785 return (off); 2786 } 2787 2788 /* 2789 * Now we have a driver which is held. We can examine entry points 2790 * and check the condition listed above. 2791 */ 2792 ops = dip->devi_ops; 2793 2794 /* 2795 * Some nexus drivers incorrectly set cb_prop_op to nodev, 2796 * nulldev or even NULL. 2797 */ 2798 if (ops && ops->devo_cb_ops && 2799 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) && 2800 (ops->devo_cb_ops->cb_prop_op != nodev) && 2801 (ops->devo_cb_ops->cb_prop_op != nulldev) && 2802 (ops->devo_cb_ops->cb_prop_op != NULL)) { 2803 need_prop_op = 1; 2804 } 2805 2806 getprop: 2807 /* 2808 * check memory availability 2809 */ 2810 off = di_checkmem(st, *off_p, sizeof (struct di_prop)); 2811 *off_p = off; 2812 /* 2813 * Now copy properties 2814 */ 2815 do { 2816 pp = (struct di_prop *)di_mem_addr(st, off); 2817 pp->self = off; 2818 /* 2819 * Split dev_t to major/minor, so it works for 2820 * both ILP32 and LP64 model 2821 */ 2822 pp->dev_major = getmajor(prop->prop_dev); 2823 pp->dev_minor = getminor(prop->prop_dev); 2824 pp->prop_flags = prop->prop_flags; 2825 pp->prop_list = list; 2826 2827 /* 2828 * property name 2829 */ 2830 off += sizeof (struct di_prop); 2831 if (prop->prop_name) { 2832 off = di_checkmem(st, off, strlen(prop->prop_name) 2833 + 1); 2834 pp->prop_name = off; 2835 (void) strcpy(di_mem_addr(st, off), prop->prop_name); 2836 off += strlen(prop->prop_name) + 1; 2837 } 2838 2839 /* 2840 * Set prop_len here. This may change later 2841 * if cb_prop_op returns a different length. 2842 */ 2843 pp->prop_len = prop->prop_len; 2844 if (!need_prop_op) { 2845 if (prop->prop_val == NULL) { 2846 dcmn_err((CE_WARN, 2847 "devinfo: property fault at %p", 2848 (void *)prop)); 2849 pp->prop_data = -1; 2850 } else if (prop->prop_len != 0) { 2851 off = di_checkmem(st, off, prop->prop_len); 2852 pp->prop_data = off; 2853 bcopy(prop->prop_val, di_mem_addr(st, off), 2854 prop->prop_len); 2855 off += DI_ALIGN(pp->prop_len); 2856 } 2857 } 2858 2859 off = di_checkmem(st, off, sizeof (struct di_prop)); 2860 pp->next = off; 2861 prop = prop->prop_next; 2862 } while (prop); 2863 2864 pp->next = 0; 2865 2866 if (!need_prop_op) { 2867 dcmn_err((CE_CONT, "finished property " 2868 "list at offset 0x%x\n", off)); 2869 return (off); 2870 } 2871 2872 /* 2873 * If there is a need to call driver's prop_op entry, 2874 * we must release driver's devi_lock, because the 2875 * cb_prop_op entry point will grab it. 2876 * 2877 * The snapshot memory has already been allocated above, 2878 * which means the length of an active property should 2879 * remain fixed for this implementation to work. 2880 */ 2881 2882 2883 prop_op = ops->devo_cb_ops->cb_prop_op; 2884 pp = (struct di_prop *)di_mem_addr(st, *off_p); 2885 2886 mutex_exit(&dip->devi_lock); 2887 2888 do { 2889 int err; 2890 struct di_prop *tmp; 2891 2892 if (pp->next) { 2893 tmp = (struct di_prop *) 2894 di_mem_addr(st, pp->next); 2895 } else { 2896 tmp = NULL; 2897 } 2898 2899 /* 2900 * call into driver's prop_op entry point 2901 * 2902 * Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY 2903 */ 2904 dev = makedevice(pp->dev_major, pp->dev_minor); 2905 if (dev == DDI_DEV_T_NONE) 2906 dev = DDI_DEV_T_ANY; 2907 2908 dcmn_err((CE_CONT, "call prop_op" 2909 "(%lx, %p, PROP_LEN_AND_VAL_BUF, " 2910 "DDI_PROP_DONTPASS, \"%s\", %p, &%d)\n", 2911 dev, 2912 (void *)dip, 2913 (char *)di_mem_addr(st, pp->prop_name), 2914 (void *)di_mem_addr(st, pp->prop_data), 2915 pp->prop_len)); 2916 2917 if ((err = (*prop_op)(dev, (dev_info_t)dip, 2918 PROP_LEN_AND_VAL_ALLOC, DDI_PROP_DONTPASS, 2919 (char *)di_mem_addr(st, pp->prop_name), 2920 &prop_val, &prop_len)) != DDI_PROP_SUCCESS) { 2921 if ((propp = i_ddi_prop_search(dev, 2922 (char *)di_mem_addr(st, pp->prop_name), 2923 (uint_t)pp->prop_flags, 2924 &(DEVI(dip)->devi_drv_prop_ptr))) != NULL) { 2925 pp->prop_len = propp->prop_len; 2926 if (pp->prop_len != 0) { 2927 off = di_checkmem(st, off, 2928 pp->prop_len); 2929 pp->prop_data = off; 2930 bcopy(propp->prop_val, di_mem_addr(st, 2931 pp->prop_data), propp->prop_len); 2932 off += DI_ALIGN(pp->prop_len); 2933 } 2934 } else { 2935 prop_op_fail = 1; 2936 } 2937 } else if (prop_len != 0) { 2938 pp->prop_len = prop_len; 2939 off = di_checkmem(st, off, prop_len); 2940 pp->prop_data = off; 2941 bcopy(prop_val, di_mem_addr(st, off), prop_len); 2942 off += DI_ALIGN(prop_len); 2943 kmem_free(prop_val, prop_len); 2944 } 2945 2946 if (prop_op_fail) { 2947 pp->prop_data = -1; 2948 dcmn_err((CE_WARN, "devinfo: prop_op failure " 2949 "for \"%s\" err %d", 2950 di_mem_addr(st, pp->prop_name), err)); 2951 } 2952 2953 pp = tmp; 2954 2955 } while (pp); 2956 2957 mutex_enter(&dip->devi_lock); 2958 dcmn_err((CE_CONT, "finished property list at offset 0x%x\n", off)); 2959 return (off); 2960 } 2961 2962 /* 2963 * find private data format attached to a dip 2964 * parent = 1 to match driver name of parent dip (for parent private data) 2965 * 0 to match driver name of current dip (for driver private data) 2966 */ 2967 #define DI_MATCH_DRIVER 0 2968 #define DI_MATCH_PARENT 1 2969 2970 struct di_priv_format * 2971 di_match_drv_name(struct dev_info *node, struct di_state *st, int match) 2972 { 2973 int i, count, len; 2974 char *drv_name; 2975 major_t major; 2976 struct di_all *all; 2977 struct di_priv_format *form; 2978 2979 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n", 2980 node->devi_node_name, match)); 2981 2982 if (match == DI_MATCH_PARENT) { 2983 node = DEVI(node->devi_parent); 2984 } 2985 2986 if (node == NULL) { 2987 return (NULL); 2988 } 2989 2990 major = ddi_name_to_major(node->devi_binding_name); 2991 if (major == (major_t)(-1)) { 2992 return (NULL); 2993 } 2994 2995 /* 2996 * Match the driver name. 2997 */ 2998 drv_name = ddi_major_to_name(major); 2999 if ((drv_name == NULL) || *drv_name == '\0') { 3000 return (NULL); 3001 } 3002 3003 /* Now get the di_priv_format array */ 3004 all = (struct di_all *)di_mem_addr(st, 0); 3005 3006 if (match == DI_MATCH_PARENT) { 3007 count = all->n_ppdata; 3008 form = (struct di_priv_format *) 3009 (di_mem_addr(st, 0) + all->ppdata_format); 3010 } else { 3011 count = all->n_dpdata; 3012 form = (struct di_priv_format *) 3013 ((caddr_t)all + all->dpdata_format); 3014 } 3015 3016 len = strlen(drv_name); 3017 for (i = 0; i < count; i++) { 3018 char *tmp; 3019 3020 tmp = form[i].drv_name; 3021 while (tmp && (*tmp != '\0')) { 3022 if (strncmp(drv_name, tmp, len) == 0) { 3023 return (&form[i]); 3024 } 3025 /* 3026 * Move to next driver name, skipping a white space 3027 */ 3028 if (tmp = strchr(tmp, ' ')) { 3029 tmp++; 3030 } 3031 } 3032 } 3033 3034 return (NULL); 3035 } 3036 3037 /* 3038 * The following functions copy data as specified by the format passed in. 3039 * To prevent invalid format from panicing the system, we call on_fault(). 3040 * A return value of 0 indicates an error. Otherwise, the total offset 3041 * is returned. 3042 */ 3043 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */ 3044 3045 static di_off_t 3046 di_getprvdata(struct di_priv_format *pdp, void *data, di_off_t *off_p, 3047 struct di_state *st) 3048 { 3049 caddr_t pa; 3050 void *ptr; 3051 int i, size, repeat; 3052 di_off_t off, off0, *tmp; 3053 3054 label_t ljb; 3055 3056 dcmn_err2((CE_CONT, "di_getprvdata:\n")); 3057 3058 /* 3059 * check memory availability. Private data size is 3060 * limited to DI_MAX_PRIVDATA. 3061 */ 3062 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA); 3063 3064 if ((pdp->bytes <= 0) || pdp->bytes > DI_MAX_PRIVDATA) { 3065 goto failure; 3066 } 3067 3068 if (!on_fault(&ljb)) { 3069 /* copy the struct */ 3070 bcopy(data, di_mem_addr(st, off), pdp->bytes); 3071 off0 = DI_ALIGN(pdp->bytes); 3072 3073 /* dereferencing pointers */ 3074 for (i = 0; i < MAX_PTR_IN_PRV; i++) { 3075 3076 if (pdp->ptr[i].size == 0) { 3077 goto success; /* no more ptrs */ 3078 } 3079 3080 /* 3081 * first, get the pointer content 3082 */ 3083 if ((pdp->ptr[i].offset < 0) || 3084 (pdp->ptr[i].offset > 3085 pdp->bytes - sizeof (char *))) 3086 goto failure; /* wrong offset */ 3087 3088 pa = di_mem_addr(st, off + pdp->ptr[i].offset); 3089 tmp = (di_off_t *)pa; /* to store off_t later */ 3090 3091 ptr = *((void **) pa); /* get pointer value */ 3092 if (ptr == NULL) { /* if NULL pointer, go on */ 3093 continue; 3094 } 3095 3096 /* 3097 * next, find the repeat count (array dimension) 3098 */ 3099 repeat = pdp->ptr[i].len_offset; 3100 3101 /* 3102 * Positive value indicates a fixed sized array. 3103 * 0 or negative value indicates variable sized array. 3104 * 3105 * For variable sized array, the variable must be 3106 * an int member of the structure, with an offset 3107 * equal to the absolution value of struct member. 3108 */ 3109 if (repeat > pdp->bytes - sizeof (int)) { 3110 goto failure; /* wrong offset */ 3111 } 3112 3113 if (repeat >= 0) { 3114 repeat = *((int *)((caddr_t)data + repeat)); 3115 } else { 3116 repeat = -repeat; 3117 } 3118 3119 /* 3120 * next, get the size of the object to be copied 3121 */ 3122 size = pdp->ptr[i].size * repeat; 3123 3124 /* 3125 * Arbitrarily limit the total size of object to be 3126 * copied (1 byte to 1/4 page). 3127 */ 3128 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) { 3129 goto failure; /* wrong size or too big */ 3130 } 3131 3132 /* 3133 * Now copy the data 3134 */ 3135 *tmp = off0; 3136 bcopy(ptr, di_mem_addr(st, off + off0), size); 3137 off0 += DI_ALIGN(size); 3138 } 3139 } else { 3140 goto failure; 3141 } 3142 3143 success: 3144 /* 3145 * success if reached here 3146 */ 3147 no_fault(); 3148 *off_p = off; 3149 3150 return (off + off0); 3151 /*NOTREACHED*/ 3152 3153 failure: 3154 /* 3155 * fault occurred 3156 */ 3157 no_fault(); 3158 cmn_err(CE_WARN, "devinfo: fault in private data at %p", data); 3159 *off_p = -1; /* set private data to indicate error */ 3160 3161 return (off); 3162 } 3163 3164 /* 3165 * get parent private data; on error, returns original offset 3166 */ 3167 static di_off_t 3168 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3169 { 3170 int off; 3171 struct di_priv_format *ppdp; 3172 3173 dcmn_err2((CE_CONT, "di_getppdata:\n")); 3174 3175 /* find the parent data format */ 3176 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) { 3177 off = *off_p; 3178 *off_p = 0; /* set parent data to none */ 3179 return (off); 3180 } 3181 3182 return (di_getprvdata(ppdp, ddi_get_parent_data((dev_info_t *)node), 3183 off_p, st)); 3184 } 3185 3186 /* 3187 * get parent private data; returns original offset 3188 */ 3189 static di_off_t 3190 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3191 { 3192 int off; 3193 struct di_priv_format *dpdp; 3194 3195 dcmn_err2((CE_CONT, "di_getdpdata:")); 3196 3197 /* find the parent data format */ 3198 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) { 3199 off = *off_p; 3200 *off_p = 0; /* set driver data to none */ 3201 return (off); 3202 } 3203 3204 return (di_getprvdata(dpdp, ddi_get_driver_private((dev_info_t *)node), 3205 off_p, st)); 3206 } 3207 3208 /* 3209 * The driver is stateful across DINFOCPYALL and DINFOUSRLD. 3210 * This function encapsulates the state machine: 3211 * 3212 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY -> 3213 * | SNAPSHOT USRLD | 3214 * -------------------------------------------------- 3215 * 3216 * Returns 0 on success and -1 on failure 3217 */ 3218 static int 3219 di_setstate(struct di_state *st, int new_state) 3220 { 3221 int ret = 0; 3222 3223 mutex_enter(&di_lock); 3224 switch (new_state) { 3225 case IOC_IDLE: 3226 case IOC_DONE: 3227 break; 3228 case IOC_SNAP: 3229 if (st->di_iocstate != IOC_IDLE) 3230 ret = -1; 3231 break; 3232 case IOC_COPY: 3233 if (st->di_iocstate != IOC_DONE) 3234 ret = -1; 3235 break; 3236 default: 3237 ret = -1; 3238 } 3239 3240 if (ret == 0) 3241 st->di_iocstate = new_state; 3242 else 3243 cmn_err(CE_NOTE, "incorrect state transition from %d to %d", 3244 st->di_iocstate, new_state); 3245 mutex_exit(&di_lock); 3246 return (ret); 3247 } 3248 3249 /* 3250 * We cannot assume the presence of the entire 3251 * snapshot in this routine. All we are guaranteed 3252 * is the di_all struct + 1 byte (for root_path) 3253 */ 3254 static int 3255 header_plus_one_ok(struct di_all *all) 3256 { 3257 /* 3258 * Refuse to read old versions 3259 */ 3260 if (all->version != DI_SNAPSHOT_VERSION) { 3261 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version)); 3262 return (0); 3263 } 3264 3265 if (all->cache_magic != DI_CACHE_MAGIC) { 3266 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic)); 3267 return (0); 3268 } 3269 3270 if (all->snapshot_time <= 0) { 3271 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time)); 3272 return (0); 3273 } 3274 3275 if (all->top_devinfo == 0) { 3276 CACHE_DEBUG((DI_ERR, "NULL top devinfo")); 3277 return (0); 3278 } 3279 3280 if (all->map_size < sizeof (*all) + 1) { 3281 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size)); 3282 return (0); 3283 } 3284 3285 if (all->root_path[0] != '/' || all->root_path[1] != '\0') { 3286 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c", 3287 all->root_path[0], all->root_path[1])); 3288 return (0); 3289 } 3290 3291 /* 3292 * We can't check checksum here as we just have the header 3293 */ 3294 3295 return (1); 3296 } 3297 3298 static int 3299 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len) 3300 { 3301 rlim64_t rlimit; 3302 ssize_t resid; 3303 int error = 0; 3304 3305 3306 rlimit = RLIM64_INFINITY; 3307 3308 while (len) { 3309 resid = 0; 3310 error = vn_rdwr(UIO_WRITE, vp, buf, len, off, 3311 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 3312 3313 if (error || resid < 0) { 3314 error = error ? error : EIO; 3315 CACHE_DEBUG((DI_ERR, "write error: %d", error)); 3316 break; 3317 } 3318 3319 /* 3320 * Check if we are making progress 3321 */ 3322 if (resid >= len) { 3323 error = ENOSPC; 3324 break; 3325 } 3326 buf += len - resid; 3327 off += len - resid; 3328 len = resid; 3329 } 3330 3331 return (error); 3332 } 3333 3334 extern int modrootloaded; 3335 3336 static void 3337 di_cache_write(struct di_cache *cache) 3338 { 3339 struct di_all *all; 3340 struct vnode *vp; 3341 int oflags; 3342 size_t map_size; 3343 size_t chunk; 3344 offset_t off; 3345 int error; 3346 char *buf; 3347 3348 ASSERT(DI_CACHE_LOCKED(*cache)); 3349 ASSERT(!servicing_interrupt()); 3350 3351 if (cache->cache_size == 0) { 3352 ASSERT(cache->cache_data == NULL); 3353 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write")); 3354 return; 3355 } 3356 3357 ASSERT(cache->cache_size > 0); 3358 ASSERT(cache->cache_data); 3359 3360 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) { 3361 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write")); 3362 return; 3363 } 3364 3365 all = (struct di_all *)cache->cache_data; 3366 3367 if (!header_plus_one_ok(all)) { 3368 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write")); 3369 return; 3370 } 3371 3372 ASSERT(strcmp(all->root_path, "/") == 0); 3373 3374 /* 3375 * The cache_size is the total allocated memory for the cache. 3376 * The map_size is the actual size of valid data in the cache. 3377 * map_size may be smaller than cache_size but cannot exceed 3378 * cache_size. 3379 */ 3380 if (all->map_size > cache->cache_size) { 3381 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)." 3382 " Skipping write", all->map_size, cache->cache_size)); 3383 return; 3384 } 3385 3386 /* 3387 * First unlink the temp file 3388 */ 3389 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE); 3390 if (error && error != ENOENT) { 3391 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d", 3392 DI_CACHE_TEMP, error)); 3393 } 3394 3395 if (error == EROFS) { 3396 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write")); 3397 return; 3398 } 3399 3400 vp = NULL; 3401 oflags = (FCREAT|FWRITE); 3402 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags, 3403 DI_CACHE_PERMS, &vp, CRCREAT, 0)) { 3404 CACHE_DEBUG((DI_ERR, "%s: create failed: %d", 3405 DI_CACHE_TEMP, error)); 3406 return; 3407 } 3408 3409 ASSERT(vp); 3410 3411 /* 3412 * Paranoid: Check if the file is on a read-only FS 3413 */ 3414 if (vn_is_readonly(vp)) { 3415 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS")); 3416 goto fail; 3417 } 3418 3419 /* 3420 * Note that we only write map_size bytes to disk - this saves 3421 * space as the actual cache size may be larger than size of 3422 * valid data in the cache. 3423 * Another advantage is that it makes verification of size 3424 * easier when the file is read later. 3425 */ 3426 map_size = all->map_size; 3427 off = 0; 3428 buf = cache->cache_data; 3429 3430 while (map_size) { 3431 ASSERT(map_size > 0); 3432 /* 3433 * Write in chunks so that VM system 3434 * is not overwhelmed 3435 */ 3436 if (map_size > di_chunk * PAGESIZE) 3437 chunk = di_chunk * PAGESIZE; 3438 else 3439 chunk = map_size; 3440 3441 error = chunk_write(vp, off, buf, chunk); 3442 if (error) { 3443 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d", 3444 off, error)); 3445 goto fail; 3446 } 3447 3448 off += chunk; 3449 buf += chunk; 3450 map_size -= chunk; 3451 3452 /* Give pageout a chance to run */ 3453 delay(1); 3454 } 3455 3456 /* 3457 * Now sync the file and close it 3458 */ 3459 if (error = VOP_FSYNC(vp, FSYNC, kcred)) { 3460 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error)); 3461 } 3462 3463 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred)) { 3464 CACHE_DEBUG((DI_ERR, "close() failed: %d", error)); 3465 VN_RELE(vp); 3466 return; 3467 } 3468 3469 VN_RELE(vp); 3470 3471 /* 3472 * Now do the rename 3473 */ 3474 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) { 3475 CACHE_DEBUG((DI_ERR, "rename failed: %d", error)); 3476 return; 3477 } 3478 3479 CACHE_DEBUG((DI_INFO, "Cache write successful.")); 3480 3481 return; 3482 3483 fail: 3484 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred); 3485 VN_RELE(vp); 3486 } 3487 3488 3489 /* 3490 * Since we could be called early in boot, 3491 * use kobj_read_file() 3492 */ 3493 static void 3494 di_cache_read(struct di_cache *cache) 3495 { 3496 struct _buf *file; 3497 struct di_all *all; 3498 int n; 3499 size_t map_size, sz, chunk; 3500 offset_t off; 3501 caddr_t buf; 3502 uint32_t saved_crc, crc; 3503 3504 ASSERT(modrootloaded); 3505 ASSERT(DI_CACHE_LOCKED(*cache)); 3506 ASSERT(cache->cache_data == NULL); 3507 ASSERT(cache->cache_size == 0); 3508 ASSERT(!servicing_interrupt()); 3509 3510 file = kobj_open_file(DI_CACHE_FILE); 3511 if (file == (struct _buf *)-1) { 3512 CACHE_DEBUG((DI_ERR, "%s: open failed: %d", 3513 DI_CACHE_FILE, ENOENT)); 3514 return; 3515 } 3516 3517 /* 3518 * Read in the header+root_path first. The root_path must be "/" 3519 */ 3520 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP); 3521 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0); 3522 3523 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) { 3524 kmem_free(all, sizeof (*all) + 1); 3525 kobj_close_file(file); 3526 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid")); 3527 return; 3528 } 3529 3530 map_size = all->map_size; 3531 3532 kmem_free(all, sizeof (*all) + 1); 3533 3534 ASSERT(map_size >= sizeof (*all) + 1); 3535 3536 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP); 3537 sz = map_size; 3538 off = 0; 3539 while (sz) { 3540 /* Don't overload VM with large reads */ 3541 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz; 3542 n = kobj_read_file(file, buf, chunk, off); 3543 if (n != chunk) { 3544 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld", 3545 DI_CACHE_FILE, off)); 3546 goto fail; 3547 } 3548 off += chunk; 3549 buf += chunk; 3550 sz -= chunk; 3551 } 3552 3553 ASSERT(off == map_size); 3554 3555 /* 3556 * Read past expected EOF to verify size. 3557 */ 3558 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) { 3559 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE)); 3560 goto fail; 3561 } 3562 3563 all = (struct di_all *)di_cache.cache_data; 3564 if (!header_plus_one_ok(all)) { 3565 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE)); 3566 goto fail; 3567 } 3568 3569 /* 3570 * Compute CRC with checksum field in the cache data set to 0 3571 */ 3572 saved_crc = all->cache_checksum; 3573 all->cache_checksum = 0; 3574 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table); 3575 all->cache_checksum = saved_crc; 3576 3577 if (crc != all->cache_checksum) { 3578 CACHE_DEBUG((DI_ERR, 3579 "%s: checksum error: expected=0x%x actual=0x%x", 3580 DI_CACHE_FILE, all->cache_checksum, crc)); 3581 goto fail; 3582 } 3583 3584 if (all->map_size != map_size) { 3585 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE)); 3586 goto fail; 3587 } 3588 3589 kobj_close_file(file); 3590 3591 di_cache.cache_size = map_size; 3592 3593 return; 3594 3595 fail: 3596 kmem_free(di_cache.cache_data, map_size); 3597 kobj_close_file(file); 3598 di_cache.cache_data = NULL; 3599 di_cache.cache_size = 0; 3600 } 3601 3602 3603 /* 3604 * Checks if arguments are valid for using the cache. 3605 */ 3606 static int 3607 cache_args_valid(struct di_state *st, int *error) 3608 { 3609 ASSERT(error); 3610 ASSERT(st->mem_size > 0); 3611 ASSERT(st->memlist != NULL); 3612 3613 if (!modrootloaded || !i_ddi_io_initialized()) { 3614 CACHE_DEBUG((DI_ERR, 3615 "cache lookup failure: I/O subsystem not inited")); 3616 *error = ENOTACTIVE; 3617 return (0); 3618 } 3619 3620 /* 3621 * No other flags allowed with DINFOCACHE 3622 */ 3623 if (st->command != (DINFOCACHE & DIIOC_MASK)) { 3624 CACHE_DEBUG((DI_ERR, 3625 "cache lookup failure: bad flags: 0x%x", 3626 st->command)); 3627 *error = EINVAL; 3628 return (0); 3629 } 3630 3631 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3632 CACHE_DEBUG((DI_ERR, 3633 "cache lookup failure: bad root: %s", 3634 DI_ALL_PTR(st)->root_path)); 3635 *error = EINVAL; 3636 return (0); 3637 } 3638 3639 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command)); 3640 3641 *error = 0; 3642 3643 return (1); 3644 } 3645 3646 static int 3647 snapshot_is_cacheable(struct di_state *st) 3648 { 3649 ASSERT(st->mem_size > 0); 3650 ASSERT(st->memlist != NULL); 3651 3652 if (st->command != (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) { 3653 CACHE_DEBUG((DI_INFO, 3654 "not cacheable: incompatible flags: 0x%x", 3655 st->command)); 3656 return (0); 3657 } 3658 3659 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3660 CACHE_DEBUG((DI_INFO, 3661 "not cacheable: incompatible root path: %s", 3662 DI_ALL_PTR(st)->root_path)); 3663 return (0); 3664 } 3665 3666 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command)); 3667 3668 return (1); 3669 } 3670 3671 static int 3672 di_cache_lookup(struct di_state *st) 3673 { 3674 size_t rval; 3675 int cache_valid; 3676 3677 ASSERT(cache_args_valid(st, &cache_valid)); 3678 ASSERT(modrootloaded); 3679 3680 DI_CACHE_LOCK(di_cache); 3681 3682 /* 3683 * The following assignment determines the validity 3684 * of the cache as far as this snapshot is concerned. 3685 */ 3686 cache_valid = di_cache.cache_valid; 3687 3688 if (cache_valid && di_cache.cache_data == NULL) { 3689 di_cache_read(&di_cache); 3690 /* check for read or file error */ 3691 if (di_cache.cache_data == NULL) 3692 cache_valid = 0; 3693 } 3694 3695 if (cache_valid) { 3696 /* 3697 * Ok, the cache was valid as of this particular 3698 * snapshot. Copy the cached snapshot. This is safe 3699 * to do as the cache cannot be freed (we hold the 3700 * cache lock). Free the memory allocated in di_state 3701 * up until this point - we will simply copy everything 3702 * in the cache. 3703 */ 3704 3705 ASSERT(di_cache.cache_data != NULL); 3706 ASSERT(di_cache.cache_size > 0); 3707 3708 di_freemem(st); 3709 3710 rval = 0; 3711 if (di_cache2mem(&di_cache, st) > 0) { 3712 3713 ASSERT(DI_ALL_PTR(st)); 3714 3715 /* 3716 * map_size is size of valid data in the 3717 * cached snapshot and may be less than 3718 * size of the cache. 3719 */ 3720 rval = DI_ALL_PTR(st)->map_size; 3721 3722 ASSERT(rval >= sizeof (struct di_all)); 3723 ASSERT(rval <= di_cache.cache_size); 3724 } 3725 } else { 3726 /* 3727 * The cache isn't valid, we need to take a snapshot. 3728 * Set the command flags appropriately 3729 */ 3730 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK)); 3731 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK); 3732 rval = di_cache_update(st); 3733 st->command = (DINFOCACHE & DIIOC_MASK); 3734 } 3735 3736 DI_CACHE_UNLOCK(di_cache); 3737 3738 /* 3739 * For cached snapshots, the devinfo driver always returns 3740 * a snapshot rooted at "/". 3741 */ 3742 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0); 3743 3744 return (rval); 3745 } 3746 3747 /* 3748 * This is a forced update of the cache - the previous state of the cache 3749 * may be: 3750 * - unpopulated 3751 * - populated and invalid 3752 * - populated and valid 3753 */ 3754 static int 3755 di_cache_update(struct di_state *st) 3756 { 3757 int rval; 3758 uint32_t crc; 3759 struct di_all *all; 3760 3761 ASSERT(DI_CACHE_LOCKED(di_cache)); 3762 ASSERT(snapshot_is_cacheable(st)); 3763 3764 /* 3765 * Free the in-core cache and the on-disk file (if they exist) 3766 */ 3767 i_ddi_di_cache_free(&di_cache); 3768 3769 /* 3770 * Set valid flag before taking the snapshot, 3771 * so that any invalidations that arrive 3772 * during or after the snapshot are not 3773 * removed by us. 3774 */ 3775 atomic_or_32(&di_cache.cache_valid, 1); 3776 3777 modunload_disable(); 3778 rval = di_snapshot(st); 3779 modunload_enable(); 3780 3781 if (rval == 0) { 3782 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot")); 3783 return (0); 3784 } 3785 3786 DI_ALL_PTR(st)->map_size = rval; 3787 3788 if (di_mem2cache(st, &di_cache) == 0) { 3789 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed")); 3790 return (0); 3791 } 3792 3793 ASSERT(di_cache.cache_data); 3794 ASSERT(di_cache.cache_size > 0); 3795 3796 /* 3797 * Now that we have cached the snapshot, compute its checksum. 3798 * The checksum is only computed over the valid data in the 3799 * cache, not the entire cache. 3800 * Also, set all the fields (except checksum) before computing 3801 * checksum. 3802 */ 3803 all = (struct di_all *)di_cache.cache_data; 3804 all->cache_magic = DI_CACHE_MAGIC; 3805 all->map_size = rval; 3806 3807 ASSERT(all->cache_checksum == 0); 3808 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table); 3809 all->cache_checksum = crc; 3810 3811 di_cache_write(&di_cache); 3812 3813 return (rval); 3814 } 3815 3816 static void 3817 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...) 3818 { 3819 va_list ap; 3820 3821 if (di_cache_debug <= DI_QUIET) 3822 return; 3823 3824 if (di_cache_debug < msglevel) 3825 return; 3826 3827 switch (msglevel) { 3828 case DI_ERR: 3829 msglevel = CE_WARN; 3830 break; 3831 case DI_INFO: 3832 case DI_TRACE: 3833 default: 3834 msglevel = CE_NOTE; 3835 break; 3836 } 3837 3838 va_start(ap, fmt); 3839 vcmn_err(msglevel, fmt, ap); 3840 va_end(ap); 3841 } 3842