1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * driver for accessing kernel devinfo tree. 31 */ 32 #include <sys/types.h> 33 #include <sys/pathname.h> 34 #include <sys/debug.h> 35 #include <sys/autoconf.h> 36 #include <sys/conf.h> 37 #include <sys/file.h> 38 #include <sys/kmem.h> 39 #include <sys/modctl.h> 40 #include <sys/stat.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/sunldi_impl.h> 44 #include <sys/sunndi.h> 45 #include <sys/esunddi.h> 46 #include <sys/sunmdi.h> 47 #include <sys/ddi_impldefs.h> 48 #include <sys/ndi_impldefs.h> 49 #include <sys/mdi_impldefs.h> 50 #include <sys/devinfo_impl.h> 51 #include <sys/thread.h> 52 #include <sys/modhash.h> 53 #include <sys/bitmap.h> 54 #include <util/qsort.h> 55 #include <sys/disp.h> 56 #include <sys/kobj.h> 57 #include <sys/crc32.h> 58 59 60 #ifdef DEBUG 61 static int di_debug; 62 #define dcmn_err(args) if (di_debug >= 1) cmn_err args 63 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args 64 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args 65 #else 66 #define dcmn_err(args) /* nothing */ 67 #define dcmn_err2(args) /* nothing */ 68 #define dcmn_err3(args) /* nothing */ 69 #endif 70 71 /* 72 * We partition the space of devinfo minor nodes equally between the full and 73 * unprivileged versions of the driver. The even-numbered minor nodes are the 74 * full version, while the odd-numbered ones are the read-only version. 75 */ 76 static int di_max_opens = 32; 77 78 #define DI_FULL_PARENT 0 79 #define DI_READONLY_PARENT 1 80 #define DI_NODE_SPECIES 2 81 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) 82 83 #define IOC_IDLE 0 /* snapshot ioctl states */ 84 #define IOC_SNAP 1 /* snapshot in progress */ 85 #define IOC_DONE 2 /* snapshot done, but not copied out */ 86 #define IOC_COPY 3 /* copyout in progress */ 87 88 /* 89 * Keep max alignment so we can move snapshot to different platforms 90 */ 91 #define DI_ALIGN(addr) ((addr + 7l) & ~7l) 92 93 /* 94 * To avoid wasting memory, make a linked list of memory chunks. 95 * Size of each chunk is buf_size. 96 */ 97 struct di_mem { 98 struct di_mem *next; /* link to next chunk */ 99 char *buf; /* contiguous kernel memory */ 100 size_t buf_size; /* size of buf in bytes */ 101 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ 102 }; 103 104 /* 105 * This is a stack for walking the tree without using recursion. 106 * When the devinfo tree height is above some small size, one 107 * gets watchdog resets on sun4m. 108 */ 109 struct di_stack { 110 void *offset[MAX_TREE_DEPTH]; 111 struct dev_info *dip[MAX_TREE_DEPTH]; 112 int circ[MAX_TREE_DEPTH]; 113 int depth; /* depth of current node to be copied */ 114 }; 115 116 #define TOP_OFFSET(stack) \ 117 ((di_off_t *)(stack)->offset[(stack)->depth - 1]) 118 #define TOP_NODE(stack) \ 119 ((stack)->dip[(stack)->depth - 1]) 120 #define PARENT_OFFSET(stack) \ 121 ((di_off_t *)(stack)->offset[(stack)->depth - 2]) 122 #define EMPTY_STACK(stack) ((stack)->depth == 0) 123 #define POP_STACK(stack) { \ 124 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ 125 (stack)->circ[(stack)->depth - 1]); \ 126 ((stack)->depth--); \ 127 } 128 #define PUSH_STACK(stack, node, offp) { \ 129 ASSERT(node != NULL); \ 130 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ 131 (stack)->dip[(stack)->depth] = (node); \ 132 (stack)->offset[(stack)->depth] = (void *)(offp); \ 133 ((stack)->depth)++; \ 134 } 135 136 #define DI_ALL_PTR(s) ((struct di_all *)di_mem_addr((s), 0)) 137 138 /* 139 * With devfs, the device tree has no global locks. The device tree is 140 * dynamic and dips may come and go if they are not locked locally. Under 141 * these conditions, pointers are no longer reliable as unique IDs. 142 * Specifically, these pointers cannot be used as keys for hash tables 143 * as the same devinfo structure may be freed in one part of the tree only 144 * to be allocated as the structure for a different device in another 145 * part of the tree. This can happen if DR and the snapshot are 146 * happening concurrently. 147 * The following data structures act as keys for devinfo nodes and 148 * pathinfo nodes. 149 */ 150 151 enum di_ktype { 152 DI_DKEY = 1, 153 DI_PKEY = 2 154 }; 155 156 struct di_dkey { 157 dev_info_t *dk_dip; 158 major_t dk_major; 159 int dk_inst; 160 pnode_t dk_nodeid; 161 }; 162 163 struct di_pkey { 164 mdi_pathinfo_t *pk_pip; 165 char *pk_path_addr; 166 dev_info_t *pk_client; 167 dev_info_t *pk_phci; 168 }; 169 170 struct di_key { 171 enum di_ktype k_type; 172 union { 173 struct di_dkey dkey; 174 struct di_pkey pkey; 175 } k_u; 176 }; 177 178 179 struct i_lnode; 180 181 typedef struct i_link { 182 /* 183 * If a di_link struct representing this i_link struct makes it 184 * into the snapshot, then self will point to the offset of 185 * the di_link struct in the snapshot 186 */ 187 di_off_t self; 188 189 int spec_type; /* block or char access type */ 190 struct i_lnode *src_lnode; /* src i_lnode */ 191 struct i_lnode *tgt_lnode; /* tgt i_lnode */ 192 struct i_link *src_link_next; /* next src i_link /w same i_lnode */ 193 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ 194 } i_link_t; 195 196 typedef struct i_lnode { 197 /* 198 * If a di_lnode struct representing this i_lnode struct makes it 199 * into the snapshot, then self will point to the offset of 200 * the di_lnode struct in the snapshot 201 */ 202 di_off_t self; 203 204 /* 205 * used for hashing and comparing i_lnodes 206 */ 207 int modid; 208 209 /* 210 * public information describing a link endpoint 211 */ 212 struct di_node *di_node; /* di_node in snapshot */ 213 dev_t devt; /* devt */ 214 215 /* 216 * i_link ptr to links coming into this i_lnode node 217 * (this i_lnode is the target of these i_links) 218 */ 219 i_link_t *link_in; 220 221 /* 222 * i_link ptr to links going out of this i_lnode node 223 * (this i_lnode is the source of these i_links) 224 */ 225 i_link_t *link_out; 226 } i_lnode_t; 227 228 /* 229 * Soft state associated with each instance of driver open. 230 */ 231 static struct di_state { 232 di_off_t mem_size; /* total # bytes in memlist */ 233 struct di_mem *memlist; /* head of memlist */ 234 uint_t command; /* command from ioctl */ 235 int di_iocstate; /* snapshot ioctl state */ 236 mod_hash_t *reg_dip_hash; 237 mod_hash_t *reg_pip_hash; 238 int lnode_count; 239 int link_count; 240 241 mod_hash_t *lnode_hash; 242 mod_hash_t *link_hash; 243 } **di_states; 244 245 static kmutex_t di_lock; /* serialize instance assignment */ 246 247 typedef enum { 248 DI_QUIET = 0, /* DI_QUIET must always be 0 */ 249 DI_ERR, 250 DI_INFO, 251 DI_TRACE, 252 DI_TRACE1, 253 DI_TRACE2 254 } di_cache_debug_t; 255 256 static uint_t di_chunk = 32; /* I/O chunk size in pages */ 257 258 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) 259 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) 260 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) 261 262 /* 263 * Check that whole device tree is being configured as a pre-condition for 264 * cleaning up /etc/devices files. 265 */ 266 #define DEVICES_FILES_CLEANABLE(st) \ 267 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \ 268 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0) 269 270 #define CACHE_DEBUG(args) \ 271 { if (di_cache_debug != DI_QUIET) di_cache_print args; } 272 273 static struct phci_walk_arg { 274 di_off_t off; 275 struct di_state *st; 276 } phci_walk_arg_t; 277 278 static int di_open(dev_t *, int, int, cred_t *); 279 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 280 static int di_close(dev_t, int, int, cred_t *); 281 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 282 static int di_attach(dev_info_t *, ddi_attach_cmd_t); 283 static int di_detach(dev_info_t *, ddi_detach_cmd_t); 284 285 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); 286 static di_off_t di_snapshot_and_clean(struct di_state *); 287 static di_off_t di_copydevnm(di_off_t *, struct di_state *); 288 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); 289 static di_off_t di_copynode(struct di_stack *, struct di_state *); 290 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, 291 struct di_state *); 292 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); 293 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); 294 static di_off_t di_getprop(struct ddi_prop *, di_off_t *, 295 struct di_state *, struct dev_info *, int); 296 static void di_allocmem(struct di_state *, size_t); 297 static void di_freemem(struct di_state *); 298 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); 299 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); 300 static caddr_t di_mem_addr(struct di_state *, di_off_t); 301 static int di_setstate(struct di_state *, int); 302 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); 303 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); 304 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, 305 struct di_state *, int); 306 static di_off_t di_getlink_data(di_off_t, struct di_state *); 307 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); 308 309 static int cache_args_valid(struct di_state *st, int *error); 310 static int snapshot_is_cacheable(struct di_state *st); 311 static int di_cache_lookup(struct di_state *st); 312 static int di_cache_update(struct di_state *st); 313 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); 314 int build_vhci_list(dev_info_t *vh_devinfo, void *arg); 315 int build_phci_list(dev_info_t *ph_devinfo, void *arg); 316 317 static struct cb_ops di_cb_ops = { 318 di_open, /* open */ 319 di_close, /* close */ 320 nodev, /* strategy */ 321 nodev, /* print */ 322 nodev, /* dump */ 323 nodev, /* read */ 324 nodev, /* write */ 325 di_ioctl, /* ioctl */ 326 nodev, /* devmap */ 327 nodev, /* mmap */ 328 nodev, /* segmap */ 329 nochpoll, /* poll */ 330 ddi_prop_op, /* prop_op */ 331 NULL, /* streamtab */ 332 D_NEW | D_MP /* Driver compatibility flag */ 333 }; 334 335 static struct dev_ops di_ops = { 336 DEVO_REV, /* devo_rev, */ 337 0, /* refcnt */ 338 di_info, /* info */ 339 nulldev, /* identify */ 340 nulldev, /* probe */ 341 di_attach, /* attach */ 342 di_detach, /* detach */ 343 nodev, /* reset */ 344 &di_cb_ops, /* driver operations */ 345 NULL /* bus operations */ 346 }; 347 348 /* 349 * Module linkage information for the kernel. 350 */ 351 static struct modldrv modldrv = { 352 &mod_driverops, 353 "DEVINFO Driver %I%", 354 &di_ops 355 }; 356 357 static struct modlinkage modlinkage = { 358 MODREV_1, 359 &modldrv, 360 NULL 361 }; 362 363 int 364 _init(void) 365 { 366 int error; 367 368 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); 369 370 error = mod_install(&modlinkage); 371 if (error != 0) { 372 mutex_destroy(&di_lock); 373 return (error); 374 } 375 376 return (0); 377 } 378 379 int 380 _info(struct modinfo *modinfop) 381 { 382 return (mod_info(&modlinkage, modinfop)); 383 } 384 385 int 386 _fini(void) 387 { 388 int error; 389 390 error = mod_remove(&modlinkage); 391 if (error != 0) { 392 return (error); 393 } 394 395 mutex_destroy(&di_lock); 396 return (0); 397 } 398 399 static dev_info_t *di_dip; 400 401 /*ARGSUSED*/ 402 static int 403 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 404 { 405 int error = DDI_FAILURE; 406 407 switch (infocmd) { 408 case DDI_INFO_DEVT2DEVINFO: 409 *result = (void *)di_dip; 410 error = DDI_SUCCESS; 411 break; 412 case DDI_INFO_DEVT2INSTANCE: 413 /* 414 * All dev_t's map to the same, single instance. 415 */ 416 *result = (void *)0; 417 error = DDI_SUCCESS; 418 break; 419 default: 420 break; 421 } 422 423 return (error); 424 } 425 426 static int 427 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 428 { 429 int error = DDI_FAILURE; 430 431 switch (cmd) { 432 case DDI_ATTACH: 433 di_states = kmem_zalloc( 434 di_max_opens * sizeof (struct di_state *), KM_SLEEP); 435 436 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, 437 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || 438 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, 439 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { 440 kmem_free(di_states, 441 di_max_opens * sizeof (struct di_state *)); 442 ddi_remove_minor_node(dip, NULL); 443 error = DDI_FAILURE; 444 } else { 445 di_dip = dip; 446 ddi_report_dev(dip); 447 448 error = DDI_SUCCESS; 449 } 450 break; 451 default: 452 error = DDI_FAILURE; 453 break; 454 } 455 456 return (error); 457 } 458 459 static int 460 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 461 { 462 int error = DDI_FAILURE; 463 464 switch (cmd) { 465 case DDI_DETACH: 466 ddi_remove_minor_node(dip, NULL); 467 di_dip = NULL; 468 kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); 469 470 error = DDI_SUCCESS; 471 break; 472 default: 473 error = DDI_FAILURE; 474 break; 475 } 476 477 return (error); 478 } 479 480 /* 481 * Allow multiple opens by tweaking the dev_t such that it looks like each 482 * open is getting a different minor device. Each minor gets a separate 483 * entry in the di_states[] table. Based on the original minor number, we 484 * discriminate opens of the full and read-only nodes. If all of the instances 485 * of the selected minor node are currently open, we return EAGAIN. 486 */ 487 /*ARGSUSED*/ 488 static int 489 di_open(dev_t *devp, int flag, int otyp, cred_t *credp) 490 { 491 int m; 492 minor_t minor_parent = getminor(*devp); 493 494 if (minor_parent != DI_FULL_PARENT && 495 minor_parent != DI_READONLY_PARENT) 496 return (ENXIO); 497 498 mutex_enter(&di_lock); 499 500 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { 501 if (di_states[m] != NULL) 502 continue; 503 504 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); 505 break; /* It's ours. */ 506 } 507 508 if (m >= di_max_opens) { 509 /* 510 * maximum open instance for device reached 511 */ 512 mutex_exit(&di_lock); 513 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); 514 return (EAGAIN); 515 } 516 mutex_exit(&di_lock); 517 518 ASSERT(m < di_max_opens); 519 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); 520 521 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", 522 (void *)curthread, m + DI_NODE_SPECIES)); 523 524 return (0); 525 } 526 527 /*ARGSUSED*/ 528 static int 529 di_close(dev_t dev, int flag, int otype, cred_t *cred_p) 530 { 531 struct di_state *st; 532 int m = (int)getminor(dev) - DI_NODE_SPECIES; 533 534 if (m < 0) { 535 cmn_err(CE_WARN, "closing non-existent devinfo minor %d", 536 m + DI_NODE_SPECIES); 537 return (ENXIO); 538 } 539 540 st = di_states[m]; 541 ASSERT(m < di_max_opens && st != NULL); 542 543 di_freemem(st); 544 kmem_free(st, sizeof (struct di_state)); 545 546 /* 547 * empty slot in state table 548 */ 549 mutex_enter(&di_lock); 550 di_states[m] = NULL; 551 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", 552 (void *)curthread, m + DI_NODE_SPECIES)); 553 mutex_exit(&di_lock); 554 555 return (0); 556 } 557 558 559 /*ARGSUSED*/ 560 static int 561 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 562 { 563 int rv, error; 564 di_off_t off; 565 struct di_all *all; 566 struct di_state *st; 567 int m = (int)getminor(dev) - DI_NODE_SPECIES; 568 569 major_t i; 570 char *drv_name; 571 size_t map_size, size; 572 struct di_mem *dcp; 573 int ndi_flags; 574 575 if (m < 0 || m >= di_max_opens) { 576 return (ENXIO); 577 } 578 579 st = di_states[m]; 580 ASSERT(st != NULL); 581 582 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); 583 584 switch (cmd) { 585 case DINFOIDENT: 586 /* 587 * This is called from di_init to verify that the driver 588 * opened is indeed devinfo. The purpose is to guard against 589 * sending ioctl to an unknown driver in case of an 590 * unresolved major number conflict during bfu. 591 */ 592 *rvalp = DI_MAGIC; 593 return (0); 594 595 case DINFOLODRV: 596 /* 597 * Hold an installed driver and return the result 598 */ 599 if (DI_UNPRIVILEGED_NODE(m)) { 600 /* 601 * Only the fully enabled instances may issue 602 * DINFOLDDRV. 603 */ 604 return (EACCES); 605 } 606 607 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 608 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { 609 kmem_free(drv_name, MAXNAMELEN); 610 return (EFAULT); 611 } 612 613 /* 614 * Some 3rd party driver's _init() walks the device tree, 615 * so we load the driver module before configuring driver. 616 */ 617 i = ddi_name_to_major(drv_name); 618 if (ddi_hold_driver(i) == NULL) { 619 kmem_free(drv_name, MAXNAMELEN); 620 return (ENXIO); 621 } 622 623 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; 624 625 /* 626 * i_ddi_load_drvconf() below will trigger a reprobe 627 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't 628 * needed here. 629 */ 630 modunload_disable(); 631 (void) i_ddi_load_drvconf(i); 632 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); 633 kmem_free(drv_name, MAXNAMELEN); 634 ddi_rele_driver(i); 635 rv = i_ddi_devs_attached(i); 636 modunload_enable(); 637 638 i_ddi_di_cache_invalidate(KM_SLEEP); 639 640 return ((rv == DDI_SUCCESS)? 0 : ENXIO); 641 642 case DINFOUSRLD: 643 /* 644 * The case for copying snapshot to userland 645 */ 646 if (di_setstate(st, IOC_COPY) == -1) 647 return (EBUSY); 648 649 map_size = ((struct di_all *)di_mem_addr(st, 0))->map_size; 650 if (map_size == 0) { 651 (void) di_setstate(st, IOC_DONE); 652 return (EFAULT); 653 } 654 655 /* 656 * copyout the snapshot 657 */ 658 map_size = (map_size + PAGEOFFSET) & PAGEMASK; 659 660 /* 661 * Return the map size, so caller may do a sanity 662 * check against the return value of snapshot ioctl() 663 */ 664 *rvalp = (int)map_size; 665 666 /* 667 * Copy one chunk at a time 668 */ 669 off = 0; 670 dcp = st->memlist; 671 while (map_size) { 672 size = dcp->buf_size; 673 if (map_size <= size) { 674 size = map_size; 675 } 676 677 if (ddi_copyout(di_mem_addr(st, off), 678 (void *)(arg + off), size, mode) != 0) { 679 (void) di_setstate(st, IOC_DONE); 680 return (EFAULT); 681 } 682 683 map_size -= size; 684 off += size; 685 dcp = dcp->next; 686 } 687 688 di_freemem(st); 689 (void) di_setstate(st, IOC_IDLE); 690 return (0); 691 692 default: 693 if ((cmd & ~DIIOC_MASK) != DIIOC) { 694 /* 695 * Invalid ioctl command 696 */ 697 return (ENOTTY); 698 } 699 /* 700 * take a snapshot 701 */ 702 st->command = cmd & DIIOC_MASK; 703 /*FALLTHROUGH*/ 704 } 705 706 /* 707 * Obtain enough memory to hold header + rootpath. We prevent kernel 708 * memory exhaustion by freeing any previously allocated snapshot and 709 * refusing the operation; otherwise we would be allowing ioctl(), 710 * ioctl(), ioctl(), ..., panic. 711 */ 712 if (di_setstate(st, IOC_SNAP) == -1) 713 return (EBUSY); 714 715 size = sizeof (struct di_all) + 716 sizeof (((struct dinfo_io *)(NULL))->root_path); 717 if (size < PAGESIZE) 718 size = PAGESIZE; 719 di_allocmem(st, size); 720 721 all = (struct di_all *)di_mem_addr(st, 0); 722 all->devcnt = devcnt; 723 all->command = st->command; 724 all->version = DI_SNAPSHOT_VERSION; 725 all->top_vhci_devinfo = 0; /* filled up by build_vhci_list. */ 726 727 /* 728 * Note the endianness in case we need to transport snapshot 729 * over the network. 730 */ 731 #if defined(_LITTLE_ENDIAN) 732 all->endianness = DI_LITTLE_ENDIAN; 733 #else 734 all->endianness = DI_BIG_ENDIAN; 735 #endif 736 737 /* Copyin ioctl args, store in the snapshot. */ 738 if (copyinstr((void *)arg, all->root_path, 739 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { 740 di_freemem(st); 741 (void) di_setstate(st, IOC_IDLE); 742 return (EFAULT); 743 } 744 745 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) { 746 di_freemem(st); 747 (void) di_setstate(st, IOC_IDLE); 748 return (EINVAL); 749 } 750 751 error = 0; 752 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { 753 di_freemem(st); 754 (void) di_setstate(st, IOC_IDLE); 755 return (error); 756 } 757 758 off = DI_ALIGN(sizeof (struct di_all) + size); 759 760 /* 761 * Only the fully enabled version may force load drivers or read 762 * the parent private data from a driver. 763 */ 764 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && 765 DI_UNPRIVILEGED_NODE(m)) { 766 di_freemem(st); 767 (void) di_setstate(st, IOC_IDLE); 768 return (EACCES); 769 } 770 771 /* Do we need private data? */ 772 if (st->command & DINFOPRIVDATA) { 773 arg += sizeof (((struct dinfo_io *)(NULL))->root_path); 774 775 #ifdef _MULTI_DATAMODEL 776 switch (ddi_model_convert_from(mode & FMODELS)) { 777 case DDI_MODEL_ILP32: { 778 /* 779 * Cannot copy private data from 64-bit kernel 780 * to 32-bit app 781 */ 782 di_freemem(st); 783 (void) di_setstate(st, IOC_IDLE); 784 return (EINVAL); 785 } 786 case DDI_MODEL_NONE: 787 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 788 di_freemem(st); 789 (void) di_setstate(st, IOC_IDLE); 790 return (EFAULT); 791 } 792 break; 793 } 794 #else /* !_MULTI_DATAMODEL */ 795 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 796 di_freemem(st); 797 (void) di_setstate(st, IOC_IDLE); 798 return (EFAULT); 799 } 800 #endif /* _MULTI_DATAMODEL */ 801 } 802 803 all->top_devinfo = DI_ALIGN(off); 804 805 /* 806 * For cache lookups we reallocate memory from scratch, 807 * so the value of "all" is no longer valid. 808 */ 809 all = NULL; 810 811 if (st->command & DINFOCACHE) { 812 *rvalp = di_cache_lookup(st); 813 } else if (snapshot_is_cacheable(st)) { 814 DI_CACHE_LOCK(di_cache); 815 *rvalp = di_cache_update(st); 816 DI_CACHE_UNLOCK(di_cache); 817 } else 818 *rvalp = di_snapshot_and_clean(st); 819 820 if (*rvalp) { 821 DI_ALL_PTR(st)->map_size = *rvalp; 822 (void) di_setstate(st, IOC_DONE); 823 } else { 824 di_freemem(st); 825 (void) di_setstate(st, IOC_IDLE); 826 } 827 828 return (0); 829 } 830 831 /* 832 * Get a chunk of memory >= size, for the snapshot 833 */ 834 static void 835 di_allocmem(struct di_state *st, size_t size) 836 { 837 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), 838 KM_SLEEP); 839 /* 840 * Round up size to nearest power of 2. If it is less 841 * than st->mem_size, set it to st->mem_size (i.e., 842 * the mem_size is doubled every time) to reduce the 843 * number of memory allocations. 844 */ 845 size_t tmp = 1; 846 while (tmp < size) { 847 tmp <<= 1; 848 } 849 size = (tmp > st->mem_size) ? tmp : st->mem_size; 850 851 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); 852 mem->buf_size = size; 853 854 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); 855 856 if (st->mem_size == 0) { /* first chunk */ 857 st->memlist = mem; 858 } else { 859 /* 860 * locate end of linked list and add a chunk at the end 861 */ 862 struct di_mem *dcp = st->memlist; 863 while (dcp->next != NULL) { 864 dcp = dcp->next; 865 } 866 867 dcp->next = mem; 868 } 869 870 st->mem_size += size; 871 } 872 873 /* 874 * Copy upto bufsiz bytes of the memlist to buf 875 */ 876 static void 877 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) 878 { 879 struct di_mem *dcp; 880 size_t copysz; 881 882 if (st->mem_size == 0) { 883 ASSERT(st->memlist == NULL); 884 return; 885 } 886 887 copysz = 0; 888 for (dcp = st->memlist; dcp; dcp = dcp->next) { 889 890 ASSERT(bufsiz > 0); 891 892 if (bufsiz <= dcp->buf_size) 893 copysz = bufsiz; 894 else 895 copysz = dcp->buf_size; 896 897 bcopy(dcp->buf, buf, copysz); 898 899 buf += copysz; 900 bufsiz -= copysz; 901 902 if (bufsiz == 0) 903 break; 904 } 905 } 906 907 /* 908 * Free all memory for the snapshot 909 */ 910 static void 911 di_freemem(struct di_state *st) 912 { 913 struct di_mem *dcp, *tmp; 914 915 dcmn_err2((CE_CONT, "di_freemem\n")); 916 917 if (st->mem_size) { 918 dcp = st->memlist; 919 while (dcp) { /* traverse the linked list */ 920 tmp = dcp; 921 dcp = dcp->next; 922 ddi_umem_free(tmp->cook); 923 kmem_free(tmp, sizeof (struct di_mem)); 924 } 925 st->mem_size = 0; 926 st->memlist = NULL; 927 } 928 929 ASSERT(st->mem_size == 0); 930 ASSERT(st->memlist == NULL); 931 } 932 933 /* 934 * Copies cached data to the di_state structure. 935 * Returns: 936 * - size of data copied, on SUCCESS 937 * - 0 on failure 938 */ 939 static int 940 di_cache2mem(struct di_cache *cache, struct di_state *st) 941 { 942 caddr_t pa; 943 944 ASSERT(st->mem_size == 0); 945 ASSERT(st->memlist == NULL); 946 ASSERT(!servicing_interrupt()); 947 ASSERT(DI_CACHE_LOCKED(*cache)); 948 949 if (cache->cache_size == 0) { 950 ASSERT(cache->cache_data == NULL); 951 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); 952 return (0); 953 } 954 955 ASSERT(cache->cache_data); 956 957 di_allocmem(st, cache->cache_size); 958 959 pa = di_mem_addr(st, 0); 960 961 ASSERT(pa); 962 963 /* 964 * Verify that di_allocmem() allocates contiguous memory, 965 * so that it is safe to do straight bcopy() 966 */ 967 ASSERT(st->memlist != NULL); 968 ASSERT(st->memlist->next == NULL); 969 bcopy(cache->cache_data, pa, cache->cache_size); 970 971 return (cache->cache_size); 972 } 973 974 /* 975 * Copies a snapshot from di_state to the cache 976 * Returns: 977 * - 0 on failure 978 * - size of copied data on success 979 */ 980 static int 981 di_mem2cache(struct di_state *st, struct di_cache *cache) 982 { 983 size_t map_size; 984 985 ASSERT(cache->cache_size == 0); 986 ASSERT(cache->cache_data == NULL); 987 ASSERT(!servicing_interrupt()); 988 ASSERT(DI_CACHE_LOCKED(*cache)); 989 990 if (st->mem_size == 0) { 991 ASSERT(st->memlist == NULL); 992 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); 993 return (0); 994 } 995 996 ASSERT(st->memlist); 997 998 /* 999 * The size of the memory list may be much larger than the 1000 * size of valid data (map_size). Cache only the valid data 1001 */ 1002 map_size = DI_ALL_PTR(st)->map_size; 1003 if (map_size == 0 || map_size < sizeof (struct di_all) || 1004 map_size > st->mem_size) { 1005 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); 1006 return (0); 1007 } 1008 1009 cache->cache_data = kmem_alloc(map_size, KM_SLEEP); 1010 cache->cache_size = map_size; 1011 di_copymem(st, cache->cache_data, cache->cache_size); 1012 1013 return (map_size); 1014 } 1015 1016 /* 1017 * Make sure there is at least "size" bytes memory left before 1018 * going on. Otherwise, start on a new chunk. 1019 */ 1020 static di_off_t 1021 di_checkmem(struct di_state *st, di_off_t off, size_t size) 1022 { 1023 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", 1024 off, (int)size)); 1025 1026 /* 1027 * di_checkmem() shouldn't be called with a size of zero. 1028 * But in case it is, we want to make sure we return a valid 1029 * offset within the memlist and not an offset that points us 1030 * at the end of the memlist. 1031 */ 1032 if (size == 0) { 1033 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); 1034 size = 1; 1035 } 1036 1037 off = DI_ALIGN(off); 1038 if ((st->mem_size - off) < size) { 1039 off = st->mem_size; 1040 di_allocmem(st, size); 1041 } 1042 1043 return (off); 1044 } 1045 1046 /* 1047 * Copy the private data format from ioctl arg. 1048 * On success, the ending offset is returned. On error 0 is returned. 1049 */ 1050 static di_off_t 1051 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) 1052 { 1053 di_off_t size; 1054 struct di_priv_data *priv; 1055 struct di_all *all = (struct di_all *)di_mem_addr(st, 0); 1056 1057 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", 1058 off, (void *)arg, mode)); 1059 1060 /* 1061 * Copyin data and check version. 1062 * We only handle private data version 0. 1063 */ 1064 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); 1065 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), 1066 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { 1067 kmem_free(priv, sizeof (struct di_priv_data)); 1068 return (0); 1069 } 1070 1071 /* 1072 * Save di_priv_data copied from userland in snapshot. 1073 */ 1074 all->pd_version = priv->version; 1075 all->n_ppdata = priv->n_parent; 1076 all->n_dpdata = priv->n_driver; 1077 1078 /* 1079 * copyin private data format, modify offset accordingly 1080 */ 1081 if (all->n_ppdata) { /* parent private data format */ 1082 /* 1083 * check memory 1084 */ 1085 size = all->n_ppdata * sizeof (struct di_priv_format); 1086 off = di_checkmem(st, off, size); 1087 all->ppdata_format = off; 1088 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, 1089 mode) != 0) { 1090 kmem_free(priv, sizeof (struct di_priv_data)); 1091 return (0); 1092 } 1093 1094 off += size; 1095 } 1096 1097 if (all->n_dpdata) { /* driver private data format */ 1098 /* 1099 * check memory 1100 */ 1101 size = all->n_dpdata * sizeof (struct di_priv_format); 1102 off = di_checkmem(st, off, size); 1103 all->dpdata_format = off; 1104 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, 1105 mode) != 0) { 1106 kmem_free(priv, sizeof (struct di_priv_data)); 1107 return (0); 1108 } 1109 1110 off += size; 1111 } 1112 1113 kmem_free(priv, sizeof (struct di_priv_data)); 1114 return (off); 1115 } 1116 1117 /* 1118 * Return the real address based on the offset (off) within snapshot 1119 */ 1120 static caddr_t 1121 di_mem_addr(struct di_state *st, di_off_t off) 1122 { 1123 struct di_mem *dcp = st->memlist; 1124 1125 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", 1126 (void *)dcp, off)); 1127 1128 ASSERT(off < st->mem_size); 1129 1130 while (off >= dcp->buf_size) { 1131 off -= dcp->buf_size; 1132 dcp = dcp->next; 1133 } 1134 1135 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", 1136 off, (void *)(dcp->buf + off))); 1137 1138 return (dcp->buf + off); 1139 } 1140 1141 /* 1142 * Ideally we would use the whole key to derive the hash 1143 * value. However, the probability that two keys will 1144 * have the same dip (or pip) is very low, so 1145 * hashing by dip (or pip) pointer should suffice. 1146 */ 1147 static uint_t 1148 di_hash_byptr(void *arg, mod_hash_key_t key) 1149 { 1150 struct di_key *dik = key; 1151 size_t rshift; 1152 void *ptr; 1153 1154 ASSERT(arg == NULL); 1155 1156 switch (dik->k_type) { 1157 case DI_DKEY: 1158 ptr = dik->k_u.dkey.dk_dip; 1159 rshift = highbit(sizeof (struct dev_info)); 1160 break; 1161 case DI_PKEY: 1162 ptr = dik->k_u.pkey.pk_pip; 1163 rshift = highbit(sizeof (struct mdi_pathinfo)); 1164 break; 1165 default: 1166 panic("devinfo: unknown key type"); 1167 /*NOTREACHED*/ 1168 } 1169 return (mod_hash_byptr((void *)rshift, ptr)); 1170 } 1171 1172 static void 1173 di_key_dtor(mod_hash_key_t key) 1174 { 1175 char *path_addr; 1176 struct di_key *dik = key; 1177 1178 switch (dik->k_type) { 1179 case DI_DKEY: 1180 break; 1181 case DI_PKEY: 1182 path_addr = dik->k_u.pkey.pk_path_addr; 1183 if (path_addr) 1184 kmem_free(path_addr, strlen(path_addr) + 1); 1185 break; 1186 default: 1187 panic("devinfo: unknown key type"); 1188 /*NOTREACHED*/ 1189 } 1190 1191 kmem_free(dik, sizeof (struct di_key)); 1192 } 1193 1194 static int 1195 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) 1196 { 1197 if (dk1->dk_dip != dk2->dk_dip) 1198 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); 1199 1200 if (dk1->dk_major != -1 && dk2->dk_major != -1) { 1201 if (dk1->dk_major != dk2->dk_major) 1202 return (dk1->dk_major > dk2->dk_major ? 1 : -1); 1203 1204 if (dk1->dk_inst != dk2->dk_inst) 1205 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); 1206 } 1207 1208 if (dk1->dk_nodeid != dk2->dk_nodeid) 1209 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); 1210 1211 return (0); 1212 } 1213 1214 static int 1215 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) 1216 { 1217 char *p1, *p2; 1218 int rv; 1219 1220 if (pk1->pk_pip != pk2->pk_pip) 1221 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); 1222 1223 p1 = pk1->pk_path_addr; 1224 p2 = pk2->pk_path_addr; 1225 1226 p1 = p1 ? p1 : ""; 1227 p2 = p2 ? p2 : ""; 1228 1229 rv = strcmp(p1, p2); 1230 if (rv) 1231 return (rv > 0 ? 1 : -1); 1232 1233 if (pk1->pk_client != pk2->pk_client) 1234 return (pk1->pk_client > pk2->pk_client ? 1 : -1); 1235 1236 if (pk1->pk_phci != pk2->pk_phci) 1237 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); 1238 1239 return (0); 1240 } 1241 1242 static int 1243 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 1244 { 1245 struct di_key *dik1, *dik2; 1246 1247 dik1 = key1; 1248 dik2 = key2; 1249 1250 if (dik1->k_type != dik2->k_type) { 1251 panic("devinfo: mismatched keys"); 1252 /*NOTREACHED*/ 1253 } 1254 1255 switch (dik1->k_type) { 1256 case DI_DKEY: 1257 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); 1258 case DI_PKEY: 1259 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); 1260 default: 1261 panic("devinfo: unknown key type"); 1262 /*NOTREACHED*/ 1263 } 1264 } 1265 1266 /* 1267 * This is the main function that takes a snapshot 1268 */ 1269 static di_off_t 1270 di_snapshot(struct di_state *st) 1271 { 1272 di_off_t off; 1273 struct di_all *all; 1274 dev_info_t *rootnode; 1275 char buf[80]; 1276 int plen; 1277 char *path; 1278 vnode_t *vp; 1279 1280 all = (struct di_all *)di_mem_addr(st, 0); 1281 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); 1282 1283 /* 1284 * Verify path before entrusting it to e_ddi_hold_devi_by_path because 1285 * some platforms have OBP bugs where executing the NDI_PROMNAME code 1286 * path against an invalid path results in panic. The lookupnameat 1287 * is done relative to rootdir without a leading '/' on "devices/" 1288 * to force the lookup to occur in the global zone. 1289 */ 1290 plen = strlen("devices/") + strlen(all->root_path) + 1; 1291 path = kmem_alloc(plen, KM_SLEEP); 1292 (void) snprintf(path, plen, "devices/%s", all->root_path); 1293 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) { 1294 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1295 all->root_path)); 1296 kmem_free(path, plen); 1297 return (0); 1298 } 1299 kmem_free(path, plen); 1300 VN_RELE(vp); 1301 1302 /* 1303 * Hold the devinfo node referred by the path. 1304 */ 1305 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); 1306 if (rootnode == NULL) { 1307 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1308 all->root_path)); 1309 return (0); 1310 } 1311 1312 (void) snprintf(buf, sizeof (buf), 1313 "devinfo registered dips (statep=%p)", (void *)st); 1314 1315 st->reg_dip_hash = mod_hash_create_extended(buf, 64, 1316 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1317 NULL, di_key_cmp, KM_SLEEP); 1318 1319 1320 (void) snprintf(buf, sizeof (buf), 1321 "devinfo registered pips (statep=%p)", (void *)st); 1322 1323 st->reg_pip_hash = mod_hash_create_extended(buf, 64, 1324 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1325 NULL, di_key_cmp, KM_SLEEP); 1326 1327 /* 1328 * copy the device tree 1329 */ 1330 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); 1331 1332 if (DINFOPATH & st->command) { 1333 mdi_walk_vhcis(build_vhci_list, st); 1334 } 1335 1336 ddi_release_devi(rootnode); 1337 1338 /* 1339 * copy the devnames array 1340 */ 1341 all->devnames = off; 1342 off = di_copydevnm(&all->devnames, st); 1343 1344 1345 /* initialize the hash tables */ 1346 st->lnode_count = 0; 1347 st->link_count = 0; 1348 1349 if (DINFOLYR & st->command) { 1350 off = di_getlink_data(off, st); 1351 } 1352 1353 /* 1354 * Free up hash tables 1355 */ 1356 mod_hash_destroy_hash(st->reg_dip_hash); 1357 mod_hash_destroy_hash(st->reg_pip_hash); 1358 1359 /* 1360 * Record the timestamp now that we are done with snapshot. 1361 * 1362 * We compute the checksum later and then only if we cache 1363 * the snapshot, since checksumming adds some overhead. 1364 * The checksum is checked later if we read the cache file. 1365 * from disk. 1366 * 1367 * Set checksum field to 0 as CRC is calculated with that 1368 * field set to 0. 1369 */ 1370 all->snapshot_time = ddi_get_time(); 1371 all->cache_checksum = 0; 1372 1373 return (off); 1374 } 1375 1376 /* 1377 * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set 1378 */ 1379 static di_off_t 1380 di_snapshot_and_clean(struct di_state *st) 1381 { 1382 di_off_t off; 1383 1384 modunload_disable(); 1385 off = di_snapshot(st); 1386 if (off != 0 && (st->command & DINFOCLEANUP)) { 1387 ASSERT(DEVICES_FILES_CLEANABLE(st)); 1388 /* 1389 * Cleanup /etc/devices files: 1390 * In order to accurately account for the system configuration 1391 * in /etc/devices files, the appropriate drivers must be 1392 * fully configured before the cleanup starts. 1393 * So enable modunload only after the cleanup. 1394 */ 1395 i_ddi_clean_devices_files(); 1396 } 1397 modunload_enable(); 1398 1399 return (off); 1400 } 1401 1402 /* 1403 * construct vhci linkage in the snapshot. 1404 */ 1405 int 1406 build_vhci_list(dev_info_t *vh_devinfo, void *arg) 1407 { 1408 struct di_all *all; 1409 struct di_node *me; 1410 struct di_state *st; 1411 di_off_t off; 1412 struct phci_walk_arg pwa; 1413 1414 dcmn_err3((CE_CONT, "build_vhci list\n")); 1415 1416 dcmn_err3((CE_CONT, "vhci node %s, instance #%d\n", 1417 DEVI(vh_devinfo)->devi_node_name, 1418 DEVI(vh_devinfo)->devi_instance)); 1419 1420 st = (struct di_state *)arg; 1421 if (di_dip_find(st, vh_devinfo, &off) != 0) { 1422 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1423 return (DDI_WALK_TERMINATE); 1424 } 1425 1426 dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n", 1427 st->mem_size, off)); 1428 1429 all = (struct di_all *)di_mem_addr(st, 0); 1430 if (all->top_vhci_devinfo == 0) { 1431 all->top_vhci_devinfo = off; 1432 } else { 1433 me = (struct di_node *)di_mem_addr(st, all->top_vhci_devinfo); 1434 1435 while (me->next_vhci != 0) { 1436 me = (struct di_node *)di_mem_addr(st, me->next_vhci); 1437 } 1438 1439 me->next_vhci = off; 1440 } 1441 1442 pwa.off = off; 1443 pwa.st = st; 1444 mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa); 1445 1446 return (DDI_WALK_CONTINUE); 1447 } 1448 1449 /* 1450 * construct phci linkage for the given vhci in the snapshot. 1451 */ 1452 int 1453 build_phci_list(dev_info_t *ph_devinfo, void *arg) 1454 { 1455 struct di_node *vh_di_node; 1456 struct di_node *me; 1457 struct phci_walk_arg *pwa; 1458 di_off_t off; 1459 1460 pwa = (struct phci_walk_arg *)arg; 1461 1462 dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n", 1463 pwa->off)); 1464 1465 vh_di_node = (struct di_node *)di_mem_addr(pwa->st, pwa->off); 1466 1467 if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) { 1468 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1469 return (DDI_WALK_TERMINATE); 1470 } 1471 1472 dcmn_err3((CE_CONT, "phci node %s, instance #%d, at offset 0x%x\n", 1473 DEVI(ph_devinfo)->devi_node_name, 1474 DEVI(ph_devinfo)->devi_instance, off)); 1475 1476 if (vh_di_node->top_phci == 0) { 1477 vh_di_node->top_phci = off; 1478 return (DDI_WALK_CONTINUE); 1479 } 1480 1481 me = (struct di_node *)di_mem_addr(pwa->st, vh_di_node->top_phci); 1482 1483 while (me->next_phci != 0) { 1484 me = (struct di_node *)di_mem_addr(pwa->st, me->next_phci); 1485 } 1486 me->next_phci = off; 1487 1488 return (DDI_WALK_CONTINUE); 1489 } 1490 1491 /* 1492 * Assumes all devinfo nodes in device tree have been snapshotted 1493 */ 1494 static void 1495 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *poff_p) 1496 { 1497 struct dev_info *node; 1498 struct di_node *me; 1499 di_off_t off; 1500 1501 ASSERT(mutex_owned(&dnp->dn_lock)); 1502 1503 node = DEVI(dnp->dn_head); 1504 for (; node; node = node->devi_next) { 1505 if (di_dip_find(st, (dev_info_t *)node, &off) != 0) 1506 continue; 1507 1508 ASSERT(off > 0); 1509 me = (struct di_node *)di_mem_addr(st, off); 1510 ASSERT(me->next == 0 || me->next == -1); 1511 /* 1512 * Only nodes which were BOUND when they were 1513 * snapshotted will be added to per-driver list. 1514 */ 1515 if (me->next != -1) 1516 continue; 1517 1518 *poff_p = off; 1519 poff_p = &me->next; 1520 } 1521 1522 *poff_p = 0; 1523 } 1524 1525 /* 1526 * Copy the devnames array, so we have a list of drivers in the snapshot. 1527 * Also makes it possible to locate the per-driver devinfo nodes. 1528 */ 1529 static di_off_t 1530 di_copydevnm(di_off_t *off_p, struct di_state *st) 1531 { 1532 int i; 1533 di_off_t off; 1534 size_t size; 1535 struct di_devnm *dnp; 1536 1537 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); 1538 1539 /* 1540 * make sure there is some allocated memory 1541 */ 1542 size = devcnt * sizeof (struct di_devnm); 1543 off = di_checkmem(st, *off_p, size); 1544 *off_p = off; 1545 1546 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", 1547 devcnt, off)); 1548 1549 dnp = (struct di_devnm *)di_mem_addr(st, off); 1550 off += size; 1551 1552 for (i = 0; i < devcnt; i++) { 1553 if (devnamesp[i].dn_name == NULL) { 1554 continue; 1555 } 1556 1557 /* 1558 * dn_name is not freed during driver unload or removal. 1559 * 1560 * There is a race condition when make_devname() changes 1561 * dn_name during our strcpy. This should be rare since 1562 * only add_drv does this. At any rate, we never had a 1563 * problem with ddi_name_to_major(), which should have 1564 * the same problem. 1565 */ 1566 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", 1567 devnamesp[i].dn_name, devnamesp[i].dn_instance, 1568 off)); 1569 1570 off = di_checkmem(st, off, strlen(devnamesp[i].dn_name) + 1); 1571 dnp[i].name = off; 1572 (void) strcpy((char *)di_mem_addr(st, off), 1573 devnamesp[i].dn_name); 1574 off += DI_ALIGN(strlen(devnamesp[i].dn_name) + 1); 1575 1576 mutex_enter(&devnamesp[i].dn_lock); 1577 1578 /* 1579 * Snapshot per-driver node list 1580 */ 1581 snap_driver_list(st, &devnamesp[i], &dnp[i].head); 1582 1583 /* 1584 * This is not used by libdevinfo, leave it for now 1585 */ 1586 dnp[i].flags = devnamesp[i].dn_flags; 1587 dnp[i].instance = devnamesp[i].dn_instance; 1588 1589 /* 1590 * get global properties 1591 */ 1592 if ((DINFOPROP & st->command) && 1593 devnamesp[i].dn_global_prop_ptr) { 1594 dnp[i].global_prop = off; 1595 off = di_getprop( 1596 devnamesp[i].dn_global_prop_ptr->prop_list, 1597 &dnp[i].global_prop, st, NULL, DI_PROP_GLB_LIST); 1598 } 1599 1600 /* 1601 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str 1602 */ 1603 if (CB_DRV_INSTALLED(devopsp[i])) { 1604 if (devopsp[i]->devo_cb_ops) { 1605 dnp[i].ops |= DI_CB_OPS; 1606 if (devopsp[i]->devo_cb_ops->cb_str) 1607 dnp[i].ops |= DI_STREAM_OPS; 1608 } 1609 if (NEXUS_DRV(devopsp[i])) { 1610 dnp[i].ops |= DI_BUS_OPS; 1611 } 1612 } 1613 1614 mutex_exit(&devnamesp[i].dn_lock); 1615 } 1616 1617 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); 1618 1619 return (off); 1620 } 1621 1622 /* 1623 * Copy the kernel devinfo tree. The tree and the devnames array forms 1624 * the entire snapshot (see also di_copydevnm). 1625 */ 1626 static di_off_t 1627 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) 1628 { 1629 di_off_t off; 1630 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); 1631 1632 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", 1633 (void *)root, *off_p)); 1634 1635 /* force attach drivers */ 1636 if ((i_ddi_node_state((dev_info_t *)root) == DS_READY) && 1637 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { 1638 (void) ndi_devi_config((dev_info_t *)root, 1639 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | 1640 NDI_DRV_CONF_REPROBE); 1641 } 1642 1643 /* 1644 * Push top_devinfo onto a stack 1645 * 1646 * The stack is necessary to avoid recursion, which can overrun 1647 * the kernel stack. 1648 */ 1649 PUSH_STACK(dsp, root, off_p); 1650 1651 /* 1652 * As long as there is a node on the stack, copy the node. 1653 * di_copynode() is responsible for pushing and popping 1654 * child and sibling nodes on the stack. 1655 */ 1656 while (!EMPTY_STACK(dsp)) { 1657 off = di_copynode(dsp, st); 1658 } 1659 1660 /* 1661 * Free the stack structure 1662 */ 1663 kmem_free(dsp, sizeof (struct di_stack)); 1664 1665 return (off); 1666 } 1667 1668 /* 1669 * This is the core function, which copies all data associated with a single 1670 * node into the snapshot. The amount of information is determined by the 1671 * ioctl command. 1672 */ 1673 static di_off_t 1674 di_copynode(struct di_stack *dsp, struct di_state *st) 1675 { 1676 di_off_t off; 1677 struct di_node *me; 1678 struct dev_info *node; 1679 1680 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", 1681 dsp->depth)); 1682 1683 node = TOP_NODE(dsp); 1684 1685 ASSERT(node != NULL); 1686 1687 /* 1688 * check memory usage, and fix offsets accordingly. 1689 */ 1690 off = di_checkmem(st, *(TOP_OFFSET(dsp)), sizeof (struct di_node)); 1691 *(TOP_OFFSET(dsp)) = off; 1692 me = DI_NODE(di_mem_addr(st, off)); 1693 1694 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", 1695 node->devi_node_name, node->devi_instance, off)); 1696 1697 /* 1698 * Node parameters: 1699 * self -- offset of current node within snapshot 1700 * nodeid -- pointer to PROM node (tri-valued) 1701 * state -- hot plugging device state 1702 * node_state -- devinfo node state (CF1, CF2, etc.) 1703 */ 1704 me->self = off; 1705 me->instance = node->devi_instance; 1706 me->nodeid = node->devi_nodeid; 1707 me->node_class = node->devi_node_class; 1708 me->attributes = node->devi_node_attributes; 1709 me->state = node->devi_state; 1710 me->node_state = node->devi_node_state; 1711 me->next_vhci = 0; /* Filled up by build_vhci_list. */ 1712 me->top_phci = 0; /* Filled up by build_phci_list. */ 1713 me->next_phci = 0; /* Filled up by build_phci_list. */ 1714 me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */ 1715 me->user_private_data = NULL; 1716 1717 /* 1718 * Get parent's offset in snapshot from the stack 1719 * and store it in the current node 1720 */ 1721 if (dsp->depth > 1) { 1722 me->parent = *(PARENT_OFFSET(dsp)); 1723 } 1724 1725 /* 1726 * Save the offset of this di_node in a hash table. 1727 * This is used later to resolve references to this 1728 * dip from other parts of the tree (per-driver list, 1729 * multipathing linkages, layered usage linkages). 1730 * The key used for the hash table is derived from 1731 * information in the dip. 1732 */ 1733 di_register_dip(st, (dev_info_t *)node, me->self); 1734 1735 /* 1736 * increment offset 1737 */ 1738 off += sizeof (struct di_node); 1739 1740 #ifdef DEVID_COMPATIBILITY 1741 /* check for devid as property marker */ 1742 if (node->devi_devid) { 1743 ddi_devid_t devid; 1744 char *devidstr; 1745 int devid_size; 1746 1747 /* 1748 * The devid is now represented as a property. 1749 * For micro release compatibility with di_devid interface 1750 * in libdevinfo we must return it as a binary structure in' 1751 * the snapshot. When di_devid is removed from libdevinfo 1752 * in a future release (and devi_devid is deleted) then 1753 * code related to DEVID_COMPATIBILITY can be removed. 1754 */ 1755 ASSERT(node->devi_devid == DEVID_COMPATIBILITY); 1756 /* XXX should be DDI_DEV_T_NONE! */ 1757 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, (dev_info_t *)node, 1758 DDI_PROP_DONTPASS, DEVID_PROP_NAME, &devidstr) == 1759 DDI_PROP_SUCCESS) { 1760 if (ddi_devid_str_decode(devidstr, &devid, NULL) == 1761 DDI_SUCCESS) { 1762 devid_size = ddi_devid_sizeof(devid); 1763 off = di_checkmem(st, off, devid_size); 1764 me->devid = off; 1765 bcopy(devid, 1766 di_mem_addr(st, off), devid_size); 1767 off += devid_size; 1768 ddi_devid_free(devid); 1769 } 1770 ddi_prop_free(devidstr); 1771 } 1772 } 1773 #endif /* DEVID_COMPATIBILITY */ 1774 1775 if (node->devi_node_name) { 1776 off = di_checkmem(st, off, strlen(node->devi_node_name) + 1); 1777 me->node_name = off; 1778 (void) strcpy(di_mem_addr(st, off), node->devi_node_name); 1779 off += strlen(node->devi_node_name) + 1; 1780 } 1781 1782 if (node->devi_compat_names && (node->devi_compat_length > 1)) { 1783 off = di_checkmem(st, off, node->devi_compat_length); 1784 me->compat_names = off; 1785 me->compat_length = node->devi_compat_length; 1786 bcopy(node->devi_compat_names, di_mem_addr(st, off), 1787 node->devi_compat_length); 1788 off += node->devi_compat_length; 1789 } 1790 1791 if (node->devi_addr) { 1792 off = di_checkmem(st, off, strlen(node->devi_addr) + 1); 1793 me->address = off; 1794 (void) strcpy(di_mem_addr(st, off), node->devi_addr); 1795 off += strlen(node->devi_addr) + 1; 1796 } 1797 1798 if (node->devi_binding_name) { 1799 off = di_checkmem(st, off, strlen(node->devi_binding_name) + 1); 1800 me->bind_name = off; 1801 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name); 1802 off += strlen(node->devi_binding_name) + 1; 1803 } 1804 1805 me->drv_major = node->devi_major; 1806 1807 /* 1808 * If the dip is BOUND, set the next pointer of the 1809 * per-instance list to -1, indicating that it is yet to be resolved. 1810 * This will be resolved later in snap_driver_list(). 1811 */ 1812 if (me->drv_major != -1) { 1813 me->next = -1; 1814 } else { 1815 me->next = 0; 1816 } 1817 1818 /* 1819 * An optimization to skip mutex_enter when not needed. 1820 */ 1821 if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) { 1822 goto priv_data; 1823 } 1824 1825 /* 1826 * Grab current per dev_info node lock to 1827 * get minor data and properties. 1828 */ 1829 mutex_enter(&(node->devi_lock)); 1830 1831 if (!(DINFOMINOR & st->command)) { 1832 goto path; 1833 } 1834 1835 if (node->devi_minor) { /* minor data */ 1836 me->minor_data = DI_ALIGN(off); 1837 off = di_getmdata(node->devi_minor, &me->minor_data, 1838 me->self, st); 1839 } 1840 1841 path: 1842 if (!(DINFOPATH & st->command)) { 1843 goto property; 1844 } 1845 1846 if (MDI_VHCI(node)) { 1847 me->multipath_component = MULTIPATH_COMPONENT_VHCI; 1848 } 1849 1850 if (MDI_CLIENT(node)) { 1851 me->multipath_component = MULTIPATH_COMPONENT_CLIENT; 1852 me->multipath_client = DI_ALIGN(off); 1853 off = di_getpath_data((dev_info_t *)node, &me->multipath_client, 1854 me->self, st, 1); 1855 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " 1856 "component type = %d. off=%d", 1857 me->multipath_client, 1858 (void *)node, node->devi_mdi_component, off)); 1859 } 1860 1861 if (MDI_PHCI(node)) { 1862 me->multipath_component = MULTIPATH_COMPONENT_PHCI; 1863 me->multipath_phci = DI_ALIGN(off); 1864 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, 1865 me->self, st, 0); 1866 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " 1867 "component type = %d. off=%d", 1868 me->multipath_phci, 1869 (void *)node, node->devi_mdi_component, off)); 1870 } 1871 1872 property: 1873 if (!(DINFOPROP & st->command)) { 1874 goto unlock; 1875 } 1876 1877 if (node->devi_drv_prop_ptr) { /* driver property list */ 1878 me->drv_prop = DI_ALIGN(off); 1879 off = di_getprop(node->devi_drv_prop_ptr, &me->drv_prop, st, 1880 node, DI_PROP_DRV_LIST); 1881 } 1882 1883 if (node->devi_sys_prop_ptr) { /* system property list */ 1884 me->sys_prop = DI_ALIGN(off); 1885 off = di_getprop(node->devi_sys_prop_ptr, &me->sys_prop, st, 1886 node, DI_PROP_SYS_LIST); 1887 } 1888 1889 if (node->devi_hw_prop_ptr) { /* hardware property list */ 1890 me->hw_prop = DI_ALIGN(off); 1891 off = di_getprop(node->devi_hw_prop_ptr, &me->hw_prop, st, 1892 node, DI_PROP_HW_LIST); 1893 } 1894 1895 if (node->devi_global_prop_list == NULL) { 1896 me->glob_prop = (di_off_t)-1; /* not global property */ 1897 } else { 1898 /* 1899 * Make copy of global property list if this devinfo refers 1900 * global properties different from what's on the devnames 1901 * array. It can happen if there has been a forced 1902 * driver.conf update. See mod_drv(1M). 1903 */ 1904 ASSERT(me->drv_major != -1); 1905 if (node->devi_global_prop_list != 1906 devnamesp[me->drv_major].dn_global_prop_ptr) { 1907 me->glob_prop = DI_ALIGN(off); 1908 off = di_getprop(node->devi_global_prop_list->prop_list, 1909 &me->glob_prop, st, node, DI_PROP_GLB_LIST); 1910 } 1911 } 1912 1913 unlock: 1914 /* 1915 * release current per dev_info node lock 1916 */ 1917 mutex_exit(&(node->devi_lock)); 1918 1919 priv_data: 1920 if (!(DINFOPRIVDATA & st->command)) { 1921 goto pm_info; 1922 } 1923 1924 if (ddi_get_parent_data((dev_info_t *)node) != NULL) { 1925 me->parent_data = DI_ALIGN(off); 1926 off = di_getppdata(node, &me->parent_data, st); 1927 } 1928 1929 if (ddi_get_driver_private((dev_info_t *)node) != NULL) { 1930 me->driver_data = DI_ALIGN(off); 1931 off = di_getdpdata(node, &me->driver_data, st); 1932 } 1933 1934 pm_info: /* NOT implemented */ 1935 1936 subtree: 1937 if (!(DINFOSUBTREE & st->command)) { 1938 POP_STACK(dsp); 1939 return (DI_ALIGN(off)); 1940 } 1941 1942 child: 1943 /* 1944 * If there is a child--push child onto stack. 1945 * Hold the parent busy while doing so. 1946 */ 1947 if (node->devi_child) { 1948 me->child = DI_ALIGN(off); 1949 PUSH_STACK(dsp, node->devi_child, &me->child); 1950 return (me->child); 1951 } 1952 1953 sibling: 1954 /* 1955 * no child node, unroll the stack till a sibling of 1956 * a parent node is found or root node is reached 1957 */ 1958 POP_STACK(dsp); 1959 while (!EMPTY_STACK(dsp) && (node->devi_sibling == NULL)) { 1960 node = TOP_NODE(dsp); 1961 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); 1962 POP_STACK(dsp); 1963 } 1964 1965 if (!EMPTY_STACK(dsp)) { 1966 /* 1967 * a sibling is found, replace top of stack by its sibling 1968 */ 1969 me->sibling = DI_ALIGN(off); 1970 PUSH_STACK(dsp, node->devi_sibling, &me->sibling); 1971 return (me->sibling); 1972 } 1973 1974 /* 1975 * DONE with all nodes 1976 */ 1977 return (DI_ALIGN(off)); 1978 } 1979 1980 static i_lnode_t * 1981 i_lnode_alloc(int modid) 1982 { 1983 i_lnode_t *i_lnode; 1984 1985 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); 1986 1987 ASSERT(modid != -1); 1988 i_lnode->modid = modid; 1989 1990 return (i_lnode); 1991 } 1992 1993 static void 1994 i_lnode_free(i_lnode_t *i_lnode) 1995 { 1996 kmem_free(i_lnode, sizeof (i_lnode_t)); 1997 } 1998 1999 static void 2000 i_lnode_check_free(i_lnode_t *i_lnode) 2001 { 2002 /* This lnode and its dip must have been snapshotted */ 2003 ASSERT(i_lnode->self > 0); 2004 ASSERT(i_lnode->di_node->self > 0); 2005 2006 /* at least 1 link (in or out) must exist for this lnode */ 2007 ASSERT(i_lnode->link_in || i_lnode->link_out); 2008 2009 i_lnode_free(i_lnode); 2010 } 2011 2012 static i_link_t * 2013 i_link_alloc(int spec_type) 2014 { 2015 i_link_t *i_link; 2016 2017 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); 2018 i_link->spec_type = spec_type; 2019 2020 return (i_link); 2021 } 2022 2023 static void 2024 i_link_check_free(i_link_t *i_link) 2025 { 2026 /* This link must have been snapshotted */ 2027 ASSERT(i_link->self > 0); 2028 2029 /* Both endpoint lnodes must exist for this link */ 2030 ASSERT(i_link->src_lnode); 2031 ASSERT(i_link->tgt_lnode); 2032 2033 kmem_free(i_link, sizeof (i_link_t)); 2034 } 2035 2036 /*ARGSUSED*/ 2037 static uint_t 2038 i_lnode_hashfunc(void *arg, mod_hash_key_t key) 2039 { 2040 i_lnode_t *i_lnode = (i_lnode_t *)key; 2041 struct di_node *ptr; 2042 dev_t dev; 2043 2044 dev = i_lnode->devt; 2045 if (dev != DDI_DEV_T_NONE) 2046 return (i_lnode->modid + getminor(dev) + getmajor(dev)); 2047 2048 ptr = i_lnode->di_node; 2049 ASSERT(ptr->self > 0); 2050 if (ptr) { 2051 uintptr_t k = (uintptr_t)ptr; 2052 k >>= (int)highbit(sizeof (struct di_node)); 2053 return ((uint_t)k); 2054 } 2055 2056 return (i_lnode->modid); 2057 } 2058 2059 static int 2060 i_lnode_cmp(void *arg1, void *arg2) 2061 { 2062 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1; 2063 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2; 2064 2065 if (i_lnode1->modid != i_lnode2->modid) { 2066 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); 2067 } 2068 2069 if (i_lnode1->di_node != i_lnode2->di_node) 2070 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); 2071 2072 if (i_lnode1->devt != i_lnode2->devt) 2073 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); 2074 2075 return (0); 2076 } 2077 2078 /* 2079 * An lnode represents a {dip, dev_t} tuple. A link represents a 2080 * {src_lnode, tgt_lnode, spec_type} tuple. 2081 * The following callback assumes that LDI framework ref-counts the 2082 * src_dip and tgt_dip while invoking this callback. 2083 */ 2084 static int 2085 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) 2086 { 2087 struct di_state *st = (struct di_state *)arg; 2088 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode; 2089 i_link_t **i_link_next, *i_link; 2090 di_off_t soff, toff; 2091 mod_hash_val_t nodep = NULL; 2092 int res; 2093 2094 /* 2095 * if the source or target of this device usage information doesn't 2096 * corrospond to a device node then we don't report it via 2097 * libdevinfo so return. 2098 */ 2099 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) 2100 return (LDI_USAGE_CONTINUE); 2101 2102 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); 2103 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); 2104 2105 /* 2106 * Skip the ldi_usage if either src or tgt dip is not in the 2107 * snapshot. This saves us from pruning bad lnodes/links later. 2108 */ 2109 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) 2110 return (LDI_USAGE_CONTINUE); 2111 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) 2112 return (LDI_USAGE_CONTINUE); 2113 2114 ASSERT(soff > 0); 2115 ASSERT(toff > 0); 2116 2117 /* 2118 * allocate an i_lnode and add it to the lnode hash 2119 * if it is not already present. For this particular 2120 * link the lnode is a source, but it may 2121 * participate as tgt or src in any number of layered 2122 * operations - so it may already be in the hash. 2123 */ 2124 i_lnode = i_lnode_alloc(ldi_usage->src_modid); 2125 i_lnode->di_node = (struct di_node *)di_mem_addr(st, soff); 2126 i_lnode->devt = ldi_usage->src_devt; 2127 2128 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2129 if (res == MH_ERR_NOTFOUND) { 2130 /* 2131 * new i_lnode 2132 * add it to the hash and increment the lnode count 2133 */ 2134 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2135 ASSERT(res == 0); 2136 st->lnode_count++; 2137 src_lnode = i_lnode; 2138 } else { 2139 /* this i_lnode already exists in the lnode_hash */ 2140 i_lnode_free(i_lnode); 2141 src_lnode = (i_lnode_t *)nodep; 2142 } 2143 2144 /* 2145 * allocate a tgt i_lnode and add it to the lnode hash 2146 */ 2147 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); 2148 i_lnode->di_node = (struct di_node *)di_mem_addr(st, toff); 2149 i_lnode->devt = ldi_usage->tgt_devt; 2150 2151 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2152 if (res == MH_ERR_NOTFOUND) { 2153 /* 2154 * new i_lnode 2155 * add it to the hash and increment the lnode count 2156 */ 2157 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2158 ASSERT(res == 0); 2159 st->lnode_count++; 2160 tgt_lnode = i_lnode; 2161 } else { 2162 /* this i_lnode already exists in the lnode_hash */ 2163 i_lnode_free(i_lnode); 2164 tgt_lnode = (i_lnode_t *)nodep; 2165 } 2166 2167 /* 2168 * allocate a i_link 2169 */ 2170 i_link = i_link_alloc(ldi_usage->tgt_spec_type); 2171 i_link->src_lnode = src_lnode; 2172 i_link->tgt_lnode = tgt_lnode; 2173 2174 /* 2175 * add this link onto the src i_lnodes outbound i_link list 2176 */ 2177 i_link_next = &(src_lnode->link_out); 2178 while (*i_link_next != NULL) { 2179 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && 2180 (i_link->spec_type == (*i_link_next)->spec_type)) { 2181 /* this link already exists */ 2182 kmem_free(i_link, sizeof (i_link_t)); 2183 return (LDI_USAGE_CONTINUE); 2184 } 2185 i_link_next = &((*i_link_next)->src_link_next); 2186 } 2187 *i_link_next = i_link; 2188 2189 /* 2190 * add this link onto the tgt i_lnodes inbound i_link list 2191 */ 2192 i_link_next = &(tgt_lnode->link_in); 2193 while (*i_link_next != NULL) { 2194 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); 2195 i_link_next = &((*i_link_next)->tgt_link_next); 2196 } 2197 *i_link_next = i_link; 2198 2199 /* 2200 * add this i_link to the link hash 2201 */ 2202 res = mod_hash_insert(st->link_hash, i_link, i_link); 2203 ASSERT(res == 0); 2204 st->link_count++; 2205 2206 return (LDI_USAGE_CONTINUE); 2207 } 2208 2209 struct i_layer_data { 2210 struct di_state *st; 2211 int lnode_count; 2212 int link_count; 2213 di_off_t lnode_off; 2214 di_off_t link_off; 2215 }; 2216 2217 /*ARGSUSED*/ 2218 static uint_t 2219 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2220 { 2221 i_link_t *i_link = (i_link_t *)key; 2222 struct i_layer_data *data = arg; 2223 struct di_link *me; 2224 struct di_lnode *melnode; 2225 struct di_node *medinode; 2226 2227 ASSERT(i_link->self == 0); 2228 2229 i_link->self = data->link_off + 2230 (data->link_count * sizeof (struct di_link)); 2231 data->link_count++; 2232 2233 ASSERT(data->link_off > 0 && data->link_count > 0); 2234 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ 2235 ASSERT(data->link_count <= data->st->link_count); 2236 2237 /* fill in fields for the di_link snapshot */ 2238 me = (struct di_link *)di_mem_addr(data->st, i_link->self); 2239 me->self = i_link->self; 2240 me->spec_type = i_link->spec_type; 2241 2242 /* 2243 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t 2244 * are created during the LDI table walk. Since we are 2245 * walking the link hash, the lnode hash has already been 2246 * walked and the lnodes have been snapshotted. Save lnode 2247 * offsets. 2248 */ 2249 me->src_lnode = i_link->src_lnode->self; 2250 me->tgt_lnode = i_link->tgt_lnode->self; 2251 2252 /* 2253 * Save this link's offset in the src_lnode snapshot's link_out 2254 * field 2255 */ 2256 melnode = (struct di_lnode *)di_mem_addr(data->st, me->src_lnode); 2257 me->src_link_next = melnode->link_out; 2258 melnode->link_out = me->self; 2259 2260 /* 2261 * Put this link on the tgt_lnode's link_in field 2262 */ 2263 melnode = (struct di_lnode *)di_mem_addr(data->st, me->tgt_lnode); 2264 me->tgt_link_next = melnode->link_in; 2265 melnode->link_in = me->self; 2266 2267 /* 2268 * An i_lnode_t is only created if the corresponding dip exists 2269 * in the snapshot. A pointer to the di_node is saved in the 2270 * i_lnode_t when it is allocated. For this link, get the di_node 2271 * for the source lnode. Then put the link on the di_node's list 2272 * of src links 2273 */ 2274 medinode = i_link->src_lnode->di_node; 2275 me->src_node_next = medinode->src_links; 2276 medinode->src_links = me->self; 2277 2278 /* 2279 * Put this link on the tgt_links list of the target 2280 * dip. 2281 */ 2282 medinode = i_link->tgt_lnode->di_node; 2283 me->tgt_node_next = medinode->tgt_links; 2284 medinode->tgt_links = me->self; 2285 2286 return (MH_WALK_CONTINUE); 2287 } 2288 2289 /*ARGSUSED*/ 2290 static uint_t 2291 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2292 { 2293 i_lnode_t *i_lnode = (i_lnode_t *)key; 2294 struct i_layer_data *data = arg; 2295 struct di_lnode *me; 2296 struct di_node *medinode; 2297 2298 ASSERT(i_lnode->self == 0); 2299 2300 i_lnode->self = data->lnode_off + 2301 (data->lnode_count * sizeof (struct di_lnode)); 2302 data->lnode_count++; 2303 2304 ASSERT(data->lnode_off > 0 && data->lnode_count > 0); 2305 ASSERT(data->link_count == 0); /* links not done yet */ 2306 ASSERT(data->lnode_count <= data->st->lnode_count); 2307 2308 /* fill in fields for the di_lnode snapshot */ 2309 me = (struct di_lnode *)di_mem_addr(data->st, i_lnode->self); 2310 me->self = i_lnode->self; 2311 2312 if (i_lnode->devt == DDI_DEV_T_NONE) { 2313 me->dev_major = (major_t)-1; 2314 me->dev_minor = (minor_t)-1; 2315 } else { 2316 me->dev_major = getmajor(i_lnode->devt); 2317 me->dev_minor = getminor(i_lnode->devt); 2318 } 2319 2320 /* 2321 * The dip corresponding to this lnode must exist in 2322 * the snapshot or we wouldn't have created the i_lnode_t 2323 * during LDI walk. Save the offset of the dip. 2324 */ 2325 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); 2326 me->node = i_lnode->di_node->self; 2327 2328 /* 2329 * There must be at least one link in or out of this lnode 2330 * or we wouldn't have created it. These fields will be set 2331 * during the link hash walk. 2332 */ 2333 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); 2334 2335 /* 2336 * set the offset of the devinfo node associated with this 2337 * lnode. Also update the node_next next pointer. this pointer 2338 * is set if there are multiple lnodes associated with the same 2339 * devinfo node. (could occure when multiple minor nodes 2340 * are open for one device, etc.) 2341 */ 2342 medinode = i_lnode->di_node; 2343 me->node_next = medinode->lnodes; 2344 medinode->lnodes = me->self; 2345 2346 return (MH_WALK_CONTINUE); 2347 } 2348 2349 static di_off_t 2350 di_getlink_data(di_off_t off, struct di_state *st) 2351 { 2352 struct i_layer_data data = {0}; 2353 size_t size; 2354 2355 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); 2356 2357 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, 2358 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, 2359 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); 2360 2361 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, 2362 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); 2363 2364 /* get driver layering information */ 2365 (void) ldi_usage_walker(st, di_ldi_callback); 2366 2367 /* check if there is any link data to include in the snapshot */ 2368 if (st->lnode_count == 0) { 2369 ASSERT(st->link_count == 0); 2370 goto out; 2371 } 2372 2373 ASSERT(st->link_count != 0); 2374 2375 /* get a pointer to snapshot memory for all the di_lnodes */ 2376 size = sizeof (struct di_lnode) * st->lnode_count; 2377 data.lnode_off = off = di_checkmem(st, off, size); 2378 off += DI_ALIGN(size); 2379 2380 /* get a pointer to snapshot memory for all the di_links */ 2381 size = sizeof (struct di_link) * st->link_count; 2382 data.link_off = off = di_checkmem(st, off, size); 2383 off += DI_ALIGN(size); 2384 2385 data.lnode_count = data.link_count = 0; 2386 data.st = st; 2387 2388 /* 2389 * We have lnodes and links that will go into the 2390 * snapshot, so let's walk the respective hashes 2391 * and snapshot them. The various linkages are 2392 * also set up during the walk. 2393 */ 2394 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); 2395 ASSERT(data.lnode_count == st->lnode_count); 2396 2397 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); 2398 ASSERT(data.link_count == st->link_count); 2399 2400 out: 2401 /* free up the i_lnodes and i_links used to create the snapshot */ 2402 mod_hash_destroy_hash(st->lnode_hash); 2403 mod_hash_destroy_hash(st->link_hash); 2404 st->lnode_count = 0; 2405 st->link_count = 0; 2406 2407 return (off); 2408 } 2409 2410 2411 /* 2412 * Copy all minor data nodes attached to a devinfo node into the snapshot. 2413 * It is called from di_copynode with devi_lock held. 2414 */ 2415 static di_off_t 2416 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, 2417 struct di_state *st) 2418 { 2419 di_off_t off; 2420 struct di_minor *me; 2421 2422 dcmn_err2((CE_CONT, "di_getmdata:\n")); 2423 2424 /* 2425 * check memory first 2426 */ 2427 off = di_checkmem(st, *off_p, sizeof (struct di_minor)); 2428 *off_p = off; 2429 2430 do { 2431 me = (struct di_minor *)di_mem_addr(st, off); 2432 me->self = off; 2433 me->type = mnode->type; 2434 me->node = node; 2435 me->user_private_data = NULL; 2436 2437 off += DI_ALIGN(sizeof (struct di_minor)); 2438 2439 /* 2440 * Split dev_t to major/minor, so it works for 2441 * both ILP32 and LP64 model 2442 */ 2443 me->dev_major = getmajor(mnode->ddm_dev); 2444 me->dev_minor = getminor(mnode->ddm_dev); 2445 me->spec_type = mnode->ddm_spec_type; 2446 2447 if (mnode->ddm_name) { 2448 off = di_checkmem(st, off, 2449 strlen(mnode->ddm_name) + 1); 2450 me->name = off; 2451 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name); 2452 off += DI_ALIGN(strlen(mnode->ddm_name) + 1); 2453 } 2454 2455 if (mnode->ddm_node_type) { 2456 off = di_checkmem(st, off, 2457 strlen(mnode->ddm_node_type) + 1); 2458 me->node_type = off; 2459 (void) strcpy(di_mem_addr(st, off), 2460 mnode->ddm_node_type); 2461 off += DI_ALIGN(strlen(mnode->ddm_node_type) + 1); 2462 } 2463 2464 off = di_checkmem(st, off, sizeof (struct di_minor)); 2465 me->next = off; 2466 mnode = mnode->next; 2467 } while (mnode); 2468 2469 me->next = 0; 2470 2471 return (off); 2472 } 2473 2474 /* 2475 * di_register_dip(), di_find_dip(): The dip must be protected 2476 * from deallocation when using these routines - this can either 2477 * be a reference count, a busy hold or a per-driver lock. 2478 */ 2479 2480 static void 2481 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) 2482 { 2483 struct dev_info *node = DEVI(dip); 2484 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2485 struct di_dkey *dk; 2486 2487 ASSERT(dip); 2488 ASSERT(off > 0); 2489 2490 key->k_type = DI_DKEY; 2491 dk = &(key->k_u.dkey); 2492 2493 dk->dk_dip = dip; 2494 dk->dk_major = node->devi_major; 2495 dk->dk_inst = node->devi_instance; 2496 dk->dk_nodeid = node->devi_nodeid; 2497 2498 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, 2499 (mod_hash_val_t)(uintptr_t)off) != 0) { 2500 panic( 2501 "duplicate devinfo (%p) registered during device " 2502 "tree walk", (void *)dip); 2503 } 2504 } 2505 2506 2507 static int 2508 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) 2509 { 2510 /* 2511 * uintptr_t must be used because it matches the size of void *; 2512 * mod_hash expects clients to place results into pointer-size 2513 * containers; since di_off_t is always a 32-bit offset, alignment 2514 * would otherwise be broken on 64-bit kernels. 2515 */ 2516 uintptr_t offset; 2517 struct di_key key = {0}; 2518 struct di_dkey *dk; 2519 2520 ASSERT(st->reg_dip_hash); 2521 ASSERT(dip); 2522 ASSERT(off_p); 2523 2524 2525 key.k_type = DI_DKEY; 2526 dk = &(key.k_u.dkey); 2527 2528 dk->dk_dip = dip; 2529 dk->dk_major = DEVI(dip)->devi_major; 2530 dk->dk_inst = DEVI(dip)->devi_instance; 2531 dk->dk_nodeid = DEVI(dip)->devi_nodeid; 2532 2533 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, 2534 (mod_hash_val_t *)&offset) == 0) { 2535 *off_p = (di_off_t)offset; 2536 return (0); 2537 } else { 2538 return (-1); 2539 } 2540 } 2541 2542 /* 2543 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation 2544 * when using these routines. The caller must do this by protecting the 2545 * client(or phci)<->pip linkage while traversing the list and then holding the 2546 * pip when it is found in the list. 2547 */ 2548 2549 static void 2550 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) 2551 { 2552 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2553 char *path_addr; 2554 struct di_pkey *pk; 2555 2556 ASSERT(pip); 2557 ASSERT(off > 0); 2558 2559 key->k_type = DI_PKEY; 2560 pk = &(key->k_u.pkey); 2561 2562 pk->pk_pip = pip; 2563 path_addr = mdi_pi_get_addr(pip); 2564 if (path_addr) 2565 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); 2566 pk->pk_client = mdi_pi_get_client(pip); 2567 pk->pk_phci = mdi_pi_get_phci(pip); 2568 2569 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, 2570 (mod_hash_val_t)(uintptr_t)off) != 0) { 2571 panic( 2572 "duplicate pathinfo (%p) registered during device " 2573 "tree walk", (void *)pip); 2574 } 2575 } 2576 2577 /* 2578 * As with di_register_pip, the caller must hold or lock the pip 2579 */ 2580 static int 2581 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) 2582 { 2583 /* 2584 * uintptr_t must be used because it matches the size of void *; 2585 * mod_hash expects clients to place results into pointer-size 2586 * containers; since di_off_t is always a 32-bit offset, alignment 2587 * would otherwise be broken on 64-bit kernels. 2588 */ 2589 uintptr_t offset; 2590 struct di_key key = {0}; 2591 struct di_pkey *pk; 2592 2593 ASSERT(st->reg_pip_hash); 2594 ASSERT(off_p); 2595 2596 if (pip == NULL) { 2597 *off_p = 0; 2598 return (0); 2599 } 2600 2601 key.k_type = DI_PKEY; 2602 pk = &(key.k_u.pkey); 2603 2604 pk->pk_pip = pip; 2605 pk->pk_path_addr = mdi_pi_get_addr(pip); 2606 pk->pk_client = mdi_pi_get_client(pip); 2607 pk->pk_phci = mdi_pi_get_phci(pip); 2608 2609 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, 2610 (mod_hash_val_t *)&offset) == 0) { 2611 *off_p = (di_off_t)offset; 2612 return (0); 2613 } else { 2614 return (-1); 2615 } 2616 } 2617 2618 static di_path_state_t 2619 path_state_convert(mdi_pathinfo_state_t st) 2620 { 2621 switch (st) { 2622 case MDI_PATHINFO_STATE_ONLINE: 2623 return (DI_PATH_STATE_ONLINE); 2624 case MDI_PATHINFO_STATE_STANDBY: 2625 return (DI_PATH_STATE_STANDBY); 2626 case MDI_PATHINFO_STATE_OFFLINE: 2627 return (DI_PATH_STATE_OFFLINE); 2628 case MDI_PATHINFO_STATE_FAULT: 2629 return (DI_PATH_STATE_FAULT); 2630 default: 2631 return (DI_PATH_STATE_UNKNOWN); 2632 } 2633 } 2634 2635 2636 static di_off_t 2637 di_path_getprop(mdi_pathinfo_t *pip, di_off_t off, di_off_t *off_p, 2638 struct di_state *st) 2639 { 2640 nvpair_t *prop = NULL; 2641 struct di_path_prop *me; 2642 2643 if (mdi_pi_get_next_prop(pip, NULL) == NULL) { 2644 *off_p = 0; 2645 return (off); 2646 } 2647 2648 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2649 *off_p = off; 2650 2651 while (prop = mdi_pi_get_next_prop(pip, prop)) { 2652 int delta = 0; 2653 2654 me = (struct di_path_prop *)di_mem_addr(st, off); 2655 me->self = off; 2656 off += sizeof (struct di_path_prop); 2657 2658 /* 2659 * property name 2660 */ 2661 off = di_checkmem(st, off, strlen(nvpair_name(prop)) + 1); 2662 me->prop_name = off; 2663 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); 2664 off += strlen(nvpair_name(prop)) + 1; 2665 2666 switch (nvpair_type(prop)) { 2667 case DATA_TYPE_BYTE: 2668 case DATA_TYPE_INT16: 2669 case DATA_TYPE_UINT16: 2670 case DATA_TYPE_INT32: 2671 case DATA_TYPE_UINT32: 2672 delta = sizeof (int32_t); 2673 me->prop_type = DDI_PROP_TYPE_INT; 2674 off = di_checkmem(st, off, delta); 2675 (void) nvpair_value_int32(prop, 2676 (int32_t *)di_mem_addr(st, off)); 2677 break; 2678 2679 case DATA_TYPE_INT64: 2680 case DATA_TYPE_UINT64: 2681 delta = sizeof (int64_t); 2682 me->prop_type = DDI_PROP_TYPE_INT64; 2683 off = di_checkmem(st, off, delta); 2684 (void) nvpair_value_int64(prop, 2685 (int64_t *)di_mem_addr(st, off)); 2686 break; 2687 2688 case DATA_TYPE_STRING: 2689 { 2690 char *str; 2691 (void) nvpair_value_string(prop, &str); 2692 delta = strlen(str) + 1; 2693 me->prop_type = DDI_PROP_TYPE_STRING; 2694 off = di_checkmem(st, off, delta); 2695 (void) strcpy(di_mem_addr(st, off), str); 2696 break; 2697 } 2698 case DATA_TYPE_BYTE_ARRAY: 2699 case DATA_TYPE_INT16_ARRAY: 2700 case DATA_TYPE_UINT16_ARRAY: 2701 case DATA_TYPE_INT32_ARRAY: 2702 case DATA_TYPE_UINT32_ARRAY: 2703 case DATA_TYPE_INT64_ARRAY: 2704 case DATA_TYPE_UINT64_ARRAY: 2705 { 2706 uchar_t *buf; 2707 uint_t nelems; 2708 (void) nvpair_value_byte_array(prop, &buf, &nelems); 2709 delta = nelems; 2710 me->prop_type = DDI_PROP_TYPE_BYTE; 2711 if (nelems != 0) { 2712 off = di_checkmem(st, off, delta); 2713 bcopy(buf, di_mem_addr(st, off), nelems); 2714 } 2715 break; 2716 } 2717 2718 default: /* Unknown or unhandled type; skip it */ 2719 delta = 0; 2720 break; 2721 } 2722 2723 if (delta > 0) { 2724 me->prop_data = off; 2725 } 2726 2727 me->prop_len = delta; 2728 off += delta; 2729 2730 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2731 me->prop_next = off; 2732 } 2733 2734 me->prop_next = 0; 2735 return (off); 2736 } 2737 2738 2739 static void 2740 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp, 2741 int get_client) 2742 { 2743 if (get_client) { 2744 ASSERT(me->path_client == 0); 2745 me->path_client = noff; 2746 ASSERT(me->path_c_link == 0); 2747 *off_pp = &me->path_c_link; 2748 me->path_snap_state &= 2749 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK); 2750 } else { 2751 ASSERT(me->path_phci == 0); 2752 me->path_phci = noff; 2753 ASSERT(me->path_p_link == 0); 2754 *off_pp = &me->path_p_link; 2755 me->path_snap_state &= 2756 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK); 2757 } 2758 } 2759 2760 /* 2761 * poff_p: pointer to the linkage field. This links pips along the client|phci 2762 * linkage list. 2763 * noff : Offset for the endpoint dip snapshot. 2764 */ 2765 static di_off_t 2766 di_getpath_data(dev_info_t *dip, di_off_t *poff_p, di_off_t noff, 2767 struct di_state *st, int get_client) 2768 { 2769 di_off_t off; 2770 mdi_pathinfo_t *pip; 2771 struct di_path *me; 2772 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *); 2773 2774 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client)); 2775 2776 /* 2777 * The naming of the following mdi_xyz() is unfortunately 2778 * non-intuitive. mdi_get_next_phci_path() follows the 2779 * client_link i.e. the list of pip's belonging to the 2780 * given client dip. 2781 */ 2782 if (get_client) 2783 next_pip = &mdi_get_next_phci_path; 2784 else 2785 next_pip = &mdi_get_next_client_path; 2786 2787 off = *poff_p; 2788 2789 pip = NULL; 2790 while (pip = (*next_pip)(dip, pip)) { 2791 mdi_pathinfo_state_t state; 2792 di_off_t stored_offset; 2793 2794 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip)); 2795 2796 mdi_pi_lock(pip); 2797 2798 if (di_pip_find(st, pip, &stored_offset) != -1) { 2799 /* 2800 * We've already seen this pathinfo node so we need to 2801 * take care not to snap it again; However, one endpoint 2802 * and linkage will be set here. The other endpoint 2803 * and linkage has already been set when the pip was 2804 * first snapshotted i.e. when the other endpoint dip 2805 * was snapshotted. 2806 */ 2807 me = (struct di_path *)di_mem_addr(st, stored_offset); 2808 2809 *poff_p = stored_offset; 2810 2811 di_path_one_endpoint(me, noff, &poff_p, get_client); 2812 2813 /* 2814 * The other endpoint and linkage were set when this 2815 * pip was snapshotted. So we are done with both 2816 * endpoints and linkages. 2817 */ 2818 ASSERT(!(me->path_snap_state & 2819 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI))); 2820 ASSERT(!(me->path_snap_state & 2821 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK))); 2822 2823 mdi_pi_unlock(pip); 2824 continue; 2825 } 2826 2827 /* 2828 * Now that we need to snapshot this pip, check memory 2829 */ 2830 off = di_checkmem(st, off, sizeof (struct di_path)); 2831 me = (struct di_path *)di_mem_addr(st, off); 2832 me->self = off; 2833 *poff_p = off; 2834 off += sizeof (struct di_path); 2835 2836 me->path_snap_state = 2837 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK; 2838 me->path_snap_state |= 2839 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI; 2840 2841 /* 2842 * Zero out fields as di_checkmem() doesn't guarantee 2843 * zero-filled memory 2844 */ 2845 me->path_client = me->path_phci = 0; 2846 me->path_c_link = me->path_p_link = 0; 2847 2848 di_path_one_endpoint(me, noff, &poff_p, get_client); 2849 2850 /* 2851 * Note the existence of this pathinfo 2852 */ 2853 di_register_pip(st, pip, me->self); 2854 2855 state = mdi_pi_get_state(pip); 2856 me->path_state = path_state_convert(state); 2857 2858 /* 2859 * Get intermediate addressing info. 2860 */ 2861 off = di_checkmem(st, off, strlen(mdi_pi_get_addr(pip)) + 1); 2862 me->path_addr = off; 2863 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip)); 2864 off += strlen(mdi_pi_get_addr(pip)) + 1; 2865 2866 /* 2867 * Get path properties if props are to be included in the 2868 * snapshot 2869 */ 2870 if (DINFOPROP & st->command) { 2871 off = di_path_getprop(pip, off, &me->path_prop, st); 2872 } else { 2873 me->path_prop = 0; 2874 } 2875 2876 mdi_pi_unlock(pip); 2877 } 2878 2879 *poff_p = 0; 2880 2881 return (off); 2882 } 2883 2884 /* 2885 * Copy a list of properties attached to a devinfo node. Called from 2886 * di_copynode with devi_lock held. The major number is passed in case 2887 * we need to call driver's prop_op entry. The value of list indicates 2888 * which list we are copying. Possible values are: 2889 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST 2890 */ 2891 static di_off_t 2892 di_getprop(struct ddi_prop *prop, di_off_t *off_p, struct di_state *st, 2893 struct dev_info *dip, int list) 2894 { 2895 dev_t dev; 2896 int (*prop_op)(); 2897 int off, need_prop_op = 0; 2898 int prop_op_fail = 0; 2899 ddi_prop_t *propp = NULL; 2900 struct di_prop *pp; 2901 struct dev_ops *ops = NULL; 2902 int prop_len; 2903 caddr_t prop_val; 2904 2905 2906 dcmn_err2((CE_CONT, "di_getprop:\n")); 2907 2908 ASSERT(st != NULL); 2909 2910 dcmn_err((CE_CONT, "copy property list at addr %p\n", (void *)prop)); 2911 2912 /* 2913 * Figure out if we need to call driver's prop_op entry point. 2914 * The conditions are: 2915 * -- driver property list 2916 * -- driver must be attached and held 2917 * -- driver's cb_prop_op != ddi_prop_op 2918 * or parent's bus_prop_op != ddi_bus_prop_op 2919 */ 2920 2921 if (list != DI_PROP_DRV_LIST) { 2922 goto getprop; 2923 } 2924 2925 /* 2926 * If driver is not attached or if major is -1, we ignore 2927 * the driver property list. No one should rely on such 2928 * properties. 2929 */ 2930 if (i_ddi_node_state((dev_info_t *)dip) < DS_ATTACHED) { 2931 off = *off_p; 2932 *off_p = 0; 2933 return (off); 2934 } 2935 2936 /* 2937 * Now we have a driver which is held. We can examine entry points 2938 * and check the condition listed above. 2939 */ 2940 ops = dip->devi_ops; 2941 2942 /* 2943 * Some nexus drivers incorrectly set cb_prop_op to nodev, 2944 * nulldev or even NULL. 2945 */ 2946 if (ops && ops->devo_cb_ops && 2947 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) && 2948 (ops->devo_cb_ops->cb_prop_op != nodev) && 2949 (ops->devo_cb_ops->cb_prop_op != nulldev) && 2950 (ops->devo_cb_ops->cb_prop_op != NULL)) { 2951 need_prop_op = 1; 2952 } 2953 2954 getprop: 2955 /* 2956 * check memory availability 2957 */ 2958 off = di_checkmem(st, *off_p, sizeof (struct di_prop)); 2959 *off_p = off; 2960 /* 2961 * Now copy properties 2962 */ 2963 do { 2964 pp = (struct di_prop *)di_mem_addr(st, off); 2965 pp->self = off; 2966 /* 2967 * Split dev_t to major/minor, so it works for 2968 * both ILP32 and LP64 model 2969 */ 2970 pp->dev_major = getmajor(prop->prop_dev); 2971 pp->dev_minor = getminor(prop->prop_dev); 2972 pp->prop_flags = prop->prop_flags; 2973 pp->prop_list = list; 2974 2975 /* 2976 * property name 2977 */ 2978 off += sizeof (struct di_prop); 2979 if (prop->prop_name) { 2980 off = di_checkmem(st, off, strlen(prop->prop_name) 2981 + 1); 2982 pp->prop_name = off; 2983 (void) strcpy(di_mem_addr(st, off), prop->prop_name); 2984 off += strlen(prop->prop_name) + 1; 2985 } 2986 2987 /* 2988 * Set prop_len here. This may change later 2989 * if cb_prop_op returns a different length. 2990 */ 2991 pp->prop_len = prop->prop_len; 2992 if (!need_prop_op) { 2993 if (prop->prop_val == NULL) { 2994 dcmn_err((CE_WARN, 2995 "devinfo: property fault at %p", 2996 (void *)prop)); 2997 pp->prop_data = -1; 2998 } else if (prop->prop_len != 0) { 2999 off = di_checkmem(st, off, prop->prop_len); 3000 pp->prop_data = off; 3001 bcopy(prop->prop_val, di_mem_addr(st, off), 3002 prop->prop_len); 3003 off += DI_ALIGN(pp->prop_len); 3004 } 3005 } 3006 3007 off = di_checkmem(st, off, sizeof (struct di_prop)); 3008 pp->next = off; 3009 prop = prop->prop_next; 3010 } while (prop); 3011 3012 pp->next = 0; 3013 3014 if (!need_prop_op) { 3015 dcmn_err((CE_CONT, "finished property " 3016 "list at offset 0x%x\n", off)); 3017 return (off); 3018 } 3019 3020 /* 3021 * If there is a need to call driver's prop_op entry, 3022 * we must release driver's devi_lock, because the 3023 * cb_prop_op entry point will grab it. 3024 * 3025 * The snapshot memory has already been allocated above, 3026 * which means the length of an active property should 3027 * remain fixed for this implementation to work. 3028 */ 3029 3030 3031 prop_op = ops->devo_cb_ops->cb_prop_op; 3032 pp = (struct di_prop *)di_mem_addr(st, *off_p); 3033 3034 mutex_exit(&dip->devi_lock); 3035 3036 do { 3037 int err; 3038 struct di_prop *tmp; 3039 3040 if (pp->next) { 3041 tmp = (struct di_prop *) 3042 di_mem_addr(st, pp->next); 3043 } else { 3044 tmp = NULL; 3045 } 3046 3047 /* 3048 * call into driver's prop_op entry point 3049 * 3050 * Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY 3051 */ 3052 dev = makedevice(pp->dev_major, pp->dev_minor); 3053 if (dev == DDI_DEV_T_NONE) 3054 dev = DDI_DEV_T_ANY; 3055 3056 dcmn_err((CE_CONT, "call prop_op" 3057 "(%lx, %p, PROP_LEN_AND_VAL_BUF, " 3058 "DDI_PROP_DONTPASS, \"%s\", %p, &%d)\n", 3059 dev, 3060 (void *)dip, 3061 (char *)di_mem_addr(st, pp->prop_name), 3062 (void *)di_mem_addr(st, pp->prop_data), 3063 pp->prop_len)); 3064 3065 if ((err = (*prop_op)(dev, (dev_info_t)dip, 3066 PROP_LEN_AND_VAL_ALLOC, DDI_PROP_DONTPASS, 3067 (char *)di_mem_addr(st, pp->prop_name), 3068 &prop_val, &prop_len)) != DDI_PROP_SUCCESS) { 3069 if ((propp = i_ddi_prop_search(dev, 3070 (char *)di_mem_addr(st, pp->prop_name), 3071 (uint_t)pp->prop_flags, 3072 &(DEVI(dip)->devi_drv_prop_ptr))) != NULL) { 3073 pp->prop_len = propp->prop_len; 3074 if (pp->prop_len != 0) { 3075 off = di_checkmem(st, off, 3076 pp->prop_len); 3077 pp->prop_data = off; 3078 bcopy(propp->prop_val, di_mem_addr(st, 3079 pp->prop_data), propp->prop_len); 3080 off += DI_ALIGN(pp->prop_len); 3081 } 3082 } else { 3083 prop_op_fail = 1; 3084 } 3085 } else if (prop_len != 0) { 3086 pp->prop_len = prop_len; 3087 off = di_checkmem(st, off, prop_len); 3088 pp->prop_data = off; 3089 bcopy(prop_val, di_mem_addr(st, off), prop_len); 3090 off += DI_ALIGN(prop_len); 3091 kmem_free(prop_val, prop_len); 3092 } 3093 3094 if (prop_op_fail) { 3095 pp->prop_data = -1; 3096 dcmn_err((CE_WARN, "devinfo: prop_op failure " 3097 "for \"%s\" err %d", 3098 di_mem_addr(st, pp->prop_name), err)); 3099 } 3100 3101 pp = tmp; 3102 3103 } while (pp); 3104 3105 mutex_enter(&dip->devi_lock); 3106 dcmn_err((CE_CONT, "finished property list at offset 0x%x\n", off)); 3107 return (off); 3108 } 3109 3110 /* 3111 * find private data format attached to a dip 3112 * parent = 1 to match driver name of parent dip (for parent private data) 3113 * 0 to match driver name of current dip (for driver private data) 3114 */ 3115 #define DI_MATCH_DRIVER 0 3116 #define DI_MATCH_PARENT 1 3117 3118 struct di_priv_format * 3119 di_match_drv_name(struct dev_info *node, struct di_state *st, int match) 3120 { 3121 int i, count, len; 3122 char *drv_name; 3123 major_t major; 3124 struct di_all *all; 3125 struct di_priv_format *form; 3126 3127 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n", 3128 node->devi_node_name, match)); 3129 3130 if (match == DI_MATCH_PARENT) { 3131 node = DEVI(node->devi_parent); 3132 } 3133 3134 if (node == NULL) { 3135 return (NULL); 3136 } 3137 3138 major = ddi_name_to_major(node->devi_binding_name); 3139 if (major == (major_t)(-1)) { 3140 return (NULL); 3141 } 3142 3143 /* 3144 * Match the driver name. 3145 */ 3146 drv_name = ddi_major_to_name(major); 3147 if ((drv_name == NULL) || *drv_name == '\0') { 3148 return (NULL); 3149 } 3150 3151 /* Now get the di_priv_format array */ 3152 all = (struct di_all *)di_mem_addr(st, 0); 3153 3154 if (match == DI_MATCH_PARENT) { 3155 count = all->n_ppdata; 3156 form = (struct di_priv_format *) 3157 (di_mem_addr(st, 0) + all->ppdata_format); 3158 } else { 3159 count = all->n_dpdata; 3160 form = (struct di_priv_format *) 3161 ((caddr_t)all + all->dpdata_format); 3162 } 3163 3164 len = strlen(drv_name); 3165 for (i = 0; i < count; i++) { 3166 char *tmp; 3167 3168 tmp = form[i].drv_name; 3169 while (tmp && (*tmp != '\0')) { 3170 if (strncmp(drv_name, tmp, len) == 0) { 3171 return (&form[i]); 3172 } 3173 /* 3174 * Move to next driver name, skipping a white space 3175 */ 3176 if (tmp = strchr(tmp, ' ')) { 3177 tmp++; 3178 } 3179 } 3180 } 3181 3182 return (NULL); 3183 } 3184 3185 /* 3186 * The following functions copy data as specified by the format passed in. 3187 * To prevent invalid format from panicing the system, we call on_fault(). 3188 * A return value of 0 indicates an error. Otherwise, the total offset 3189 * is returned. 3190 */ 3191 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */ 3192 3193 static di_off_t 3194 di_getprvdata(struct di_priv_format *pdp, void *data, di_off_t *off_p, 3195 struct di_state *st) 3196 { 3197 caddr_t pa; 3198 void *ptr; 3199 int i, size, repeat; 3200 di_off_t off, off0, *tmp; 3201 3202 label_t ljb; 3203 3204 dcmn_err2((CE_CONT, "di_getprvdata:\n")); 3205 3206 /* 3207 * check memory availability. Private data size is 3208 * limited to DI_MAX_PRIVDATA. 3209 */ 3210 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA); 3211 3212 if ((pdp->bytes <= 0) || pdp->bytes > DI_MAX_PRIVDATA) { 3213 goto failure; 3214 } 3215 3216 if (!on_fault(&ljb)) { 3217 /* copy the struct */ 3218 bcopy(data, di_mem_addr(st, off), pdp->bytes); 3219 off0 = DI_ALIGN(pdp->bytes); 3220 3221 /* dereferencing pointers */ 3222 for (i = 0; i < MAX_PTR_IN_PRV; i++) { 3223 3224 if (pdp->ptr[i].size == 0) { 3225 goto success; /* no more ptrs */ 3226 } 3227 3228 /* 3229 * first, get the pointer content 3230 */ 3231 if ((pdp->ptr[i].offset < 0) || 3232 (pdp->ptr[i].offset > 3233 pdp->bytes - sizeof (char *))) 3234 goto failure; /* wrong offset */ 3235 3236 pa = di_mem_addr(st, off + pdp->ptr[i].offset); 3237 tmp = (di_off_t *)pa; /* to store off_t later */ 3238 3239 ptr = *((void **) pa); /* get pointer value */ 3240 if (ptr == NULL) { /* if NULL pointer, go on */ 3241 continue; 3242 } 3243 3244 /* 3245 * next, find the repeat count (array dimension) 3246 */ 3247 repeat = pdp->ptr[i].len_offset; 3248 3249 /* 3250 * Positive value indicates a fixed sized array. 3251 * 0 or negative value indicates variable sized array. 3252 * 3253 * For variable sized array, the variable must be 3254 * an int member of the structure, with an offset 3255 * equal to the absolution value of struct member. 3256 */ 3257 if (repeat > pdp->bytes - sizeof (int)) { 3258 goto failure; /* wrong offset */ 3259 } 3260 3261 if (repeat >= 0) { 3262 repeat = *((int *)((caddr_t)data + repeat)); 3263 } else { 3264 repeat = -repeat; 3265 } 3266 3267 /* 3268 * next, get the size of the object to be copied 3269 */ 3270 size = pdp->ptr[i].size * repeat; 3271 3272 /* 3273 * Arbitrarily limit the total size of object to be 3274 * copied (1 byte to 1/4 page). 3275 */ 3276 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) { 3277 goto failure; /* wrong size or too big */ 3278 } 3279 3280 /* 3281 * Now copy the data 3282 */ 3283 *tmp = off0; 3284 bcopy(ptr, di_mem_addr(st, off + off0), size); 3285 off0 += DI_ALIGN(size); 3286 } 3287 } else { 3288 goto failure; 3289 } 3290 3291 success: 3292 /* 3293 * success if reached here 3294 */ 3295 no_fault(); 3296 *off_p = off; 3297 3298 return (off + off0); 3299 /*NOTREACHED*/ 3300 3301 failure: 3302 /* 3303 * fault occurred 3304 */ 3305 no_fault(); 3306 cmn_err(CE_WARN, "devinfo: fault in private data at %p", data); 3307 *off_p = -1; /* set private data to indicate error */ 3308 3309 return (off); 3310 } 3311 3312 /* 3313 * get parent private data; on error, returns original offset 3314 */ 3315 static di_off_t 3316 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3317 { 3318 int off; 3319 struct di_priv_format *ppdp; 3320 3321 dcmn_err2((CE_CONT, "di_getppdata:\n")); 3322 3323 /* find the parent data format */ 3324 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) { 3325 off = *off_p; 3326 *off_p = 0; /* set parent data to none */ 3327 return (off); 3328 } 3329 3330 return (di_getprvdata(ppdp, ddi_get_parent_data((dev_info_t *)node), 3331 off_p, st)); 3332 } 3333 3334 /* 3335 * get parent private data; returns original offset 3336 */ 3337 static di_off_t 3338 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3339 { 3340 int off; 3341 struct di_priv_format *dpdp; 3342 3343 dcmn_err2((CE_CONT, "di_getdpdata:")); 3344 3345 /* find the parent data format */ 3346 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) { 3347 off = *off_p; 3348 *off_p = 0; /* set driver data to none */ 3349 return (off); 3350 } 3351 3352 return (di_getprvdata(dpdp, ddi_get_driver_private((dev_info_t *)node), 3353 off_p, st)); 3354 } 3355 3356 /* 3357 * The driver is stateful across DINFOCPYALL and DINFOUSRLD. 3358 * This function encapsulates the state machine: 3359 * 3360 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY -> 3361 * | SNAPSHOT USRLD | 3362 * -------------------------------------------------- 3363 * 3364 * Returns 0 on success and -1 on failure 3365 */ 3366 static int 3367 di_setstate(struct di_state *st, int new_state) 3368 { 3369 int ret = 0; 3370 3371 mutex_enter(&di_lock); 3372 switch (new_state) { 3373 case IOC_IDLE: 3374 case IOC_DONE: 3375 break; 3376 case IOC_SNAP: 3377 if (st->di_iocstate != IOC_IDLE) 3378 ret = -1; 3379 break; 3380 case IOC_COPY: 3381 if (st->di_iocstate != IOC_DONE) 3382 ret = -1; 3383 break; 3384 default: 3385 ret = -1; 3386 } 3387 3388 if (ret == 0) 3389 st->di_iocstate = new_state; 3390 else 3391 cmn_err(CE_NOTE, "incorrect state transition from %d to %d", 3392 st->di_iocstate, new_state); 3393 mutex_exit(&di_lock); 3394 return (ret); 3395 } 3396 3397 /* 3398 * We cannot assume the presence of the entire 3399 * snapshot in this routine. All we are guaranteed 3400 * is the di_all struct + 1 byte (for root_path) 3401 */ 3402 static int 3403 header_plus_one_ok(struct di_all *all) 3404 { 3405 /* 3406 * Refuse to read old versions 3407 */ 3408 if (all->version != DI_SNAPSHOT_VERSION) { 3409 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version)); 3410 return (0); 3411 } 3412 3413 if (all->cache_magic != DI_CACHE_MAGIC) { 3414 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic)); 3415 return (0); 3416 } 3417 3418 if (all->snapshot_time <= 0) { 3419 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time)); 3420 return (0); 3421 } 3422 3423 if (all->top_devinfo == 0) { 3424 CACHE_DEBUG((DI_ERR, "NULL top devinfo")); 3425 return (0); 3426 } 3427 3428 if (all->map_size < sizeof (*all) + 1) { 3429 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size)); 3430 return (0); 3431 } 3432 3433 if (all->root_path[0] != '/' || all->root_path[1] != '\0') { 3434 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c", 3435 all->root_path[0], all->root_path[1])); 3436 return (0); 3437 } 3438 3439 /* 3440 * We can't check checksum here as we just have the header 3441 */ 3442 3443 return (1); 3444 } 3445 3446 static int 3447 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len) 3448 { 3449 rlim64_t rlimit; 3450 ssize_t resid; 3451 int error = 0; 3452 3453 3454 rlimit = RLIM64_INFINITY; 3455 3456 while (len) { 3457 resid = 0; 3458 error = vn_rdwr(UIO_WRITE, vp, buf, len, off, 3459 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 3460 3461 if (error || resid < 0) { 3462 error = error ? error : EIO; 3463 CACHE_DEBUG((DI_ERR, "write error: %d", error)); 3464 break; 3465 } 3466 3467 /* 3468 * Check if we are making progress 3469 */ 3470 if (resid >= len) { 3471 error = ENOSPC; 3472 break; 3473 } 3474 buf += len - resid; 3475 off += len - resid; 3476 len = resid; 3477 } 3478 3479 return (error); 3480 } 3481 3482 extern int modrootloaded; 3483 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *); 3484 extern void mdi_vhci_walk_phcis(dev_info_t *, 3485 int (*)(dev_info_t *, void *), void *); 3486 3487 static void 3488 di_cache_write(struct di_cache *cache) 3489 { 3490 struct di_all *all; 3491 struct vnode *vp; 3492 int oflags; 3493 size_t map_size; 3494 size_t chunk; 3495 offset_t off; 3496 int error; 3497 char *buf; 3498 3499 ASSERT(DI_CACHE_LOCKED(*cache)); 3500 ASSERT(!servicing_interrupt()); 3501 3502 if (cache->cache_size == 0) { 3503 ASSERT(cache->cache_data == NULL); 3504 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write")); 3505 return; 3506 } 3507 3508 ASSERT(cache->cache_size > 0); 3509 ASSERT(cache->cache_data); 3510 3511 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) { 3512 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write")); 3513 return; 3514 } 3515 3516 all = (struct di_all *)cache->cache_data; 3517 3518 if (!header_plus_one_ok(all)) { 3519 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write")); 3520 return; 3521 } 3522 3523 ASSERT(strcmp(all->root_path, "/") == 0); 3524 3525 /* 3526 * The cache_size is the total allocated memory for the cache. 3527 * The map_size is the actual size of valid data in the cache. 3528 * map_size may be smaller than cache_size but cannot exceed 3529 * cache_size. 3530 */ 3531 if (all->map_size > cache->cache_size) { 3532 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)." 3533 " Skipping write", all->map_size, cache->cache_size)); 3534 return; 3535 } 3536 3537 /* 3538 * First unlink the temp file 3539 */ 3540 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE); 3541 if (error && error != ENOENT) { 3542 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d", 3543 DI_CACHE_TEMP, error)); 3544 } 3545 3546 if (error == EROFS) { 3547 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write")); 3548 return; 3549 } 3550 3551 vp = NULL; 3552 oflags = (FCREAT|FWRITE); 3553 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags, 3554 DI_CACHE_PERMS, &vp, CRCREAT, 0)) { 3555 CACHE_DEBUG((DI_ERR, "%s: create failed: %d", 3556 DI_CACHE_TEMP, error)); 3557 return; 3558 } 3559 3560 ASSERT(vp); 3561 3562 /* 3563 * Paranoid: Check if the file is on a read-only FS 3564 */ 3565 if (vn_is_readonly(vp)) { 3566 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS")); 3567 goto fail; 3568 } 3569 3570 /* 3571 * Note that we only write map_size bytes to disk - this saves 3572 * space as the actual cache size may be larger than size of 3573 * valid data in the cache. 3574 * Another advantage is that it makes verification of size 3575 * easier when the file is read later. 3576 */ 3577 map_size = all->map_size; 3578 off = 0; 3579 buf = cache->cache_data; 3580 3581 while (map_size) { 3582 ASSERT(map_size > 0); 3583 /* 3584 * Write in chunks so that VM system 3585 * is not overwhelmed 3586 */ 3587 if (map_size > di_chunk * PAGESIZE) 3588 chunk = di_chunk * PAGESIZE; 3589 else 3590 chunk = map_size; 3591 3592 error = chunk_write(vp, off, buf, chunk); 3593 if (error) { 3594 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d", 3595 off, error)); 3596 goto fail; 3597 } 3598 3599 off += chunk; 3600 buf += chunk; 3601 map_size -= chunk; 3602 3603 /* Give pageout a chance to run */ 3604 delay(1); 3605 } 3606 3607 /* 3608 * Now sync the file and close it 3609 */ 3610 if (error = VOP_FSYNC(vp, FSYNC, kcred)) { 3611 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error)); 3612 } 3613 3614 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred)) { 3615 CACHE_DEBUG((DI_ERR, "close() failed: %d", error)); 3616 VN_RELE(vp); 3617 return; 3618 } 3619 3620 VN_RELE(vp); 3621 3622 /* 3623 * Now do the rename 3624 */ 3625 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) { 3626 CACHE_DEBUG((DI_ERR, "rename failed: %d", error)); 3627 return; 3628 } 3629 3630 CACHE_DEBUG((DI_INFO, "Cache write successful.")); 3631 3632 return; 3633 3634 fail: 3635 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred); 3636 VN_RELE(vp); 3637 } 3638 3639 3640 /* 3641 * Since we could be called early in boot, 3642 * use kobj_read_file() 3643 */ 3644 static void 3645 di_cache_read(struct di_cache *cache) 3646 { 3647 struct _buf *file; 3648 struct di_all *all; 3649 int n; 3650 size_t map_size, sz, chunk; 3651 offset_t off; 3652 caddr_t buf; 3653 uint32_t saved_crc, crc; 3654 3655 ASSERT(modrootloaded); 3656 ASSERT(DI_CACHE_LOCKED(*cache)); 3657 ASSERT(cache->cache_data == NULL); 3658 ASSERT(cache->cache_size == 0); 3659 ASSERT(!servicing_interrupt()); 3660 3661 file = kobj_open_file(DI_CACHE_FILE); 3662 if (file == (struct _buf *)-1) { 3663 CACHE_DEBUG((DI_ERR, "%s: open failed: %d", 3664 DI_CACHE_FILE, ENOENT)); 3665 return; 3666 } 3667 3668 /* 3669 * Read in the header+root_path first. The root_path must be "/" 3670 */ 3671 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP); 3672 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0); 3673 3674 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) { 3675 kmem_free(all, sizeof (*all) + 1); 3676 kobj_close_file(file); 3677 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid")); 3678 return; 3679 } 3680 3681 map_size = all->map_size; 3682 3683 kmem_free(all, sizeof (*all) + 1); 3684 3685 ASSERT(map_size >= sizeof (*all) + 1); 3686 3687 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP); 3688 sz = map_size; 3689 off = 0; 3690 while (sz) { 3691 /* Don't overload VM with large reads */ 3692 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz; 3693 n = kobj_read_file(file, buf, chunk, off); 3694 if (n != chunk) { 3695 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld", 3696 DI_CACHE_FILE, off)); 3697 goto fail; 3698 } 3699 off += chunk; 3700 buf += chunk; 3701 sz -= chunk; 3702 } 3703 3704 ASSERT(off == map_size); 3705 3706 /* 3707 * Read past expected EOF to verify size. 3708 */ 3709 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) { 3710 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE)); 3711 goto fail; 3712 } 3713 3714 all = (struct di_all *)di_cache.cache_data; 3715 if (!header_plus_one_ok(all)) { 3716 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE)); 3717 goto fail; 3718 } 3719 3720 /* 3721 * Compute CRC with checksum field in the cache data set to 0 3722 */ 3723 saved_crc = all->cache_checksum; 3724 all->cache_checksum = 0; 3725 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table); 3726 all->cache_checksum = saved_crc; 3727 3728 if (crc != all->cache_checksum) { 3729 CACHE_DEBUG((DI_ERR, 3730 "%s: checksum error: expected=0x%x actual=0x%x", 3731 DI_CACHE_FILE, all->cache_checksum, crc)); 3732 goto fail; 3733 } 3734 3735 if (all->map_size != map_size) { 3736 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE)); 3737 goto fail; 3738 } 3739 3740 kobj_close_file(file); 3741 3742 di_cache.cache_size = map_size; 3743 3744 return; 3745 3746 fail: 3747 kmem_free(di_cache.cache_data, map_size); 3748 kobj_close_file(file); 3749 di_cache.cache_data = NULL; 3750 di_cache.cache_size = 0; 3751 } 3752 3753 3754 /* 3755 * Checks if arguments are valid for using the cache. 3756 */ 3757 static int 3758 cache_args_valid(struct di_state *st, int *error) 3759 { 3760 ASSERT(error); 3761 ASSERT(st->mem_size > 0); 3762 ASSERT(st->memlist != NULL); 3763 3764 if (!modrootloaded || !i_ddi_io_initialized()) { 3765 CACHE_DEBUG((DI_ERR, 3766 "cache lookup failure: I/O subsystem not inited")); 3767 *error = ENOTACTIVE; 3768 return (0); 3769 } 3770 3771 /* 3772 * No other flags allowed with DINFOCACHE 3773 */ 3774 if (st->command != (DINFOCACHE & DIIOC_MASK)) { 3775 CACHE_DEBUG((DI_ERR, 3776 "cache lookup failure: bad flags: 0x%x", 3777 st->command)); 3778 *error = EINVAL; 3779 return (0); 3780 } 3781 3782 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3783 CACHE_DEBUG((DI_ERR, 3784 "cache lookup failure: bad root: %s", 3785 DI_ALL_PTR(st)->root_path)); 3786 *error = EINVAL; 3787 return (0); 3788 } 3789 3790 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command)); 3791 3792 *error = 0; 3793 3794 return (1); 3795 } 3796 3797 static int 3798 snapshot_is_cacheable(struct di_state *st) 3799 { 3800 ASSERT(st->mem_size > 0); 3801 ASSERT(st->memlist != NULL); 3802 3803 if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) != 3804 (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) { 3805 CACHE_DEBUG((DI_INFO, 3806 "not cacheable: incompatible flags: 0x%x", 3807 st->command)); 3808 return (0); 3809 } 3810 3811 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3812 CACHE_DEBUG((DI_INFO, 3813 "not cacheable: incompatible root path: %s", 3814 DI_ALL_PTR(st)->root_path)); 3815 return (0); 3816 } 3817 3818 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command)); 3819 3820 return (1); 3821 } 3822 3823 static int 3824 di_cache_lookup(struct di_state *st) 3825 { 3826 size_t rval; 3827 int cache_valid; 3828 3829 ASSERT(cache_args_valid(st, &cache_valid)); 3830 ASSERT(modrootloaded); 3831 3832 DI_CACHE_LOCK(di_cache); 3833 3834 /* 3835 * The following assignment determines the validity 3836 * of the cache as far as this snapshot is concerned. 3837 */ 3838 cache_valid = di_cache.cache_valid; 3839 3840 if (cache_valid && di_cache.cache_data == NULL) { 3841 di_cache_read(&di_cache); 3842 /* check for read or file error */ 3843 if (di_cache.cache_data == NULL) 3844 cache_valid = 0; 3845 } 3846 3847 if (cache_valid) { 3848 /* 3849 * Ok, the cache was valid as of this particular 3850 * snapshot. Copy the cached snapshot. This is safe 3851 * to do as the cache cannot be freed (we hold the 3852 * cache lock). Free the memory allocated in di_state 3853 * up until this point - we will simply copy everything 3854 * in the cache. 3855 */ 3856 3857 ASSERT(di_cache.cache_data != NULL); 3858 ASSERT(di_cache.cache_size > 0); 3859 3860 di_freemem(st); 3861 3862 rval = 0; 3863 if (di_cache2mem(&di_cache, st) > 0) { 3864 3865 ASSERT(DI_ALL_PTR(st)); 3866 3867 /* 3868 * map_size is size of valid data in the 3869 * cached snapshot and may be less than 3870 * size of the cache. 3871 */ 3872 rval = DI_ALL_PTR(st)->map_size; 3873 3874 ASSERT(rval >= sizeof (struct di_all)); 3875 ASSERT(rval <= di_cache.cache_size); 3876 } 3877 } else { 3878 /* 3879 * The cache isn't valid, we need to take a snapshot. 3880 * Set the command flags appropriately 3881 */ 3882 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK)); 3883 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK); 3884 rval = di_cache_update(st); 3885 st->command = (DINFOCACHE & DIIOC_MASK); 3886 } 3887 3888 DI_CACHE_UNLOCK(di_cache); 3889 3890 /* 3891 * For cached snapshots, the devinfo driver always returns 3892 * a snapshot rooted at "/". 3893 */ 3894 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0); 3895 3896 return (rval); 3897 } 3898 3899 /* 3900 * This is a forced update of the cache - the previous state of the cache 3901 * may be: 3902 * - unpopulated 3903 * - populated and invalid 3904 * - populated and valid 3905 */ 3906 static int 3907 di_cache_update(struct di_state *st) 3908 { 3909 int rval; 3910 uint32_t crc; 3911 struct di_all *all; 3912 3913 ASSERT(DI_CACHE_LOCKED(di_cache)); 3914 ASSERT(snapshot_is_cacheable(st)); 3915 3916 /* 3917 * Free the in-core cache and the on-disk file (if they exist) 3918 */ 3919 i_ddi_di_cache_free(&di_cache); 3920 3921 /* 3922 * Set valid flag before taking the snapshot, 3923 * so that any invalidations that arrive 3924 * during or after the snapshot are not 3925 * removed by us. 3926 */ 3927 atomic_or_32(&di_cache.cache_valid, 1); 3928 3929 rval = di_snapshot_and_clean(st); 3930 3931 if (rval == 0) { 3932 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot")); 3933 return (0); 3934 } 3935 3936 DI_ALL_PTR(st)->map_size = rval; 3937 3938 if (di_mem2cache(st, &di_cache) == 0) { 3939 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed")); 3940 return (0); 3941 } 3942 3943 ASSERT(di_cache.cache_data); 3944 ASSERT(di_cache.cache_size > 0); 3945 3946 /* 3947 * Now that we have cached the snapshot, compute its checksum. 3948 * The checksum is only computed over the valid data in the 3949 * cache, not the entire cache. 3950 * Also, set all the fields (except checksum) before computing 3951 * checksum. 3952 */ 3953 all = (struct di_all *)di_cache.cache_data; 3954 all->cache_magic = DI_CACHE_MAGIC; 3955 all->map_size = rval; 3956 3957 ASSERT(all->cache_checksum == 0); 3958 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table); 3959 all->cache_checksum = crc; 3960 3961 di_cache_write(&di_cache); 3962 3963 return (rval); 3964 } 3965 3966 static void 3967 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...) 3968 { 3969 va_list ap; 3970 3971 if (di_cache_debug <= DI_QUIET) 3972 return; 3973 3974 if (di_cache_debug < msglevel) 3975 return; 3976 3977 switch (msglevel) { 3978 case DI_ERR: 3979 msglevel = CE_WARN; 3980 break; 3981 case DI_INFO: 3982 case DI_TRACE: 3983 default: 3984 msglevel = CE_NOTE; 3985 break; 3986 } 3987 3988 va_start(ap, fmt); 3989 vcmn_err(msglevel, fmt, ap); 3990 va_end(ap); 3991 } 3992