1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * driver for accessing kernel devinfo tree. 30 */ 31 #include <sys/types.h> 32 #include <sys/pathname.h> 33 #include <sys/debug.h> 34 #include <sys/autoconf.h> 35 #include <sys/conf.h> 36 #include <sys/file.h> 37 #include <sys/kmem.h> 38 #include <sys/modctl.h> 39 #include <sys/stat.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/sunldi_impl.h> 43 #include <sys/sunndi.h> 44 #include <sys/esunddi.h> 45 #include <sys/sunmdi.h> 46 #include <sys/ddi_impldefs.h> 47 #include <sys/ndi_impldefs.h> 48 #include <sys/mdi_impldefs.h> 49 #include <sys/devinfo_impl.h> 50 #include <sys/thread.h> 51 #include <sys/modhash.h> 52 #include <sys/bitmap.h> 53 #include <util/qsort.h> 54 #include <sys/disp.h> 55 #include <sys/kobj.h> 56 #include <sys/crc32.h> 57 58 59 #ifdef DEBUG 60 static int di_debug; 61 #define dcmn_err(args) if (di_debug >= 1) cmn_err args 62 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args 63 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args 64 #else 65 #define dcmn_err(args) /* nothing */ 66 #define dcmn_err2(args) /* nothing */ 67 #define dcmn_err3(args) /* nothing */ 68 #endif 69 70 /* 71 * We partition the space of devinfo minor nodes equally between the full and 72 * unprivileged versions of the driver. The even-numbered minor nodes are the 73 * full version, while the odd-numbered ones are the read-only version. 74 */ 75 static int di_max_opens = 32; 76 77 #define DI_FULL_PARENT 0 78 #define DI_READONLY_PARENT 1 79 #define DI_NODE_SPECIES 2 80 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) 81 82 #define IOC_IDLE 0 /* snapshot ioctl states */ 83 #define IOC_SNAP 1 /* snapshot in progress */ 84 #define IOC_DONE 2 /* snapshot done, but not copied out */ 85 #define IOC_COPY 3 /* copyout in progress */ 86 87 /* 88 * Keep max alignment so we can move snapshot to different platforms 89 */ 90 #define DI_ALIGN(addr) ((addr + 7l) & ~7l) 91 92 /* 93 * To avoid wasting memory, make a linked list of memory chunks. 94 * Size of each chunk is buf_size. 95 */ 96 struct di_mem { 97 struct di_mem *next; /* link to next chunk */ 98 char *buf; /* contiguous kernel memory */ 99 size_t buf_size; /* size of buf in bytes */ 100 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ 101 }; 102 103 /* 104 * This is a stack for walking the tree without using recursion. 105 * When the devinfo tree height is above some small size, one 106 * gets watchdog resets on sun4m. 107 */ 108 struct di_stack { 109 void *offset[MAX_TREE_DEPTH]; 110 struct dev_info *dip[MAX_TREE_DEPTH]; 111 int circ[MAX_TREE_DEPTH]; 112 int depth; /* depth of current node to be copied */ 113 }; 114 115 #define TOP_OFFSET(stack) \ 116 ((di_off_t *)(stack)->offset[(stack)->depth - 1]) 117 #define TOP_NODE(stack) \ 118 ((stack)->dip[(stack)->depth - 1]) 119 #define PARENT_OFFSET(stack) \ 120 ((di_off_t *)(stack)->offset[(stack)->depth - 2]) 121 #define EMPTY_STACK(stack) ((stack)->depth == 0) 122 #define POP_STACK(stack) { \ 123 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ 124 (stack)->circ[(stack)->depth - 1]); \ 125 ((stack)->depth--); \ 126 } 127 #define PUSH_STACK(stack, node, offp) { \ 128 ASSERT(node != NULL); \ 129 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ 130 (stack)->dip[(stack)->depth] = (node); \ 131 (stack)->offset[(stack)->depth] = (void *)(offp); \ 132 ((stack)->depth)++; \ 133 } 134 135 #define DI_ALL_PTR(s) ((struct di_all *)(intptr_t)di_mem_addr((s), 0)) 136 137 /* 138 * With devfs, the device tree has no global locks. The device tree is 139 * dynamic and dips may come and go if they are not locked locally. Under 140 * these conditions, pointers are no longer reliable as unique IDs. 141 * Specifically, these pointers cannot be used as keys for hash tables 142 * as the same devinfo structure may be freed in one part of the tree only 143 * to be allocated as the structure for a different device in another 144 * part of the tree. This can happen if DR and the snapshot are 145 * happening concurrently. 146 * The following data structures act as keys for devinfo nodes and 147 * pathinfo nodes. 148 */ 149 150 enum di_ktype { 151 DI_DKEY = 1, 152 DI_PKEY = 2 153 }; 154 155 struct di_dkey { 156 dev_info_t *dk_dip; 157 major_t dk_major; 158 int dk_inst; 159 pnode_t dk_nodeid; 160 }; 161 162 struct di_pkey { 163 mdi_pathinfo_t *pk_pip; 164 char *pk_path_addr; 165 dev_info_t *pk_client; 166 dev_info_t *pk_phci; 167 }; 168 169 struct di_key { 170 enum di_ktype k_type; 171 union { 172 struct di_dkey dkey; 173 struct di_pkey pkey; 174 } k_u; 175 }; 176 177 178 struct i_lnode; 179 180 typedef struct i_link { 181 /* 182 * If a di_link struct representing this i_link struct makes it 183 * into the snapshot, then self will point to the offset of 184 * the di_link struct in the snapshot 185 */ 186 di_off_t self; 187 188 int spec_type; /* block or char access type */ 189 struct i_lnode *src_lnode; /* src i_lnode */ 190 struct i_lnode *tgt_lnode; /* tgt i_lnode */ 191 struct i_link *src_link_next; /* next src i_link /w same i_lnode */ 192 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ 193 } i_link_t; 194 195 typedef struct i_lnode { 196 /* 197 * If a di_lnode struct representing this i_lnode struct makes it 198 * into the snapshot, then self will point to the offset of 199 * the di_lnode struct in the snapshot 200 */ 201 di_off_t self; 202 203 /* 204 * used for hashing and comparing i_lnodes 205 */ 206 int modid; 207 208 /* 209 * public information describing a link endpoint 210 */ 211 struct di_node *di_node; /* di_node in snapshot */ 212 dev_t devt; /* devt */ 213 214 /* 215 * i_link ptr to links coming into this i_lnode node 216 * (this i_lnode is the target of these i_links) 217 */ 218 i_link_t *link_in; 219 220 /* 221 * i_link ptr to links going out of this i_lnode node 222 * (this i_lnode is the source of these i_links) 223 */ 224 i_link_t *link_out; 225 } i_lnode_t; 226 227 /* 228 * Soft state associated with each instance of driver open. 229 */ 230 static struct di_state { 231 di_off_t mem_size; /* total # bytes in memlist */ 232 struct di_mem *memlist; /* head of memlist */ 233 uint_t command; /* command from ioctl */ 234 int di_iocstate; /* snapshot ioctl state */ 235 mod_hash_t *reg_dip_hash; 236 mod_hash_t *reg_pip_hash; 237 int lnode_count; 238 int link_count; 239 240 mod_hash_t *lnode_hash; 241 mod_hash_t *link_hash; 242 } **di_states; 243 244 static kmutex_t di_lock; /* serialize instance assignment */ 245 246 typedef enum { 247 DI_QUIET = 0, /* DI_QUIET must always be 0 */ 248 DI_ERR, 249 DI_INFO, 250 DI_TRACE, 251 DI_TRACE1, 252 DI_TRACE2 253 } di_cache_debug_t; 254 255 static uint_t di_chunk = 32; /* I/O chunk size in pages */ 256 257 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) 258 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) 259 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) 260 261 /* 262 * Check that whole device tree is being configured as a pre-condition for 263 * cleaning up /etc/devices files. 264 */ 265 #define DEVICES_FILES_CLEANABLE(st) \ 266 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \ 267 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0) 268 269 #define CACHE_DEBUG(args) \ 270 { if (di_cache_debug != DI_QUIET) di_cache_print args; } 271 272 typedef struct phci_walk_arg { 273 di_off_t off; 274 struct di_state *st; 275 } phci_walk_arg_t; 276 277 static int di_open(dev_t *, int, int, cred_t *); 278 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 279 static int di_close(dev_t, int, int, cred_t *); 280 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 281 static int di_attach(dev_info_t *, ddi_attach_cmd_t); 282 static int di_detach(dev_info_t *, ddi_detach_cmd_t); 283 284 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); 285 static di_off_t di_snapshot_and_clean(struct di_state *); 286 static di_off_t di_copydevnm(di_off_t *, struct di_state *); 287 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); 288 static di_off_t di_copynode(struct di_stack *, struct di_state *); 289 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, 290 struct di_state *); 291 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); 292 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); 293 static di_off_t di_getprop(struct ddi_prop *, di_off_t *, 294 struct di_state *, struct dev_info *, int); 295 static void di_allocmem(struct di_state *, size_t); 296 static void di_freemem(struct di_state *); 297 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); 298 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); 299 static caddr_t di_mem_addr(struct di_state *, di_off_t); 300 static int di_setstate(struct di_state *, int); 301 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); 302 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); 303 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, 304 struct di_state *, int); 305 static di_off_t di_getlink_data(di_off_t, struct di_state *); 306 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); 307 308 static int cache_args_valid(struct di_state *st, int *error); 309 static int snapshot_is_cacheable(struct di_state *st); 310 static int di_cache_lookup(struct di_state *st); 311 static int di_cache_update(struct di_state *st); 312 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); 313 int build_vhci_list(dev_info_t *vh_devinfo, void *arg); 314 int build_phci_list(dev_info_t *ph_devinfo, void *arg); 315 316 static struct cb_ops di_cb_ops = { 317 di_open, /* open */ 318 di_close, /* close */ 319 nodev, /* strategy */ 320 nodev, /* print */ 321 nodev, /* dump */ 322 nodev, /* read */ 323 nodev, /* write */ 324 di_ioctl, /* ioctl */ 325 nodev, /* devmap */ 326 nodev, /* mmap */ 327 nodev, /* segmap */ 328 nochpoll, /* poll */ 329 ddi_prop_op, /* prop_op */ 330 NULL, /* streamtab */ 331 D_NEW | D_MP /* Driver compatibility flag */ 332 }; 333 334 static struct dev_ops di_ops = { 335 DEVO_REV, /* devo_rev, */ 336 0, /* refcnt */ 337 di_info, /* info */ 338 nulldev, /* identify */ 339 nulldev, /* probe */ 340 di_attach, /* attach */ 341 di_detach, /* detach */ 342 nodev, /* reset */ 343 &di_cb_ops, /* driver operations */ 344 NULL /* bus operations */ 345 }; 346 347 /* 348 * Module linkage information for the kernel. 349 */ 350 static struct modldrv modldrv = { 351 &mod_driverops, 352 "DEVINFO Driver %I%", 353 &di_ops 354 }; 355 356 static struct modlinkage modlinkage = { 357 MODREV_1, 358 &modldrv, 359 NULL 360 }; 361 362 int 363 _init(void) 364 { 365 int error; 366 367 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); 368 369 error = mod_install(&modlinkage); 370 if (error != 0) { 371 mutex_destroy(&di_lock); 372 return (error); 373 } 374 375 return (0); 376 } 377 378 int 379 _info(struct modinfo *modinfop) 380 { 381 return (mod_info(&modlinkage, modinfop)); 382 } 383 384 int 385 _fini(void) 386 { 387 int error; 388 389 error = mod_remove(&modlinkage); 390 if (error != 0) { 391 return (error); 392 } 393 394 mutex_destroy(&di_lock); 395 return (0); 396 } 397 398 static dev_info_t *di_dip; 399 400 /*ARGSUSED*/ 401 static int 402 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 403 { 404 int error = DDI_FAILURE; 405 406 switch (infocmd) { 407 case DDI_INFO_DEVT2DEVINFO: 408 *result = (void *)di_dip; 409 error = DDI_SUCCESS; 410 break; 411 case DDI_INFO_DEVT2INSTANCE: 412 /* 413 * All dev_t's map to the same, single instance. 414 */ 415 *result = (void *)0; 416 error = DDI_SUCCESS; 417 break; 418 default: 419 break; 420 } 421 422 return (error); 423 } 424 425 static int 426 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 427 { 428 int error = DDI_FAILURE; 429 430 switch (cmd) { 431 case DDI_ATTACH: 432 di_states = kmem_zalloc( 433 di_max_opens * sizeof (struct di_state *), KM_SLEEP); 434 435 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, 436 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || 437 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, 438 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { 439 kmem_free(di_states, 440 di_max_opens * sizeof (struct di_state *)); 441 ddi_remove_minor_node(dip, NULL); 442 error = DDI_FAILURE; 443 } else { 444 di_dip = dip; 445 ddi_report_dev(dip); 446 447 error = DDI_SUCCESS; 448 } 449 break; 450 default: 451 error = DDI_FAILURE; 452 break; 453 } 454 455 return (error); 456 } 457 458 static int 459 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 460 { 461 int error = DDI_FAILURE; 462 463 switch (cmd) { 464 case DDI_DETACH: 465 ddi_remove_minor_node(dip, NULL); 466 di_dip = NULL; 467 kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); 468 469 error = DDI_SUCCESS; 470 break; 471 default: 472 error = DDI_FAILURE; 473 break; 474 } 475 476 return (error); 477 } 478 479 /* 480 * Allow multiple opens by tweaking the dev_t such that it looks like each 481 * open is getting a different minor device. Each minor gets a separate 482 * entry in the di_states[] table. Based on the original minor number, we 483 * discriminate opens of the full and read-only nodes. If all of the instances 484 * of the selected minor node are currently open, we return EAGAIN. 485 */ 486 /*ARGSUSED*/ 487 static int 488 di_open(dev_t *devp, int flag, int otyp, cred_t *credp) 489 { 490 int m; 491 minor_t minor_parent = getminor(*devp); 492 493 if (minor_parent != DI_FULL_PARENT && 494 minor_parent != DI_READONLY_PARENT) 495 return (ENXIO); 496 497 mutex_enter(&di_lock); 498 499 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { 500 if (di_states[m] != NULL) 501 continue; 502 503 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); 504 break; /* It's ours. */ 505 } 506 507 if (m >= di_max_opens) { 508 /* 509 * maximum open instance for device reached 510 */ 511 mutex_exit(&di_lock); 512 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); 513 return (EAGAIN); 514 } 515 mutex_exit(&di_lock); 516 517 ASSERT(m < di_max_opens); 518 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); 519 520 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", 521 (void *)curthread, m + DI_NODE_SPECIES)); 522 523 return (0); 524 } 525 526 /*ARGSUSED*/ 527 static int 528 di_close(dev_t dev, int flag, int otype, cred_t *cred_p) 529 { 530 struct di_state *st; 531 int m = (int)getminor(dev) - DI_NODE_SPECIES; 532 533 if (m < 0) { 534 cmn_err(CE_WARN, "closing non-existent devinfo minor %d", 535 m + DI_NODE_SPECIES); 536 return (ENXIO); 537 } 538 539 st = di_states[m]; 540 ASSERT(m < di_max_opens && st != NULL); 541 542 di_freemem(st); 543 kmem_free(st, sizeof (struct di_state)); 544 545 /* 546 * empty slot in state table 547 */ 548 mutex_enter(&di_lock); 549 di_states[m] = NULL; 550 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", 551 (void *)curthread, m + DI_NODE_SPECIES)); 552 mutex_exit(&di_lock); 553 554 return (0); 555 } 556 557 558 /*ARGSUSED*/ 559 static int 560 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 561 { 562 int rv, error; 563 di_off_t off; 564 struct di_all *all; 565 struct di_state *st; 566 int m = (int)getminor(dev) - DI_NODE_SPECIES; 567 568 major_t i; 569 char *drv_name; 570 size_t map_size, size; 571 struct di_mem *dcp; 572 int ndi_flags; 573 574 if (m < 0 || m >= di_max_opens) { 575 return (ENXIO); 576 } 577 578 st = di_states[m]; 579 ASSERT(st != NULL); 580 581 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); 582 583 switch (cmd) { 584 case DINFOIDENT: 585 /* 586 * This is called from di_init to verify that the driver 587 * opened is indeed devinfo. The purpose is to guard against 588 * sending ioctl to an unknown driver in case of an 589 * unresolved major number conflict during bfu. 590 */ 591 *rvalp = DI_MAGIC; 592 return (0); 593 594 case DINFOLODRV: 595 /* 596 * Hold an installed driver and return the result 597 */ 598 if (DI_UNPRIVILEGED_NODE(m)) { 599 /* 600 * Only the fully enabled instances may issue 601 * DINFOLDDRV. 602 */ 603 return (EACCES); 604 } 605 606 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 607 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { 608 kmem_free(drv_name, MAXNAMELEN); 609 return (EFAULT); 610 } 611 612 /* 613 * Some 3rd party driver's _init() walks the device tree, 614 * so we load the driver module before configuring driver. 615 */ 616 i = ddi_name_to_major(drv_name); 617 if (ddi_hold_driver(i) == NULL) { 618 kmem_free(drv_name, MAXNAMELEN); 619 return (ENXIO); 620 } 621 622 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; 623 624 /* 625 * i_ddi_load_drvconf() below will trigger a reprobe 626 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't 627 * needed here. 628 */ 629 modunload_disable(); 630 (void) i_ddi_load_drvconf(i); 631 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); 632 kmem_free(drv_name, MAXNAMELEN); 633 ddi_rele_driver(i); 634 rv = i_ddi_devs_attached(i); 635 modunload_enable(); 636 637 i_ddi_di_cache_invalidate(KM_SLEEP); 638 639 return ((rv == DDI_SUCCESS)? 0 : ENXIO); 640 641 case DINFOUSRLD: 642 /* 643 * The case for copying snapshot to userland 644 */ 645 if (di_setstate(st, IOC_COPY) == -1) 646 return (EBUSY); 647 648 map_size = ((struct di_all *) 649 (intptr_t)di_mem_addr(st, 0))->map_size; 650 if (map_size == 0) { 651 (void) di_setstate(st, IOC_DONE); 652 return (EFAULT); 653 } 654 655 /* 656 * copyout the snapshot 657 */ 658 map_size = (map_size + PAGEOFFSET) & PAGEMASK; 659 660 /* 661 * Return the map size, so caller may do a sanity 662 * check against the return value of snapshot ioctl() 663 */ 664 *rvalp = (int)map_size; 665 666 /* 667 * Copy one chunk at a time 668 */ 669 off = 0; 670 dcp = st->memlist; 671 while (map_size) { 672 size = dcp->buf_size; 673 if (map_size <= size) { 674 size = map_size; 675 } 676 677 if (ddi_copyout(di_mem_addr(st, off), 678 (void *)(arg + off), size, mode) != 0) { 679 (void) di_setstate(st, IOC_DONE); 680 return (EFAULT); 681 } 682 683 map_size -= size; 684 off += size; 685 dcp = dcp->next; 686 } 687 688 di_freemem(st); 689 (void) di_setstate(st, IOC_IDLE); 690 return (0); 691 692 default: 693 if ((cmd & ~DIIOC_MASK) != DIIOC) { 694 /* 695 * Invalid ioctl command 696 */ 697 return (ENOTTY); 698 } 699 /* 700 * take a snapshot 701 */ 702 st->command = cmd & DIIOC_MASK; 703 /*FALLTHROUGH*/ 704 } 705 706 /* 707 * Obtain enough memory to hold header + rootpath. We prevent kernel 708 * memory exhaustion by freeing any previously allocated snapshot and 709 * refusing the operation; otherwise we would be allowing ioctl(), 710 * ioctl(), ioctl(), ..., panic. 711 */ 712 if (di_setstate(st, IOC_SNAP) == -1) 713 return (EBUSY); 714 715 size = sizeof (struct di_all) + 716 sizeof (((struct dinfo_io *)(NULL))->root_path); 717 if (size < PAGESIZE) 718 size = PAGESIZE; 719 di_allocmem(st, size); 720 721 all = (struct di_all *)(intptr_t)di_mem_addr(st, 0); 722 all->devcnt = devcnt; 723 all->command = st->command; 724 all->version = DI_SNAPSHOT_VERSION; 725 all->top_vhci_devinfo = 0; /* filled up by build_vhci_list. */ 726 727 /* 728 * Note the endianness in case we need to transport snapshot 729 * over the network. 730 */ 731 #if defined(_LITTLE_ENDIAN) 732 all->endianness = DI_LITTLE_ENDIAN; 733 #else 734 all->endianness = DI_BIG_ENDIAN; 735 #endif 736 737 /* Copyin ioctl args, store in the snapshot. */ 738 if (copyinstr((void *)arg, all->root_path, 739 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { 740 di_freemem(st); 741 (void) di_setstate(st, IOC_IDLE); 742 return (EFAULT); 743 } 744 745 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) { 746 di_freemem(st); 747 (void) di_setstate(st, IOC_IDLE); 748 return (EINVAL); 749 } 750 751 error = 0; 752 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { 753 di_freemem(st); 754 (void) di_setstate(st, IOC_IDLE); 755 return (error); 756 } 757 758 off = DI_ALIGN(sizeof (struct di_all) + size); 759 760 /* 761 * Only the fully enabled version may force load drivers or read 762 * the parent private data from a driver. 763 */ 764 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && 765 DI_UNPRIVILEGED_NODE(m)) { 766 di_freemem(st); 767 (void) di_setstate(st, IOC_IDLE); 768 return (EACCES); 769 } 770 771 /* Do we need private data? */ 772 if (st->command & DINFOPRIVDATA) { 773 arg += sizeof (((struct dinfo_io *)(NULL))->root_path); 774 775 #ifdef _MULTI_DATAMODEL 776 switch (ddi_model_convert_from(mode & FMODELS)) { 777 case DDI_MODEL_ILP32: { 778 /* 779 * Cannot copy private data from 64-bit kernel 780 * to 32-bit app 781 */ 782 di_freemem(st); 783 (void) di_setstate(st, IOC_IDLE); 784 return (EINVAL); 785 } 786 case DDI_MODEL_NONE: 787 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 788 di_freemem(st); 789 (void) di_setstate(st, IOC_IDLE); 790 return (EFAULT); 791 } 792 break; 793 } 794 #else /* !_MULTI_DATAMODEL */ 795 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 796 di_freemem(st); 797 (void) di_setstate(st, IOC_IDLE); 798 return (EFAULT); 799 } 800 #endif /* _MULTI_DATAMODEL */ 801 } 802 803 all->top_devinfo = DI_ALIGN(off); 804 805 /* 806 * For cache lookups we reallocate memory from scratch, 807 * so the value of "all" is no longer valid. 808 */ 809 all = NULL; 810 811 if (st->command & DINFOCACHE) { 812 *rvalp = di_cache_lookup(st); 813 } else if (snapshot_is_cacheable(st)) { 814 DI_CACHE_LOCK(di_cache); 815 *rvalp = di_cache_update(st); 816 DI_CACHE_UNLOCK(di_cache); 817 } else 818 *rvalp = di_snapshot_and_clean(st); 819 820 if (*rvalp) { 821 DI_ALL_PTR(st)->map_size = *rvalp; 822 (void) di_setstate(st, IOC_DONE); 823 } else { 824 di_freemem(st); 825 (void) di_setstate(st, IOC_IDLE); 826 } 827 828 return (0); 829 } 830 831 /* 832 * Get a chunk of memory >= size, for the snapshot 833 */ 834 static void 835 di_allocmem(struct di_state *st, size_t size) 836 { 837 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), 838 KM_SLEEP); 839 /* 840 * Round up size to nearest power of 2. If it is less 841 * than st->mem_size, set it to st->mem_size (i.e., 842 * the mem_size is doubled every time) to reduce the 843 * number of memory allocations. 844 */ 845 size_t tmp = 1; 846 while (tmp < size) { 847 tmp <<= 1; 848 } 849 size = (tmp > st->mem_size) ? tmp : st->mem_size; 850 851 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); 852 mem->buf_size = size; 853 854 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); 855 856 if (st->mem_size == 0) { /* first chunk */ 857 st->memlist = mem; 858 } else { 859 /* 860 * locate end of linked list and add a chunk at the end 861 */ 862 struct di_mem *dcp = st->memlist; 863 while (dcp->next != NULL) { 864 dcp = dcp->next; 865 } 866 867 dcp->next = mem; 868 } 869 870 st->mem_size += size; 871 } 872 873 /* 874 * Copy upto bufsiz bytes of the memlist to buf 875 */ 876 static void 877 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) 878 { 879 struct di_mem *dcp; 880 size_t copysz; 881 882 if (st->mem_size == 0) { 883 ASSERT(st->memlist == NULL); 884 return; 885 } 886 887 copysz = 0; 888 for (dcp = st->memlist; dcp; dcp = dcp->next) { 889 890 ASSERT(bufsiz > 0); 891 892 if (bufsiz <= dcp->buf_size) 893 copysz = bufsiz; 894 else 895 copysz = dcp->buf_size; 896 897 bcopy(dcp->buf, buf, copysz); 898 899 buf += copysz; 900 bufsiz -= copysz; 901 902 if (bufsiz == 0) 903 break; 904 } 905 } 906 907 /* 908 * Free all memory for the snapshot 909 */ 910 static void 911 di_freemem(struct di_state *st) 912 { 913 struct di_mem *dcp, *tmp; 914 915 dcmn_err2((CE_CONT, "di_freemem\n")); 916 917 if (st->mem_size) { 918 dcp = st->memlist; 919 while (dcp) { /* traverse the linked list */ 920 tmp = dcp; 921 dcp = dcp->next; 922 ddi_umem_free(tmp->cook); 923 kmem_free(tmp, sizeof (struct di_mem)); 924 } 925 st->mem_size = 0; 926 st->memlist = NULL; 927 } 928 929 ASSERT(st->mem_size == 0); 930 ASSERT(st->memlist == NULL); 931 } 932 933 /* 934 * Copies cached data to the di_state structure. 935 * Returns: 936 * - size of data copied, on SUCCESS 937 * - 0 on failure 938 */ 939 static int 940 di_cache2mem(struct di_cache *cache, struct di_state *st) 941 { 942 caddr_t pa; 943 944 ASSERT(st->mem_size == 0); 945 ASSERT(st->memlist == NULL); 946 ASSERT(!servicing_interrupt()); 947 ASSERT(DI_CACHE_LOCKED(*cache)); 948 949 if (cache->cache_size == 0) { 950 ASSERT(cache->cache_data == NULL); 951 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); 952 return (0); 953 } 954 955 ASSERT(cache->cache_data); 956 957 di_allocmem(st, cache->cache_size); 958 959 pa = di_mem_addr(st, 0); 960 961 ASSERT(pa); 962 963 /* 964 * Verify that di_allocmem() allocates contiguous memory, 965 * so that it is safe to do straight bcopy() 966 */ 967 ASSERT(st->memlist != NULL); 968 ASSERT(st->memlist->next == NULL); 969 bcopy(cache->cache_data, pa, cache->cache_size); 970 971 return (cache->cache_size); 972 } 973 974 /* 975 * Copies a snapshot from di_state to the cache 976 * Returns: 977 * - 0 on failure 978 * - size of copied data on success 979 */ 980 static size_t 981 di_mem2cache(struct di_state *st, struct di_cache *cache) 982 { 983 size_t map_size; 984 985 ASSERT(cache->cache_size == 0); 986 ASSERT(cache->cache_data == NULL); 987 ASSERT(!servicing_interrupt()); 988 ASSERT(DI_CACHE_LOCKED(*cache)); 989 990 if (st->mem_size == 0) { 991 ASSERT(st->memlist == NULL); 992 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); 993 return (0); 994 } 995 996 ASSERT(st->memlist); 997 998 /* 999 * The size of the memory list may be much larger than the 1000 * size of valid data (map_size). Cache only the valid data 1001 */ 1002 map_size = DI_ALL_PTR(st)->map_size; 1003 if (map_size == 0 || map_size < sizeof (struct di_all) || 1004 map_size > st->mem_size) { 1005 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); 1006 return (0); 1007 } 1008 1009 cache->cache_data = kmem_alloc(map_size, KM_SLEEP); 1010 cache->cache_size = map_size; 1011 di_copymem(st, cache->cache_data, cache->cache_size); 1012 1013 return (map_size); 1014 } 1015 1016 /* 1017 * Make sure there is at least "size" bytes memory left before 1018 * going on. Otherwise, start on a new chunk. 1019 */ 1020 static di_off_t 1021 di_checkmem(struct di_state *st, di_off_t off, size_t size) 1022 { 1023 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", 1024 off, (int)size)); 1025 1026 /* 1027 * di_checkmem() shouldn't be called with a size of zero. 1028 * But in case it is, we want to make sure we return a valid 1029 * offset within the memlist and not an offset that points us 1030 * at the end of the memlist. 1031 */ 1032 if (size == 0) { 1033 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); 1034 size = 1; 1035 } 1036 1037 off = DI_ALIGN(off); 1038 if ((st->mem_size - off) < size) { 1039 off = st->mem_size; 1040 di_allocmem(st, size); 1041 } 1042 1043 return (off); 1044 } 1045 1046 /* 1047 * Copy the private data format from ioctl arg. 1048 * On success, the ending offset is returned. On error 0 is returned. 1049 */ 1050 static di_off_t 1051 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) 1052 { 1053 di_off_t size; 1054 struct di_priv_data *priv; 1055 struct di_all *all = (struct di_all *)(intptr_t)di_mem_addr(st, 0); 1056 1057 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", 1058 off, (void *)arg, mode)); 1059 1060 /* 1061 * Copyin data and check version. 1062 * We only handle private data version 0. 1063 */ 1064 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); 1065 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), 1066 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { 1067 kmem_free(priv, sizeof (struct di_priv_data)); 1068 return (0); 1069 } 1070 1071 /* 1072 * Save di_priv_data copied from userland in snapshot. 1073 */ 1074 all->pd_version = priv->version; 1075 all->n_ppdata = priv->n_parent; 1076 all->n_dpdata = priv->n_driver; 1077 1078 /* 1079 * copyin private data format, modify offset accordingly 1080 */ 1081 if (all->n_ppdata) { /* parent private data format */ 1082 /* 1083 * check memory 1084 */ 1085 size = all->n_ppdata * sizeof (struct di_priv_format); 1086 off = di_checkmem(st, off, size); 1087 all->ppdata_format = off; 1088 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, 1089 mode) != 0) { 1090 kmem_free(priv, sizeof (struct di_priv_data)); 1091 return (0); 1092 } 1093 1094 off += size; 1095 } 1096 1097 if (all->n_dpdata) { /* driver private data format */ 1098 /* 1099 * check memory 1100 */ 1101 size = all->n_dpdata * sizeof (struct di_priv_format); 1102 off = di_checkmem(st, off, size); 1103 all->dpdata_format = off; 1104 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, 1105 mode) != 0) { 1106 kmem_free(priv, sizeof (struct di_priv_data)); 1107 return (0); 1108 } 1109 1110 off += size; 1111 } 1112 1113 kmem_free(priv, sizeof (struct di_priv_data)); 1114 return (off); 1115 } 1116 1117 /* 1118 * Return the real address based on the offset (off) within snapshot 1119 */ 1120 static caddr_t 1121 di_mem_addr(struct di_state *st, di_off_t off) 1122 { 1123 struct di_mem *dcp = st->memlist; 1124 1125 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", 1126 (void *)dcp, off)); 1127 1128 ASSERT(off < st->mem_size); 1129 1130 while (off >= dcp->buf_size) { 1131 off -= dcp->buf_size; 1132 dcp = dcp->next; 1133 } 1134 1135 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", 1136 off, (void *)(dcp->buf + off))); 1137 1138 return (dcp->buf + off); 1139 } 1140 1141 /* 1142 * Ideally we would use the whole key to derive the hash 1143 * value. However, the probability that two keys will 1144 * have the same dip (or pip) is very low, so 1145 * hashing by dip (or pip) pointer should suffice. 1146 */ 1147 static uint_t 1148 di_hash_byptr(void *arg, mod_hash_key_t key) 1149 { 1150 struct di_key *dik = key; 1151 size_t rshift; 1152 void *ptr; 1153 1154 ASSERT(arg == NULL); 1155 1156 switch (dik->k_type) { 1157 case DI_DKEY: 1158 ptr = dik->k_u.dkey.dk_dip; 1159 rshift = highbit(sizeof (struct dev_info)); 1160 break; 1161 case DI_PKEY: 1162 ptr = dik->k_u.pkey.pk_pip; 1163 rshift = highbit(sizeof (struct mdi_pathinfo)); 1164 break; 1165 default: 1166 panic("devinfo: unknown key type"); 1167 /*NOTREACHED*/ 1168 } 1169 return (mod_hash_byptr((void *)rshift, ptr)); 1170 } 1171 1172 static void 1173 di_key_dtor(mod_hash_key_t key) 1174 { 1175 char *path_addr; 1176 struct di_key *dik = key; 1177 1178 switch (dik->k_type) { 1179 case DI_DKEY: 1180 break; 1181 case DI_PKEY: 1182 path_addr = dik->k_u.pkey.pk_path_addr; 1183 if (path_addr) 1184 kmem_free(path_addr, strlen(path_addr) + 1); 1185 break; 1186 default: 1187 panic("devinfo: unknown key type"); 1188 /*NOTREACHED*/ 1189 } 1190 1191 kmem_free(dik, sizeof (struct di_key)); 1192 } 1193 1194 static int 1195 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) 1196 { 1197 if (dk1->dk_dip != dk2->dk_dip) 1198 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); 1199 1200 if (dk1->dk_major != DDI_MAJOR_T_NONE && 1201 dk2->dk_major != DDI_MAJOR_T_NONE) { 1202 if (dk1->dk_major != dk2->dk_major) 1203 return (dk1->dk_major > dk2->dk_major ? 1 : -1); 1204 1205 if (dk1->dk_inst != dk2->dk_inst) 1206 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); 1207 } 1208 1209 if (dk1->dk_nodeid != dk2->dk_nodeid) 1210 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); 1211 1212 return (0); 1213 } 1214 1215 static int 1216 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) 1217 { 1218 char *p1, *p2; 1219 int rv; 1220 1221 if (pk1->pk_pip != pk2->pk_pip) 1222 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); 1223 1224 p1 = pk1->pk_path_addr; 1225 p2 = pk2->pk_path_addr; 1226 1227 p1 = p1 ? p1 : ""; 1228 p2 = p2 ? p2 : ""; 1229 1230 rv = strcmp(p1, p2); 1231 if (rv) 1232 return (rv > 0 ? 1 : -1); 1233 1234 if (pk1->pk_client != pk2->pk_client) 1235 return (pk1->pk_client > pk2->pk_client ? 1 : -1); 1236 1237 if (pk1->pk_phci != pk2->pk_phci) 1238 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); 1239 1240 return (0); 1241 } 1242 1243 static int 1244 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 1245 { 1246 struct di_key *dik1, *dik2; 1247 1248 dik1 = key1; 1249 dik2 = key2; 1250 1251 if (dik1->k_type != dik2->k_type) { 1252 panic("devinfo: mismatched keys"); 1253 /*NOTREACHED*/ 1254 } 1255 1256 switch (dik1->k_type) { 1257 case DI_DKEY: 1258 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); 1259 case DI_PKEY: 1260 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); 1261 default: 1262 panic("devinfo: unknown key type"); 1263 /*NOTREACHED*/ 1264 } 1265 } 1266 1267 /* 1268 * This is the main function that takes a snapshot 1269 */ 1270 static di_off_t 1271 di_snapshot(struct di_state *st) 1272 { 1273 di_off_t off; 1274 struct di_all *all; 1275 dev_info_t *rootnode; 1276 char buf[80]; 1277 int plen; 1278 char *path; 1279 vnode_t *vp; 1280 1281 all = (struct di_all *)(intptr_t)di_mem_addr(st, 0); 1282 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); 1283 1284 /* 1285 * Verify path before entrusting it to e_ddi_hold_devi_by_path because 1286 * some platforms have OBP bugs where executing the NDI_PROMNAME code 1287 * path against an invalid path results in panic. The lookupnameat 1288 * is done relative to rootdir without a leading '/' on "devices/" 1289 * to force the lookup to occur in the global zone. 1290 */ 1291 plen = strlen("devices/") + strlen(all->root_path) + 1; 1292 path = kmem_alloc(plen, KM_SLEEP); 1293 (void) snprintf(path, plen, "devices/%s", all->root_path); 1294 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) { 1295 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1296 all->root_path)); 1297 kmem_free(path, plen); 1298 return (0); 1299 } 1300 kmem_free(path, plen); 1301 VN_RELE(vp); 1302 1303 /* 1304 * Hold the devinfo node referred by the path. 1305 */ 1306 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); 1307 if (rootnode == NULL) { 1308 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1309 all->root_path)); 1310 return (0); 1311 } 1312 1313 (void) snprintf(buf, sizeof (buf), 1314 "devinfo registered dips (statep=%p)", (void *)st); 1315 1316 st->reg_dip_hash = mod_hash_create_extended(buf, 64, 1317 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1318 NULL, di_key_cmp, KM_SLEEP); 1319 1320 1321 (void) snprintf(buf, sizeof (buf), 1322 "devinfo registered pips (statep=%p)", (void *)st); 1323 1324 st->reg_pip_hash = mod_hash_create_extended(buf, 64, 1325 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1326 NULL, di_key_cmp, KM_SLEEP); 1327 1328 /* 1329 * copy the device tree 1330 */ 1331 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); 1332 1333 if (DINFOPATH & st->command) { 1334 mdi_walk_vhcis(build_vhci_list, st); 1335 } 1336 1337 ddi_release_devi(rootnode); 1338 1339 /* 1340 * copy the devnames array 1341 */ 1342 all->devnames = off; 1343 off = di_copydevnm(&all->devnames, st); 1344 1345 1346 /* initialize the hash tables */ 1347 st->lnode_count = 0; 1348 st->link_count = 0; 1349 1350 if (DINFOLYR & st->command) { 1351 off = di_getlink_data(off, st); 1352 } 1353 1354 /* 1355 * Free up hash tables 1356 */ 1357 mod_hash_destroy_hash(st->reg_dip_hash); 1358 mod_hash_destroy_hash(st->reg_pip_hash); 1359 1360 /* 1361 * Record the timestamp now that we are done with snapshot. 1362 * 1363 * We compute the checksum later and then only if we cache 1364 * the snapshot, since checksumming adds some overhead. 1365 * The checksum is checked later if we read the cache file. 1366 * from disk. 1367 * 1368 * Set checksum field to 0 as CRC is calculated with that 1369 * field set to 0. 1370 */ 1371 all->snapshot_time = ddi_get_time(); 1372 all->cache_checksum = 0; 1373 1374 ASSERT(all->snapshot_time != 0); 1375 1376 return (off); 1377 } 1378 1379 /* 1380 * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set 1381 */ 1382 static di_off_t 1383 di_snapshot_and_clean(struct di_state *st) 1384 { 1385 di_off_t off; 1386 1387 modunload_disable(); 1388 off = di_snapshot(st); 1389 if (off != 0 && (st->command & DINFOCLEANUP)) { 1390 ASSERT(DEVICES_FILES_CLEANABLE(st)); 1391 /* 1392 * Cleanup /etc/devices files: 1393 * In order to accurately account for the system configuration 1394 * in /etc/devices files, the appropriate drivers must be 1395 * fully configured before the cleanup starts. 1396 * So enable modunload only after the cleanup. 1397 */ 1398 i_ddi_clean_devices_files(); 1399 /* 1400 * Remove backing store nodes for unused devices, 1401 * which retain past permissions customizations 1402 * and may be undesired for newly configured devices. 1403 */ 1404 dev_devices_cleanup(); 1405 } 1406 modunload_enable(); 1407 1408 return (off); 1409 } 1410 1411 /* 1412 * construct vhci linkage in the snapshot. 1413 */ 1414 int 1415 build_vhci_list(dev_info_t *vh_devinfo, void *arg) 1416 { 1417 struct di_all *all; 1418 struct di_node *me; 1419 struct di_state *st; 1420 di_off_t off; 1421 phci_walk_arg_t pwa; 1422 1423 dcmn_err3((CE_CONT, "build_vhci list\n")); 1424 1425 dcmn_err3((CE_CONT, "vhci node %s, instance #%d\n", 1426 DEVI(vh_devinfo)->devi_node_name, 1427 DEVI(vh_devinfo)->devi_instance)); 1428 1429 st = (struct di_state *)arg; 1430 if (di_dip_find(st, vh_devinfo, &off) != 0) { 1431 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1432 return (DDI_WALK_TERMINATE); 1433 } 1434 1435 dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n", 1436 st->mem_size, off)); 1437 1438 all = (struct di_all *)(intptr_t)di_mem_addr(st, 0); 1439 if (all->top_vhci_devinfo == 0) { 1440 all->top_vhci_devinfo = off; 1441 } else { 1442 me = (struct di_node *) 1443 (intptr_t)di_mem_addr(st, all->top_vhci_devinfo); 1444 1445 while (me->next_vhci != 0) { 1446 me = (struct di_node *) 1447 (intptr_t)di_mem_addr(st, me->next_vhci); 1448 } 1449 1450 me->next_vhci = off; 1451 } 1452 1453 pwa.off = off; 1454 pwa.st = st; 1455 mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa); 1456 1457 return (DDI_WALK_CONTINUE); 1458 } 1459 1460 /* 1461 * construct phci linkage for the given vhci in the snapshot. 1462 */ 1463 int 1464 build_phci_list(dev_info_t *ph_devinfo, void *arg) 1465 { 1466 struct di_node *vh_di_node; 1467 struct di_node *me; 1468 phci_walk_arg_t *pwa; 1469 di_off_t off; 1470 1471 pwa = (phci_walk_arg_t *)arg; 1472 1473 dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n", 1474 pwa->off)); 1475 1476 vh_di_node = (struct di_node *)(intptr_t)di_mem_addr(pwa->st, pwa->off); 1477 1478 if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) { 1479 dcmn_err((CE_WARN, "di_dip_find error for the given node\n")); 1480 return (DDI_WALK_TERMINATE); 1481 } 1482 1483 dcmn_err3((CE_CONT, "phci node %s, instance #%d, at offset 0x%x\n", 1484 DEVI(ph_devinfo)->devi_node_name, 1485 DEVI(ph_devinfo)->devi_instance, off)); 1486 1487 if (vh_di_node->top_phci == 0) { 1488 vh_di_node->top_phci = off; 1489 return (DDI_WALK_CONTINUE); 1490 } 1491 1492 me = (struct di_node *) 1493 (intptr_t)di_mem_addr(pwa->st, vh_di_node->top_phci); 1494 1495 while (me->next_phci != 0) { 1496 me = (struct di_node *) 1497 (intptr_t)di_mem_addr(pwa->st, me->next_phci); 1498 } 1499 me->next_phci = off; 1500 1501 return (DDI_WALK_CONTINUE); 1502 } 1503 1504 /* 1505 * Assumes all devinfo nodes in device tree have been snapshotted 1506 */ 1507 static void 1508 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *poff_p) 1509 { 1510 struct dev_info *node; 1511 struct di_node *me; 1512 di_off_t off; 1513 1514 ASSERT(mutex_owned(&dnp->dn_lock)); 1515 1516 node = DEVI(dnp->dn_head); 1517 for (; node; node = node->devi_next) { 1518 if (di_dip_find(st, (dev_info_t *)node, &off) != 0) 1519 continue; 1520 1521 ASSERT(off > 0); 1522 me = (struct di_node *)(intptr_t)di_mem_addr(st, off); 1523 ASSERT(me->next == 0 || me->next == -1); 1524 /* 1525 * Only nodes which were BOUND when they were 1526 * snapshotted will be added to per-driver list. 1527 */ 1528 if (me->next != -1) 1529 continue; 1530 1531 *poff_p = off; 1532 poff_p = &me->next; 1533 } 1534 1535 *poff_p = 0; 1536 } 1537 1538 /* 1539 * Copy the devnames array, so we have a list of drivers in the snapshot. 1540 * Also makes it possible to locate the per-driver devinfo nodes. 1541 */ 1542 static di_off_t 1543 di_copydevnm(di_off_t *off_p, struct di_state *st) 1544 { 1545 int i; 1546 di_off_t off; 1547 size_t size; 1548 struct di_devnm *dnp; 1549 1550 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); 1551 1552 /* 1553 * make sure there is some allocated memory 1554 */ 1555 size = devcnt * sizeof (struct di_devnm); 1556 off = di_checkmem(st, *off_p, size); 1557 *off_p = off; 1558 1559 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", 1560 devcnt, off)); 1561 1562 dnp = (struct di_devnm *)(intptr_t)di_mem_addr(st, off); 1563 off += size; 1564 1565 for (i = 0; i < devcnt; i++) { 1566 if (devnamesp[i].dn_name == NULL) { 1567 continue; 1568 } 1569 1570 /* 1571 * dn_name is not freed during driver unload or removal. 1572 * 1573 * There is a race condition when make_devname() changes 1574 * dn_name during our strcpy. This should be rare since 1575 * only add_drv does this. At any rate, we never had a 1576 * problem with ddi_name_to_major(), which should have 1577 * the same problem. 1578 */ 1579 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", 1580 devnamesp[i].dn_name, devnamesp[i].dn_instance, 1581 off)); 1582 1583 off = di_checkmem(st, off, strlen(devnamesp[i].dn_name) + 1); 1584 dnp[i].name = off; 1585 (void) strcpy((char *)di_mem_addr(st, off), 1586 devnamesp[i].dn_name); 1587 off += DI_ALIGN(strlen(devnamesp[i].dn_name) + 1); 1588 1589 mutex_enter(&devnamesp[i].dn_lock); 1590 1591 /* 1592 * Snapshot per-driver node list 1593 */ 1594 snap_driver_list(st, &devnamesp[i], &dnp[i].head); 1595 1596 /* 1597 * This is not used by libdevinfo, leave it for now 1598 */ 1599 dnp[i].flags = devnamesp[i].dn_flags; 1600 dnp[i].instance = devnamesp[i].dn_instance; 1601 1602 /* 1603 * get global properties 1604 */ 1605 if ((DINFOPROP & st->command) && 1606 devnamesp[i].dn_global_prop_ptr) { 1607 dnp[i].global_prop = off; 1608 off = di_getprop( 1609 devnamesp[i].dn_global_prop_ptr->prop_list, 1610 &dnp[i].global_prop, st, NULL, DI_PROP_GLB_LIST); 1611 } 1612 1613 /* 1614 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str 1615 */ 1616 if (CB_DRV_INSTALLED(devopsp[i])) { 1617 if (devopsp[i]->devo_cb_ops) { 1618 dnp[i].ops |= DI_CB_OPS; 1619 if (devopsp[i]->devo_cb_ops->cb_str) 1620 dnp[i].ops |= DI_STREAM_OPS; 1621 } 1622 if (NEXUS_DRV(devopsp[i])) { 1623 dnp[i].ops |= DI_BUS_OPS; 1624 } 1625 } 1626 1627 mutex_exit(&devnamesp[i].dn_lock); 1628 } 1629 1630 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); 1631 1632 return (off); 1633 } 1634 1635 /* 1636 * Copy the kernel devinfo tree. The tree and the devnames array forms 1637 * the entire snapshot (see also di_copydevnm). 1638 */ 1639 static di_off_t 1640 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) 1641 { 1642 di_off_t off; 1643 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); 1644 1645 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", 1646 (void *)root, *off_p)); 1647 1648 /* force attach drivers */ 1649 if (i_ddi_devi_attached((dev_info_t *)root) && 1650 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { 1651 (void) ndi_devi_config((dev_info_t *)root, 1652 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | 1653 NDI_DRV_CONF_REPROBE); 1654 } 1655 1656 /* 1657 * Push top_devinfo onto a stack 1658 * 1659 * The stack is necessary to avoid recursion, which can overrun 1660 * the kernel stack. 1661 */ 1662 PUSH_STACK(dsp, root, off_p); 1663 1664 /* 1665 * As long as there is a node on the stack, copy the node. 1666 * di_copynode() is responsible for pushing and popping 1667 * child and sibling nodes on the stack. 1668 */ 1669 while (!EMPTY_STACK(dsp)) { 1670 off = di_copynode(dsp, st); 1671 } 1672 1673 /* 1674 * Free the stack structure 1675 */ 1676 kmem_free(dsp, sizeof (struct di_stack)); 1677 1678 return (off); 1679 } 1680 1681 /* 1682 * This is the core function, which copies all data associated with a single 1683 * node into the snapshot. The amount of information is determined by the 1684 * ioctl command. 1685 */ 1686 static di_off_t 1687 di_copynode(struct di_stack *dsp, struct di_state *st) 1688 { 1689 di_off_t off; 1690 struct di_node *me; 1691 struct dev_info *node; 1692 1693 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth)); 1694 1695 node = TOP_NODE(dsp); 1696 1697 ASSERT(node != NULL); 1698 1699 /* 1700 * check memory usage, and fix offsets accordingly. 1701 */ 1702 off = di_checkmem(st, *(TOP_OFFSET(dsp)), sizeof (struct di_node)); 1703 *(TOP_OFFSET(dsp)) = off; 1704 me = DI_NODE(di_mem_addr(st, off)); 1705 1706 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", 1707 node->devi_node_name, node->devi_instance, off)); 1708 1709 /* 1710 * Node parameters: 1711 * self -- offset of current node within snapshot 1712 * nodeid -- pointer to PROM node (tri-valued) 1713 * state -- hot plugging device state 1714 * node_state -- devinfo node state (CF1, CF2, etc.) 1715 */ 1716 me->self = off; 1717 me->instance = node->devi_instance; 1718 me->nodeid = node->devi_nodeid; 1719 me->node_class = node->devi_node_class; 1720 me->attributes = node->devi_node_attributes; 1721 me->state = node->devi_state; 1722 me->flags = node->devi_flags; 1723 me->node_state = node->devi_node_state; 1724 me->next_vhci = 0; /* Filled up by build_vhci_list. */ 1725 me->top_phci = 0; /* Filled up by build_phci_list. */ 1726 me->next_phci = 0; /* Filled up by build_phci_list. */ 1727 me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */ 1728 me->user_private_data = NULL; 1729 1730 /* 1731 * Get parent's offset in snapshot from the stack 1732 * and store it in the current node 1733 */ 1734 if (dsp->depth > 1) { 1735 me->parent = *(PARENT_OFFSET(dsp)); 1736 } 1737 1738 /* 1739 * Save the offset of this di_node in a hash table. 1740 * This is used later to resolve references to this 1741 * dip from other parts of the tree (per-driver list, 1742 * multipathing linkages, layered usage linkages). 1743 * The key used for the hash table is derived from 1744 * information in the dip. 1745 */ 1746 di_register_dip(st, (dev_info_t *)node, me->self); 1747 1748 /* 1749 * increment offset 1750 */ 1751 off += sizeof (struct di_node); 1752 1753 #ifdef DEVID_COMPATIBILITY 1754 /* check for devid as property marker */ 1755 if (node->devi_devid_str) { 1756 ddi_devid_t devid; 1757 int devid_size; 1758 1759 /* 1760 * The devid is now represented as a property. For 1761 * compatibility with di_devid() interface in libdevinfo we 1762 * must return it as a binary structure in the snapshot. When 1763 * (if) di_devid() is removed from libdevinfo then the code 1764 * related to DEVID_COMPATIBILITY can be removed. 1765 */ 1766 if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) == 1767 DDI_SUCCESS) { 1768 devid_size = ddi_devid_sizeof(devid); 1769 off = di_checkmem(st, off, devid_size); 1770 me->devid = off; 1771 bcopy(devid, di_mem_addr(st, off), devid_size); 1772 off += devid_size; 1773 ddi_devid_free(devid); 1774 } 1775 } 1776 #endif /* DEVID_COMPATIBILITY */ 1777 1778 if (node->devi_node_name) { 1779 off = di_checkmem(st, off, strlen(node->devi_node_name) + 1); 1780 me->node_name = off; 1781 (void) strcpy(di_mem_addr(st, off), node->devi_node_name); 1782 off += strlen(node->devi_node_name) + 1; 1783 } 1784 1785 if (node->devi_compat_names && (node->devi_compat_length > 1)) { 1786 off = di_checkmem(st, off, node->devi_compat_length); 1787 me->compat_names = off; 1788 me->compat_length = node->devi_compat_length; 1789 bcopy(node->devi_compat_names, di_mem_addr(st, off), 1790 node->devi_compat_length); 1791 off += node->devi_compat_length; 1792 } 1793 1794 if (node->devi_addr) { 1795 off = di_checkmem(st, off, strlen(node->devi_addr) + 1); 1796 me->address = off; 1797 (void) strcpy(di_mem_addr(st, off), node->devi_addr); 1798 off += strlen(node->devi_addr) + 1; 1799 } 1800 1801 if (node->devi_binding_name) { 1802 off = di_checkmem(st, off, strlen(node->devi_binding_name) + 1); 1803 me->bind_name = off; 1804 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name); 1805 off += strlen(node->devi_binding_name) + 1; 1806 } 1807 1808 me->drv_major = node->devi_major; 1809 1810 /* 1811 * If the dip is BOUND, set the next pointer of the 1812 * per-instance list to -1, indicating that it is yet to be resolved. 1813 * This will be resolved later in snap_driver_list(). 1814 */ 1815 if (me->drv_major != -1) { 1816 me->next = -1; 1817 } else { 1818 me->next = 0; 1819 } 1820 1821 /* 1822 * An optimization to skip mutex_enter when not needed. 1823 */ 1824 if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) { 1825 goto priv_data; 1826 } 1827 1828 /* 1829 * Grab current per dev_info node lock to 1830 * get minor data and properties. 1831 */ 1832 mutex_enter(&(node->devi_lock)); 1833 1834 if (!(DINFOMINOR & st->command)) { 1835 goto path; 1836 } 1837 1838 if (node->devi_minor) { /* minor data */ 1839 me->minor_data = DI_ALIGN(off); 1840 off = di_getmdata(node->devi_minor, &me->minor_data, 1841 me->self, st); 1842 } 1843 1844 path: 1845 if (!(DINFOPATH & st->command)) { 1846 goto property; 1847 } 1848 1849 if (MDI_VHCI(node)) { 1850 me->multipath_component = MULTIPATH_COMPONENT_VHCI; 1851 } 1852 1853 if (MDI_CLIENT(node)) { 1854 me->multipath_component = MULTIPATH_COMPONENT_CLIENT; 1855 me->multipath_client = DI_ALIGN(off); 1856 off = di_getpath_data((dev_info_t *)node, &me->multipath_client, 1857 me->self, st, 1); 1858 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " 1859 "component type = %d. off=%d", 1860 me->multipath_client, 1861 (void *)node, node->devi_mdi_component, off)); 1862 } 1863 1864 if (MDI_PHCI(node)) { 1865 me->multipath_component = MULTIPATH_COMPONENT_PHCI; 1866 me->multipath_phci = DI_ALIGN(off); 1867 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, 1868 me->self, st, 0); 1869 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " 1870 "component type = %d. off=%d", 1871 me->multipath_phci, 1872 (void *)node, node->devi_mdi_component, off)); 1873 } 1874 1875 property: 1876 if (!(DINFOPROP & st->command)) { 1877 goto unlock; 1878 } 1879 1880 if (node->devi_drv_prop_ptr) { /* driver property list */ 1881 me->drv_prop = DI_ALIGN(off); 1882 off = di_getprop(node->devi_drv_prop_ptr, &me->drv_prop, st, 1883 node, DI_PROP_DRV_LIST); 1884 } 1885 1886 if (node->devi_sys_prop_ptr) { /* system property list */ 1887 me->sys_prop = DI_ALIGN(off); 1888 off = di_getprop(node->devi_sys_prop_ptr, &me->sys_prop, st, 1889 node, DI_PROP_SYS_LIST); 1890 } 1891 1892 if (node->devi_hw_prop_ptr) { /* hardware property list */ 1893 me->hw_prop = DI_ALIGN(off); 1894 off = di_getprop(node->devi_hw_prop_ptr, &me->hw_prop, st, 1895 node, DI_PROP_HW_LIST); 1896 } 1897 1898 if (node->devi_global_prop_list == NULL) { 1899 me->glob_prop = (di_off_t)-1; /* not global property */ 1900 } else { 1901 /* 1902 * Make copy of global property list if this devinfo refers 1903 * global properties different from what's on the devnames 1904 * array. It can happen if there has been a forced 1905 * driver.conf update. See mod_drv(1M). 1906 */ 1907 ASSERT(me->drv_major != -1); 1908 if (node->devi_global_prop_list != 1909 devnamesp[me->drv_major].dn_global_prop_ptr) { 1910 me->glob_prop = DI_ALIGN(off); 1911 off = di_getprop(node->devi_global_prop_list->prop_list, 1912 &me->glob_prop, st, node, DI_PROP_GLB_LIST); 1913 } 1914 } 1915 1916 unlock: 1917 /* 1918 * release current per dev_info node lock 1919 */ 1920 mutex_exit(&(node->devi_lock)); 1921 1922 priv_data: 1923 if (!(DINFOPRIVDATA & st->command)) { 1924 goto pm_info; 1925 } 1926 1927 if (ddi_get_parent_data((dev_info_t *)node) != NULL) { 1928 me->parent_data = DI_ALIGN(off); 1929 off = di_getppdata(node, &me->parent_data, st); 1930 } 1931 1932 if (ddi_get_driver_private((dev_info_t *)node) != NULL) { 1933 me->driver_data = DI_ALIGN(off); 1934 off = di_getdpdata(node, &me->driver_data, st); 1935 } 1936 1937 pm_info: /* NOT implemented */ 1938 1939 subtree: 1940 if (!(DINFOSUBTREE & st->command)) { 1941 POP_STACK(dsp); 1942 return (DI_ALIGN(off)); 1943 } 1944 1945 child: 1946 /* 1947 * If there is a child--push child onto stack. 1948 * Hold the parent busy while doing so. 1949 */ 1950 if (node->devi_child) { 1951 me->child = DI_ALIGN(off); 1952 PUSH_STACK(dsp, node->devi_child, &me->child); 1953 return (me->child); 1954 } 1955 1956 sibling: 1957 /* 1958 * no child node, unroll the stack till a sibling of 1959 * a parent node is found or root node is reached 1960 */ 1961 POP_STACK(dsp); 1962 while (!EMPTY_STACK(dsp) && (node->devi_sibling == NULL)) { 1963 node = TOP_NODE(dsp); 1964 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); 1965 POP_STACK(dsp); 1966 } 1967 1968 if (!EMPTY_STACK(dsp)) { 1969 /* 1970 * a sibling is found, replace top of stack by its sibling 1971 */ 1972 me->sibling = DI_ALIGN(off); 1973 PUSH_STACK(dsp, node->devi_sibling, &me->sibling); 1974 return (me->sibling); 1975 } 1976 1977 /* 1978 * DONE with all nodes 1979 */ 1980 return (DI_ALIGN(off)); 1981 } 1982 1983 static i_lnode_t * 1984 i_lnode_alloc(int modid) 1985 { 1986 i_lnode_t *i_lnode; 1987 1988 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); 1989 1990 ASSERT(modid != -1); 1991 i_lnode->modid = modid; 1992 1993 return (i_lnode); 1994 } 1995 1996 static void 1997 i_lnode_free(i_lnode_t *i_lnode) 1998 { 1999 kmem_free(i_lnode, sizeof (i_lnode_t)); 2000 } 2001 2002 static void 2003 i_lnode_check_free(i_lnode_t *i_lnode) 2004 { 2005 /* This lnode and its dip must have been snapshotted */ 2006 ASSERT(i_lnode->self > 0); 2007 ASSERT(i_lnode->di_node->self > 0); 2008 2009 /* at least 1 link (in or out) must exist for this lnode */ 2010 ASSERT(i_lnode->link_in || i_lnode->link_out); 2011 2012 i_lnode_free(i_lnode); 2013 } 2014 2015 static i_link_t * 2016 i_link_alloc(int spec_type) 2017 { 2018 i_link_t *i_link; 2019 2020 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); 2021 i_link->spec_type = spec_type; 2022 2023 return (i_link); 2024 } 2025 2026 static void 2027 i_link_check_free(i_link_t *i_link) 2028 { 2029 /* This link must have been snapshotted */ 2030 ASSERT(i_link->self > 0); 2031 2032 /* Both endpoint lnodes must exist for this link */ 2033 ASSERT(i_link->src_lnode); 2034 ASSERT(i_link->tgt_lnode); 2035 2036 kmem_free(i_link, sizeof (i_link_t)); 2037 } 2038 2039 /*ARGSUSED*/ 2040 static uint_t 2041 i_lnode_hashfunc(void *arg, mod_hash_key_t key) 2042 { 2043 i_lnode_t *i_lnode = (i_lnode_t *)key; 2044 struct di_node *ptr; 2045 dev_t dev; 2046 2047 dev = i_lnode->devt; 2048 if (dev != DDI_DEV_T_NONE) 2049 return (i_lnode->modid + getminor(dev) + getmajor(dev)); 2050 2051 ptr = i_lnode->di_node; 2052 ASSERT(ptr->self > 0); 2053 if (ptr) { 2054 uintptr_t k = (uintptr_t)ptr; 2055 k >>= (int)highbit(sizeof (struct di_node)); 2056 return ((uint_t)k); 2057 } 2058 2059 return (i_lnode->modid); 2060 } 2061 2062 static int 2063 i_lnode_cmp(void *arg1, void *arg2) 2064 { 2065 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1; 2066 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2; 2067 2068 if (i_lnode1->modid != i_lnode2->modid) { 2069 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); 2070 } 2071 2072 if (i_lnode1->di_node != i_lnode2->di_node) 2073 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); 2074 2075 if (i_lnode1->devt != i_lnode2->devt) 2076 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); 2077 2078 return (0); 2079 } 2080 2081 /* 2082 * An lnode represents a {dip, dev_t} tuple. A link represents a 2083 * {src_lnode, tgt_lnode, spec_type} tuple. 2084 * The following callback assumes that LDI framework ref-counts the 2085 * src_dip and tgt_dip while invoking this callback. 2086 */ 2087 static int 2088 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) 2089 { 2090 struct di_state *st = (struct di_state *)arg; 2091 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode; 2092 i_link_t **i_link_next, *i_link; 2093 di_off_t soff, toff; 2094 mod_hash_val_t nodep = NULL; 2095 int res; 2096 2097 /* 2098 * if the source or target of this device usage information doesn't 2099 * corrospond to a device node then we don't report it via 2100 * libdevinfo so return. 2101 */ 2102 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) 2103 return (LDI_USAGE_CONTINUE); 2104 2105 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); 2106 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); 2107 2108 /* 2109 * Skip the ldi_usage if either src or tgt dip is not in the 2110 * snapshot. This saves us from pruning bad lnodes/links later. 2111 */ 2112 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) 2113 return (LDI_USAGE_CONTINUE); 2114 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) 2115 return (LDI_USAGE_CONTINUE); 2116 2117 ASSERT(soff > 0); 2118 ASSERT(toff > 0); 2119 2120 /* 2121 * allocate an i_lnode and add it to the lnode hash 2122 * if it is not already present. For this particular 2123 * link the lnode is a source, but it may 2124 * participate as tgt or src in any number of layered 2125 * operations - so it may already be in the hash. 2126 */ 2127 i_lnode = i_lnode_alloc(ldi_usage->src_modid); 2128 i_lnode->di_node = (struct di_node *)(intptr_t)di_mem_addr(st, soff); 2129 i_lnode->devt = ldi_usage->src_devt; 2130 2131 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2132 if (res == MH_ERR_NOTFOUND) { 2133 /* 2134 * new i_lnode 2135 * add it to the hash and increment the lnode count 2136 */ 2137 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2138 ASSERT(res == 0); 2139 st->lnode_count++; 2140 src_lnode = i_lnode; 2141 } else { 2142 /* this i_lnode already exists in the lnode_hash */ 2143 i_lnode_free(i_lnode); 2144 src_lnode = (i_lnode_t *)nodep; 2145 } 2146 2147 /* 2148 * allocate a tgt i_lnode and add it to the lnode hash 2149 */ 2150 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); 2151 i_lnode->di_node = (struct di_node *)(intptr_t)di_mem_addr(st, toff); 2152 i_lnode->devt = ldi_usage->tgt_devt; 2153 2154 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2155 if (res == MH_ERR_NOTFOUND) { 2156 /* 2157 * new i_lnode 2158 * add it to the hash and increment the lnode count 2159 */ 2160 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2161 ASSERT(res == 0); 2162 st->lnode_count++; 2163 tgt_lnode = i_lnode; 2164 } else { 2165 /* this i_lnode already exists in the lnode_hash */ 2166 i_lnode_free(i_lnode); 2167 tgt_lnode = (i_lnode_t *)nodep; 2168 } 2169 2170 /* 2171 * allocate a i_link 2172 */ 2173 i_link = i_link_alloc(ldi_usage->tgt_spec_type); 2174 i_link->src_lnode = src_lnode; 2175 i_link->tgt_lnode = tgt_lnode; 2176 2177 /* 2178 * add this link onto the src i_lnodes outbound i_link list 2179 */ 2180 i_link_next = &(src_lnode->link_out); 2181 while (*i_link_next != NULL) { 2182 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && 2183 (i_link->spec_type == (*i_link_next)->spec_type)) { 2184 /* this link already exists */ 2185 kmem_free(i_link, sizeof (i_link_t)); 2186 return (LDI_USAGE_CONTINUE); 2187 } 2188 i_link_next = &((*i_link_next)->src_link_next); 2189 } 2190 *i_link_next = i_link; 2191 2192 /* 2193 * add this link onto the tgt i_lnodes inbound i_link list 2194 */ 2195 i_link_next = &(tgt_lnode->link_in); 2196 while (*i_link_next != NULL) { 2197 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); 2198 i_link_next = &((*i_link_next)->tgt_link_next); 2199 } 2200 *i_link_next = i_link; 2201 2202 /* 2203 * add this i_link to the link hash 2204 */ 2205 res = mod_hash_insert(st->link_hash, i_link, i_link); 2206 ASSERT(res == 0); 2207 st->link_count++; 2208 2209 return (LDI_USAGE_CONTINUE); 2210 } 2211 2212 struct i_layer_data { 2213 struct di_state *st; 2214 int lnode_count; 2215 int link_count; 2216 di_off_t lnode_off; 2217 di_off_t link_off; 2218 }; 2219 2220 /*ARGSUSED*/ 2221 static uint_t 2222 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2223 { 2224 i_link_t *i_link = (i_link_t *)key; 2225 struct i_layer_data *data = arg; 2226 struct di_link *me; 2227 struct di_lnode *melnode; 2228 struct di_node *medinode; 2229 2230 ASSERT(i_link->self == 0); 2231 2232 i_link->self = data->link_off + 2233 (data->link_count * sizeof (struct di_link)); 2234 data->link_count++; 2235 2236 ASSERT(data->link_off > 0 && data->link_count > 0); 2237 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ 2238 ASSERT(data->link_count <= data->st->link_count); 2239 2240 /* fill in fields for the di_link snapshot */ 2241 me = (struct di_link *)(intptr_t)di_mem_addr(data->st, i_link->self); 2242 me->self = i_link->self; 2243 me->spec_type = i_link->spec_type; 2244 2245 /* 2246 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t 2247 * are created during the LDI table walk. Since we are 2248 * walking the link hash, the lnode hash has already been 2249 * walked and the lnodes have been snapshotted. Save lnode 2250 * offsets. 2251 */ 2252 me->src_lnode = i_link->src_lnode->self; 2253 me->tgt_lnode = i_link->tgt_lnode->self; 2254 2255 /* 2256 * Save this link's offset in the src_lnode snapshot's link_out 2257 * field 2258 */ 2259 melnode = (struct di_lnode *) 2260 (intptr_t)di_mem_addr(data->st, me->src_lnode); 2261 me->src_link_next = melnode->link_out; 2262 melnode->link_out = me->self; 2263 2264 /* 2265 * Put this link on the tgt_lnode's link_in field 2266 */ 2267 melnode = (struct di_lnode *) 2268 (intptr_t)di_mem_addr(data->st, me->tgt_lnode); 2269 me->tgt_link_next = melnode->link_in; 2270 melnode->link_in = me->self; 2271 2272 /* 2273 * An i_lnode_t is only created if the corresponding dip exists 2274 * in the snapshot. A pointer to the di_node is saved in the 2275 * i_lnode_t when it is allocated. For this link, get the di_node 2276 * for the source lnode. Then put the link on the di_node's list 2277 * of src links 2278 */ 2279 medinode = i_link->src_lnode->di_node; 2280 me->src_node_next = medinode->src_links; 2281 medinode->src_links = me->self; 2282 2283 /* 2284 * Put this link on the tgt_links list of the target 2285 * dip. 2286 */ 2287 medinode = i_link->tgt_lnode->di_node; 2288 me->tgt_node_next = medinode->tgt_links; 2289 medinode->tgt_links = me->self; 2290 2291 return (MH_WALK_CONTINUE); 2292 } 2293 2294 /*ARGSUSED*/ 2295 static uint_t 2296 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2297 { 2298 i_lnode_t *i_lnode = (i_lnode_t *)key; 2299 struct i_layer_data *data = arg; 2300 struct di_lnode *me; 2301 struct di_node *medinode; 2302 2303 ASSERT(i_lnode->self == 0); 2304 2305 i_lnode->self = data->lnode_off + 2306 (data->lnode_count * sizeof (struct di_lnode)); 2307 data->lnode_count++; 2308 2309 ASSERT(data->lnode_off > 0 && data->lnode_count > 0); 2310 ASSERT(data->link_count == 0); /* links not done yet */ 2311 ASSERT(data->lnode_count <= data->st->lnode_count); 2312 2313 /* fill in fields for the di_lnode snapshot */ 2314 me = (struct di_lnode *)(intptr_t)di_mem_addr(data->st, i_lnode->self); 2315 me->self = i_lnode->self; 2316 2317 if (i_lnode->devt == DDI_DEV_T_NONE) { 2318 me->dev_major = DDI_MAJOR_T_NONE; 2319 me->dev_minor = DDI_MAJOR_T_NONE; 2320 } else { 2321 me->dev_major = getmajor(i_lnode->devt); 2322 me->dev_minor = getminor(i_lnode->devt); 2323 } 2324 2325 /* 2326 * The dip corresponding to this lnode must exist in 2327 * the snapshot or we wouldn't have created the i_lnode_t 2328 * during LDI walk. Save the offset of the dip. 2329 */ 2330 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); 2331 me->node = i_lnode->di_node->self; 2332 2333 /* 2334 * There must be at least one link in or out of this lnode 2335 * or we wouldn't have created it. These fields will be set 2336 * during the link hash walk. 2337 */ 2338 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); 2339 2340 /* 2341 * set the offset of the devinfo node associated with this 2342 * lnode. Also update the node_next next pointer. this pointer 2343 * is set if there are multiple lnodes associated with the same 2344 * devinfo node. (could occure when multiple minor nodes 2345 * are open for one device, etc.) 2346 */ 2347 medinode = i_lnode->di_node; 2348 me->node_next = medinode->lnodes; 2349 medinode->lnodes = me->self; 2350 2351 return (MH_WALK_CONTINUE); 2352 } 2353 2354 static di_off_t 2355 di_getlink_data(di_off_t off, struct di_state *st) 2356 { 2357 struct i_layer_data data = {0}; 2358 size_t size; 2359 2360 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); 2361 2362 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, 2363 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, 2364 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); 2365 2366 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, 2367 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); 2368 2369 /* get driver layering information */ 2370 (void) ldi_usage_walker(st, di_ldi_callback); 2371 2372 /* check if there is any link data to include in the snapshot */ 2373 if (st->lnode_count == 0) { 2374 ASSERT(st->link_count == 0); 2375 goto out; 2376 } 2377 2378 ASSERT(st->link_count != 0); 2379 2380 /* get a pointer to snapshot memory for all the di_lnodes */ 2381 size = sizeof (struct di_lnode) * st->lnode_count; 2382 data.lnode_off = off = di_checkmem(st, off, size); 2383 off += DI_ALIGN(size); 2384 2385 /* get a pointer to snapshot memory for all the di_links */ 2386 size = sizeof (struct di_link) * st->link_count; 2387 data.link_off = off = di_checkmem(st, off, size); 2388 off += DI_ALIGN(size); 2389 2390 data.lnode_count = data.link_count = 0; 2391 data.st = st; 2392 2393 /* 2394 * We have lnodes and links that will go into the 2395 * snapshot, so let's walk the respective hashes 2396 * and snapshot them. The various linkages are 2397 * also set up during the walk. 2398 */ 2399 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); 2400 ASSERT(data.lnode_count == st->lnode_count); 2401 2402 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); 2403 ASSERT(data.link_count == st->link_count); 2404 2405 out: 2406 /* free up the i_lnodes and i_links used to create the snapshot */ 2407 mod_hash_destroy_hash(st->lnode_hash); 2408 mod_hash_destroy_hash(st->link_hash); 2409 st->lnode_count = 0; 2410 st->link_count = 0; 2411 2412 return (off); 2413 } 2414 2415 2416 /* 2417 * Copy all minor data nodes attached to a devinfo node into the snapshot. 2418 * It is called from di_copynode with devi_lock held. 2419 */ 2420 static di_off_t 2421 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, 2422 struct di_state *st) 2423 { 2424 di_off_t off; 2425 struct di_minor *me; 2426 2427 dcmn_err2((CE_CONT, "di_getmdata:\n")); 2428 2429 /* 2430 * check memory first 2431 */ 2432 off = di_checkmem(st, *off_p, sizeof (struct di_minor)); 2433 *off_p = off; 2434 2435 do { 2436 me = (struct di_minor *)(intptr_t)di_mem_addr(st, off); 2437 me->self = off; 2438 me->type = mnode->type; 2439 me->node = node; 2440 me->user_private_data = NULL; 2441 2442 off += DI_ALIGN(sizeof (struct di_minor)); 2443 2444 /* 2445 * Split dev_t to major/minor, so it works for 2446 * both ILP32 and LP64 model 2447 */ 2448 me->dev_major = getmajor(mnode->ddm_dev); 2449 me->dev_minor = getminor(mnode->ddm_dev); 2450 me->spec_type = mnode->ddm_spec_type; 2451 2452 if (mnode->ddm_name) { 2453 off = di_checkmem(st, off, 2454 strlen(mnode->ddm_name) + 1); 2455 me->name = off; 2456 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name); 2457 off += DI_ALIGN(strlen(mnode->ddm_name) + 1); 2458 } 2459 2460 if (mnode->ddm_node_type) { 2461 off = di_checkmem(st, off, 2462 strlen(mnode->ddm_node_type) + 1); 2463 me->node_type = off; 2464 (void) strcpy(di_mem_addr(st, off), 2465 mnode->ddm_node_type); 2466 off += DI_ALIGN(strlen(mnode->ddm_node_type) + 1); 2467 } 2468 2469 off = di_checkmem(st, off, sizeof (struct di_minor)); 2470 me->next = off; 2471 mnode = mnode->next; 2472 } while (mnode); 2473 2474 me->next = 0; 2475 2476 return (off); 2477 } 2478 2479 /* 2480 * di_register_dip(), di_find_dip(): The dip must be protected 2481 * from deallocation when using these routines - this can either 2482 * be a reference count, a busy hold or a per-driver lock. 2483 */ 2484 2485 static void 2486 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) 2487 { 2488 struct dev_info *node = DEVI(dip); 2489 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2490 struct di_dkey *dk; 2491 2492 ASSERT(dip); 2493 ASSERT(off > 0); 2494 2495 key->k_type = DI_DKEY; 2496 dk = &(key->k_u.dkey); 2497 2498 dk->dk_dip = dip; 2499 dk->dk_major = node->devi_major; 2500 dk->dk_inst = node->devi_instance; 2501 dk->dk_nodeid = node->devi_nodeid; 2502 2503 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, 2504 (mod_hash_val_t)(uintptr_t)off) != 0) { 2505 panic( 2506 "duplicate devinfo (%p) registered during device " 2507 "tree walk", (void *)dip); 2508 } 2509 } 2510 2511 2512 static int 2513 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) 2514 { 2515 /* 2516 * uintptr_t must be used because it matches the size of void *; 2517 * mod_hash expects clients to place results into pointer-size 2518 * containers; since di_off_t is always a 32-bit offset, alignment 2519 * would otherwise be broken on 64-bit kernels. 2520 */ 2521 uintptr_t offset; 2522 struct di_key key = {0}; 2523 struct di_dkey *dk; 2524 2525 ASSERT(st->reg_dip_hash); 2526 ASSERT(dip); 2527 ASSERT(off_p); 2528 2529 2530 key.k_type = DI_DKEY; 2531 dk = &(key.k_u.dkey); 2532 2533 dk->dk_dip = dip; 2534 dk->dk_major = DEVI(dip)->devi_major; 2535 dk->dk_inst = DEVI(dip)->devi_instance; 2536 dk->dk_nodeid = DEVI(dip)->devi_nodeid; 2537 2538 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, 2539 (mod_hash_val_t *)&offset) == 0) { 2540 *off_p = (di_off_t)offset; 2541 return (0); 2542 } else { 2543 return (-1); 2544 } 2545 } 2546 2547 /* 2548 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation 2549 * when using these routines. The caller must do this by protecting the 2550 * client(or phci)<->pip linkage while traversing the list and then holding the 2551 * pip when it is found in the list. 2552 */ 2553 2554 static void 2555 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) 2556 { 2557 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2558 char *path_addr; 2559 struct di_pkey *pk; 2560 2561 ASSERT(pip); 2562 ASSERT(off > 0); 2563 2564 key->k_type = DI_PKEY; 2565 pk = &(key->k_u.pkey); 2566 2567 pk->pk_pip = pip; 2568 path_addr = mdi_pi_get_addr(pip); 2569 if (path_addr) 2570 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); 2571 pk->pk_client = mdi_pi_get_client(pip); 2572 pk->pk_phci = mdi_pi_get_phci(pip); 2573 2574 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, 2575 (mod_hash_val_t)(uintptr_t)off) != 0) { 2576 panic( 2577 "duplicate pathinfo (%p) registered during device " 2578 "tree walk", (void *)pip); 2579 } 2580 } 2581 2582 /* 2583 * As with di_register_pip, the caller must hold or lock the pip 2584 */ 2585 static int 2586 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) 2587 { 2588 /* 2589 * uintptr_t must be used because it matches the size of void *; 2590 * mod_hash expects clients to place results into pointer-size 2591 * containers; since di_off_t is always a 32-bit offset, alignment 2592 * would otherwise be broken on 64-bit kernels. 2593 */ 2594 uintptr_t offset; 2595 struct di_key key = {0}; 2596 struct di_pkey *pk; 2597 2598 ASSERT(st->reg_pip_hash); 2599 ASSERT(off_p); 2600 2601 if (pip == NULL) { 2602 *off_p = 0; 2603 return (0); 2604 } 2605 2606 key.k_type = DI_PKEY; 2607 pk = &(key.k_u.pkey); 2608 2609 pk->pk_pip = pip; 2610 pk->pk_path_addr = mdi_pi_get_addr(pip); 2611 pk->pk_client = mdi_pi_get_client(pip); 2612 pk->pk_phci = mdi_pi_get_phci(pip); 2613 2614 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, 2615 (mod_hash_val_t *)&offset) == 0) { 2616 *off_p = (di_off_t)offset; 2617 return (0); 2618 } else { 2619 return (-1); 2620 } 2621 } 2622 2623 static di_path_state_t 2624 path_state_convert(mdi_pathinfo_state_t st) 2625 { 2626 switch (st) { 2627 case MDI_PATHINFO_STATE_ONLINE: 2628 return (DI_PATH_STATE_ONLINE); 2629 case MDI_PATHINFO_STATE_STANDBY: 2630 return (DI_PATH_STATE_STANDBY); 2631 case MDI_PATHINFO_STATE_OFFLINE: 2632 return (DI_PATH_STATE_OFFLINE); 2633 case MDI_PATHINFO_STATE_FAULT: 2634 return (DI_PATH_STATE_FAULT); 2635 default: 2636 return (DI_PATH_STATE_UNKNOWN); 2637 } 2638 } 2639 2640 2641 static di_off_t 2642 di_path_getprop(mdi_pathinfo_t *pip, di_off_t off, di_off_t *off_p, 2643 struct di_state *st) 2644 { 2645 nvpair_t *prop = NULL; 2646 struct di_path_prop *me; 2647 2648 if (mdi_pi_get_next_prop(pip, NULL) == NULL) { 2649 *off_p = 0; 2650 return (off); 2651 } 2652 2653 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2654 *off_p = off; 2655 2656 while (prop = mdi_pi_get_next_prop(pip, prop)) { 2657 int delta = 0; 2658 2659 me = (struct di_path_prop *)(intptr_t)di_mem_addr(st, off); 2660 me->self = off; 2661 off += sizeof (struct di_path_prop); 2662 2663 /* 2664 * property name 2665 */ 2666 off = di_checkmem(st, off, strlen(nvpair_name(prop)) + 1); 2667 me->prop_name = off; 2668 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); 2669 off += strlen(nvpair_name(prop)) + 1; 2670 2671 switch (nvpair_type(prop)) { 2672 case DATA_TYPE_BYTE: 2673 case DATA_TYPE_INT16: 2674 case DATA_TYPE_UINT16: 2675 case DATA_TYPE_INT32: 2676 case DATA_TYPE_UINT32: 2677 delta = sizeof (int32_t); 2678 me->prop_type = DDI_PROP_TYPE_INT; 2679 off = di_checkmem(st, off, delta); 2680 (void) nvpair_value_int32(prop, 2681 (int32_t *)(intptr_t)di_mem_addr(st, off)); 2682 break; 2683 2684 case DATA_TYPE_INT64: 2685 case DATA_TYPE_UINT64: 2686 delta = sizeof (int64_t); 2687 me->prop_type = DDI_PROP_TYPE_INT64; 2688 off = di_checkmem(st, off, delta); 2689 (void) nvpair_value_int64(prop, 2690 (int64_t *)(intptr_t)di_mem_addr(st, off)); 2691 break; 2692 2693 case DATA_TYPE_STRING: 2694 { 2695 char *str; 2696 (void) nvpair_value_string(prop, &str); 2697 delta = strlen(str) + 1; 2698 me->prop_type = DDI_PROP_TYPE_STRING; 2699 off = di_checkmem(st, off, delta); 2700 (void) strcpy(di_mem_addr(st, off), str); 2701 break; 2702 } 2703 case DATA_TYPE_BYTE_ARRAY: 2704 case DATA_TYPE_INT16_ARRAY: 2705 case DATA_TYPE_UINT16_ARRAY: 2706 case DATA_TYPE_INT32_ARRAY: 2707 case DATA_TYPE_UINT32_ARRAY: 2708 case DATA_TYPE_INT64_ARRAY: 2709 case DATA_TYPE_UINT64_ARRAY: 2710 { 2711 uchar_t *buf; 2712 uint_t nelems; 2713 (void) nvpair_value_byte_array(prop, &buf, &nelems); 2714 delta = nelems; 2715 me->prop_type = DDI_PROP_TYPE_BYTE; 2716 if (nelems != 0) { 2717 off = di_checkmem(st, off, delta); 2718 bcopy(buf, di_mem_addr(st, off), nelems); 2719 } 2720 break; 2721 } 2722 2723 default: /* Unknown or unhandled type; skip it */ 2724 delta = 0; 2725 break; 2726 } 2727 2728 if (delta > 0) { 2729 me->prop_data = off; 2730 } 2731 2732 me->prop_len = delta; 2733 off += delta; 2734 2735 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2736 me->prop_next = off; 2737 } 2738 2739 me->prop_next = 0; 2740 return (off); 2741 } 2742 2743 2744 static void 2745 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp, 2746 int get_client) 2747 { 2748 if (get_client) { 2749 ASSERT(me->path_client == 0); 2750 me->path_client = noff; 2751 ASSERT(me->path_c_link == 0); 2752 *off_pp = &me->path_c_link; 2753 me->path_snap_state &= 2754 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK); 2755 } else { 2756 ASSERT(me->path_phci == 0); 2757 me->path_phci = noff; 2758 ASSERT(me->path_p_link == 0); 2759 *off_pp = &me->path_p_link; 2760 me->path_snap_state &= 2761 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK); 2762 } 2763 } 2764 2765 /* 2766 * poff_p: pointer to the linkage field. This links pips along the client|phci 2767 * linkage list. 2768 * noff : Offset for the endpoint dip snapshot. 2769 */ 2770 static di_off_t 2771 di_getpath_data(dev_info_t *dip, di_off_t *poff_p, di_off_t noff, 2772 struct di_state *st, int get_client) 2773 { 2774 di_off_t off; 2775 mdi_pathinfo_t *pip; 2776 struct di_path *me; 2777 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *); 2778 2779 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client)); 2780 2781 /* 2782 * The naming of the following mdi_xyz() is unfortunately 2783 * non-intuitive. mdi_get_next_phci_path() follows the 2784 * client_link i.e. the list of pip's belonging to the 2785 * given client dip. 2786 */ 2787 if (get_client) 2788 next_pip = &mdi_get_next_phci_path; 2789 else 2790 next_pip = &mdi_get_next_client_path; 2791 2792 off = *poff_p; 2793 2794 pip = NULL; 2795 while (pip = (*next_pip)(dip, pip)) { 2796 mdi_pathinfo_state_t state; 2797 di_off_t stored_offset; 2798 2799 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip)); 2800 2801 mdi_pi_lock(pip); 2802 2803 if (di_pip_find(st, pip, &stored_offset) != -1) { 2804 /* 2805 * We've already seen this pathinfo node so we need to 2806 * take care not to snap it again; However, one endpoint 2807 * and linkage will be set here. The other endpoint 2808 * and linkage has already been set when the pip was 2809 * first snapshotted i.e. when the other endpoint dip 2810 * was snapshotted. 2811 */ 2812 me = (struct di_path *)(intptr_t) 2813 di_mem_addr(st, stored_offset); 2814 2815 *poff_p = stored_offset; 2816 2817 di_path_one_endpoint(me, noff, &poff_p, get_client); 2818 2819 /* 2820 * The other endpoint and linkage were set when this 2821 * pip was snapshotted. So we are done with both 2822 * endpoints and linkages. 2823 */ 2824 ASSERT(!(me->path_snap_state & 2825 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI))); 2826 ASSERT(!(me->path_snap_state & 2827 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK))); 2828 2829 mdi_pi_unlock(pip); 2830 continue; 2831 } 2832 2833 /* 2834 * Now that we need to snapshot this pip, check memory 2835 */ 2836 off = di_checkmem(st, off, sizeof (struct di_path)); 2837 me = (struct di_path *)(intptr_t)di_mem_addr(st, off); 2838 me->self = off; 2839 *poff_p = off; 2840 off += sizeof (struct di_path); 2841 2842 me->path_snap_state = 2843 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK; 2844 me->path_snap_state |= 2845 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI; 2846 2847 /* 2848 * Zero out fields as di_checkmem() doesn't guarantee 2849 * zero-filled memory 2850 */ 2851 me->path_client = me->path_phci = 0; 2852 me->path_c_link = me->path_p_link = 0; 2853 2854 di_path_one_endpoint(me, noff, &poff_p, get_client); 2855 2856 /* 2857 * Note the existence of this pathinfo 2858 */ 2859 di_register_pip(st, pip, me->self); 2860 2861 state = mdi_pi_get_state(pip); 2862 me->path_state = path_state_convert(state); 2863 2864 me->path_instance = mdi_pi_get_path_instance(pip); 2865 2866 /* 2867 * Get intermediate addressing info. 2868 */ 2869 off = di_checkmem(st, off, strlen(mdi_pi_get_addr(pip)) + 1); 2870 me->path_addr = off; 2871 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip)); 2872 off += strlen(mdi_pi_get_addr(pip)) + 1; 2873 2874 /* 2875 * Get path properties if props are to be included in the 2876 * snapshot 2877 */ 2878 if (DINFOPROP & st->command) { 2879 off = di_path_getprop(pip, off, &me->path_prop, st); 2880 } else { 2881 me->path_prop = 0; 2882 } 2883 2884 mdi_pi_unlock(pip); 2885 } 2886 2887 *poff_p = 0; 2888 2889 return (off); 2890 } 2891 2892 /* 2893 * Copy a list of properties attached to a devinfo node. Called from 2894 * di_copynode with devi_lock held. The major number is passed in case 2895 * we need to call driver's prop_op entry. The value of list indicates 2896 * which list we are copying. Possible values are: 2897 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST 2898 */ 2899 static di_off_t 2900 di_getprop(struct ddi_prop *prop, di_off_t *off_p, struct di_state *st, 2901 struct dev_info *dip, int list) 2902 { 2903 dev_t dev; 2904 int (*prop_op)(); 2905 int off, need_prop_op = 0; 2906 int prop_op_fail = 0; 2907 ddi_prop_t *propp = NULL; 2908 struct di_prop *pp; 2909 struct dev_ops *ops = NULL; 2910 int prop_len; 2911 caddr_t prop_val; 2912 2913 2914 dcmn_err2((CE_CONT, "di_getprop:\n")); 2915 2916 ASSERT(st != NULL); 2917 2918 dcmn_err((CE_CONT, "copy property list at addr %p\n", (void *)prop)); 2919 2920 /* 2921 * Figure out if we need to call driver's prop_op entry point. 2922 * The conditions are: 2923 * -- driver property list 2924 * -- driver must be attached and held 2925 * -- driver's cb_prop_op != ddi_prop_op 2926 * or parent's bus_prop_op != ddi_bus_prop_op 2927 */ 2928 2929 if (list != DI_PROP_DRV_LIST) { 2930 goto getprop; 2931 } 2932 2933 /* 2934 * If driver is not attached or if major is -1, we ignore 2935 * the driver property list. No one should rely on such 2936 * properties. 2937 */ 2938 if (!i_ddi_devi_attached((dev_info_t *)dip)) { 2939 off = *off_p; 2940 *off_p = 0; 2941 return (off); 2942 } 2943 2944 /* 2945 * Now we have a driver which is held. We can examine entry points 2946 * and check the condition listed above. 2947 */ 2948 ops = dip->devi_ops; 2949 2950 /* 2951 * Some nexus drivers incorrectly set cb_prop_op to nodev, 2952 * nulldev or even NULL. 2953 */ 2954 if (ops && ops->devo_cb_ops && 2955 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) && 2956 (ops->devo_cb_ops->cb_prop_op != nodev) && 2957 (ops->devo_cb_ops->cb_prop_op != nulldev) && 2958 (ops->devo_cb_ops->cb_prop_op != NULL)) { 2959 need_prop_op = 1; 2960 } 2961 2962 getprop: 2963 /* 2964 * check memory availability 2965 */ 2966 off = di_checkmem(st, *off_p, sizeof (struct di_prop)); 2967 *off_p = off; 2968 /* 2969 * Now copy properties 2970 */ 2971 do { 2972 pp = (struct di_prop *)(intptr_t)di_mem_addr(st, off); 2973 pp->self = off; 2974 /* 2975 * Split dev_t to major/minor, so it works for 2976 * both ILP32 and LP64 model 2977 */ 2978 pp->dev_major = getmajor(prop->prop_dev); 2979 pp->dev_minor = getminor(prop->prop_dev); 2980 pp->prop_flags = prop->prop_flags; 2981 pp->prop_list = list; 2982 2983 /* 2984 * property name 2985 */ 2986 off += sizeof (struct di_prop); 2987 if (prop->prop_name) { 2988 off = di_checkmem(st, off, strlen(prop->prop_name) 2989 + 1); 2990 pp->prop_name = off; 2991 (void) strcpy(di_mem_addr(st, off), prop->prop_name); 2992 off += strlen(prop->prop_name) + 1; 2993 } 2994 2995 /* 2996 * Set prop_len here. This may change later 2997 * if cb_prop_op returns a different length. 2998 */ 2999 pp->prop_len = prop->prop_len; 3000 if (!need_prop_op) { 3001 if (prop->prop_val == NULL) { 3002 dcmn_err((CE_WARN, 3003 "devinfo: property fault at %p", 3004 (void *)prop)); 3005 pp->prop_data = -1; 3006 } else if (prop->prop_len != 0) { 3007 off = di_checkmem(st, off, prop->prop_len); 3008 pp->prop_data = off; 3009 bcopy(prop->prop_val, di_mem_addr(st, off), 3010 prop->prop_len); 3011 off += DI_ALIGN(pp->prop_len); 3012 } 3013 } 3014 3015 off = di_checkmem(st, off, sizeof (struct di_prop)); 3016 pp->next = off; 3017 prop = prop->prop_next; 3018 } while (prop); 3019 3020 pp->next = 0; 3021 3022 if (!need_prop_op) { 3023 dcmn_err((CE_CONT, "finished property " 3024 "list at offset 0x%x\n", off)); 3025 return (off); 3026 } 3027 3028 /* 3029 * If there is a need to call driver's prop_op entry, 3030 * we must release driver's devi_lock, because the 3031 * cb_prop_op entry point will grab it. 3032 * 3033 * The snapshot memory has already been allocated above, 3034 * which means the length of an active property should 3035 * remain fixed for this implementation to work. 3036 */ 3037 3038 3039 prop_op = ops->devo_cb_ops->cb_prop_op; 3040 pp = (struct di_prop *)(intptr_t)di_mem_addr(st, *off_p); 3041 3042 mutex_exit(&dip->devi_lock); 3043 3044 do { 3045 int err; 3046 struct di_prop *tmp; 3047 3048 if (pp->next) { 3049 tmp = (struct di_prop *) 3050 (intptr_t)di_mem_addr(st, pp->next); 3051 } else { 3052 tmp = NULL; 3053 } 3054 3055 /* 3056 * call into driver's prop_op entry point 3057 * 3058 * Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY 3059 */ 3060 dev = makedevice(pp->dev_major, pp->dev_minor); 3061 if (dev == DDI_DEV_T_NONE) 3062 dev = DDI_DEV_T_ANY; 3063 3064 dcmn_err((CE_CONT, "call prop_op" 3065 "(%lx, %p, PROP_LEN_AND_VAL_BUF, " 3066 "DDI_PROP_DONTPASS, \"%s\", %p, &%d)\n", 3067 dev, 3068 (void *)dip, 3069 (char *)di_mem_addr(st, pp->prop_name), 3070 (void *)di_mem_addr(st, pp->prop_data), 3071 pp->prop_len)); 3072 3073 if ((err = (*prop_op)(dev, (dev_info_t)dip, 3074 PROP_LEN_AND_VAL_ALLOC, DDI_PROP_DONTPASS, 3075 (char *)di_mem_addr(st, pp->prop_name), 3076 &prop_val, &prop_len)) != DDI_PROP_SUCCESS) { 3077 if ((propp = i_ddi_prop_search(dev, 3078 (char *)di_mem_addr(st, pp->prop_name), 3079 (uint_t)pp->prop_flags, 3080 &(DEVI(dip)->devi_drv_prop_ptr))) != NULL) { 3081 pp->prop_len = propp->prop_len; 3082 if (pp->prop_len != 0) { 3083 off = di_checkmem(st, off, 3084 pp->prop_len); 3085 pp->prop_data = off; 3086 bcopy(propp->prop_val, di_mem_addr(st, 3087 pp->prop_data), propp->prop_len); 3088 off += DI_ALIGN(pp->prop_len); 3089 } 3090 } else { 3091 prop_op_fail = 1; 3092 } 3093 } else if (prop_len != 0) { 3094 pp->prop_len = prop_len; 3095 off = di_checkmem(st, off, prop_len); 3096 pp->prop_data = off; 3097 bcopy(prop_val, di_mem_addr(st, off), prop_len); 3098 off += DI_ALIGN(prop_len); 3099 kmem_free(prop_val, prop_len); 3100 } 3101 3102 if (prop_op_fail) { 3103 pp->prop_data = -1; 3104 dcmn_err((CE_WARN, "devinfo: prop_op failure " 3105 "for \"%s\" err %d", 3106 di_mem_addr(st, pp->prop_name), err)); 3107 } 3108 3109 pp = tmp; 3110 3111 } while (pp); 3112 3113 mutex_enter(&dip->devi_lock); 3114 dcmn_err((CE_CONT, "finished property list at offset 0x%x\n", off)); 3115 return (off); 3116 } 3117 3118 /* 3119 * find private data format attached to a dip 3120 * parent = 1 to match driver name of parent dip (for parent private data) 3121 * 0 to match driver name of current dip (for driver private data) 3122 */ 3123 #define DI_MATCH_DRIVER 0 3124 #define DI_MATCH_PARENT 1 3125 3126 struct di_priv_format * 3127 di_match_drv_name(struct dev_info *node, struct di_state *st, int match) 3128 { 3129 int i, count, len; 3130 char *drv_name; 3131 major_t major; 3132 struct di_all *all; 3133 struct di_priv_format *form; 3134 3135 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n", 3136 node->devi_node_name, match)); 3137 3138 if (match == DI_MATCH_PARENT) { 3139 node = DEVI(node->devi_parent); 3140 } 3141 3142 if (node == NULL) { 3143 return (NULL); 3144 } 3145 3146 major = ddi_name_to_major(node->devi_binding_name); 3147 if (major == (major_t)(-1)) { 3148 return (NULL); 3149 } 3150 3151 /* 3152 * Match the driver name. 3153 */ 3154 drv_name = ddi_major_to_name(major); 3155 if ((drv_name == NULL) || *drv_name == '\0') { 3156 return (NULL); 3157 } 3158 3159 /* Now get the di_priv_format array */ 3160 all = (struct di_all *)(intptr_t)di_mem_addr(st, 0); 3161 3162 if (match == DI_MATCH_PARENT) { 3163 count = all->n_ppdata; 3164 form = (struct di_priv_format *) 3165 (intptr_t)(di_mem_addr(st, 0) + all->ppdata_format); 3166 } else { 3167 count = all->n_dpdata; 3168 form = (struct di_priv_format *) 3169 (intptr_t)((caddr_t)all + all->dpdata_format); 3170 } 3171 3172 len = strlen(drv_name); 3173 for (i = 0; i < count; i++) { 3174 char *tmp; 3175 3176 tmp = form[i].drv_name; 3177 while (tmp && (*tmp != '\0')) { 3178 if (strncmp(drv_name, tmp, len) == 0) { 3179 return (&form[i]); 3180 } 3181 /* 3182 * Move to next driver name, skipping a white space 3183 */ 3184 if (tmp = strchr(tmp, ' ')) { 3185 tmp++; 3186 } 3187 } 3188 } 3189 3190 return (NULL); 3191 } 3192 3193 /* 3194 * The following functions copy data as specified by the format passed in. 3195 * To prevent invalid format from panicing the system, we call on_fault(). 3196 * A return value of 0 indicates an error. Otherwise, the total offset 3197 * is returned. 3198 */ 3199 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */ 3200 3201 static di_off_t 3202 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node, 3203 void *data, di_off_t *off_p, struct di_state *st) 3204 { 3205 caddr_t pa; 3206 void *ptr; 3207 int i, size, repeat; 3208 di_off_t off, off0, *tmp; 3209 char *path; 3210 3211 label_t ljb; 3212 3213 dcmn_err2((CE_CONT, "di_getprvdata:\n")); 3214 3215 /* 3216 * check memory availability. Private data size is 3217 * limited to DI_MAX_PRIVDATA. 3218 */ 3219 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA); 3220 3221 if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) { 3222 goto failure; 3223 } 3224 3225 if (!on_fault(&ljb)) { 3226 /* copy the struct */ 3227 bcopy(data, di_mem_addr(st, off), pdp->bytes); 3228 off0 = DI_ALIGN(pdp->bytes); 3229 3230 /* dereferencing pointers */ 3231 for (i = 0; i < MAX_PTR_IN_PRV; i++) { 3232 3233 if (pdp->ptr[i].size == 0) { 3234 goto success; /* no more ptrs */ 3235 } 3236 3237 /* 3238 * first, get the pointer content 3239 */ 3240 if ((pdp->ptr[i].offset < 0) || 3241 (pdp->ptr[i].offset > 3242 pdp->bytes - sizeof (char *))) 3243 goto failure; /* wrong offset */ 3244 3245 pa = di_mem_addr(st, off + pdp->ptr[i].offset); 3246 3247 /* save a tmp ptr to store off_t later */ 3248 tmp = (di_off_t *)(intptr_t)pa; 3249 3250 /* get pointer value, if NULL continue */ 3251 ptr = *((void **) (intptr_t)pa); 3252 if (ptr == NULL) { 3253 continue; 3254 } 3255 3256 /* 3257 * next, find the repeat count (array dimension) 3258 */ 3259 repeat = pdp->ptr[i].len_offset; 3260 3261 /* 3262 * Positive value indicates a fixed sized array. 3263 * 0 or negative value indicates variable sized array. 3264 * 3265 * For variable sized array, the variable must be 3266 * an int member of the structure, with an offset 3267 * equal to the absolution value of struct member. 3268 */ 3269 if (repeat > pdp->bytes - sizeof (int)) { 3270 goto failure; /* wrong offset */ 3271 } 3272 3273 if (repeat >= 0) { 3274 repeat = *((int *) 3275 (intptr_t)((caddr_t)data + repeat)); 3276 } else { 3277 repeat = -repeat; 3278 } 3279 3280 /* 3281 * next, get the size of the object to be copied 3282 */ 3283 size = pdp->ptr[i].size * repeat; 3284 3285 /* 3286 * Arbitrarily limit the total size of object to be 3287 * copied (1 byte to 1/4 page). 3288 */ 3289 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) { 3290 goto failure; /* wrong size or too big */ 3291 } 3292 3293 /* 3294 * Now copy the data 3295 */ 3296 *tmp = off0; 3297 bcopy(ptr, di_mem_addr(st, off + off0), size); 3298 off0 += DI_ALIGN(size); 3299 } 3300 } else { 3301 goto failure; 3302 } 3303 3304 success: 3305 /* 3306 * success if reached here 3307 */ 3308 no_fault(); 3309 *off_p = off; 3310 3311 return (off + off0); 3312 /*NOTREACHED*/ 3313 3314 failure: 3315 /* 3316 * fault occurred 3317 */ 3318 no_fault(); 3319 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 3320 cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p", 3321 ddi_pathname((dev_info_t *)node, path), data); 3322 kmem_free(path, MAXPATHLEN); 3323 *off_p = -1; /* set private data to indicate error */ 3324 3325 return (off); 3326 } 3327 3328 /* 3329 * get parent private data; on error, returns original offset 3330 */ 3331 static di_off_t 3332 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3333 { 3334 int off; 3335 struct di_priv_format *ppdp; 3336 3337 dcmn_err2((CE_CONT, "di_getppdata:\n")); 3338 3339 /* find the parent data format */ 3340 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) { 3341 off = *off_p; 3342 *off_p = 0; /* set parent data to none */ 3343 return (off); 3344 } 3345 3346 return (di_getprvdata(ppdp, node, 3347 ddi_get_parent_data((dev_info_t *)node), off_p, st)); 3348 } 3349 3350 /* 3351 * get parent private data; returns original offset 3352 */ 3353 static di_off_t 3354 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3355 { 3356 int off; 3357 struct di_priv_format *dpdp; 3358 3359 dcmn_err2((CE_CONT, "di_getdpdata:")); 3360 3361 /* find the parent data format */ 3362 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) { 3363 off = *off_p; 3364 *off_p = 0; /* set driver data to none */ 3365 return (off); 3366 } 3367 3368 return (di_getprvdata(dpdp, node, 3369 ddi_get_driver_private((dev_info_t *)node), off_p, st)); 3370 } 3371 3372 /* 3373 * The driver is stateful across DINFOCPYALL and DINFOUSRLD. 3374 * This function encapsulates the state machine: 3375 * 3376 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY -> 3377 * | SNAPSHOT USRLD | 3378 * -------------------------------------------------- 3379 * 3380 * Returns 0 on success and -1 on failure 3381 */ 3382 static int 3383 di_setstate(struct di_state *st, int new_state) 3384 { 3385 int ret = 0; 3386 3387 mutex_enter(&di_lock); 3388 switch (new_state) { 3389 case IOC_IDLE: 3390 case IOC_DONE: 3391 break; 3392 case IOC_SNAP: 3393 if (st->di_iocstate != IOC_IDLE) 3394 ret = -1; 3395 break; 3396 case IOC_COPY: 3397 if (st->di_iocstate != IOC_DONE) 3398 ret = -1; 3399 break; 3400 default: 3401 ret = -1; 3402 } 3403 3404 if (ret == 0) 3405 st->di_iocstate = new_state; 3406 else 3407 cmn_err(CE_NOTE, "incorrect state transition from %d to %d", 3408 st->di_iocstate, new_state); 3409 mutex_exit(&di_lock); 3410 return (ret); 3411 } 3412 3413 /* 3414 * We cannot assume the presence of the entire 3415 * snapshot in this routine. All we are guaranteed 3416 * is the di_all struct + 1 byte (for root_path) 3417 */ 3418 static int 3419 header_plus_one_ok(struct di_all *all) 3420 { 3421 /* 3422 * Refuse to read old versions 3423 */ 3424 if (all->version != DI_SNAPSHOT_VERSION) { 3425 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version)); 3426 return (0); 3427 } 3428 3429 if (all->cache_magic != DI_CACHE_MAGIC) { 3430 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic)); 3431 return (0); 3432 } 3433 3434 if (all->snapshot_time == 0) { 3435 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time)); 3436 return (0); 3437 } 3438 3439 if (all->top_devinfo == 0) { 3440 CACHE_DEBUG((DI_ERR, "NULL top devinfo")); 3441 return (0); 3442 } 3443 3444 if (all->map_size < sizeof (*all) + 1) { 3445 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size)); 3446 return (0); 3447 } 3448 3449 if (all->root_path[0] != '/' || all->root_path[1] != '\0') { 3450 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c", 3451 all->root_path[0], all->root_path[1])); 3452 return (0); 3453 } 3454 3455 /* 3456 * We can't check checksum here as we just have the header 3457 */ 3458 3459 return (1); 3460 } 3461 3462 static int 3463 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len) 3464 { 3465 rlim64_t rlimit; 3466 ssize_t resid; 3467 int error = 0; 3468 3469 3470 rlimit = RLIM64_INFINITY; 3471 3472 while (len) { 3473 resid = 0; 3474 error = vn_rdwr(UIO_WRITE, vp, buf, len, off, 3475 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 3476 3477 if (error || resid < 0) { 3478 error = error ? error : EIO; 3479 CACHE_DEBUG((DI_ERR, "write error: %d", error)); 3480 break; 3481 } 3482 3483 /* 3484 * Check if we are making progress 3485 */ 3486 if (resid >= len) { 3487 error = ENOSPC; 3488 break; 3489 } 3490 buf += len - resid; 3491 off += len - resid; 3492 len = resid; 3493 } 3494 3495 return (error); 3496 } 3497 3498 extern int modrootloaded; 3499 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *); 3500 extern void mdi_vhci_walk_phcis(dev_info_t *, 3501 int (*)(dev_info_t *, void *), void *); 3502 3503 static void 3504 di_cache_write(struct di_cache *cache) 3505 { 3506 struct di_all *all; 3507 struct vnode *vp; 3508 int oflags; 3509 size_t map_size; 3510 size_t chunk; 3511 offset_t off; 3512 int error; 3513 char *buf; 3514 3515 ASSERT(DI_CACHE_LOCKED(*cache)); 3516 ASSERT(!servicing_interrupt()); 3517 3518 if (cache->cache_size == 0) { 3519 ASSERT(cache->cache_data == NULL); 3520 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write")); 3521 return; 3522 } 3523 3524 ASSERT(cache->cache_size > 0); 3525 ASSERT(cache->cache_data); 3526 3527 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) { 3528 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write")); 3529 return; 3530 } 3531 3532 all = (struct di_all *)cache->cache_data; 3533 3534 if (!header_plus_one_ok(all)) { 3535 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write")); 3536 return; 3537 } 3538 3539 ASSERT(strcmp(all->root_path, "/") == 0); 3540 3541 /* 3542 * The cache_size is the total allocated memory for the cache. 3543 * The map_size is the actual size of valid data in the cache. 3544 * map_size may be smaller than cache_size but cannot exceed 3545 * cache_size. 3546 */ 3547 if (all->map_size > cache->cache_size) { 3548 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)." 3549 " Skipping write", all->map_size, cache->cache_size)); 3550 return; 3551 } 3552 3553 /* 3554 * First unlink the temp file 3555 */ 3556 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE); 3557 if (error && error != ENOENT) { 3558 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d", 3559 DI_CACHE_TEMP, error)); 3560 } 3561 3562 if (error == EROFS) { 3563 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write")); 3564 return; 3565 } 3566 3567 vp = NULL; 3568 oflags = (FCREAT|FWRITE); 3569 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags, 3570 DI_CACHE_PERMS, &vp, CRCREAT, 0)) { 3571 CACHE_DEBUG((DI_ERR, "%s: create failed: %d", 3572 DI_CACHE_TEMP, error)); 3573 return; 3574 } 3575 3576 ASSERT(vp); 3577 3578 /* 3579 * Paranoid: Check if the file is on a read-only FS 3580 */ 3581 if (vn_is_readonly(vp)) { 3582 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS")); 3583 goto fail; 3584 } 3585 3586 /* 3587 * Note that we only write map_size bytes to disk - this saves 3588 * space as the actual cache size may be larger than size of 3589 * valid data in the cache. 3590 * Another advantage is that it makes verification of size 3591 * easier when the file is read later. 3592 */ 3593 map_size = all->map_size; 3594 off = 0; 3595 buf = cache->cache_data; 3596 3597 while (map_size) { 3598 ASSERT(map_size > 0); 3599 /* 3600 * Write in chunks so that VM system 3601 * is not overwhelmed 3602 */ 3603 if (map_size > di_chunk * PAGESIZE) 3604 chunk = di_chunk * PAGESIZE; 3605 else 3606 chunk = map_size; 3607 3608 error = chunk_write(vp, off, buf, chunk); 3609 if (error) { 3610 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d", 3611 off, error)); 3612 goto fail; 3613 } 3614 3615 off += chunk; 3616 buf += chunk; 3617 map_size -= chunk; 3618 3619 /* Give pageout a chance to run */ 3620 delay(1); 3621 } 3622 3623 /* 3624 * Now sync the file and close it 3625 */ 3626 if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) { 3627 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error)); 3628 } 3629 3630 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) { 3631 CACHE_DEBUG((DI_ERR, "close() failed: %d", error)); 3632 VN_RELE(vp); 3633 return; 3634 } 3635 3636 VN_RELE(vp); 3637 3638 /* 3639 * Now do the rename 3640 */ 3641 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) { 3642 CACHE_DEBUG((DI_ERR, "rename failed: %d", error)); 3643 return; 3644 } 3645 3646 CACHE_DEBUG((DI_INFO, "Cache write successful.")); 3647 3648 return; 3649 3650 fail: 3651 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL); 3652 VN_RELE(vp); 3653 } 3654 3655 3656 /* 3657 * Since we could be called early in boot, 3658 * use kobj_read_file() 3659 */ 3660 static void 3661 di_cache_read(struct di_cache *cache) 3662 { 3663 struct _buf *file; 3664 struct di_all *all; 3665 int n; 3666 size_t map_size, sz, chunk; 3667 offset_t off; 3668 caddr_t buf; 3669 uint32_t saved_crc, crc; 3670 3671 ASSERT(modrootloaded); 3672 ASSERT(DI_CACHE_LOCKED(*cache)); 3673 ASSERT(cache->cache_data == NULL); 3674 ASSERT(cache->cache_size == 0); 3675 ASSERT(!servicing_interrupt()); 3676 3677 file = kobj_open_file(DI_CACHE_FILE); 3678 if (file == (struct _buf *)-1) { 3679 CACHE_DEBUG((DI_ERR, "%s: open failed: %d", 3680 DI_CACHE_FILE, ENOENT)); 3681 return; 3682 } 3683 3684 /* 3685 * Read in the header+root_path first. The root_path must be "/" 3686 */ 3687 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP); 3688 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0); 3689 3690 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) { 3691 kmem_free(all, sizeof (*all) + 1); 3692 kobj_close_file(file); 3693 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid")); 3694 return; 3695 } 3696 3697 map_size = all->map_size; 3698 3699 kmem_free(all, sizeof (*all) + 1); 3700 3701 ASSERT(map_size >= sizeof (*all) + 1); 3702 3703 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP); 3704 sz = map_size; 3705 off = 0; 3706 while (sz) { 3707 /* Don't overload VM with large reads */ 3708 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz; 3709 n = kobj_read_file(file, buf, chunk, off); 3710 if (n != chunk) { 3711 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld", 3712 DI_CACHE_FILE, off)); 3713 goto fail; 3714 } 3715 off += chunk; 3716 buf += chunk; 3717 sz -= chunk; 3718 } 3719 3720 ASSERT(off == map_size); 3721 3722 /* 3723 * Read past expected EOF to verify size. 3724 */ 3725 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) { 3726 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE)); 3727 goto fail; 3728 } 3729 3730 all = (struct di_all *)di_cache.cache_data; 3731 if (!header_plus_one_ok(all)) { 3732 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE)); 3733 goto fail; 3734 } 3735 3736 /* 3737 * Compute CRC with checksum field in the cache data set to 0 3738 */ 3739 saved_crc = all->cache_checksum; 3740 all->cache_checksum = 0; 3741 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table); 3742 all->cache_checksum = saved_crc; 3743 3744 if (crc != all->cache_checksum) { 3745 CACHE_DEBUG((DI_ERR, 3746 "%s: checksum error: expected=0x%x actual=0x%x", 3747 DI_CACHE_FILE, all->cache_checksum, crc)); 3748 goto fail; 3749 } 3750 3751 if (all->map_size != map_size) { 3752 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE)); 3753 goto fail; 3754 } 3755 3756 kobj_close_file(file); 3757 3758 di_cache.cache_size = map_size; 3759 3760 return; 3761 3762 fail: 3763 kmem_free(di_cache.cache_data, map_size); 3764 kobj_close_file(file); 3765 di_cache.cache_data = NULL; 3766 di_cache.cache_size = 0; 3767 } 3768 3769 3770 /* 3771 * Checks if arguments are valid for using the cache. 3772 */ 3773 static int 3774 cache_args_valid(struct di_state *st, int *error) 3775 { 3776 ASSERT(error); 3777 ASSERT(st->mem_size > 0); 3778 ASSERT(st->memlist != NULL); 3779 3780 if (!modrootloaded || !i_ddi_io_initialized()) { 3781 CACHE_DEBUG((DI_ERR, 3782 "cache lookup failure: I/O subsystem not inited")); 3783 *error = ENOTACTIVE; 3784 return (0); 3785 } 3786 3787 /* 3788 * No other flags allowed with DINFOCACHE 3789 */ 3790 if (st->command != (DINFOCACHE & DIIOC_MASK)) { 3791 CACHE_DEBUG((DI_ERR, 3792 "cache lookup failure: bad flags: 0x%x", 3793 st->command)); 3794 *error = EINVAL; 3795 return (0); 3796 } 3797 3798 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3799 CACHE_DEBUG((DI_ERR, 3800 "cache lookup failure: bad root: %s", 3801 DI_ALL_PTR(st)->root_path)); 3802 *error = EINVAL; 3803 return (0); 3804 } 3805 3806 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command)); 3807 3808 *error = 0; 3809 3810 return (1); 3811 } 3812 3813 static int 3814 snapshot_is_cacheable(struct di_state *st) 3815 { 3816 ASSERT(st->mem_size > 0); 3817 ASSERT(st->memlist != NULL); 3818 3819 if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) != 3820 (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) { 3821 CACHE_DEBUG((DI_INFO, 3822 "not cacheable: incompatible flags: 0x%x", 3823 st->command)); 3824 return (0); 3825 } 3826 3827 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3828 CACHE_DEBUG((DI_INFO, 3829 "not cacheable: incompatible root path: %s", 3830 DI_ALL_PTR(st)->root_path)); 3831 return (0); 3832 } 3833 3834 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command)); 3835 3836 return (1); 3837 } 3838 3839 static int 3840 di_cache_lookup(struct di_state *st) 3841 { 3842 size_t rval; 3843 int cache_valid; 3844 3845 ASSERT(cache_args_valid(st, &cache_valid)); 3846 ASSERT(modrootloaded); 3847 3848 DI_CACHE_LOCK(di_cache); 3849 3850 /* 3851 * The following assignment determines the validity 3852 * of the cache as far as this snapshot is concerned. 3853 */ 3854 cache_valid = di_cache.cache_valid; 3855 3856 if (cache_valid && di_cache.cache_data == NULL) { 3857 di_cache_read(&di_cache); 3858 /* check for read or file error */ 3859 if (di_cache.cache_data == NULL) 3860 cache_valid = 0; 3861 } 3862 3863 if (cache_valid) { 3864 /* 3865 * Ok, the cache was valid as of this particular 3866 * snapshot. Copy the cached snapshot. This is safe 3867 * to do as the cache cannot be freed (we hold the 3868 * cache lock). Free the memory allocated in di_state 3869 * up until this point - we will simply copy everything 3870 * in the cache. 3871 */ 3872 3873 ASSERT(di_cache.cache_data != NULL); 3874 ASSERT(di_cache.cache_size > 0); 3875 3876 di_freemem(st); 3877 3878 rval = 0; 3879 if (di_cache2mem(&di_cache, st) > 0) { 3880 3881 ASSERT(DI_ALL_PTR(st)); 3882 3883 /* 3884 * map_size is size of valid data in the 3885 * cached snapshot and may be less than 3886 * size of the cache. 3887 */ 3888 rval = DI_ALL_PTR(st)->map_size; 3889 3890 ASSERT(rval >= sizeof (struct di_all)); 3891 ASSERT(rval <= di_cache.cache_size); 3892 } 3893 } else { 3894 /* 3895 * The cache isn't valid, we need to take a snapshot. 3896 * Set the command flags appropriately 3897 */ 3898 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK)); 3899 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK); 3900 rval = di_cache_update(st); 3901 st->command = (DINFOCACHE & DIIOC_MASK); 3902 } 3903 3904 DI_CACHE_UNLOCK(di_cache); 3905 3906 /* 3907 * For cached snapshots, the devinfo driver always returns 3908 * a snapshot rooted at "/". 3909 */ 3910 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0); 3911 3912 return ((int)rval); 3913 } 3914 3915 /* 3916 * This is a forced update of the cache - the previous state of the cache 3917 * may be: 3918 * - unpopulated 3919 * - populated and invalid 3920 * - populated and valid 3921 */ 3922 static int 3923 di_cache_update(struct di_state *st) 3924 { 3925 int rval; 3926 uint32_t crc; 3927 struct di_all *all; 3928 3929 ASSERT(DI_CACHE_LOCKED(di_cache)); 3930 ASSERT(snapshot_is_cacheable(st)); 3931 3932 /* 3933 * Free the in-core cache and the on-disk file (if they exist) 3934 */ 3935 i_ddi_di_cache_free(&di_cache); 3936 3937 /* 3938 * Set valid flag before taking the snapshot, 3939 * so that any invalidations that arrive 3940 * during or after the snapshot are not 3941 * removed by us. 3942 */ 3943 atomic_or_32(&di_cache.cache_valid, 1); 3944 3945 rval = di_snapshot_and_clean(st); 3946 3947 if (rval == 0) { 3948 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot")); 3949 return (0); 3950 } 3951 3952 DI_ALL_PTR(st)->map_size = rval; 3953 3954 if (di_mem2cache(st, &di_cache) == 0) { 3955 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed")); 3956 return (0); 3957 } 3958 3959 ASSERT(di_cache.cache_data); 3960 ASSERT(di_cache.cache_size > 0); 3961 3962 /* 3963 * Now that we have cached the snapshot, compute its checksum. 3964 * The checksum is only computed over the valid data in the 3965 * cache, not the entire cache. 3966 * Also, set all the fields (except checksum) before computing 3967 * checksum. 3968 */ 3969 all = (struct di_all *)di_cache.cache_data; 3970 all->cache_magic = DI_CACHE_MAGIC; 3971 all->map_size = rval; 3972 3973 ASSERT(all->cache_checksum == 0); 3974 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table); 3975 all->cache_checksum = crc; 3976 3977 di_cache_write(&di_cache); 3978 3979 return (rval); 3980 } 3981 3982 static void 3983 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...) 3984 { 3985 va_list ap; 3986 3987 if (di_cache_debug <= DI_QUIET) 3988 return; 3989 3990 if (di_cache_debug < msglevel) 3991 return; 3992 3993 switch (msglevel) { 3994 case DI_ERR: 3995 msglevel = CE_WARN; 3996 break; 3997 case DI_INFO: 3998 case DI_TRACE: 3999 default: 4000 msglevel = CE_NOTE; 4001 break; 4002 } 4003 4004 va_start(ap, fmt); 4005 vcmn_err(msglevel, fmt, ap); 4006 va_end(ap); 4007 } 4008