1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * driver for accessing kernel devinfo tree. 31 */ 32 #include <sys/types.h> 33 #include <sys/pathname.h> 34 #include <sys/debug.h> 35 #include <sys/autoconf.h> 36 #include <sys/conf.h> 37 #include <sys/file.h> 38 #include <sys/kmem.h> 39 #include <sys/modctl.h> 40 #include <sys/stat.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/sunldi_impl.h> 44 #include <sys/sunndi.h> 45 #include <sys/esunddi.h> 46 #include <sys/sunmdi.h> 47 #include <sys/ddi_impldefs.h> 48 #include <sys/ndi_impldefs.h> 49 #include <sys/mdi_impldefs.h> 50 #include <sys/devinfo_impl.h> 51 #include <sys/thread.h> 52 #include <sys/modhash.h> 53 #include <sys/bitmap.h> 54 #include <util/qsort.h> 55 #include <sys/disp.h> 56 #include <sys/kobj.h> 57 #include <sys/crc32.h> 58 59 60 #ifdef DEBUG 61 static int di_debug; 62 #define dcmn_err(args) if (di_debug >= 1) cmn_err args 63 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args 64 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args 65 #else 66 #define dcmn_err(args) /* nothing */ 67 #define dcmn_err2(args) /* nothing */ 68 #define dcmn_err3(args) /* nothing */ 69 #endif 70 71 /* 72 * We partition the space of devinfo minor nodes equally between the full and 73 * unprivileged versions of the driver. The even-numbered minor nodes are the 74 * full version, while the odd-numbered ones are the read-only version. 75 */ 76 static int di_max_opens = 32; 77 78 #define DI_FULL_PARENT 0 79 #define DI_READONLY_PARENT 1 80 #define DI_NODE_SPECIES 2 81 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0) 82 83 #define IOC_IDLE 0 /* snapshot ioctl states */ 84 #define IOC_SNAP 1 /* snapshot in progress */ 85 #define IOC_DONE 2 /* snapshot done, but not copied out */ 86 #define IOC_COPY 3 /* copyout in progress */ 87 88 /* 89 * Keep max alignment so we can move snapshot to different platforms 90 */ 91 #define DI_ALIGN(addr) ((addr + 7l) & ~7l) 92 93 /* 94 * To avoid wasting memory, make a linked list of memory chunks. 95 * Size of each chunk is buf_size. 96 */ 97 struct di_mem { 98 struct di_mem *next; /* link to next chunk */ 99 char *buf; /* contiguous kernel memory */ 100 size_t buf_size; /* size of buf in bytes */ 101 devmap_cookie_t cook; /* cookie from ddi_umem_alloc */ 102 }; 103 104 /* 105 * This is a stack for walking the tree without using recursion. 106 * When the devinfo tree height is above some small size, one 107 * gets watchdog resets on sun4m. 108 */ 109 struct di_stack { 110 void *offset[MAX_TREE_DEPTH]; 111 struct dev_info *dip[MAX_TREE_DEPTH]; 112 int circ[MAX_TREE_DEPTH]; 113 int depth; /* depth of current node to be copied */ 114 }; 115 116 #define TOP_OFFSET(stack) \ 117 ((di_off_t *)(stack)->offset[(stack)->depth - 1]) 118 #define TOP_NODE(stack) \ 119 ((stack)->dip[(stack)->depth - 1]) 120 #define PARENT_OFFSET(stack) \ 121 ((di_off_t *)(stack)->offset[(stack)->depth - 2]) 122 #define EMPTY_STACK(stack) ((stack)->depth == 0) 123 #define POP_STACK(stack) { \ 124 ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ 125 (stack)->circ[(stack)->depth - 1]); \ 126 ((stack)->depth--); \ 127 } 128 #define PUSH_STACK(stack, node, offp) { \ 129 ASSERT(node != NULL); \ 130 ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ 131 (stack)->dip[(stack)->depth] = (node); \ 132 (stack)->offset[(stack)->depth] = (void *)(offp); \ 133 ((stack)->depth)++; \ 134 } 135 136 #define DI_ALL_PTR(s) ((struct di_all *)di_mem_addr((s), 0)) 137 138 /* 139 * With devfs, the device tree has no global locks. The device tree is 140 * dynamic and dips may come and go if they are not locked locally. Under 141 * these conditions, pointers are no longer reliable as unique IDs. 142 * Specifically, these pointers cannot be used as keys for hash tables 143 * as the same devinfo structure may be freed in one part of the tree only 144 * to be allocated as the structure for a different device in another 145 * part of the tree. This can happen if DR and the snapshot are 146 * happening concurrently. 147 * The following data structures act as keys for devinfo nodes and 148 * pathinfo nodes. 149 */ 150 151 enum di_ktype { 152 DI_DKEY = 1, 153 DI_PKEY = 2 154 }; 155 156 struct di_dkey { 157 dev_info_t *dk_dip; 158 major_t dk_major; 159 int dk_inst; 160 pnode_t dk_nodeid; 161 }; 162 163 struct di_pkey { 164 mdi_pathinfo_t *pk_pip; 165 char *pk_path_addr; 166 dev_info_t *pk_client; 167 dev_info_t *pk_phci; 168 }; 169 170 struct di_key { 171 enum di_ktype k_type; 172 union { 173 struct di_dkey dkey; 174 struct di_pkey pkey; 175 } k_u; 176 }; 177 178 179 struct i_lnode; 180 181 typedef struct i_link { 182 /* 183 * If a di_link struct representing this i_link struct makes it 184 * into the snapshot, then self will point to the offset of 185 * the di_link struct in the snapshot 186 */ 187 di_off_t self; 188 189 int spec_type; /* block or char access type */ 190 struct i_lnode *src_lnode; /* src i_lnode */ 191 struct i_lnode *tgt_lnode; /* tgt i_lnode */ 192 struct i_link *src_link_next; /* next src i_link /w same i_lnode */ 193 struct i_link *tgt_link_next; /* next tgt i_link /w same i_lnode */ 194 } i_link_t; 195 196 typedef struct i_lnode { 197 /* 198 * If a di_lnode struct representing this i_lnode struct makes it 199 * into the snapshot, then self will point to the offset of 200 * the di_lnode struct in the snapshot 201 */ 202 di_off_t self; 203 204 /* 205 * used for hashing and comparing i_lnodes 206 */ 207 int modid; 208 209 /* 210 * public information describing a link endpoint 211 */ 212 struct di_node *di_node; /* di_node in snapshot */ 213 dev_t devt; /* devt */ 214 215 /* 216 * i_link ptr to links coming into this i_lnode node 217 * (this i_lnode is the target of these i_links) 218 */ 219 i_link_t *link_in; 220 221 /* 222 * i_link ptr to links going out of this i_lnode node 223 * (this i_lnode is the source of these i_links) 224 */ 225 i_link_t *link_out; 226 } i_lnode_t; 227 228 /* 229 * Soft state associated with each instance of driver open. 230 */ 231 static struct di_state { 232 di_off_t mem_size; /* total # bytes in memlist */ 233 struct di_mem *memlist; /* head of memlist */ 234 uint_t command; /* command from ioctl */ 235 int di_iocstate; /* snapshot ioctl state */ 236 mod_hash_t *reg_dip_hash; 237 mod_hash_t *reg_pip_hash; 238 int lnode_count; 239 int link_count; 240 241 mod_hash_t *lnode_hash; 242 mod_hash_t *link_hash; 243 } **di_states; 244 245 static kmutex_t di_lock; /* serialize instance assignment */ 246 247 typedef enum { 248 DI_QUIET = 0, /* DI_QUIET must always be 0 */ 249 DI_ERR, 250 DI_INFO, 251 DI_TRACE, 252 DI_TRACE1, 253 DI_TRACE2 254 } di_cache_debug_t; 255 256 static uint_t di_chunk = 32; /* I/O chunk size in pages */ 257 258 #define DI_CACHE_LOCK(c) (mutex_enter(&(c).cache_lock)) 259 #define DI_CACHE_UNLOCK(c) (mutex_exit(&(c).cache_lock)) 260 #define DI_CACHE_LOCKED(c) (mutex_owned(&(c).cache_lock)) 261 262 /* 263 * Check that whole device tree is being configured as a pre-condition for 264 * cleaning up /etc/devices files. 265 */ 266 #define DEVICES_FILES_CLEANABLE(st) \ 267 (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \ 268 strcmp(DI_ALL_PTR(st)->root_path, "/") == 0) 269 270 #define CACHE_DEBUG(args) \ 271 { if (di_cache_debug != DI_QUIET) di_cache_print args; } 272 273 static int di_open(dev_t *, int, int, cred_t *); 274 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 275 static int di_close(dev_t, int, int, cred_t *); 276 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 277 static int di_attach(dev_info_t *, ddi_attach_cmd_t); 278 static int di_detach(dev_info_t *, ddi_detach_cmd_t); 279 280 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); 281 static di_off_t di_snapshot_and_clean(struct di_state *); 282 static di_off_t di_copydevnm(di_off_t *, struct di_state *); 283 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); 284 static di_off_t di_copynode(struct di_stack *, struct di_state *); 285 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, 286 struct di_state *); 287 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); 288 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); 289 static di_off_t di_getprop(struct ddi_prop *, di_off_t *, 290 struct di_state *, struct dev_info *, int); 291 static void di_allocmem(struct di_state *, size_t); 292 static void di_freemem(struct di_state *); 293 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); 294 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); 295 static caddr_t di_mem_addr(struct di_state *, di_off_t); 296 static int di_setstate(struct di_state *, int); 297 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); 298 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); 299 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, 300 struct di_state *, int); 301 static di_off_t di_getlink_data(di_off_t, struct di_state *); 302 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); 303 304 static int cache_args_valid(struct di_state *st, int *error); 305 static int snapshot_is_cacheable(struct di_state *st); 306 static int di_cache_lookup(struct di_state *st); 307 static int di_cache_update(struct di_state *st); 308 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); 309 310 static struct cb_ops di_cb_ops = { 311 di_open, /* open */ 312 di_close, /* close */ 313 nodev, /* strategy */ 314 nodev, /* print */ 315 nodev, /* dump */ 316 nodev, /* read */ 317 nodev, /* write */ 318 di_ioctl, /* ioctl */ 319 nodev, /* devmap */ 320 nodev, /* mmap */ 321 nodev, /* segmap */ 322 nochpoll, /* poll */ 323 ddi_prop_op, /* prop_op */ 324 NULL, /* streamtab */ 325 D_NEW | D_MP /* Driver compatibility flag */ 326 }; 327 328 static struct dev_ops di_ops = { 329 DEVO_REV, /* devo_rev, */ 330 0, /* refcnt */ 331 di_info, /* info */ 332 nulldev, /* identify */ 333 nulldev, /* probe */ 334 di_attach, /* attach */ 335 di_detach, /* detach */ 336 nodev, /* reset */ 337 &di_cb_ops, /* driver operations */ 338 NULL /* bus operations */ 339 }; 340 341 /* 342 * Module linkage information for the kernel. 343 */ 344 static struct modldrv modldrv = { 345 &mod_driverops, 346 "DEVINFO Driver %I%", 347 &di_ops 348 }; 349 350 static struct modlinkage modlinkage = { 351 MODREV_1, 352 &modldrv, 353 NULL 354 }; 355 356 int 357 _init(void) 358 { 359 int error; 360 361 mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); 362 363 error = mod_install(&modlinkage); 364 if (error != 0) { 365 mutex_destroy(&di_lock); 366 return (error); 367 } 368 369 return (0); 370 } 371 372 int 373 _info(struct modinfo *modinfop) 374 { 375 return (mod_info(&modlinkage, modinfop)); 376 } 377 378 int 379 _fini(void) 380 { 381 int error; 382 383 error = mod_remove(&modlinkage); 384 if (error != 0) { 385 return (error); 386 } 387 388 mutex_destroy(&di_lock); 389 return (0); 390 } 391 392 static dev_info_t *di_dip; 393 394 /*ARGSUSED*/ 395 static int 396 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 397 { 398 int error = DDI_FAILURE; 399 400 switch (infocmd) { 401 case DDI_INFO_DEVT2DEVINFO: 402 *result = (void *)di_dip; 403 error = DDI_SUCCESS; 404 break; 405 case DDI_INFO_DEVT2INSTANCE: 406 /* 407 * All dev_t's map to the same, single instance. 408 */ 409 *result = (void *)0; 410 error = DDI_SUCCESS; 411 break; 412 default: 413 break; 414 } 415 416 return (error); 417 } 418 419 static int 420 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 421 { 422 int error = DDI_FAILURE; 423 424 switch (cmd) { 425 case DDI_ATTACH: 426 di_states = kmem_zalloc( 427 di_max_opens * sizeof (struct di_state *), KM_SLEEP); 428 429 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, 430 DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || 431 ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, 432 DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { 433 kmem_free(di_states, 434 di_max_opens * sizeof (struct di_state *)); 435 ddi_remove_minor_node(dip, NULL); 436 error = DDI_FAILURE; 437 } else { 438 di_dip = dip; 439 ddi_report_dev(dip); 440 441 error = DDI_SUCCESS; 442 } 443 break; 444 default: 445 error = DDI_FAILURE; 446 break; 447 } 448 449 return (error); 450 } 451 452 static int 453 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 454 { 455 int error = DDI_FAILURE; 456 457 switch (cmd) { 458 case DDI_DETACH: 459 ddi_remove_minor_node(dip, NULL); 460 di_dip = NULL; 461 kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); 462 463 error = DDI_SUCCESS; 464 break; 465 default: 466 error = DDI_FAILURE; 467 break; 468 } 469 470 return (error); 471 } 472 473 /* 474 * Allow multiple opens by tweaking the dev_t such that it looks like each 475 * open is getting a different minor device. Each minor gets a separate 476 * entry in the di_states[] table. Based on the original minor number, we 477 * discriminate opens of the full and read-only nodes. If all of the instances 478 * of the selected minor node are currently open, we return EAGAIN. 479 */ 480 /*ARGSUSED*/ 481 static int 482 di_open(dev_t *devp, int flag, int otyp, cred_t *credp) 483 { 484 int m; 485 minor_t minor_parent = getminor(*devp); 486 487 if (minor_parent != DI_FULL_PARENT && 488 minor_parent != DI_READONLY_PARENT) 489 return (ENXIO); 490 491 mutex_enter(&di_lock); 492 493 for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { 494 if (di_states[m] != NULL) 495 continue; 496 497 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); 498 break; /* It's ours. */ 499 } 500 501 if (m >= di_max_opens) { 502 /* 503 * maximum open instance for device reached 504 */ 505 mutex_exit(&di_lock); 506 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); 507 return (EAGAIN); 508 } 509 mutex_exit(&di_lock); 510 511 ASSERT(m < di_max_opens); 512 *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); 513 514 dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", 515 (void *)curthread, m + DI_NODE_SPECIES)); 516 517 return (0); 518 } 519 520 /*ARGSUSED*/ 521 static int 522 di_close(dev_t dev, int flag, int otype, cred_t *cred_p) 523 { 524 struct di_state *st; 525 int m = (int)getminor(dev) - DI_NODE_SPECIES; 526 527 if (m < 0) { 528 cmn_err(CE_WARN, "closing non-existent devinfo minor %d", 529 m + DI_NODE_SPECIES); 530 return (ENXIO); 531 } 532 533 st = di_states[m]; 534 ASSERT(m < di_max_opens && st != NULL); 535 536 di_freemem(st); 537 kmem_free(st, sizeof (struct di_state)); 538 539 /* 540 * empty slot in state table 541 */ 542 mutex_enter(&di_lock); 543 di_states[m] = NULL; 544 dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", 545 (void *)curthread, m + DI_NODE_SPECIES)); 546 mutex_exit(&di_lock); 547 548 return (0); 549 } 550 551 552 /*ARGSUSED*/ 553 static int 554 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 555 { 556 int rv, error; 557 di_off_t off; 558 struct di_all *all; 559 struct di_state *st; 560 int m = (int)getminor(dev) - DI_NODE_SPECIES; 561 562 major_t i; 563 char *drv_name; 564 size_t map_size, size; 565 struct di_mem *dcp; 566 int ndi_flags; 567 568 if (m < 0 || m >= di_max_opens) { 569 return (ENXIO); 570 } 571 572 st = di_states[m]; 573 ASSERT(st != NULL); 574 575 dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); 576 577 switch (cmd) { 578 case DINFOIDENT: 579 /* 580 * This is called from di_init to verify that the driver 581 * opened is indeed devinfo. The purpose is to guard against 582 * sending ioctl to an unknown driver in case of an 583 * unresolved major number conflict during bfu. 584 */ 585 *rvalp = DI_MAGIC; 586 return (0); 587 588 case DINFOLODRV: 589 /* 590 * Hold an installed driver and return the result 591 */ 592 if (DI_UNPRIVILEGED_NODE(m)) { 593 /* 594 * Only the fully enabled instances may issue 595 * DINFOLDDRV. 596 */ 597 return (EACCES); 598 } 599 600 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 601 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { 602 kmem_free(drv_name, MAXNAMELEN); 603 return (EFAULT); 604 } 605 606 /* 607 * Some 3rd party driver's _init() walks the device tree, 608 * so we load the driver module before configuring driver. 609 */ 610 i = ddi_name_to_major(drv_name); 611 if (ddi_hold_driver(i) == NULL) { 612 kmem_free(drv_name, MAXNAMELEN); 613 return (ENXIO); 614 } 615 616 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; 617 618 /* 619 * i_ddi_load_drvconf() below will trigger a reprobe 620 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't 621 * needed here. 622 */ 623 modunload_disable(); 624 (void) i_ddi_load_drvconf(i); 625 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); 626 kmem_free(drv_name, MAXNAMELEN); 627 ddi_rele_driver(i); 628 rv = i_ddi_devs_attached(i); 629 modunload_enable(); 630 631 i_ddi_di_cache_invalidate(KM_SLEEP); 632 633 return ((rv == DDI_SUCCESS)? 0 : ENXIO); 634 635 case DINFOUSRLD: 636 /* 637 * The case for copying snapshot to userland 638 */ 639 if (di_setstate(st, IOC_COPY) == -1) 640 return (EBUSY); 641 642 map_size = ((struct di_all *)di_mem_addr(st, 0))->map_size; 643 if (map_size == 0) { 644 (void) di_setstate(st, IOC_DONE); 645 return (EFAULT); 646 } 647 648 /* 649 * copyout the snapshot 650 */ 651 map_size = (map_size + PAGEOFFSET) & PAGEMASK; 652 653 /* 654 * Return the map size, so caller may do a sanity 655 * check against the return value of snapshot ioctl() 656 */ 657 *rvalp = (int)map_size; 658 659 /* 660 * Copy one chunk at a time 661 */ 662 off = 0; 663 dcp = st->memlist; 664 while (map_size) { 665 size = dcp->buf_size; 666 if (map_size <= size) { 667 size = map_size; 668 } 669 670 if (ddi_copyout(di_mem_addr(st, off), 671 (void *)(arg + off), size, mode) != 0) { 672 (void) di_setstate(st, IOC_DONE); 673 return (EFAULT); 674 } 675 676 map_size -= size; 677 off += size; 678 dcp = dcp->next; 679 } 680 681 di_freemem(st); 682 (void) di_setstate(st, IOC_IDLE); 683 return (0); 684 685 default: 686 if ((cmd & ~DIIOC_MASK) != DIIOC) { 687 /* 688 * Invalid ioctl command 689 */ 690 return (ENOTTY); 691 } 692 /* 693 * take a snapshot 694 */ 695 st->command = cmd & DIIOC_MASK; 696 /*FALLTHROUGH*/ 697 } 698 699 /* 700 * Obtain enough memory to hold header + rootpath. We prevent kernel 701 * memory exhaustion by freeing any previously allocated snapshot and 702 * refusing the operation; otherwise we would be allowing ioctl(), 703 * ioctl(), ioctl(), ..., panic. 704 */ 705 if (di_setstate(st, IOC_SNAP) == -1) 706 return (EBUSY); 707 708 size = sizeof (struct di_all) + 709 sizeof (((struct dinfo_io *)(NULL))->root_path); 710 if (size < PAGESIZE) 711 size = PAGESIZE; 712 di_allocmem(st, size); 713 714 all = (struct di_all *)di_mem_addr(st, 0); 715 all->devcnt = devcnt; 716 all->command = st->command; 717 all->version = DI_SNAPSHOT_VERSION; 718 719 /* 720 * Note the endianness in case we need to transport snapshot 721 * over the network. 722 */ 723 #if defined(_LITTLE_ENDIAN) 724 all->endianness = DI_LITTLE_ENDIAN; 725 #else 726 all->endianness = DI_BIG_ENDIAN; 727 #endif 728 729 /* Copyin ioctl args, store in the snapshot. */ 730 if (copyinstr((void *)arg, all->root_path, 731 sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { 732 di_freemem(st); 733 (void) di_setstate(st, IOC_IDLE); 734 return (EFAULT); 735 } 736 737 if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) { 738 di_freemem(st); 739 (void) di_setstate(st, IOC_IDLE); 740 return (EINVAL); 741 } 742 743 error = 0; 744 if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { 745 di_freemem(st); 746 (void) di_setstate(st, IOC_IDLE); 747 return (error); 748 } 749 750 off = DI_ALIGN(sizeof (struct di_all) + size); 751 752 /* 753 * Only the fully enabled version may force load drivers or read 754 * the parent private data from a driver. 755 */ 756 if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && 757 DI_UNPRIVILEGED_NODE(m)) { 758 di_freemem(st); 759 (void) di_setstate(st, IOC_IDLE); 760 return (EACCES); 761 } 762 763 /* Do we need private data? */ 764 if (st->command & DINFOPRIVDATA) { 765 arg += sizeof (((struct dinfo_io *)(NULL))->root_path); 766 767 #ifdef _MULTI_DATAMODEL 768 switch (ddi_model_convert_from(mode & FMODELS)) { 769 case DDI_MODEL_ILP32: { 770 /* 771 * Cannot copy private data from 64-bit kernel 772 * to 32-bit app 773 */ 774 di_freemem(st); 775 (void) di_setstate(st, IOC_IDLE); 776 return (EINVAL); 777 } 778 case DDI_MODEL_NONE: 779 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 780 di_freemem(st); 781 (void) di_setstate(st, IOC_IDLE); 782 return (EFAULT); 783 } 784 break; 785 } 786 #else /* !_MULTI_DATAMODEL */ 787 if ((off = di_copyformat(off, st, arg, mode)) == 0) { 788 di_freemem(st); 789 (void) di_setstate(st, IOC_IDLE); 790 return (EFAULT); 791 } 792 #endif /* _MULTI_DATAMODEL */ 793 } 794 795 all->top_devinfo = DI_ALIGN(off); 796 797 /* 798 * For cache lookups we reallocate memory from scratch, 799 * so the value of "all" is no longer valid. 800 */ 801 all = NULL; 802 803 if (st->command & DINFOCACHE) { 804 *rvalp = di_cache_lookup(st); 805 } else if (snapshot_is_cacheable(st)) { 806 DI_CACHE_LOCK(di_cache); 807 *rvalp = di_cache_update(st); 808 DI_CACHE_UNLOCK(di_cache); 809 } else 810 *rvalp = di_snapshot_and_clean(st); 811 812 if (*rvalp) { 813 DI_ALL_PTR(st)->map_size = *rvalp; 814 (void) di_setstate(st, IOC_DONE); 815 } else { 816 di_freemem(st); 817 (void) di_setstate(st, IOC_IDLE); 818 } 819 820 return (0); 821 } 822 823 /* 824 * Get a chunk of memory >= size, for the snapshot 825 */ 826 static void 827 di_allocmem(struct di_state *st, size_t size) 828 { 829 struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), 830 KM_SLEEP); 831 /* 832 * Round up size to nearest power of 2. If it is less 833 * than st->mem_size, set it to st->mem_size (i.e., 834 * the mem_size is doubled every time) to reduce the 835 * number of memory allocations. 836 */ 837 size_t tmp = 1; 838 while (tmp < size) { 839 tmp <<= 1; 840 } 841 size = (tmp > st->mem_size) ? tmp : st->mem_size; 842 843 mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); 844 mem->buf_size = size; 845 846 dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); 847 848 if (st->mem_size == 0) { /* first chunk */ 849 st->memlist = mem; 850 } else { 851 /* 852 * locate end of linked list and add a chunk at the end 853 */ 854 struct di_mem *dcp = st->memlist; 855 while (dcp->next != NULL) { 856 dcp = dcp->next; 857 } 858 859 dcp->next = mem; 860 } 861 862 st->mem_size += size; 863 } 864 865 /* 866 * Copy upto bufsiz bytes of the memlist to buf 867 */ 868 static void 869 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) 870 { 871 struct di_mem *dcp; 872 size_t copysz; 873 874 if (st->mem_size == 0) { 875 ASSERT(st->memlist == NULL); 876 return; 877 } 878 879 copysz = 0; 880 for (dcp = st->memlist; dcp; dcp = dcp->next) { 881 882 ASSERT(bufsiz > 0); 883 884 if (bufsiz <= dcp->buf_size) 885 copysz = bufsiz; 886 else 887 copysz = dcp->buf_size; 888 889 bcopy(dcp->buf, buf, copysz); 890 891 buf += copysz; 892 bufsiz -= copysz; 893 894 if (bufsiz == 0) 895 break; 896 } 897 } 898 899 /* 900 * Free all memory for the snapshot 901 */ 902 static void 903 di_freemem(struct di_state *st) 904 { 905 struct di_mem *dcp, *tmp; 906 907 dcmn_err2((CE_CONT, "di_freemem\n")); 908 909 if (st->mem_size) { 910 dcp = st->memlist; 911 while (dcp) { /* traverse the linked list */ 912 tmp = dcp; 913 dcp = dcp->next; 914 ddi_umem_free(tmp->cook); 915 kmem_free(tmp, sizeof (struct di_mem)); 916 } 917 st->mem_size = 0; 918 st->memlist = NULL; 919 } 920 921 ASSERT(st->mem_size == 0); 922 ASSERT(st->memlist == NULL); 923 } 924 925 /* 926 * Copies cached data to the di_state structure. 927 * Returns: 928 * - size of data copied, on SUCCESS 929 * - 0 on failure 930 */ 931 static int 932 di_cache2mem(struct di_cache *cache, struct di_state *st) 933 { 934 caddr_t pa; 935 936 ASSERT(st->mem_size == 0); 937 ASSERT(st->memlist == NULL); 938 ASSERT(!servicing_interrupt()); 939 ASSERT(DI_CACHE_LOCKED(*cache)); 940 941 if (cache->cache_size == 0) { 942 ASSERT(cache->cache_data == NULL); 943 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); 944 return (0); 945 } 946 947 ASSERT(cache->cache_data); 948 949 di_allocmem(st, cache->cache_size); 950 951 pa = di_mem_addr(st, 0); 952 953 ASSERT(pa); 954 955 /* 956 * Verify that di_allocmem() allocates contiguous memory, 957 * so that it is safe to do straight bcopy() 958 */ 959 ASSERT(st->memlist != NULL); 960 ASSERT(st->memlist->next == NULL); 961 bcopy(cache->cache_data, pa, cache->cache_size); 962 963 return (cache->cache_size); 964 } 965 966 /* 967 * Copies a snapshot from di_state to the cache 968 * Returns: 969 * - 0 on failure 970 * - size of copied data on success 971 */ 972 static int 973 di_mem2cache(struct di_state *st, struct di_cache *cache) 974 { 975 size_t map_size; 976 977 ASSERT(cache->cache_size == 0); 978 ASSERT(cache->cache_data == NULL); 979 ASSERT(!servicing_interrupt()); 980 ASSERT(DI_CACHE_LOCKED(*cache)); 981 982 if (st->mem_size == 0) { 983 ASSERT(st->memlist == NULL); 984 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); 985 return (0); 986 } 987 988 ASSERT(st->memlist); 989 990 /* 991 * The size of the memory list may be much larger than the 992 * size of valid data (map_size). Cache only the valid data 993 */ 994 map_size = DI_ALL_PTR(st)->map_size; 995 if (map_size == 0 || map_size < sizeof (struct di_all) || 996 map_size > st->mem_size) { 997 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); 998 return (0); 999 } 1000 1001 cache->cache_data = kmem_alloc(map_size, KM_SLEEP); 1002 cache->cache_size = map_size; 1003 di_copymem(st, cache->cache_data, cache->cache_size); 1004 1005 return (map_size); 1006 } 1007 1008 /* 1009 * Make sure there is at least "size" bytes memory left before 1010 * going on. Otherwise, start on a new chunk. 1011 */ 1012 static di_off_t 1013 di_checkmem(struct di_state *st, di_off_t off, size_t size) 1014 { 1015 dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", 1016 off, (int)size)); 1017 1018 /* 1019 * di_checkmem() shouldn't be called with a size of zero. 1020 * But in case it is, we want to make sure we return a valid 1021 * offset within the memlist and not an offset that points us 1022 * at the end of the memlist. 1023 */ 1024 if (size == 0) { 1025 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); 1026 size = 1; 1027 } 1028 1029 off = DI_ALIGN(off); 1030 if ((st->mem_size - off) < size) { 1031 off = st->mem_size; 1032 di_allocmem(st, size); 1033 } 1034 1035 return (off); 1036 } 1037 1038 /* 1039 * Copy the private data format from ioctl arg. 1040 * On success, the ending offset is returned. On error 0 is returned. 1041 */ 1042 static di_off_t 1043 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) 1044 { 1045 di_off_t size; 1046 struct di_priv_data *priv; 1047 struct di_all *all = (struct di_all *)di_mem_addr(st, 0); 1048 1049 dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", 1050 off, (void *)arg, mode)); 1051 1052 /* 1053 * Copyin data and check version. 1054 * We only handle private data version 0. 1055 */ 1056 priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); 1057 if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), 1058 mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { 1059 kmem_free(priv, sizeof (struct di_priv_data)); 1060 return (0); 1061 } 1062 1063 /* 1064 * Save di_priv_data copied from userland in snapshot. 1065 */ 1066 all->pd_version = priv->version; 1067 all->n_ppdata = priv->n_parent; 1068 all->n_dpdata = priv->n_driver; 1069 1070 /* 1071 * copyin private data format, modify offset accordingly 1072 */ 1073 if (all->n_ppdata) { /* parent private data format */ 1074 /* 1075 * check memory 1076 */ 1077 size = all->n_ppdata * sizeof (struct di_priv_format); 1078 off = di_checkmem(st, off, size); 1079 all->ppdata_format = off; 1080 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, 1081 mode) != 0) { 1082 kmem_free(priv, sizeof (struct di_priv_data)); 1083 return (0); 1084 } 1085 1086 off += size; 1087 } 1088 1089 if (all->n_dpdata) { /* driver private data format */ 1090 /* 1091 * check memory 1092 */ 1093 size = all->n_dpdata * sizeof (struct di_priv_format); 1094 off = di_checkmem(st, off, size); 1095 all->dpdata_format = off; 1096 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, 1097 mode) != 0) { 1098 kmem_free(priv, sizeof (struct di_priv_data)); 1099 return (0); 1100 } 1101 1102 off += size; 1103 } 1104 1105 kmem_free(priv, sizeof (struct di_priv_data)); 1106 return (off); 1107 } 1108 1109 /* 1110 * Return the real address based on the offset (off) within snapshot 1111 */ 1112 static caddr_t 1113 di_mem_addr(struct di_state *st, di_off_t off) 1114 { 1115 struct di_mem *dcp = st->memlist; 1116 1117 dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", 1118 (void *)dcp, off)); 1119 1120 ASSERT(off < st->mem_size); 1121 1122 while (off >= dcp->buf_size) { 1123 off -= dcp->buf_size; 1124 dcp = dcp->next; 1125 } 1126 1127 dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", 1128 off, (void *)(dcp->buf + off))); 1129 1130 return (dcp->buf + off); 1131 } 1132 1133 /* 1134 * Ideally we would use the whole key to derive the hash 1135 * value. However, the probability that two keys will 1136 * have the same dip (or pip) is very low, so 1137 * hashing by dip (or pip) pointer should suffice. 1138 */ 1139 static uint_t 1140 di_hash_byptr(void *arg, mod_hash_key_t key) 1141 { 1142 struct di_key *dik = key; 1143 size_t rshift; 1144 void *ptr; 1145 1146 ASSERT(arg == NULL); 1147 1148 switch (dik->k_type) { 1149 case DI_DKEY: 1150 ptr = dik->k_u.dkey.dk_dip; 1151 rshift = highbit(sizeof (struct dev_info)); 1152 break; 1153 case DI_PKEY: 1154 ptr = dik->k_u.pkey.pk_pip; 1155 rshift = highbit(sizeof (struct mdi_pathinfo)); 1156 break; 1157 default: 1158 panic("devinfo: unknown key type"); 1159 /*NOTREACHED*/ 1160 } 1161 return (mod_hash_byptr((void *)rshift, ptr)); 1162 } 1163 1164 static void 1165 di_key_dtor(mod_hash_key_t key) 1166 { 1167 char *path_addr; 1168 struct di_key *dik = key; 1169 1170 switch (dik->k_type) { 1171 case DI_DKEY: 1172 break; 1173 case DI_PKEY: 1174 path_addr = dik->k_u.pkey.pk_path_addr; 1175 if (path_addr) 1176 kmem_free(path_addr, strlen(path_addr) + 1); 1177 break; 1178 default: 1179 panic("devinfo: unknown key type"); 1180 /*NOTREACHED*/ 1181 } 1182 1183 kmem_free(dik, sizeof (struct di_key)); 1184 } 1185 1186 static int 1187 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) 1188 { 1189 if (dk1->dk_dip != dk2->dk_dip) 1190 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); 1191 1192 if (dk1->dk_major != -1 && dk2->dk_major != -1) { 1193 if (dk1->dk_major != dk2->dk_major) 1194 return (dk1->dk_major > dk2->dk_major ? 1 : -1); 1195 1196 if (dk1->dk_inst != dk2->dk_inst) 1197 return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); 1198 } 1199 1200 if (dk1->dk_nodeid != dk2->dk_nodeid) 1201 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); 1202 1203 return (0); 1204 } 1205 1206 static int 1207 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) 1208 { 1209 char *p1, *p2; 1210 int rv; 1211 1212 if (pk1->pk_pip != pk2->pk_pip) 1213 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); 1214 1215 p1 = pk1->pk_path_addr; 1216 p2 = pk2->pk_path_addr; 1217 1218 p1 = p1 ? p1 : ""; 1219 p2 = p2 ? p2 : ""; 1220 1221 rv = strcmp(p1, p2); 1222 if (rv) 1223 return (rv > 0 ? 1 : -1); 1224 1225 if (pk1->pk_client != pk2->pk_client) 1226 return (pk1->pk_client > pk2->pk_client ? 1 : -1); 1227 1228 if (pk1->pk_phci != pk2->pk_phci) 1229 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); 1230 1231 return (0); 1232 } 1233 1234 static int 1235 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) 1236 { 1237 struct di_key *dik1, *dik2; 1238 1239 dik1 = key1; 1240 dik2 = key2; 1241 1242 if (dik1->k_type != dik2->k_type) { 1243 panic("devinfo: mismatched keys"); 1244 /*NOTREACHED*/ 1245 } 1246 1247 switch (dik1->k_type) { 1248 case DI_DKEY: 1249 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); 1250 case DI_PKEY: 1251 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); 1252 default: 1253 panic("devinfo: unknown key type"); 1254 /*NOTREACHED*/ 1255 } 1256 } 1257 1258 /* 1259 * This is the main function that takes a snapshot 1260 */ 1261 static di_off_t 1262 di_snapshot(struct di_state *st) 1263 { 1264 di_off_t off; 1265 struct di_all *all; 1266 dev_info_t *rootnode; 1267 char buf[80]; 1268 int plen; 1269 char *path; 1270 vnode_t *vp; 1271 1272 all = (struct di_all *)di_mem_addr(st, 0); 1273 dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); 1274 1275 /* 1276 * Verify path before entrusting it to e_ddi_hold_devi_by_path because 1277 * some platforms have OBP bugs where executing the NDI_PROMNAME code 1278 * path against an invalid path results in panic. The lookupnameat 1279 * is done relative to rootdir without a leading '/' on "devices/" 1280 * to force the lookup to occur in the global zone. 1281 */ 1282 plen = strlen("devices/") + strlen(all->root_path) + 1; 1283 path = kmem_alloc(plen, KM_SLEEP); 1284 (void) snprintf(path, plen, "devices/%s", all->root_path); 1285 if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) { 1286 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1287 all->root_path)); 1288 kmem_free(path, plen); 1289 return (0); 1290 } 1291 kmem_free(path, plen); 1292 VN_RELE(vp); 1293 1294 /* 1295 * Hold the devinfo node referred by the path. 1296 */ 1297 rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); 1298 if (rootnode == NULL) { 1299 dcmn_err((CE_CONT, "Devinfo node %s not found\n", 1300 all->root_path)); 1301 return (0); 1302 } 1303 1304 (void) snprintf(buf, sizeof (buf), 1305 "devinfo registered dips (statep=%p)", (void *)st); 1306 1307 st->reg_dip_hash = mod_hash_create_extended(buf, 64, 1308 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1309 NULL, di_key_cmp, KM_SLEEP); 1310 1311 1312 (void) snprintf(buf, sizeof (buf), 1313 "devinfo registered pips (statep=%p)", (void *)st); 1314 1315 st->reg_pip_hash = mod_hash_create_extended(buf, 64, 1316 di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, 1317 NULL, di_key_cmp, KM_SLEEP); 1318 1319 /* 1320 * copy the device tree 1321 */ 1322 off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); 1323 1324 ddi_release_devi(rootnode); 1325 1326 /* 1327 * copy the devnames array 1328 */ 1329 all->devnames = off; 1330 off = di_copydevnm(&all->devnames, st); 1331 1332 1333 /* initialize the hash tables */ 1334 st->lnode_count = 0; 1335 st->link_count = 0; 1336 1337 if (DINFOLYR & st->command) { 1338 off = di_getlink_data(off, st); 1339 } 1340 1341 /* 1342 * Free up hash tables 1343 */ 1344 mod_hash_destroy_hash(st->reg_dip_hash); 1345 mod_hash_destroy_hash(st->reg_pip_hash); 1346 1347 /* 1348 * Record the timestamp now that we are done with snapshot. 1349 * 1350 * We compute the checksum later and then only if we cache 1351 * the snapshot, since checksumming adds some overhead. 1352 * The checksum is checked later if we read the cache file. 1353 * from disk. 1354 * 1355 * Set checksum field to 0 as CRC is calculated with that 1356 * field set to 0. 1357 */ 1358 all->snapshot_time = ddi_get_time(); 1359 all->cache_checksum = 0; 1360 1361 return (off); 1362 } 1363 1364 /* 1365 * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set 1366 */ 1367 static di_off_t 1368 di_snapshot_and_clean(struct di_state *st) 1369 { 1370 di_off_t off; 1371 1372 modunload_disable(); 1373 off = di_snapshot(st); 1374 if (off != 0 && (st->command & DINFOCLEANUP)) { 1375 ASSERT(DEVICES_FILES_CLEANABLE(st)); 1376 /* 1377 * Cleanup /etc/devices files: 1378 * In order to accurately account for the system configuration 1379 * in /etc/devices files, the appropriate drivers must be 1380 * fully configured before the cleanup starts. 1381 * So enable modunload only after the cleanup. 1382 */ 1383 i_ddi_clean_devices_files(); 1384 } 1385 modunload_enable(); 1386 1387 return (off); 1388 } 1389 1390 /* 1391 * Assumes all devinfo nodes in device tree have been snapshotted 1392 */ 1393 static void 1394 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *poff_p) 1395 { 1396 struct dev_info *node; 1397 struct di_node *me; 1398 di_off_t off; 1399 1400 ASSERT(mutex_owned(&dnp->dn_lock)); 1401 1402 node = DEVI(dnp->dn_head); 1403 for (; node; node = node->devi_next) { 1404 if (di_dip_find(st, (dev_info_t *)node, &off) != 0) 1405 continue; 1406 1407 ASSERT(off > 0); 1408 me = (struct di_node *)di_mem_addr(st, off); 1409 ASSERT(me->next == 0 || me->next == -1); 1410 /* 1411 * Only nodes which were BOUND when they were 1412 * snapshotted will be added to per-driver list. 1413 */ 1414 if (me->next != -1) 1415 continue; 1416 1417 *poff_p = off; 1418 poff_p = &me->next; 1419 } 1420 1421 *poff_p = 0; 1422 } 1423 1424 /* 1425 * Copy the devnames array, so we have a list of drivers in the snapshot. 1426 * Also makes it possible to locate the per-driver devinfo nodes. 1427 */ 1428 static di_off_t 1429 di_copydevnm(di_off_t *off_p, struct di_state *st) 1430 { 1431 int i; 1432 di_off_t off; 1433 size_t size; 1434 struct di_devnm *dnp; 1435 1436 dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); 1437 1438 /* 1439 * make sure there is some allocated memory 1440 */ 1441 size = devcnt * sizeof (struct di_devnm); 1442 off = di_checkmem(st, *off_p, size); 1443 *off_p = off; 1444 1445 dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", 1446 devcnt, off)); 1447 1448 dnp = (struct di_devnm *)di_mem_addr(st, off); 1449 off += size; 1450 1451 for (i = 0; i < devcnt; i++) { 1452 if (devnamesp[i].dn_name == NULL) { 1453 continue; 1454 } 1455 1456 /* 1457 * dn_name is not freed during driver unload or removal. 1458 * 1459 * There is a race condition when make_devname() changes 1460 * dn_name during our strcpy. This should be rare since 1461 * only add_drv does this. At any rate, we never had a 1462 * problem with ddi_name_to_major(), which should have 1463 * the same problem. 1464 */ 1465 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", 1466 devnamesp[i].dn_name, devnamesp[i].dn_instance, 1467 off)); 1468 1469 off = di_checkmem(st, off, strlen(devnamesp[i].dn_name) + 1); 1470 dnp[i].name = off; 1471 (void) strcpy((char *)di_mem_addr(st, off), 1472 devnamesp[i].dn_name); 1473 off += DI_ALIGN(strlen(devnamesp[i].dn_name) + 1); 1474 1475 mutex_enter(&devnamesp[i].dn_lock); 1476 1477 /* 1478 * Snapshot per-driver node list 1479 */ 1480 snap_driver_list(st, &devnamesp[i], &dnp[i].head); 1481 1482 /* 1483 * This is not used by libdevinfo, leave it for now 1484 */ 1485 dnp[i].flags = devnamesp[i].dn_flags; 1486 dnp[i].instance = devnamesp[i].dn_instance; 1487 1488 /* 1489 * get global properties 1490 */ 1491 if ((DINFOPROP & st->command) && 1492 devnamesp[i].dn_global_prop_ptr) { 1493 dnp[i].global_prop = off; 1494 off = di_getprop( 1495 devnamesp[i].dn_global_prop_ptr->prop_list, 1496 &dnp[i].global_prop, st, NULL, DI_PROP_GLB_LIST); 1497 } 1498 1499 /* 1500 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str 1501 */ 1502 if (CB_DRV_INSTALLED(devopsp[i])) { 1503 if (devopsp[i]->devo_cb_ops) { 1504 dnp[i].ops |= DI_CB_OPS; 1505 if (devopsp[i]->devo_cb_ops->cb_str) 1506 dnp[i].ops |= DI_STREAM_OPS; 1507 } 1508 if (NEXUS_DRV(devopsp[i])) { 1509 dnp[i].ops |= DI_BUS_OPS; 1510 } 1511 } 1512 1513 mutex_exit(&devnamesp[i].dn_lock); 1514 } 1515 1516 dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); 1517 1518 return (off); 1519 } 1520 1521 /* 1522 * Copy the kernel devinfo tree. The tree and the devnames array forms 1523 * the entire snapshot (see also di_copydevnm). 1524 */ 1525 static di_off_t 1526 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) 1527 { 1528 di_off_t off; 1529 struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); 1530 1531 dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", 1532 (void *)root, *off_p)); 1533 1534 /* force attach drivers */ 1535 if ((i_ddi_node_state((dev_info_t *)root) == DS_READY) && 1536 (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { 1537 (void) ndi_devi_config((dev_info_t *)root, 1538 NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | 1539 NDI_DRV_CONF_REPROBE); 1540 } 1541 1542 /* 1543 * Push top_devinfo onto a stack 1544 * 1545 * The stack is necessary to avoid recursion, which can overrun 1546 * the kernel stack. 1547 */ 1548 PUSH_STACK(dsp, root, off_p); 1549 1550 /* 1551 * As long as there is a node on the stack, copy the node. 1552 * di_copynode() is responsible for pushing and popping 1553 * child and sibling nodes on the stack. 1554 */ 1555 while (!EMPTY_STACK(dsp)) { 1556 off = di_copynode(dsp, st); 1557 } 1558 1559 /* 1560 * Free the stack structure 1561 */ 1562 kmem_free(dsp, sizeof (struct di_stack)); 1563 1564 return (off); 1565 } 1566 1567 /* 1568 * This is the core function, which copies all data associated with a single 1569 * node into the snapshot. The amount of information is determined by the 1570 * ioctl command. 1571 */ 1572 static di_off_t 1573 di_copynode(struct di_stack *dsp, struct di_state *st) 1574 { 1575 di_off_t off; 1576 struct di_node *me; 1577 struct dev_info *node; 1578 1579 dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", 1580 dsp->depth)); 1581 1582 node = TOP_NODE(dsp); 1583 1584 ASSERT(node != NULL); 1585 1586 /* 1587 * check memory usage, and fix offsets accordingly. 1588 */ 1589 off = di_checkmem(st, *(TOP_OFFSET(dsp)), sizeof (struct di_node)); 1590 *(TOP_OFFSET(dsp)) = off; 1591 me = DI_NODE(di_mem_addr(st, off)); 1592 1593 dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", 1594 node->devi_node_name, node->devi_instance, off)); 1595 1596 /* 1597 * Node parameters: 1598 * self -- offset of current node within snapshot 1599 * nodeid -- pointer to PROM node (tri-valued) 1600 * state -- hot plugging device state 1601 * node_state -- devinfo node state (CF1, CF2, etc.) 1602 */ 1603 me->self = off; 1604 me->instance = node->devi_instance; 1605 me->nodeid = node->devi_nodeid; 1606 me->node_class = node->devi_node_class; 1607 me->attributes = node->devi_node_attributes; 1608 me->state = node->devi_state; 1609 me->node_state = node->devi_node_state; 1610 me->user_private_data = NULL; 1611 1612 /* 1613 * Get parent's offset in snapshot from the stack 1614 * and store it in the current node 1615 */ 1616 if (dsp->depth > 1) { 1617 me->parent = *(PARENT_OFFSET(dsp)); 1618 } 1619 1620 /* 1621 * Save the offset of this di_node in a hash table. 1622 * This is used later to resolve references to this 1623 * dip from other parts of the tree (per-driver list, 1624 * multipathing linkages, layered usage linkages). 1625 * The key used for the hash table is derived from 1626 * information in the dip. 1627 */ 1628 di_register_dip(st, (dev_info_t *)node, me->self); 1629 1630 /* 1631 * increment offset 1632 */ 1633 off += sizeof (struct di_node); 1634 1635 #ifdef DEVID_COMPATIBILITY 1636 /* check for devid as property marker */ 1637 if (node->devi_devid) { 1638 ddi_devid_t devid; 1639 char *devidstr; 1640 int devid_size; 1641 1642 /* 1643 * The devid is now represented as a property. 1644 * For micro release compatibility with di_devid interface 1645 * in libdevinfo we must return it as a binary structure in' 1646 * the snapshot. When di_devid is removed from libdevinfo 1647 * in a future release (and devi_devid is deleted) then 1648 * code related to DEVID_COMPATIBILITY can be removed. 1649 */ 1650 ASSERT(node->devi_devid == DEVID_COMPATIBILITY); 1651 /* XXX should be DDI_DEV_T_NONE! */ 1652 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, (dev_info_t *)node, 1653 DDI_PROP_DONTPASS, DEVID_PROP_NAME, &devidstr) == 1654 DDI_PROP_SUCCESS) { 1655 if (ddi_devid_str_decode(devidstr, &devid, NULL) == 1656 DDI_SUCCESS) { 1657 devid_size = ddi_devid_sizeof(devid); 1658 off = di_checkmem(st, off, devid_size); 1659 me->devid = off; 1660 bcopy(devid, 1661 di_mem_addr(st, off), devid_size); 1662 off += devid_size; 1663 ddi_devid_free(devid); 1664 } 1665 ddi_prop_free(devidstr); 1666 } 1667 } 1668 #endif /* DEVID_COMPATIBILITY */ 1669 1670 if (node->devi_node_name) { 1671 off = di_checkmem(st, off, strlen(node->devi_node_name) + 1); 1672 me->node_name = off; 1673 (void) strcpy(di_mem_addr(st, off), node->devi_node_name); 1674 off += strlen(node->devi_node_name) + 1; 1675 } 1676 1677 if (node->devi_compat_names && (node->devi_compat_length > 1)) { 1678 off = di_checkmem(st, off, node->devi_compat_length); 1679 me->compat_names = off; 1680 me->compat_length = node->devi_compat_length; 1681 bcopy(node->devi_compat_names, di_mem_addr(st, off), 1682 node->devi_compat_length); 1683 off += node->devi_compat_length; 1684 } 1685 1686 if (node->devi_addr) { 1687 off = di_checkmem(st, off, strlen(node->devi_addr) + 1); 1688 me->address = off; 1689 (void) strcpy(di_mem_addr(st, off), node->devi_addr); 1690 off += strlen(node->devi_addr) + 1; 1691 } 1692 1693 if (node->devi_binding_name) { 1694 off = di_checkmem(st, off, strlen(node->devi_binding_name) + 1); 1695 me->bind_name = off; 1696 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name); 1697 off += strlen(node->devi_binding_name) + 1; 1698 } 1699 1700 me->drv_major = node->devi_major; 1701 1702 /* 1703 * If the dip is BOUND, set the next pointer of the 1704 * per-instance list to -1, indicating that it is yet to be resolved. 1705 * This will be resolved later in snap_driver_list(). 1706 */ 1707 if (me->drv_major != -1) { 1708 me->next = -1; 1709 } else { 1710 me->next = 0; 1711 } 1712 1713 /* 1714 * An optimization to skip mutex_enter when not needed. 1715 */ 1716 if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) { 1717 goto priv_data; 1718 } 1719 1720 /* 1721 * Grab current per dev_info node lock to 1722 * get minor data and properties. 1723 */ 1724 mutex_enter(&(node->devi_lock)); 1725 1726 if (!(DINFOMINOR & st->command)) { 1727 goto path; 1728 } 1729 1730 if (node->devi_minor) { /* minor data */ 1731 me->minor_data = DI_ALIGN(off); 1732 off = di_getmdata(node->devi_minor, &me->minor_data, 1733 me->self, st); 1734 } 1735 1736 path: 1737 if (!(DINFOPATH & st->command)) { 1738 goto property; 1739 } 1740 1741 if (MDI_CLIENT(node)) { 1742 me->multipath_client = DI_ALIGN(off); 1743 off = di_getpath_data((dev_info_t *)node, &me->multipath_client, 1744 me->self, st, 1); 1745 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " 1746 "component type = %d. off=%d", 1747 me->multipath_client, 1748 (void *)node, node->devi_mdi_component, off)); 1749 } 1750 1751 if (MDI_PHCI(node)) { 1752 me->multipath_phci = DI_ALIGN(off); 1753 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, 1754 me->self, st, 0); 1755 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " 1756 "component type = %d. off=%d", 1757 me->multipath_phci, 1758 (void *)node, node->devi_mdi_component, off)); 1759 } 1760 1761 property: 1762 if (!(DINFOPROP & st->command)) { 1763 goto unlock; 1764 } 1765 1766 if (node->devi_drv_prop_ptr) { /* driver property list */ 1767 me->drv_prop = DI_ALIGN(off); 1768 off = di_getprop(node->devi_drv_prop_ptr, &me->drv_prop, st, 1769 node, DI_PROP_DRV_LIST); 1770 } 1771 1772 if (node->devi_sys_prop_ptr) { /* system property list */ 1773 me->sys_prop = DI_ALIGN(off); 1774 off = di_getprop(node->devi_sys_prop_ptr, &me->sys_prop, st, 1775 node, DI_PROP_SYS_LIST); 1776 } 1777 1778 if (node->devi_hw_prop_ptr) { /* hardware property list */ 1779 me->hw_prop = DI_ALIGN(off); 1780 off = di_getprop(node->devi_hw_prop_ptr, &me->hw_prop, st, 1781 node, DI_PROP_HW_LIST); 1782 } 1783 1784 if (node->devi_global_prop_list == NULL) { 1785 me->glob_prop = (di_off_t)-1; /* not global property */ 1786 } else { 1787 /* 1788 * Make copy of global property list if this devinfo refers 1789 * global properties different from what's on the devnames 1790 * array. It can happen if there has been a forced 1791 * driver.conf update. See mod_drv(1M). 1792 */ 1793 ASSERT(me->drv_major != -1); 1794 if (node->devi_global_prop_list != 1795 devnamesp[me->drv_major].dn_global_prop_ptr) { 1796 me->glob_prop = DI_ALIGN(off); 1797 off = di_getprop(node->devi_global_prop_list->prop_list, 1798 &me->glob_prop, st, node, DI_PROP_GLB_LIST); 1799 } 1800 } 1801 1802 unlock: 1803 /* 1804 * release current per dev_info node lock 1805 */ 1806 mutex_exit(&(node->devi_lock)); 1807 1808 priv_data: 1809 if (!(DINFOPRIVDATA & st->command)) { 1810 goto pm_info; 1811 } 1812 1813 if (ddi_get_parent_data((dev_info_t *)node) != NULL) { 1814 me->parent_data = DI_ALIGN(off); 1815 off = di_getppdata(node, &me->parent_data, st); 1816 } 1817 1818 if (ddi_get_driver_private((dev_info_t *)node) != NULL) { 1819 me->driver_data = DI_ALIGN(off); 1820 off = di_getdpdata(node, &me->driver_data, st); 1821 } 1822 1823 pm_info: /* NOT implemented */ 1824 1825 subtree: 1826 if (!(DINFOSUBTREE & st->command)) { 1827 POP_STACK(dsp); 1828 return (DI_ALIGN(off)); 1829 } 1830 1831 child: 1832 /* 1833 * If there is a child--push child onto stack. 1834 * Hold the parent busy while doing so. 1835 */ 1836 if (node->devi_child) { 1837 me->child = DI_ALIGN(off); 1838 PUSH_STACK(dsp, node->devi_child, &me->child); 1839 return (me->child); 1840 } 1841 1842 sibling: 1843 /* 1844 * no child node, unroll the stack till a sibling of 1845 * a parent node is found or root node is reached 1846 */ 1847 POP_STACK(dsp); 1848 while (!EMPTY_STACK(dsp) && (node->devi_sibling == NULL)) { 1849 node = TOP_NODE(dsp); 1850 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); 1851 POP_STACK(dsp); 1852 } 1853 1854 if (!EMPTY_STACK(dsp)) { 1855 /* 1856 * a sibling is found, replace top of stack by its sibling 1857 */ 1858 me->sibling = DI_ALIGN(off); 1859 PUSH_STACK(dsp, node->devi_sibling, &me->sibling); 1860 return (me->sibling); 1861 } 1862 1863 /* 1864 * DONE with all nodes 1865 */ 1866 return (DI_ALIGN(off)); 1867 } 1868 1869 static i_lnode_t * 1870 i_lnode_alloc(int modid) 1871 { 1872 i_lnode_t *i_lnode; 1873 1874 i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); 1875 1876 ASSERT(modid != -1); 1877 i_lnode->modid = modid; 1878 1879 return (i_lnode); 1880 } 1881 1882 static void 1883 i_lnode_free(i_lnode_t *i_lnode) 1884 { 1885 kmem_free(i_lnode, sizeof (i_lnode_t)); 1886 } 1887 1888 static void 1889 i_lnode_check_free(i_lnode_t *i_lnode) 1890 { 1891 /* This lnode and its dip must have been snapshotted */ 1892 ASSERT(i_lnode->self > 0); 1893 ASSERT(i_lnode->di_node->self > 0); 1894 1895 /* at least 1 link (in or out) must exist for this lnode */ 1896 ASSERT(i_lnode->link_in || i_lnode->link_out); 1897 1898 i_lnode_free(i_lnode); 1899 } 1900 1901 static i_link_t * 1902 i_link_alloc(int spec_type) 1903 { 1904 i_link_t *i_link; 1905 1906 i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); 1907 i_link->spec_type = spec_type; 1908 1909 return (i_link); 1910 } 1911 1912 static void 1913 i_link_check_free(i_link_t *i_link) 1914 { 1915 /* This link must have been snapshotted */ 1916 ASSERT(i_link->self > 0); 1917 1918 /* Both endpoint lnodes must exist for this link */ 1919 ASSERT(i_link->src_lnode); 1920 ASSERT(i_link->tgt_lnode); 1921 1922 kmem_free(i_link, sizeof (i_link_t)); 1923 } 1924 1925 /*ARGSUSED*/ 1926 static uint_t 1927 i_lnode_hashfunc(void *arg, mod_hash_key_t key) 1928 { 1929 i_lnode_t *i_lnode = (i_lnode_t *)key; 1930 struct di_node *ptr; 1931 dev_t dev; 1932 1933 dev = i_lnode->devt; 1934 if (dev != DDI_DEV_T_NONE) 1935 return (i_lnode->modid + getminor(dev) + getmajor(dev)); 1936 1937 ptr = i_lnode->di_node; 1938 ASSERT(ptr->self > 0); 1939 if (ptr) { 1940 uintptr_t k = (uintptr_t)ptr; 1941 k >>= (int)highbit(sizeof (struct di_node)); 1942 return ((uint_t)k); 1943 } 1944 1945 return (i_lnode->modid); 1946 } 1947 1948 static int 1949 i_lnode_cmp(void *arg1, void *arg2) 1950 { 1951 i_lnode_t *i_lnode1 = (i_lnode_t *)arg1; 1952 i_lnode_t *i_lnode2 = (i_lnode_t *)arg2; 1953 1954 if (i_lnode1->modid != i_lnode2->modid) { 1955 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); 1956 } 1957 1958 if (i_lnode1->di_node != i_lnode2->di_node) 1959 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); 1960 1961 if (i_lnode1->devt != i_lnode2->devt) 1962 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); 1963 1964 return (0); 1965 } 1966 1967 /* 1968 * An lnode represents a {dip, dev_t} tuple. A link represents a 1969 * {src_lnode, tgt_lnode, spec_type} tuple. 1970 * The following callback assumes that LDI framework ref-counts the 1971 * src_dip and tgt_dip while invoking this callback. 1972 */ 1973 static int 1974 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) 1975 { 1976 struct di_state *st = (struct di_state *)arg; 1977 i_lnode_t *src_lnode, *tgt_lnode, *i_lnode; 1978 i_link_t **i_link_next, *i_link; 1979 di_off_t soff, toff; 1980 mod_hash_val_t nodep = NULL; 1981 int res; 1982 1983 /* 1984 * if the source or target of this device usage information doesn't 1985 * corrospond to a device node then we don't report it via 1986 * libdevinfo so return. 1987 */ 1988 if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) 1989 return (LDI_USAGE_CONTINUE); 1990 1991 ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); 1992 ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); 1993 1994 /* 1995 * Skip the ldi_usage if either src or tgt dip is not in the 1996 * snapshot. This saves us from pruning bad lnodes/links later. 1997 */ 1998 if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) 1999 return (LDI_USAGE_CONTINUE); 2000 if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) 2001 return (LDI_USAGE_CONTINUE); 2002 2003 ASSERT(soff > 0); 2004 ASSERT(toff > 0); 2005 2006 /* 2007 * allocate an i_lnode and add it to the lnode hash 2008 * if it is not already present. For this particular 2009 * link the lnode is a source, but it may 2010 * participate as tgt or src in any number of layered 2011 * operations - so it may already be in the hash. 2012 */ 2013 i_lnode = i_lnode_alloc(ldi_usage->src_modid); 2014 i_lnode->di_node = (struct di_node *)di_mem_addr(st, soff); 2015 i_lnode->devt = ldi_usage->src_devt; 2016 2017 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2018 if (res == MH_ERR_NOTFOUND) { 2019 /* 2020 * new i_lnode 2021 * add it to the hash and increment the lnode count 2022 */ 2023 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2024 ASSERT(res == 0); 2025 st->lnode_count++; 2026 src_lnode = i_lnode; 2027 } else { 2028 /* this i_lnode already exists in the lnode_hash */ 2029 i_lnode_free(i_lnode); 2030 src_lnode = (i_lnode_t *)nodep; 2031 } 2032 2033 /* 2034 * allocate a tgt i_lnode and add it to the lnode hash 2035 */ 2036 i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); 2037 i_lnode->di_node = (struct di_node *)di_mem_addr(st, toff); 2038 i_lnode->devt = ldi_usage->tgt_devt; 2039 2040 res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); 2041 if (res == MH_ERR_NOTFOUND) { 2042 /* 2043 * new i_lnode 2044 * add it to the hash and increment the lnode count 2045 */ 2046 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); 2047 ASSERT(res == 0); 2048 st->lnode_count++; 2049 tgt_lnode = i_lnode; 2050 } else { 2051 /* this i_lnode already exists in the lnode_hash */ 2052 i_lnode_free(i_lnode); 2053 tgt_lnode = (i_lnode_t *)nodep; 2054 } 2055 2056 /* 2057 * allocate a i_link 2058 */ 2059 i_link = i_link_alloc(ldi_usage->tgt_spec_type); 2060 i_link->src_lnode = src_lnode; 2061 i_link->tgt_lnode = tgt_lnode; 2062 2063 /* 2064 * add this link onto the src i_lnodes outbound i_link list 2065 */ 2066 i_link_next = &(src_lnode->link_out); 2067 while (*i_link_next != NULL) { 2068 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && 2069 (i_link->spec_type == (*i_link_next)->spec_type)) { 2070 /* this link already exists */ 2071 kmem_free(i_link, sizeof (i_link_t)); 2072 return (LDI_USAGE_CONTINUE); 2073 } 2074 i_link_next = &((*i_link_next)->src_link_next); 2075 } 2076 *i_link_next = i_link; 2077 2078 /* 2079 * add this link onto the tgt i_lnodes inbound i_link list 2080 */ 2081 i_link_next = &(tgt_lnode->link_in); 2082 while (*i_link_next != NULL) { 2083 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); 2084 i_link_next = &((*i_link_next)->tgt_link_next); 2085 } 2086 *i_link_next = i_link; 2087 2088 /* 2089 * add this i_link to the link hash 2090 */ 2091 res = mod_hash_insert(st->link_hash, i_link, i_link); 2092 ASSERT(res == 0); 2093 st->link_count++; 2094 2095 return (LDI_USAGE_CONTINUE); 2096 } 2097 2098 struct i_layer_data { 2099 struct di_state *st; 2100 int lnode_count; 2101 int link_count; 2102 di_off_t lnode_off; 2103 di_off_t link_off; 2104 }; 2105 2106 /*ARGSUSED*/ 2107 static uint_t 2108 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2109 { 2110 i_link_t *i_link = (i_link_t *)key; 2111 struct i_layer_data *data = arg; 2112 struct di_link *me; 2113 struct di_lnode *melnode; 2114 struct di_node *medinode; 2115 2116 ASSERT(i_link->self == 0); 2117 2118 i_link->self = data->link_off + 2119 (data->link_count * sizeof (struct di_link)); 2120 data->link_count++; 2121 2122 ASSERT(data->link_off > 0 && data->link_count > 0); 2123 ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ 2124 ASSERT(data->link_count <= data->st->link_count); 2125 2126 /* fill in fields for the di_link snapshot */ 2127 me = (struct di_link *)di_mem_addr(data->st, i_link->self); 2128 me->self = i_link->self; 2129 me->spec_type = i_link->spec_type; 2130 2131 /* 2132 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t 2133 * are created during the LDI table walk. Since we are 2134 * walking the link hash, the lnode hash has already been 2135 * walked and the lnodes have been snapshotted. Save lnode 2136 * offsets. 2137 */ 2138 me->src_lnode = i_link->src_lnode->self; 2139 me->tgt_lnode = i_link->tgt_lnode->self; 2140 2141 /* 2142 * Save this link's offset in the src_lnode snapshot's link_out 2143 * field 2144 */ 2145 melnode = (struct di_lnode *)di_mem_addr(data->st, me->src_lnode); 2146 me->src_link_next = melnode->link_out; 2147 melnode->link_out = me->self; 2148 2149 /* 2150 * Put this link on the tgt_lnode's link_in field 2151 */ 2152 melnode = (struct di_lnode *)di_mem_addr(data->st, me->tgt_lnode); 2153 me->tgt_link_next = melnode->link_in; 2154 melnode->link_in = me->self; 2155 2156 /* 2157 * An i_lnode_t is only created if the corresponding dip exists 2158 * in the snapshot. A pointer to the di_node is saved in the 2159 * i_lnode_t when it is allocated. For this link, get the di_node 2160 * for the source lnode. Then put the link on the di_node's list 2161 * of src links 2162 */ 2163 medinode = i_link->src_lnode->di_node; 2164 me->src_node_next = medinode->src_links; 2165 medinode->src_links = me->self; 2166 2167 /* 2168 * Put this link on the tgt_links list of the target 2169 * dip. 2170 */ 2171 medinode = i_link->tgt_lnode->di_node; 2172 me->tgt_node_next = medinode->tgt_links; 2173 medinode->tgt_links = me->self; 2174 2175 return (MH_WALK_CONTINUE); 2176 } 2177 2178 /*ARGSUSED*/ 2179 static uint_t 2180 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 2181 { 2182 i_lnode_t *i_lnode = (i_lnode_t *)key; 2183 struct i_layer_data *data = arg; 2184 struct di_lnode *me; 2185 struct di_node *medinode; 2186 2187 ASSERT(i_lnode->self == 0); 2188 2189 i_lnode->self = data->lnode_off + 2190 (data->lnode_count * sizeof (struct di_lnode)); 2191 data->lnode_count++; 2192 2193 ASSERT(data->lnode_off > 0 && data->lnode_count > 0); 2194 ASSERT(data->link_count == 0); /* links not done yet */ 2195 ASSERT(data->lnode_count <= data->st->lnode_count); 2196 2197 /* fill in fields for the di_lnode snapshot */ 2198 me = (struct di_lnode *)di_mem_addr(data->st, i_lnode->self); 2199 me->self = i_lnode->self; 2200 2201 if (i_lnode->devt == DDI_DEV_T_NONE) { 2202 me->dev_major = (major_t)-1; 2203 me->dev_minor = (minor_t)-1; 2204 } else { 2205 me->dev_major = getmajor(i_lnode->devt); 2206 me->dev_minor = getminor(i_lnode->devt); 2207 } 2208 2209 /* 2210 * The dip corresponding to this lnode must exist in 2211 * the snapshot or we wouldn't have created the i_lnode_t 2212 * during LDI walk. Save the offset of the dip. 2213 */ 2214 ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); 2215 me->node = i_lnode->di_node->self; 2216 2217 /* 2218 * There must be at least one link in or out of this lnode 2219 * or we wouldn't have created it. These fields will be set 2220 * during the link hash walk. 2221 */ 2222 ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); 2223 2224 /* 2225 * set the offset of the devinfo node associated with this 2226 * lnode. Also update the node_next next pointer. this pointer 2227 * is set if there are multiple lnodes associated with the same 2228 * devinfo node. (could occure when multiple minor nodes 2229 * are open for one device, etc.) 2230 */ 2231 medinode = i_lnode->di_node; 2232 me->node_next = medinode->lnodes; 2233 medinode->lnodes = me->self; 2234 2235 return (MH_WALK_CONTINUE); 2236 } 2237 2238 static di_off_t 2239 di_getlink_data(di_off_t off, struct di_state *st) 2240 { 2241 struct i_layer_data data = {0}; 2242 size_t size; 2243 2244 dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); 2245 2246 st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, 2247 mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, 2248 i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); 2249 2250 st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, 2251 (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); 2252 2253 /* get driver layering information */ 2254 (void) ldi_usage_walker(st, di_ldi_callback); 2255 2256 /* check if there is any link data to include in the snapshot */ 2257 if (st->lnode_count == 0) { 2258 ASSERT(st->link_count == 0); 2259 goto out; 2260 } 2261 2262 ASSERT(st->link_count != 0); 2263 2264 /* get a pointer to snapshot memory for all the di_lnodes */ 2265 size = sizeof (struct di_lnode) * st->lnode_count; 2266 data.lnode_off = off = di_checkmem(st, off, size); 2267 off += DI_ALIGN(size); 2268 2269 /* get a pointer to snapshot memory for all the di_links */ 2270 size = sizeof (struct di_link) * st->link_count; 2271 data.link_off = off = di_checkmem(st, off, size); 2272 off += DI_ALIGN(size); 2273 2274 data.lnode_count = data.link_count = 0; 2275 data.st = st; 2276 2277 /* 2278 * We have lnodes and links that will go into the 2279 * snapshot, so let's walk the respective hashes 2280 * and snapshot them. The various linkages are 2281 * also set up during the walk. 2282 */ 2283 mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); 2284 ASSERT(data.lnode_count == st->lnode_count); 2285 2286 mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); 2287 ASSERT(data.link_count == st->link_count); 2288 2289 out: 2290 /* free up the i_lnodes and i_links used to create the snapshot */ 2291 mod_hash_destroy_hash(st->lnode_hash); 2292 mod_hash_destroy_hash(st->link_hash); 2293 st->lnode_count = 0; 2294 st->link_count = 0; 2295 2296 return (off); 2297 } 2298 2299 2300 /* 2301 * Copy all minor data nodes attached to a devinfo node into the snapshot. 2302 * It is called from di_copynode with devi_lock held. 2303 */ 2304 static di_off_t 2305 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, 2306 struct di_state *st) 2307 { 2308 di_off_t off; 2309 struct di_minor *me; 2310 2311 dcmn_err2((CE_CONT, "di_getmdata:\n")); 2312 2313 /* 2314 * check memory first 2315 */ 2316 off = di_checkmem(st, *off_p, sizeof (struct di_minor)); 2317 *off_p = off; 2318 2319 do { 2320 me = (struct di_minor *)di_mem_addr(st, off); 2321 me->self = off; 2322 me->type = mnode->type; 2323 me->node = node; 2324 me->user_private_data = NULL; 2325 2326 off += DI_ALIGN(sizeof (struct di_minor)); 2327 2328 /* 2329 * Split dev_t to major/minor, so it works for 2330 * both ILP32 and LP64 model 2331 */ 2332 me->dev_major = getmajor(mnode->ddm_dev); 2333 me->dev_minor = getminor(mnode->ddm_dev); 2334 me->spec_type = mnode->ddm_spec_type; 2335 2336 if (mnode->ddm_name) { 2337 off = di_checkmem(st, off, 2338 strlen(mnode->ddm_name) + 1); 2339 me->name = off; 2340 (void) strcpy(di_mem_addr(st, off), mnode->ddm_name); 2341 off += DI_ALIGN(strlen(mnode->ddm_name) + 1); 2342 } 2343 2344 if (mnode->ddm_node_type) { 2345 off = di_checkmem(st, off, 2346 strlen(mnode->ddm_node_type) + 1); 2347 me->node_type = off; 2348 (void) strcpy(di_mem_addr(st, off), 2349 mnode->ddm_node_type); 2350 off += DI_ALIGN(strlen(mnode->ddm_node_type) + 1); 2351 } 2352 2353 off = di_checkmem(st, off, sizeof (struct di_minor)); 2354 me->next = off; 2355 mnode = mnode->next; 2356 } while (mnode); 2357 2358 me->next = 0; 2359 2360 return (off); 2361 } 2362 2363 /* 2364 * di_register_dip(), di_find_dip(): The dip must be protected 2365 * from deallocation when using these routines - this can either 2366 * be a reference count, a busy hold or a per-driver lock. 2367 */ 2368 2369 static void 2370 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) 2371 { 2372 struct dev_info *node = DEVI(dip); 2373 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2374 struct di_dkey *dk; 2375 2376 ASSERT(dip); 2377 ASSERT(off > 0); 2378 2379 key->k_type = DI_DKEY; 2380 dk = &(key->k_u.dkey); 2381 2382 dk->dk_dip = dip; 2383 dk->dk_major = node->devi_major; 2384 dk->dk_inst = node->devi_instance; 2385 dk->dk_nodeid = node->devi_nodeid; 2386 2387 if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, 2388 (mod_hash_val_t)(uintptr_t)off) != 0) { 2389 panic( 2390 "duplicate devinfo (%p) registered during device " 2391 "tree walk", (void *)dip); 2392 } 2393 } 2394 2395 2396 static int 2397 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) 2398 { 2399 /* 2400 * uintptr_t must be used because it matches the size of void *; 2401 * mod_hash expects clients to place results into pointer-size 2402 * containers; since di_off_t is always a 32-bit offset, alignment 2403 * would otherwise be broken on 64-bit kernels. 2404 */ 2405 uintptr_t offset; 2406 struct di_key key = {0}; 2407 struct di_dkey *dk; 2408 2409 ASSERT(st->reg_dip_hash); 2410 ASSERT(dip); 2411 ASSERT(off_p); 2412 2413 2414 key.k_type = DI_DKEY; 2415 dk = &(key.k_u.dkey); 2416 2417 dk->dk_dip = dip; 2418 dk->dk_major = DEVI(dip)->devi_major; 2419 dk->dk_inst = DEVI(dip)->devi_instance; 2420 dk->dk_nodeid = DEVI(dip)->devi_nodeid; 2421 2422 if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, 2423 (mod_hash_val_t *)&offset) == 0) { 2424 *off_p = (di_off_t)offset; 2425 return (0); 2426 } else { 2427 return (-1); 2428 } 2429 } 2430 2431 /* 2432 * di_register_pip(), di_find_pip(): The pip must be protected from deallocation 2433 * when using these routines. The caller must do this by protecting the 2434 * client(or phci)<->pip linkage while traversing the list and then holding the 2435 * pip when it is found in the list. 2436 */ 2437 2438 static void 2439 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) 2440 { 2441 struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); 2442 char *path_addr; 2443 struct di_pkey *pk; 2444 2445 ASSERT(pip); 2446 ASSERT(off > 0); 2447 2448 key->k_type = DI_PKEY; 2449 pk = &(key->k_u.pkey); 2450 2451 pk->pk_pip = pip; 2452 path_addr = mdi_pi_get_addr(pip); 2453 if (path_addr) 2454 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); 2455 pk->pk_client = mdi_pi_get_client(pip); 2456 pk->pk_phci = mdi_pi_get_phci(pip); 2457 2458 if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, 2459 (mod_hash_val_t)(uintptr_t)off) != 0) { 2460 panic( 2461 "duplicate pathinfo (%p) registered during device " 2462 "tree walk", (void *)pip); 2463 } 2464 } 2465 2466 /* 2467 * As with di_register_pip, the caller must hold or lock the pip 2468 */ 2469 static int 2470 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) 2471 { 2472 /* 2473 * uintptr_t must be used because it matches the size of void *; 2474 * mod_hash expects clients to place results into pointer-size 2475 * containers; since di_off_t is always a 32-bit offset, alignment 2476 * would otherwise be broken on 64-bit kernels. 2477 */ 2478 uintptr_t offset; 2479 struct di_key key = {0}; 2480 struct di_pkey *pk; 2481 2482 ASSERT(st->reg_pip_hash); 2483 ASSERT(off_p); 2484 2485 if (pip == NULL) { 2486 *off_p = 0; 2487 return (0); 2488 } 2489 2490 key.k_type = DI_PKEY; 2491 pk = &(key.k_u.pkey); 2492 2493 pk->pk_pip = pip; 2494 pk->pk_path_addr = mdi_pi_get_addr(pip); 2495 pk->pk_client = mdi_pi_get_client(pip); 2496 pk->pk_phci = mdi_pi_get_phci(pip); 2497 2498 if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, 2499 (mod_hash_val_t *)&offset) == 0) { 2500 *off_p = (di_off_t)offset; 2501 return (0); 2502 } else { 2503 return (-1); 2504 } 2505 } 2506 2507 static di_path_state_t 2508 path_state_convert(mdi_pathinfo_state_t st) 2509 { 2510 switch (st) { 2511 case MDI_PATHINFO_STATE_ONLINE: 2512 return (DI_PATH_STATE_ONLINE); 2513 case MDI_PATHINFO_STATE_STANDBY: 2514 return (DI_PATH_STATE_STANDBY); 2515 case MDI_PATHINFO_STATE_OFFLINE: 2516 return (DI_PATH_STATE_OFFLINE); 2517 case MDI_PATHINFO_STATE_FAULT: 2518 return (DI_PATH_STATE_FAULT); 2519 default: 2520 return (DI_PATH_STATE_UNKNOWN); 2521 } 2522 } 2523 2524 2525 static di_off_t 2526 di_path_getprop(mdi_pathinfo_t *pip, di_off_t off, di_off_t *off_p, 2527 struct di_state *st) 2528 { 2529 nvpair_t *prop = NULL; 2530 struct di_path_prop *me; 2531 2532 if (mdi_pi_get_next_prop(pip, NULL) == NULL) { 2533 *off_p = 0; 2534 return (off); 2535 } 2536 2537 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2538 *off_p = off; 2539 2540 while (prop = mdi_pi_get_next_prop(pip, prop)) { 2541 int delta = 0; 2542 2543 me = (struct di_path_prop *)di_mem_addr(st, off); 2544 me->self = off; 2545 off += sizeof (struct di_path_prop); 2546 2547 /* 2548 * property name 2549 */ 2550 off = di_checkmem(st, off, strlen(nvpair_name(prop)) + 1); 2551 me->prop_name = off; 2552 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); 2553 off += strlen(nvpair_name(prop)) + 1; 2554 2555 switch (nvpair_type(prop)) { 2556 case DATA_TYPE_BYTE: 2557 case DATA_TYPE_INT16: 2558 case DATA_TYPE_UINT16: 2559 case DATA_TYPE_INT32: 2560 case DATA_TYPE_UINT32: 2561 delta = sizeof (int32_t); 2562 me->prop_type = DDI_PROP_TYPE_INT; 2563 off = di_checkmem(st, off, delta); 2564 (void) nvpair_value_int32(prop, 2565 (int32_t *)di_mem_addr(st, off)); 2566 break; 2567 2568 case DATA_TYPE_INT64: 2569 case DATA_TYPE_UINT64: 2570 delta = sizeof (int64_t); 2571 me->prop_type = DDI_PROP_TYPE_INT64; 2572 off = di_checkmem(st, off, delta); 2573 (void) nvpair_value_int64(prop, 2574 (int64_t *)di_mem_addr(st, off)); 2575 break; 2576 2577 case DATA_TYPE_STRING: 2578 { 2579 char *str; 2580 (void) nvpair_value_string(prop, &str); 2581 delta = strlen(str) + 1; 2582 me->prop_type = DDI_PROP_TYPE_STRING; 2583 off = di_checkmem(st, off, delta); 2584 (void) strcpy(di_mem_addr(st, off), str); 2585 break; 2586 } 2587 case DATA_TYPE_BYTE_ARRAY: 2588 case DATA_TYPE_INT16_ARRAY: 2589 case DATA_TYPE_UINT16_ARRAY: 2590 case DATA_TYPE_INT32_ARRAY: 2591 case DATA_TYPE_UINT32_ARRAY: 2592 case DATA_TYPE_INT64_ARRAY: 2593 case DATA_TYPE_UINT64_ARRAY: 2594 { 2595 uchar_t *buf; 2596 uint_t nelems; 2597 (void) nvpair_value_byte_array(prop, &buf, &nelems); 2598 delta = nelems; 2599 me->prop_type = DDI_PROP_TYPE_BYTE; 2600 if (nelems != 0) { 2601 off = di_checkmem(st, off, delta); 2602 bcopy(buf, di_mem_addr(st, off), nelems); 2603 } 2604 break; 2605 } 2606 2607 default: /* Unknown or unhandled type; skip it */ 2608 delta = 0; 2609 break; 2610 } 2611 2612 if (delta > 0) { 2613 me->prop_data = off; 2614 } 2615 2616 me->prop_len = delta; 2617 off += delta; 2618 2619 off = di_checkmem(st, off, sizeof (struct di_path_prop)); 2620 me->prop_next = off; 2621 } 2622 2623 me->prop_next = 0; 2624 return (off); 2625 } 2626 2627 2628 static void 2629 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp, 2630 int get_client) 2631 { 2632 if (get_client) { 2633 ASSERT(me->path_client == 0); 2634 me->path_client = noff; 2635 ASSERT(me->path_c_link == 0); 2636 *off_pp = &me->path_c_link; 2637 me->path_snap_state &= 2638 ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK); 2639 } else { 2640 ASSERT(me->path_phci == 0); 2641 me->path_phci = noff; 2642 ASSERT(me->path_p_link == 0); 2643 *off_pp = &me->path_p_link; 2644 me->path_snap_state &= 2645 ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK); 2646 } 2647 } 2648 2649 /* 2650 * poff_p: pointer to the linkage field. This links pips along the client|phci 2651 * linkage list. 2652 * noff : Offset for the endpoint dip snapshot. 2653 */ 2654 static di_off_t 2655 di_getpath_data(dev_info_t *dip, di_off_t *poff_p, di_off_t noff, 2656 struct di_state *st, int get_client) 2657 { 2658 di_off_t off; 2659 mdi_pathinfo_t *pip; 2660 struct di_path *me; 2661 mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *); 2662 2663 dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client)); 2664 2665 /* 2666 * The naming of the following mdi_xyz() is unfortunately 2667 * non-intuitive. mdi_get_next_phci_path() follows the 2668 * client_link i.e. the list of pip's belonging to the 2669 * given client dip. 2670 */ 2671 if (get_client) 2672 next_pip = &mdi_get_next_phci_path; 2673 else 2674 next_pip = &mdi_get_next_client_path; 2675 2676 off = *poff_p; 2677 2678 pip = NULL; 2679 while (pip = (*next_pip)(dip, pip)) { 2680 mdi_pathinfo_state_t state; 2681 di_off_t stored_offset; 2682 2683 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip)); 2684 2685 mdi_pi_lock(pip); 2686 2687 if (di_pip_find(st, pip, &stored_offset) != -1) { 2688 /* 2689 * We've already seen this pathinfo node so we need to 2690 * take care not to snap it again; However, one endpoint 2691 * and linkage will be set here. The other endpoint 2692 * and linkage has already been set when the pip was 2693 * first snapshotted i.e. when the other endpoint dip 2694 * was snapshotted. 2695 */ 2696 me = (struct di_path *)di_mem_addr(st, stored_offset); 2697 2698 *poff_p = stored_offset; 2699 2700 di_path_one_endpoint(me, noff, &poff_p, get_client); 2701 2702 /* 2703 * The other endpoint and linkage were set when this 2704 * pip was snapshotted. So we are done with both 2705 * endpoints and linkages. 2706 */ 2707 ASSERT(!(me->path_snap_state & 2708 (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI))); 2709 ASSERT(!(me->path_snap_state & 2710 (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK))); 2711 2712 mdi_pi_unlock(pip); 2713 continue; 2714 } 2715 2716 /* 2717 * Now that we need to snapshot this pip, check memory 2718 */ 2719 off = di_checkmem(st, off, sizeof (struct di_path)); 2720 me = (struct di_path *)di_mem_addr(st, off); 2721 me->self = off; 2722 *poff_p = off; 2723 off += sizeof (struct di_path); 2724 2725 me->path_snap_state = 2726 DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK; 2727 me->path_snap_state |= 2728 DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI; 2729 2730 /* 2731 * Zero out fields as di_checkmem() doesn't guarantee 2732 * zero-filled memory 2733 */ 2734 me->path_client = me->path_phci = 0; 2735 me->path_c_link = me->path_p_link = 0; 2736 2737 di_path_one_endpoint(me, noff, &poff_p, get_client); 2738 2739 /* 2740 * Note the existence of this pathinfo 2741 */ 2742 di_register_pip(st, pip, me->self); 2743 2744 state = mdi_pi_get_state(pip); 2745 me->path_state = path_state_convert(state); 2746 2747 /* 2748 * Get intermediate addressing info. 2749 */ 2750 off = di_checkmem(st, off, strlen(mdi_pi_get_addr(pip)) + 1); 2751 me->path_addr = off; 2752 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip)); 2753 off += strlen(mdi_pi_get_addr(pip)) + 1; 2754 2755 /* 2756 * Get path properties if props are to be included in the 2757 * snapshot 2758 */ 2759 if (DINFOPROP & st->command) { 2760 off = di_path_getprop(pip, off, &me->path_prop, st); 2761 } else { 2762 me->path_prop = 0; 2763 } 2764 2765 mdi_pi_unlock(pip); 2766 } 2767 2768 *poff_p = 0; 2769 2770 return (off); 2771 } 2772 2773 /* 2774 * Copy a list of properties attached to a devinfo node. Called from 2775 * di_copynode with devi_lock held. The major number is passed in case 2776 * we need to call driver's prop_op entry. The value of list indicates 2777 * which list we are copying. Possible values are: 2778 * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST 2779 */ 2780 static di_off_t 2781 di_getprop(struct ddi_prop *prop, di_off_t *off_p, struct di_state *st, 2782 struct dev_info *dip, int list) 2783 { 2784 dev_t dev; 2785 int (*prop_op)(); 2786 int off, need_prop_op = 0; 2787 int prop_op_fail = 0; 2788 ddi_prop_t *propp = NULL; 2789 struct di_prop *pp; 2790 struct dev_ops *ops = NULL; 2791 int prop_len; 2792 caddr_t prop_val; 2793 2794 2795 dcmn_err2((CE_CONT, "di_getprop:\n")); 2796 2797 ASSERT(st != NULL); 2798 2799 dcmn_err((CE_CONT, "copy property list at addr %p\n", (void *)prop)); 2800 2801 /* 2802 * Figure out if we need to call driver's prop_op entry point. 2803 * The conditions are: 2804 * -- driver property list 2805 * -- driver must be attached and held 2806 * -- driver's cb_prop_op != ddi_prop_op 2807 * or parent's bus_prop_op != ddi_bus_prop_op 2808 */ 2809 2810 if (list != DI_PROP_DRV_LIST) { 2811 goto getprop; 2812 } 2813 2814 /* 2815 * If driver is not attached or if major is -1, we ignore 2816 * the driver property list. No one should rely on such 2817 * properties. 2818 */ 2819 if (i_ddi_node_state((dev_info_t *)dip) < DS_ATTACHED) { 2820 off = *off_p; 2821 *off_p = 0; 2822 return (off); 2823 } 2824 2825 /* 2826 * Now we have a driver which is held. We can examine entry points 2827 * and check the condition listed above. 2828 */ 2829 ops = dip->devi_ops; 2830 2831 /* 2832 * Some nexus drivers incorrectly set cb_prop_op to nodev, 2833 * nulldev or even NULL. 2834 */ 2835 if (ops && ops->devo_cb_ops && 2836 (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) && 2837 (ops->devo_cb_ops->cb_prop_op != nodev) && 2838 (ops->devo_cb_ops->cb_prop_op != nulldev) && 2839 (ops->devo_cb_ops->cb_prop_op != NULL)) { 2840 need_prop_op = 1; 2841 } 2842 2843 getprop: 2844 /* 2845 * check memory availability 2846 */ 2847 off = di_checkmem(st, *off_p, sizeof (struct di_prop)); 2848 *off_p = off; 2849 /* 2850 * Now copy properties 2851 */ 2852 do { 2853 pp = (struct di_prop *)di_mem_addr(st, off); 2854 pp->self = off; 2855 /* 2856 * Split dev_t to major/minor, so it works for 2857 * both ILP32 and LP64 model 2858 */ 2859 pp->dev_major = getmajor(prop->prop_dev); 2860 pp->dev_minor = getminor(prop->prop_dev); 2861 pp->prop_flags = prop->prop_flags; 2862 pp->prop_list = list; 2863 2864 /* 2865 * property name 2866 */ 2867 off += sizeof (struct di_prop); 2868 if (prop->prop_name) { 2869 off = di_checkmem(st, off, strlen(prop->prop_name) 2870 + 1); 2871 pp->prop_name = off; 2872 (void) strcpy(di_mem_addr(st, off), prop->prop_name); 2873 off += strlen(prop->prop_name) + 1; 2874 } 2875 2876 /* 2877 * Set prop_len here. This may change later 2878 * if cb_prop_op returns a different length. 2879 */ 2880 pp->prop_len = prop->prop_len; 2881 if (!need_prop_op) { 2882 if (prop->prop_val == NULL) { 2883 dcmn_err((CE_WARN, 2884 "devinfo: property fault at %p", 2885 (void *)prop)); 2886 pp->prop_data = -1; 2887 } else if (prop->prop_len != 0) { 2888 off = di_checkmem(st, off, prop->prop_len); 2889 pp->prop_data = off; 2890 bcopy(prop->prop_val, di_mem_addr(st, off), 2891 prop->prop_len); 2892 off += DI_ALIGN(pp->prop_len); 2893 } 2894 } 2895 2896 off = di_checkmem(st, off, sizeof (struct di_prop)); 2897 pp->next = off; 2898 prop = prop->prop_next; 2899 } while (prop); 2900 2901 pp->next = 0; 2902 2903 if (!need_prop_op) { 2904 dcmn_err((CE_CONT, "finished property " 2905 "list at offset 0x%x\n", off)); 2906 return (off); 2907 } 2908 2909 /* 2910 * If there is a need to call driver's prop_op entry, 2911 * we must release driver's devi_lock, because the 2912 * cb_prop_op entry point will grab it. 2913 * 2914 * The snapshot memory has already been allocated above, 2915 * which means the length of an active property should 2916 * remain fixed for this implementation to work. 2917 */ 2918 2919 2920 prop_op = ops->devo_cb_ops->cb_prop_op; 2921 pp = (struct di_prop *)di_mem_addr(st, *off_p); 2922 2923 mutex_exit(&dip->devi_lock); 2924 2925 do { 2926 int err; 2927 struct di_prop *tmp; 2928 2929 if (pp->next) { 2930 tmp = (struct di_prop *) 2931 di_mem_addr(st, pp->next); 2932 } else { 2933 tmp = NULL; 2934 } 2935 2936 /* 2937 * call into driver's prop_op entry point 2938 * 2939 * Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY 2940 */ 2941 dev = makedevice(pp->dev_major, pp->dev_minor); 2942 if (dev == DDI_DEV_T_NONE) 2943 dev = DDI_DEV_T_ANY; 2944 2945 dcmn_err((CE_CONT, "call prop_op" 2946 "(%lx, %p, PROP_LEN_AND_VAL_BUF, " 2947 "DDI_PROP_DONTPASS, \"%s\", %p, &%d)\n", 2948 dev, 2949 (void *)dip, 2950 (char *)di_mem_addr(st, pp->prop_name), 2951 (void *)di_mem_addr(st, pp->prop_data), 2952 pp->prop_len)); 2953 2954 if ((err = (*prop_op)(dev, (dev_info_t)dip, 2955 PROP_LEN_AND_VAL_ALLOC, DDI_PROP_DONTPASS, 2956 (char *)di_mem_addr(st, pp->prop_name), 2957 &prop_val, &prop_len)) != DDI_PROP_SUCCESS) { 2958 if ((propp = i_ddi_prop_search(dev, 2959 (char *)di_mem_addr(st, pp->prop_name), 2960 (uint_t)pp->prop_flags, 2961 &(DEVI(dip)->devi_drv_prop_ptr))) != NULL) { 2962 pp->prop_len = propp->prop_len; 2963 if (pp->prop_len != 0) { 2964 off = di_checkmem(st, off, 2965 pp->prop_len); 2966 pp->prop_data = off; 2967 bcopy(propp->prop_val, di_mem_addr(st, 2968 pp->prop_data), propp->prop_len); 2969 off += DI_ALIGN(pp->prop_len); 2970 } 2971 } else { 2972 prop_op_fail = 1; 2973 } 2974 } else if (prop_len != 0) { 2975 pp->prop_len = prop_len; 2976 off = di_checkmem(st, off, prop_len); 2977 pp->prop_data = off; 2978 bcopy(prop_val, di_mem_addr(st, off), prop_len); 2979 off += DI_ALIGN(prop_len); 2980 kmem_free(prop_val, prop_len); 2981 } 2982 2983 if (prop_op_fail) { 2984 pp->prop_data = -1; 2985 dcmn_err((CE_WARN, "devinfo: prop_op failure " 2986 "for \"%s\" err %d", 2987 di_mem_addr(st, pp->prop_name), err)); 2988 } 2989 2990 pp = tmp; 2991 2992 } while (pp); 2993 2994 mutex_enter(&dip->devi_lock); 2995 dcmn_err((CE_CONT, "finished property list at offset 0x%x\n", off)); 2996 return (off); 2997 } 2998 2999 /* 3000 * find private data format attached to a dip 3001 * parent = 1 to match driver name of parent dip (for parent private data) 3002 * 0 to match driver name of current dip (for driver private data) 3003 */ 3004 #define DI_MATCH_DRIVER 0 3005 #define DI_MATCH_PARENT 1 3006 3007 struct di_priv_format * 3008 di_match_drv_name(struct dev_info *node, struct di_state *st, int match) 3009 { 3010 int i, count, len; 3011 char *drv_name; 3012 major_t major; 3013 struct di_all *all; 3014 struct di_priv_format *form; 3015 3016 dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n", 3017 node->devi_node_name, match)); 3018 3019 if (match == DI_MATCH_PARENT) { 3020 node = DEVI(node->devi_parent); 3021 } 3022 3023 if (node == NULL) { 3024 return (NULL); 3025 } 3026 3027 major = ddi_name_to_major(node->devi_binding_name); 3028 if (major == (major_t)(-1)) { 3029 return (NULL); 3030 } 3031 3032 /* 3033 * Match the driver name. 3034 */ 3035 drv_name = ddi_major_to_name(major); 3036 if ((drv_name == NULL) || *drv_name == '\0') { 3037 return (NULL); 3038 } 3039 3040 /* Now get the di_priv_format array */ 3041 all = (struct di_all *)di_mem_addr(st, 0); 3042 3043 if (match == DI_MATCH_PARENT) { 3044 count = all->n_ppdata; 3045 form = (struct di_priv_format *) 3046 (di_mem_addr(st, 0) + all->ppdata_format); 3047 } else { 3048 count = all->n_dpdata; 3049 form = (struct di_priv_format *) 3050 ((caddr_t)all + all->dpdata_format); 3051 } 3052 3053 len = strlen(drv_name); 3054 for (i = 0; i < count; i++) { 3055 char *tmp; 3056 3057 tmp = form[i].drv_name; 3058 while (tmp && (*tmp != '\0')) { 3059 if (strncmp(drv_name, tmp, len) == 0) { 3060 return (&form[i]); 3061 } 3062 /* 3063 * Move to next driver name, skipping a white space 3064 */ 3065 if (tmp = strchr(tmp, ' ')) { 3066 tmp++; 3067 } 3068 } 3069 } 3070 3071 return (NULL); 3072 } 3073 3074 /* 3075 * The following functions copy data as specified by the format passed in. 3076 * To prevent invalid format from panicing the system, we call on_fault(). 3077 * A return value of 0 indicates an error. Otherwise, the total offset 3078 * is returned. 3079 */ 3080 #define DI_MAX_PRIVDATA (PAGESIZE >> 1) /* max private data size */ 3081 3082 static di_off_t 3083 di_getprvdata(struct di_priv_format *pdp, void *data, di_off_t *off_p, 3084 struct di_state *st) 3085 { 3086 caddr_t pa; 3087 void *ptr; 3088 int i, size, repeat; 3089 di_off_t off, off0, *tmp; 3090 3091 label_t ljb; 3092 3093 dcmn_err2((CE_CONT, "di_getprvdata:\n")); 3094 3095 /* 3096 * check memory availability. Private data size is 3097 * limited to DI_MAX_PRIVDATA. 3098 */ 3099 off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA); 3100 3101 if ((pdp->bytes <= 0) || pdp->bytes > DI_MAX_PRIVDATA) { 3102 goto failure; 3103 } 3104 3105 if (!on_fault(&ljb)) { 3106 /* copy the struct */ 3107 bcopy(data, di_mem_addr(st, off), pdp->bytes); 3108 off0 = DI_ALIGN(pdp->bytes); 3109 3110 /* dereferencing pointers */ 3111 for (i = 0; i < MAX_PTR_IN_PRV; i++) { 3112 3113 if (pdp->ptr[i].size == 0) { 3114 goto success; /* no more ptrs */ 3115 } 3116 3117 /* 3118 * first, get the pointer content 3119 */ 3120 if ((pdp->ptr[i].offset < 0) || 3121 (pdp->ptr[i].offset > 3122 pdp->bytes - sizeof (char *))) 3123 goto failure; /* wrong offset */ 3124 3125 pa = di_mem_addr(st, off + pdp->ptr[i].offset); 3126 tmp = (di_off_t *)pa; /* to store off_t later */ 3127 3128 ptr = *((void **) pa); /* get pointer value */ 3129 if (ptr == NULL) { /* if NULL pointer, go on */ 3130 continue; 3131 } 3132 3133 /* 3134 * next, find the repeat count (array dimension) 3135 */ 3136 repeat = pdp->ptr[i].len_offset; 3137 3138 /* 3139 * Positive value indicates a fixed sized array. 3140 * 0 or negative value indicates variable sized array. 3141 * 3142 * For variable sized array, the variable must be 3143 * an int member of the structure, with an offset 3144 * equal to the absolution value of struct member. 3145 */ 3146 if (repeat > pdp->bytes - sizeof (int)) { 3147 goto failure; /* wrong offset */ 3148 } 3149 3150 if (repeat >= 0) { 3151 repeat = *((int *)((caddr_t)data + repeat)); 3152 } else { 3153 repeat = -repeat; 3154 } 3155 3156 /* 3157 * next, get the size of the object to be copied 3158 */ 3159 size = pdp->ptr[i].size * repeat; 3160 3161 /* 3162 * Arbitrarily limit the total size of object to be 3163 * copied (1 byte to 1/4 page). 3164 */ 3165 if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) { 3166 goto failure; /* wrong size or too big */ 3167 } 3168 3169 /* 3170 * Now copy the data 3171 */ 3172 *tmp = off0; 3173 bcopy(ptr, di_mem_addr(st, off + off0), size); 3174 off0 += DI_ALIGN(size); 3175 } 3176 } else { 3177 goto failure; 3178 } 3179 3180 success: 3181 /* 3182 * success if reached here 3183 */ 3184 no_fault(); 3185 *off_p = off; 3186 3187 return (off + off0); 3188 /*NOTREACHED*/ 3189 3190 failure: 3191 /* 3192 * fault occurred 3193 */ 3194 no_fault(); 3195 cmn_err(CE_WARN, "devinfo: fault in private data at %p", data); 3196 *off_p = -1; /* set private data to indicate error */ 3197 3198 return (off); 3199 } 3200 3201 /* 3202 * get parent private data; on error, returns original offset 3203 */ 3204 static di_off_t 3205 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3206 { 3207 int off; 3208 struct di_priv_format *ppdp; 3209 3210 dcmn_err2((CE_CONT, "di_getppdata:\n")); 3211 3212 /* find the parent data format */ 3213 if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) { 3214 off = *off_p; 3215 *off_p = 0; /* set parent data to none */ 3216 return (off); 3217 } 3218 3219 return (di_getprvdata(ppdp, ddi_get_parent_data((dev_info_t *)node), 3220 off_p, st)); 3221 } 3222 3223 /* 3224 * get parent private data; returns original offset 3225 */ 3226 static di_off_t 3227 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) 3228 { 3229 int off; 3230 struct di_priv_format *dpdp; 3231 3232 dcmn_err2((CE_CONT, "di_getdpdata:")); 3233 3234 /* find the parent data format */ 3235 if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) { 3236 off = *off_p; 3237 *off_p = 0; /* set driver data to none */ 3238 return (off); 3239 } 3240 3241 return (di_getprvdata(dpdp, ddi_get_driver_private((dev_info_t *)node), 3242 off_p, st)); 3243 } 3244 3245 /* 3246 * The driver is stateful across DINFOCPYALL and DINFOUSRLD. 3247 * This function encapsulates the state machine: 3248 * 3249 * -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY -> 3250 * | SNAPSHOT USRLD | 3251 * -------------------------------------------------- 3252 * 3253 * Returns 0 on success and -1 on failure 3254 */ 3255 static int 3256 di_setstate(struct di_state *st, int new_state) 3257 { 3258 int ret = 0; 3259 3260 mutex_enter(&di_lock); 3261 switch (new_state) { 3262 case IOC_IDLE: 3263 case IOC_DONE: 3264 break; 3265 case IOC_SNAP: 3266 if (st->di_iocstate != IOC_IDLE) 3267 ret = -1; 3268 break; 3269 case IOC_COPY: 3270 if (st->di_iocstate != IOC_DONE) 3271 ret = -1; 3272 break; 3273 default: 3274 ret = -1; 3275 } 3276 3277 if (ret == 0) 3278 st->di_iocstate = new_state; 3279 else 3280 cmn_err(CE_NOTE, "incorrect state transition from %d to %d", 3281 st->di_iocstate, new_state); 3282 mutex_exit(&di_lock); 3283 return (ret); 3284 } 3285 3286 /* 3287 * We cannot assume the presence of the entire 3288 * snapshot in this routine. All we are guaranteed 3289 * is the di_all struct + 1 byte (for root_path) 3290 */ 3291 static int 3292 header_plus_one_ok(struct di_all *all) 3293 { 3294 /* 3295 * Refuse to read old versions 3296 */ 3297 if (all->version != DI_SNAPSHOT_VERSION) { 3298 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version)); 3299 return (0); 3300 } 3301 3302 if (all->cache_magic != DI_CACHE_MAGIC) { 3303 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic)); 3304 return (0); 3305 } 3306 3307 if (all->snapshot_time <= 0) { 3308 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time)); 3309 return (0); 3310 } 3311 3312 if (all->top_devinfo == 0) { 3313 CACHE_DEBUG((DI_ERR, "NULL top devinfo")); 3314 return (0); 3315 } 3316 3317 if (all->map_size < sizeof (*all) + 1) { 3318 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size)); 3319 return (0); 3320 } 3321 3322 if (all->root_path[0] != '/' || all->root_path[1] != '\0') { 3323 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c", 3324 all->root_path[0], all->root_path[1])); 3325 return (0); 3326 } 3327 3328 /* 3329 * We can't check checksum here as we just have the header 3330 */ 3331 3332 return (1); 3333 } 3334 3335 static int 3336 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len) 3337 { 3338 rlim64_t rlimit; 3339 ssize_t resid; 3340 int error = 0; 3341 3342 3343 rlimit = RLIM64_INFINITY; 3344 3345 while (len) { 3346 resid = 0; 3347 error = vn_rdwr(UIO_WRITE, vp, buf, len, off, 3348 UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); 3349 3350 if (error || resid < 0) { 3351 error = error ? error : EIO; 3352 CACHE_DEBUG((DI_ERR, "write error: %d", error)); 3353 break; 3354 } 3355 3356 /* 3357 * Check if we are making progress 3358 */ 3359 if (resid >= len) { 3360 error = ENOSPC; 3361 break; 3362 } 3363 buf += len - resid; 3364 off += len - resid; 3365 len = resid; 3366 } 3367 3368 return (error); 3369 } 3370 3371 extern int modrootloaded; 3372 3373 static void 3374 di_cache_write(struct di_cache *cache) 3375 { 3376 struct di_all *all; 3377 struct vnode *vp; 3378 int oflags; 3379 size_t map_size; 3380 size_t chunk; 3381 offset_t off; 3382 int error; 3383 char *buf; 3384 3385 ASSERT(DI_CACHE_LOCKED(*cache)); 3386 ASSERT(!servicing_interrupt()); 3387 3388 if (cache->cache_size == 0) { 3389 ASSERT(cache->cache_data == NULL); 3390 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write")); 3391 return; 3392 } 3393 3394 ASSERT(cache->cache_size > 0); 3395 ASSERT(cache->cache_data); 3396 3397 if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) { 3398 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write")); 3399 return; 3400 } 3401 3402 all = (struct di_all *)cache->cache_data; 3403 3404 if (!header_plus_one_ok(all)) { 3405 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write")); 3406 return; 3407 } 3408 3409 ASSERT(strcmp(all->root_path, "/") == 0); 3410 3411 /* 3412 * The cache_size is the total allocated memory for the cache. 3413 * The map_size is the actual size of valid data in the cache. 3414 * map_size may be smaller than cache_size but cannot exceed 3415 * cache_size. 3416 */ 3417 if (all->map_size > cache->cache_size) { 3418 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)." 3419 " Skipping write", all->map_size, cache->cache_size)); 3420 return; 3421 } 3422 3423 /* 3424 * First unlink the temp file 3425 */ 3426 error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE); 3427 if (error && error != ENOENT) { 3428 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d", 3429 DI_CACHE_TEMP, error)); 3430 } 3431 3432 if (error == EROFS) { 3433 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write")); 3434 return; 3435 } 3436 3437 vp = NULL; 3438 oflags = (FCREAT|FWRITE); 3439 if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags, 3440 DI_CACHE_PERMS, &vp, CRCREAT, 0)) { 3441 CACHE_DEBUG((DI_ERR, "%s: create failed: %d", 3442 DI_CACHE_TEMP, error)); 3443 return; 3444 } 3445 3446 ASSERT(vp); 3447 3448 /* 3449 * Paranoid: Check if the file is on a read-only FS 3450 */ 3451 if (vn_is_readonly(vp)) { 3452 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS")); 3453 goto fail; 3454 } 3455 3456 /* 3457 * Note that we only write map_size bytes to disk - this saves 3458 * space as the actual cache size may be larger than size of 3459 * valid data in the cache. 3460 * Another advantage is that it makes verification of size 3461 * easier when the file is read later. 3462 */ 3463 map_size = all->map_size; 3464 off = 0; 3465 buf = cache->cache_data; 3466 3467 while (map_size) { 3468 ASSERT(map_size > 0); 3469 /* 3470 * Write in chunks so that VM system 3471 * is not overwhelmed 3472 */ 3473 if (map_size > di_chunk * PAGESIZE) 3474 chunk = di_chunk * PAGESIZE; 3475 else 3476 chunk = map_size; 3477 3478 error = chunk_write(vp, off, buf, chunk); 3479 if (error) { 3480 CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d", 3481 off, error)); 3482 goto fail; 3483 } 3484 3485 off += chunk; 3486 buf += chunk; 3487 map_size -= chunk; 3488 3489 /* Give pageout a chance to run */ 3490 delay(1); 3491 } 3492 3493 /* 3494 * Now sync the file and close it 3495 */ 3496 if (error = VOP_FSYNC(vp, FSYNC, kcred)) { 3497 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error)); 3498 } 3499 3500 if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred)) { 3501 CACHE_DEBUG((DI_ERR, "close() failed: %d", error)); 3502 VN_RELE(vp); 3503 return; 3504 } 3505 3506 VN_RELE(vp); 3507 3508 /* 3509 * Now do the rename 3510 */ 3511 if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) { 3512 CACHE_DEBUG((DI_ERR, "rename failed: %d", error)); 3513 return; 3514 } 3515 3516 CACHE_DEBUG((DI_INFO, "Cache write successful.")); 3517 3518 return; 3519 3520 fail: 3521 (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred); 3522 VN_RELE(vp); 3523 } 3524 3525 3526 /* 3527 * Since we could be called early in boot, 3528 * use kobj_read_file() 3529 */ 3530 static void 3531 di_cache_read(struct di_cache *cache) 3532 { 3533 struct _buf *file; 3534 struct di_all *all; 3535 int n; 3536 size_t map_size, sz, chunk; 3537 offset_t off; 3538 caddr_t buf; 3539 uint32_t saved_crc, crc; 3540 3541 ASSERT(modrootloaded); 3542 ASSERT(DI_CACHE_LOCKED(*cache)); 3543 ASSERT(cache->cache_data == NULL); 3544 ASSERT(cache->cache_size == 0); 3545 ASSERT(!servicing_interrupt()); 3546 3547 file = kobj_open_file(DI_CACHE_FILE); 3548 if (file == (struct _buf *)-1) { 3549 CACHE_DEBUG((DI_ERR, "%s: open failed: %d", 3550 DI_CACHE_FILE, ENOENT)); 3551 return; 3552 } 3553 3554 /* 3555 * Read in the header+root_path first. The root_path must be "/" 3556 */ 3557 all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP); 3558 n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0); 3559 3560 if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) { 3561 kmem_free(all, sizeof (*all) + 1); 3562 kobj_close_file(file); 3563 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid")); 3564 return; 3565 } 3566 3567 map_size = all->map_size; 3568 3569 kmem_free(all, sizeof (*all) + 1); 3570 3571 ASSERT(map_size >= sizeof (*all) + 1); 3572 3573 buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP); 3574 sz = map_size; 3575 off = 0; 3576 while (sz) { 3577 /* Don't overload VM with large reads */ 3578 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz; 3579 n = kobj_read_file(file, buf, chunk, off); 3580 if (n != chunk) { 3581 CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld", 3582 DI_CACHE_FILE, off)); 3583 goto fail; 3584 } 3585 off += chunk; 3586 buf += chunk; 3587 sz -= chunk; 3588 } 3589 3590 ASSERT(off == map_size); 3591 3592 /* 3593 * Read past expected EOF to verify size. 3594 */ 3595 if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) { 3596 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE)); 3597 goto fail; 3598 } 3599 3600 all = (struct di_all *)di_cache.cache_data; 3601 if (!header_plus_one_ok(all)) { 3602 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE)); 3603 goto fail; 3604 } 3605 3606 /* 3607 * Compute CRC with checksum field in the cache data set to 0 3608 */ 3609 saved_crc = all->cache_checksum; 3610 all->cache_checksum = 0; 3611 CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table); 3612 all->cache_checksum = saved_crc; 3613 3614 if (crc != all->cache_checksum) { 3615 CACHE_DEBUG((DI_ERR, 3616 "%s: checksum error: expected=0x%x actual=0x%x", 3617 DI_CACHE_FILE, all->cache_checksum, crc)); 3618 goto fail; 3619 } 3620 3621 if (all->map_size != map_size) { 3622 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE)); 3623 goto fail; 3624 } 3625 3626 kobj_close_file(file); 3627 3628 di_cache.cache_size = map_size; 3629 3630 return; 3631 3632 fail: 3633 kmem_free(di_cache.cache_data, map_size); 3634 kobj_close_file(file); 3635 di_cache.cache_data = NULL; 3636 di_cache.cache_size = 0; 3637 } 3638 3639 3640 /* 3641 * Checks if arguments are valid for using the cache. 3642 */ 3643 static int 3644 cache_args_valid(struct di_state *st, int *error) 3645 { 3646 ASSERT(error); 3647 ASSERT(st->mem_size > 0); 3648 ASSERT(st->memlist != NULL); 3649 3650 if (!modrootloaded || !i_ddi_io_initialized()) { 3651 CACHE_DEBUG((DI_ERR, 3652 "cache lookup failure: I/O subsystem not inited")); 3653 *error = ENOTACTIVE; 3654 return (0); 3655 } 3656 3657 /* 3658 * No other flags allowed with DINFOCACHE 3659 */ 3660 if (st->command != (DINFOCACHE & DIIOC_MASK)) { 3661 CACHE_DEBUG((DI_ERR, 3662 "cache lookup failure: bad flags: 0x%x", 3663 st->command)); 3664 *error = EINVAL; 3665 return (0); 3666 } 3667 3668 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3669 CACHE_DEBUG((DI_ERR, 3670 "cache lookup failure: bad root: %s", 3671 DI_ALL_PTR(st)->root_path)); 3672 *error = EINVAL; 3673 return (0); 3674 } 3675 3676 CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command)); 3677 3678 *error = 0; 3679 3680 return (1); 3681 } 3682 3683 static int 3684 snapshot_is_cacheable(struct di_state *st) 3685 { 3686 ASSERT(st->mem_size > 0); 3687 ASSERT(st->memlist != NULL); 3688 3689 if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) != 3690 (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) { 3691 CACHE_DEBUG((DI_INFO, 3692 "not cacheable: incompatible flags: 0x%x", 3693 st->command)); 3694 return (0); 3695 } 3696 3697 if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { 3698 CACHE_DEBUG((DI_INFO, 3699 "not cacheable: incompatible root path: %s", 3700 DI_ALL_PTR(st)->root_path)); 3701 return (0); 3702 } 3703 3704 CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command)); 3705 3706 return (1); 3707 } 3708 3709 static int 3710 di_cache_lookup(struct di_state *st) 3711 { 3712 size_t rval; 3713 int cache_valid; 3714 3715 ASSERT(cache_args_valid(st, &cache_valid)); 3716 ASSERT(modrootloaded); 3717 3718 DI_CACHE_LOCK(di_cache); 3719 3720 /* 3721 * The following assignment determines the validity 3722 * of the cache as far as this snapshot is concerned. 3723 */ 3724 cache_valid = di_cache.cache_valid; 3725 3726 if (cache_valid && di_cache.cache_data == NULL) { 3727 di_cache_read(&di_cache); 3728 /* check for read or file error */ 3729 if (di_cache.cache_data == NULL) 3730 cache_valid = 0; 3731 } 3732 3733 if (cache_valid) { 3734 /* 3735 * Ok, the cache was valid as of this particular 3736 * snapshot. Copy the cached snapshot. This is safe 3737 * to do as the cache cannot be freed (we hold the 3738 * cache lock). Free the memory allocated in di_state 3739 * up until this point - we will simply copy everything 3740 * in the cache. 3741 */ 3742 3743 ASSERT(di_cache.cache_data != NULL); 3744 ASSERT(di_cache.cache_size > 0); 3745 3746 di_freemem(st); 3747 3748 rval = 0; 3749 if (di_cache2mem(&di_cache, st) > 0) { 3750 3751 ASSERT(DI_ALL_PTR(st)); 3752 3753 /* 3754 * map_size is size of valid data in the 3755 * cached snapshot and may be less than 3756 * size of the cache. 3757 */ 3758 rval = DI_ALL_PTR(st)->map_size; 3759 3760 ASSERT(rval >= sizeof (struct di_all)); 3761 ASSERT(rval <= di_cache.cache_size); 3762 } 3763 } else { 3764 /* 3765 * The cache isn't valid, we need to take a snapshot. 3766 * Set the command flags appropriately 3767 */ 3768 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK)); 3769 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK); 3770 rval = di_cache_update(st); 3771 st->command = (DINFOCACHE & DIIOC_MASK); 3772 } 3773 3774 DI_CACHE_UNLOCK(di_cache); 3775 3776 /* 3777 * For cached snapshots, the devinfo driver always returns 3778 * a snapshot rooted at "/". 3779 */ 3780 ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0); 3781 3782 return (rval); 3783 } 3784 3785 /* 3786 * This is a forced update of the cache - the previous state of the cache 3787 * may be: 3788 * - unpopulated 3789 * - populated and invalid 3790 * - populated and valid 3791 */ 3792 static int 3793 di_cache_update(struct di_state *st) 3794 { 3795 int rval; 3796 uint32_t crc; 3797 struct di_all *all; 3798 3799 ASSERT(DI_CACHE_LOCKED(di_cache)); 3800 ASSERT(snapshot_is_cacheable(st)); 3801 3802 /* 3803 * Free the in-core cache and the on-disk file (if they exist) 3804 */ 3805 i_ddi_di_cache_free(&di_cache); 3806 3807 /* 3808 * Set valid flag before taking the snapshot, 3809 * so that any invalidations that arrive 3810 * during or after the snapshot are not 3811 * removed by us. 3812 */ 3813 atomic_or_32(&di_cache.cache_valid, 1); 3814 3815 rval = di_snapshot_and_clean(st); 3816 3817 if (rval == 0) { 3818 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot")); 3819 return (0); 3820 } 3821 3822 DI_ALL_PTR(st)->map_size = rval; 3823 3824 if (di_mem2cache(st, &di_cache) == 0) { 3825 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed")); 3826 return (0); 3827 } 3828 3829 ASSERT(di_cache.cache_data); 3830 ASSERT(di_cache.cache_size > 0); 3831 3832 /* 3833 * Now that we have cached the snapshot, compute its checksum. 3834 * The checksum is only computed over the valid data in the 3835 * cache, not the entire cache. 3836 * Also, set all the fields (except checksum) before computing 3837 * checksum. 3838 */ 3839 all = (struct di_all *)di_cache.cache_data; 3840 all->cache_magic = DI_CACHE_MAGIC; 3841 all->map_size = rval; 3842 3843 ASSERT(all->cache_checksum == 0); 3844 CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table); 3845 all->cache_checksum = crc; 3846 3847 di_cache_write(&di_cache); 3848 3849 return (rval); 3850 } 3851 3852 static void 3853 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...) 3854 { 3855 va_list ap; 3856 3857 if (di_cache_debug <= DI_QUIET) 3858 return; 3859 3860 if (di_cache_debug < msglevel) 3861 return; 3862 3863 switch (msglevel) { 3864 case DI_ERR: 3865 msglevel = CE_WARN; 3866 break; 3867 case DI_INFO: 3868 case DI_TRACE: 3869 default: 3870 msglevel = CE_NOTE; 3871 break; 3872 } 3873 3874 va_start(ap, fmt); 3875 vcmn_err(msglevel, fmt, ap); 3876 va_end(ap); 3877 } 3878