1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Pool import support functions. 28 * 29 * To import a pool, we rely on reading the configuration information from the 30 * ZFS label of each device. If we successfully read the label, then we 31 * organize the configuration information in the following hierarchy: 32 * 33 * pool guid -> toplevel vdev guid -> label txg 34 * 35 * Duplicate entries matching this same tuple will be discarded. Once we have 36 * examined every device, we pick the best label txg config for each toplevel 37 * vdev. We then arrange these toplevel vdevs into a complete pool config, and 38 * update any paths that have changed. Finally, we attempt to import the pool 39 * using our derived config, and record the results. 40 */ 41 42 #include <devid.h> 43 #include <dirent.h> 44 #include <errno.h> 45 #include <libintl.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <sys/stat.h> 49 #include <unistd.h> 50 #include <fcntl.h> 51 52 #include <sys/vdev_impl.h> 53 54 #include "libzfs.h" 55 #include "libzfs_impl.h" 56 57 /* 58 * Intermediate structures used to gather configuration information. 59 */ 60 typedef struct config_entry { 61 uint64_t ce_txg; 62 nvlist_t *ce_config; 63 struct config_entry *ce_next; 64 } config_entry_t; 65 66 typedef struct vdev_entry { 67 uint64_t ve_guid; 68 config_entry_t *ve_configs; 69 struct vdev_entry *ve_next; 70 } vdev_entry_t; 71 72 typedef struct pool_entry { 73 uint64_t pe_guid; 74 vdev_entry_t *pe_vdevs; 75 struct pool_entry *pe_next; 76 } pool_entry_t; 77 78 typedef struct name_entry { 79 char *ne_name; 80 uint64_t ne_guid; 81 struct name_entry *ne_next; 82 } name_entry_t; 83 84 typedef struct pool_list { 85 pool_entry_t *pools; 86 name_entry_t *names; 87 } pool_list_t; 88 89 static char * 90 get_devid(const char *path) 91 { 92 int fd; 93 ddi_devid_t devid; 94 char *minor, *ret; 95 96 if ((fd = open(path, O_RDONLY)) < 0) 97 return (NULL); 98 99 minor = NULL; 100 ret = NULL; 101 if (devid_get(fd, &devid) == 0) { 102 if (devid_get_minor_name(fd, &minor) == 0) 103 ret = devid_str_encode(devid, minor); 104 if (minor != NULL) 105 devid_str_free(minor); 106 devid_free(devid); 107 } 108 (void) close(fd); 109 110 return (ret); 111 } 112 113 114 /* 115 * Go through and fix up any path and/or devid information for the given vdev 116 * configuration. 117 */ 118 static int 119 fix_paths(nvlist_t *nv, name_entry_t *names) 120 { 121 nvlist_t **child; 122 uint_t c, children; 123 uint64_t guid; 124 name_entry_t *ne, *best; 125 char *path, *devid; 126 int matched; 127 128 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 129 &child, &children) == 0) { 130 for (c = 0; c < children; c++) 131 if (fix_paths(child[c], names) != 0) 132 return (-1); 133 return (0); 134 } 135 136 /* 137 * This is a leaf (file or disk) vdev. In either case, go through 138 * the name list and see if we find a matching guid. If so, replace 139 * the path and see if we can calculate a new devid. 140 * 141 * There may be multiple names associated with a particular guid, in 142 * which case we have overlapping slices or multiple paths to the same 143 * disk. If this is the case, then we want to pick the path that is 144 * the most similar to the original, where "most similar" is the number 145 * of matching characters starting from the end of the path. This will 146 * preserve slice numbers even if the disks have been reorganized, and 147 * will also catch preferred disk names if multiple paths exist. 148 */ 149 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0); 150 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) 151 path = NULL; 152 153 matched = 0; 154 best = NULL; 155 for (ne = names; ne != NULL; ne = ne->ne_next) { 156 if (ne->ne_guid == guid) { 157 const char *src, *dst; 158 int count; 159 160 if (path == NULL) { 161 best = ne; 162 break; 163 } 164 165 src = ne->ne_name + strlen(ne->ne_name) - 1; 166 dst = path + strlen(path) - 1; 167 for (count = 0; src >= ne->ne_name && dst >= path; 168 src--, dst--, count++) 169 if (*src != *dst) 170 break; 171 172 /* 173 * At this point, 'count' is the number of characters 174 * matched from the end. 175 */ 176 if (count > matched || best == NULL) { 177 best = ne; 178 matched = count; 179 } 180 } 181 } 182 183 if (best == NULL) 184 return (0); 185 186 if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0) 187 return (-1); 188 189 if ((devid = get_devid(best->ne_name)) == NULL) { 190 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 191 } else { 192 if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) 193 return (-1); 194 devid_str_free(devid); 195 } 196 197 return (0); 198 } 199 200 /* 201 * Add the given configuration to the list of known devices. 202 */ 203 static int 204 add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, 205 nvlist_t *config) 206 { 207 uint64_t pool_guid, vdev_guid, top_guid, txg, state; 208 pool_entry_t *pe; 209 vdev_entry_t *ve; 210 config_entry_t *ce; 211 name_entry_t *ne; 212 213 /* 214 * If this is a hot spare not currently in use or level 2 cache 215 * device, add it to the list of names to translate, but don't do 216 * anything else. 217 */ 218 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 219 &state) == 0 && 220 (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) && 221 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) { 222 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) 223 return (-1); 224 225 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { 226 free(ne); 227 return (-1); 228 } 229 ne->ne_guid = vdev_guid; 230 ne->ne_next = pl->names; 231 pl->names = ne; 232 return (0); 233 } 234 235 /* 236 * If we have a valid config but cannot read any of these fields, then 237 * it means we have a half-initialized label. In vdev_label_init() 238 * we write a label with txg == 0 so that we can identify the device 239 * in case the user refers to the same disk later on. If we fail to 240 * create the pool, we'll be left with a label in this state 241 * which should not be considered part of a valid pool. 242 */ 243 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 244 &pool_guid) != 0 || 245 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, 246 &vdev_guid) != 0 || 247 nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, 248 &top_guid) != 0 || 249 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 250 &txg) != 0 || txg == 0) { 251 nvlist_free(config); 252 return (0); 253 } 254 255 /* 256 * First, see if we know about this pool. If not, then add it to the 257 * list of known pools. 258 */ 259 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { 260 if (pe->pe_guid == pool_guid) 261 break; 262 } 263 264 if (pe == NULL) { 265 if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) { 266 nvlist_free(config); 267 return (-1); 268 } 269 pe->pe_guid = pool_guid; 270 pe->pe_next = pl->pools; 271 pl->pools = pe; 272 } 273 274 /* 275 * Second, see if we know about this toplevel vdev. Add it if its 276 * missing. 277 */ 278 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { 279 if (ve->ve_guid == top_guid) 280 break; 281 } 282 283 if (ve == NULL) { 284 if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) { 285 nvlist_free(config); 286 return (-1); 287 } 288 ve->ve_guid = top_guid; 289 ve->ve_next = pe->pe_vdevs; 290 pe->pe_vdevs = ve; 291 } 292 293 /* 294 * Third, see if we have a config with a matching transaction group. If 295 * so, then we do nothing. Otherwise, add it to the list of known 296 * configs. 297 */ 298 for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) { 299 if (ce->ce_txg == txg) 300 break; 301 } 302 303 if (ce == NULL) { 304 if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) { 305 nvlist_free(config); 306 return (-1); 307 } 308 ce->ce_txg = txg; 309 ce->ce_config = config; 310 ce->ce_next = ve->ve_configs; 311 ve->ve_configs = ce; 312 } else { 313 nvlist_free(config); 314 } 315 316 /* 317 * At this point we've successfully added our config to the list of 318 * known configs. The last thing to do is add the vdev guid -> path 319 * mappings so that we can fix up the configuration as necessary before 320 * doing the import. 321 */ 322 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) 323 return (-1); 324 325 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { 326 free(ne); 327 return (-1); 328 } 329 330 ne->ne_guid = vdev_guid; 331 ne->ne_next = pl->names; 332 pl->names = ne; 333 334 return (0); 335 } 336 337 /* 338 * Returns true if the named pool matches the given GUID. 339 */ 340 static int 341 pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid, 342 boolean_t *isactive) 343 { 344 zpool_handle_t *zhp; 345 uint64_t theguid; 346 347 if (zpool_open_silent(hdl, name, &zhp) != 0) 348 return (-1); 349 350 if (zhp == NULL) { 351 *isactive = B_FALSE; 352 return (0); 353 } 354 355 verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID, 356 &theguid) == 0); 357 358 zpool_close(zhp); 359 360 *isactive = (theguid == guid); 361 return (0); 362 } 363 364 static nvlist_t * 365 refresh_config(libzfs_handle_t *hdl, nvlist_t *config) 366 { 367 nvlist_t *nvl; 368 zfs_cmd_t zc = { 0 }; 369 int err; 370 371 if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) 372 return (NULL); 373 374 if (zcmd_alloc_dst_nvlist(hdl, &zc, 375 zc.zc_nvlist_conf_size * 2) != 0) { 376 zcmd_free_nvlists(&zc); 377 return (NULL); 378 } 379 380 while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT, 381 &zc)) != 0 && errno == ENOMEM) { 382 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { 383 zcmd_free_nvlists(&zc); 384 return (NULL); 385 } 386 } 387 388 if (err) { 389 zcmd_free_nvlists(&zc); 390 return (NULL); 391 } 392 393 if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) { 394 zcmd_free_nvlists(&zc); 395 return (NULL); 396 } 397 398 zcmd_free_nvlists(&zc); 399 return (nvl); 400 } 401 402 /* 403 * Determine if the vdev id is a hole in the namespace. 404 */ 405 boolean_t 406 vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id) 407 { 408 for (int c = 0; c < holes; c++) { 409 410 /* Top-level is a hole */ 411 if (hole_array[c] == id) 412 return (B_TRUE); 413 } 414 return (B_FALSE); 415 } 416 417 /* 418 * Convert our list of pools into the definitive set of configurations. We 419 * start by picking the best config for each toplevel vdev. Once that's done, 420 * we assemble the toplevel vdevs into a full config for the pool. We make a 421 * pass to fix up any incorrect paths, and then add it to the main list to 422 * return to the user. 423 */ 424 static nvlist_t * 425 get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) 426 { 427 pool_entry_t *pe; 428 vdev_entry_t *ve; 429 config_entry_t *ce; 430 nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot; 431 nvlist_t **spares, **l2cache; 432 uint_t i, nspares, nl2cache; 433 boolean_t config_seen; 434 uint64_t best_txg; 435 char *name, *hostname; 436 uint64_t version, guid; 437 uint_t children = 0; 438 nvlist_t **child = NULL; 439 uint_t holes; 440 uint64_t *hole_array, max_id; 441 uint_t c; 442 boolean_t isactive; 443 uint64_t hostid; 444 nvlist_t *nvl; 445 boolean_t found_one = B_FALSE; 446 boolean_t valid_top_config = B_FALSE; 447 448 if (nvlist_alloc(&ret, 0, 0) != 0) 449 goto nomem; 450 451 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { 452 uint64_t id, max_txg = 0; 453 454 if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0) 455 goto nomem; 456 config_seen = B_FALSE; 457 458 /* 459 * Iterate over all toplevel vdevs. Grab the pool configuration 460 * from the first one we find, and then go through the rest and 461 * add them as necessary to the 'vdevs' member of the config. 462 */ 463 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { 464 465 /* 466 * Determine the best configuration for this vdev by 467 * selecting the config with the latest transaction 468 * group. 469 */ 470 best_txg = 0; 471 for (ce = ve->ve_configs; ce != NULL; 472 ce = ce->ce_next) { 473 474 if (ce->ce_txg > best_txg) { 475 tmp = ce->ce_config; 476 best_txg = ce->ce_txg; 477 } 478 } 479 480 /* 481 * We rely on the fact that the max txg for the 482 * pool will contain the most up-to-date information 483 * about the valid top-levels in the vdev namespace. 484 */ 485 if (best_txg > max_txg) { 486 (void) nvlist_remove(config, 487 ZPOOL_CONFIG_VDEV_CHILDREN, 488 DATA_TYPE_UINT64); 489 (void) nvlist_remove(config, 490 ZPOOL_CONFIG_HOLE_ARRAY, 491 DATA_TYPE_UINT64_ARRAY); 492 493 max_txg = best_txg; 494 hole_array = NULL; 495 holes = 0; 496 max_id = 0; 497 valid_top_config = B_FALSE; 498 499 if (nvlist_lookup_uint64(tmp, 500 ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) { 501 verify(nvlist_add_uint64(config, 502 ZPOOL_CONFIG_VDEV_CHILDREN, 503 max_id) == 0); 504 valid_top_config = B_TRUE; 505 } 506 507 if (nvlist_lookup_uint64_array(tmp, 508 ZPOOL_CONFIG_HOLE_ARRAY, &hole_array, 509 &holes) == 0) { 510 verify(nvlist_add_uint64_array(config, 511 ZPOOL_CONFIG_HOLE_ARRAY, 512 hole_array, holes) == 0); 513 } 514 } 515 516 if (!config_seen) { 517 /* 518 * Copy the relevant pieces of data to the pool 519 * configuration: 520 * 521 * version 522 * pool guid 523 * name 524 * pool state 525 * hostid (if available) 526 * hostname (if available) 527 */ 528 uint64_t state; 529 530 verify(nvlist_lookup_uint64(tmp, 531 ZPOOL_CONFIG_VERSION, &version) == 0); 532 if (nvlist_add_uint64(config, 533 ZPOOL_CONFIG_VERSION, version) != 0) 534 goto nomem; 535 verify(nvlist_lookup_uint64(tmp, 536 ZPOOL_CONFIG_POOL_GUID, &guid) == 0); 537 if (nvlist_add_uint64(config, 538 ZPOOL_CONFIG_POOL_GUID, guid) != 0) 539 goto nomem; 540 verify(nvlist_lookup_string(tmp, 541 ZPOOL_CONFIG_POOL_NAME, &name) == 0); 542 if (nvlist_add_string(config, 543 ZPOOL_CONFIG_POOL_NAME, name) != 0) 544 goto nomem; 545 verify(nvlist_lookup_uint64(tmp, 546 ZPOOL_CONFIG_POOL_STATE, &state) == 0); 547 if (nvlist_add_uint64(config, 548 ZPOOL_CONFIG_POOL_STATE, state) != 0) 549 goto nomem; 550 hostid = 0; 551 if (nvlist_lookup_uint64(tmp, 552 ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 553 if (nvlist_add_uint64(config, 554 ZPOOL_CONFIG_HOSTID, hostid) != 0) 555 goto nomem; 556 verify(nvlist_lookup_string(tmp, 557 ZPOOL_CONFIG_HOSTNAME, 558 &hostname) == 0); 559 if (nvlist_add_string(config, 560 ZPOOL_CONFIG_HOSTNAME, 561 hostname) != 0) 562 goto nomem; 563 } 564 565 config_seen = B_TRUE; 566 } 567 568 /* 569 * Add this top-level vdev to the child array. 570 */ 571 verify(nvlist_lookup_nvlist(tmp, 572 ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); 573 verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID, 574 &id) == 0); 575 576 if (id >= children) { 577 nvlist_t **newchild; 578 579 newchild = zfs_alloc(hdl, (id + 1) * 580 sizeof (nvlist_t *)); 581 if (newchild == NULL) 582 goto nomem; 583 584 for (c = 0; c < children; c++) 585 newchild[c] = child[c]; 586 587 free(child); 588 child = newchild; 589 children = id + 1; 590 } 591 if (nvlist_dup(nvtop, &child[id], 0) != 0) 592 goto nomem; 593 594 } 595 596 /* 597 * If we have information about all the top-levels then 598 * clean up the nvlist which we've constructed. This 599 * means removing any extraneous devices that are 600 * beyond the valid range or adding devices to the end 601 * of our array which appear to be missing. 602 */ 603 if (valid_top_config) { 604 if (max_id < children) { 605 for (c = max_id; c < children; c++) 606 nvlist_free(child[c]); 607 children = max_id; 608 } else if (max_id > children) { 609 nvlist_t **newchild; 610 611 newchild = zfs_alloc(hdl, (max_id) * 612 sizeof (nvlist_t *)); 613 if (newchild == NULL) 614 goto nomem; 615 616 for (c = 0; c < children; c++) 617 newchild[c] = child[c]; 618 619 free(child); 620 child = newchild; 621 children = max_id; 622 } 623 } 624 625 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 626 &guid) == 0); 627 628 /* 629 * The vdev namespace may contain holes as a result of 630 * device removal. We must add them back into the vdev 631 * tree before we process any missing devices. 632 */ 633 if (holes > 0) { 634 ASSERT(valid_top_config); 635 636 for (c = 0; c < children; c++) { 637 nvlist_t *holey; 638 639 if (child[c] != NULL || 640 !vdev_is_hole(hole_array, holes, c)) 641 continue; 642 643 if (nvlist_alloc(&holey, NV_UNIQUE_NAME, 644 0) != 0) 645 goto nomem; 646 647 /* 648 * Holes in the namespace are treated as 649 * "hole" top-level vdevs and have a 650 * special flag set on them. 651 */ 652 if (nvlist_add_string(holey, 653 ZPOOL_CONFIG_TYPE, 654 VDEV_TYPE_HOLE) != 0 || 655 nvlist_add_uint64(holey, 656 ZPOOL_CONFIG_ID, c) != 0 || 657 nvlist_add_uint64(holey, 658 ZPOOL_CONFIG_GUID, 0ULL) != 0) 659 goto nomem; 660 child[c] = holey; 661 } 662 } 663 664 /* 665 * Look for any missing top-level vdevs. If this is the case, 666 * create a faked up 'missing' vdev as a placeholder. We cannot 667 * simply compress the child array, because the kernel performs 668 * certain checks to make sure the vdev IDs match their location 669 * in the configuration. 670 */ 671 for (c = 0; c < children; c++) { 672 if (child[c] == NULL) { 673 nvlist_t *missing; 674 if (nvlist_alloc(&missing, NV_UNIQUE_NAME, 675 0) != 0) 676 goto nomem; 677 if (nvlist_add_string(missing, 678 ZPOOL_CONFIG_TYPE, 679 VDEV_TYPE_MISSING) != 0 || 680 nvlist_add_uint64(missing, 681 ZPOOL_CONFIG_ID, c) != 0 || 682 nvlist_add_uint64(missing, 683 ZPOOL_CONFIG_GUID, 0ULL) != 0) { 684 nvlist_free(missing); 685 goto nomem; 686 } 687 child[c] = missing; 688 } 689 } 690 691 /* 692 * Put all of this pool's top-level vdevs into a root vdev. 693 */ 694 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) 695 goto nomem; 696 if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 697 VDEV_TYPE_ROOT) != 0 || 698 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 || 699 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 || 700 nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 701 child, children) != 0) { 702 nvlist_free(nvroot); 703 goto nomem; 704 } 705 706 for (c = 0; c < children; c++) 707 nvlist_free(child[c]); 708 free(child); 709 children = 0; 710 child = NULL; 711 712 /* 713 * Go through and fix up any paths and/or devids based on our 714 * known list of vdev GUID -> path mappings. 715 */ 716 if (fix_paths(nvroot, pl->names) != 0) { 717 nvlist_free(nvroot); 718 goto nomem; 719 } 720 721 /* 722 * Add the root vdev to this pool's configuration. 723 */ 724 if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 725 nvroot) != 0) { 726 nvlist_free(nvroot); 727 goto nomem; 728 } 729 nvlist_free(nvroot); 730 731 /* 732 * zdb uses this path to report on active pools that were 733 * imported or created using -R. 734 */ 735 if (active_ok) 736 goto add_pool; 737 738 /* 739 * Determine if this pool is currently active, in which case we 740 * can't actually import it. 741 */ 742 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 743 &name) == 0); 744 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 745 &guid) == 0); 746 747 if (pool_active(hdl, name, guid, &isactive) != 0) 748 goto error; 749 750 if (isactive) { 751 nvlist_free(config); 752 config = NULL; 753 continue; 754 } 755 756 if ((nvl = refresh_config(hdl, config)) == NULL) { 757 nvlist_free(config); 758 config = NULL; 759 continue; 760 } 761 762 nvlist_free(config); 763 config = nvl; 764 765 /* 766 * Go through and update the paths for spares, now that we have 767 * them. 768 */ 769 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 770 &nvroot) == 0); 771 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 772 &spares, &nspares) == 0) { 773 for (i = 0; i < nspares; i++) { 774 if (fix_paths(spares[i], pl->names) != 0) 775 goto nomem; 776 } 777 } 778 779 /* 780 * Update the paths for l2cache devices. 781 */ 782 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 783 &l2cache, &nl2cache) == 0) { 784 for (i = 0; i < nl2cache; i++) { 785 if (fix_paths(l2cache[i], pl->names) != 0) 786 goto nomem; 787 } 788 } 789 790 /* 791 * Restore the original information read from the actual label. 792 */ 793 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID, 794 DATA_TYPE_UINT64); 795 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME, 796 DATA_TYPE_STRING); 797 if (hostid != 0) { 798 verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, 799 hostid) == 0); 800 verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, 801 hostname) == 0); 802 } 803 804 add_pool: 805 /* 806 * Add this pool to the list of configs. 807 */ 808 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 809 &name) == 0); 810 if (nvlist_add_nvlist(ret, name, config) != 0) 811 goto nomem; 812 813 found_one = B_TRUE; 814 nvlist_free(config); 815 config = NULL; 816 } 817 818 if (!found_one) { 819 nvlist_free(ret); 820 ret = NULL; 821 } 822 823 return (ret); 824 825 nomem: 826 (void) no_memory(hdl); 827 error: 828 nvlist_free(config); 829 nvlist_free(ret); 830 for (c = 0; c < children; c++) 831 nvlist_free(child[c]); 832 free(child); 833 834 return (NULL); 835 } 836 837 /* 838 * Return the offset of the given label. 839 */ 840 static uint64_t 841 label_offset(uint64_t size, int l) 842 { 843 ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0); 844 return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 845 0 : size - VDEV_LABELS * sizeof (vdev_label_t))); 846 } 847 848 /* 849 * Given a file descriptor, read the label information and return an nvlist 850 * describing the configuration, if there is one. 851 */ 852 int 853 zpool_read_label(int fd, nvlist_t **config) 854 { 855 struct stat64 statbuf; 856 int l; 857 vdev_label_t *label; 858 uint64_t state, txg, size; 859 860 *config = NULL; 861 862 if (fstat64(fd, &statbuf) == -1) 863 return (0); 864 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); 865 866 if ((label = malloc(sizeof (vdev_label_t))) == NULL) 867 return (-1); 868 869 for (l = 0; l < VDEV_LABELS; l++) { 870 if (pread64(fd, label, sizeof (vdev_label_t), 871 label_offset(size, l)) != sizeof (vdev_label_t)) 872 continue; 873 874 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, 875 sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) 876 continue; 877 878 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 879 &state) != 0 || state > POOL_STATE_L2CACHE) { 880 nvlist_free(*config); 881 continue; 882 } 883 884 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 885 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 886 &txg) != 0 || txg == 0)) { 887 nvlist_free(*config); 888 continue; 889 } 890 891 free(label); 892 return (0); 893 } 894 895 free(label); 896 *config = NULL; 897 return (0); 898 } 899 900 /* 901 * Given a file descriptor, clear (zero) the label information. This function 902 * is currently only used in the appliance stack as part of the ZFS sysevent 903 * module. 904 */ 905 int 906 zpool_clear_label(int fd) 907 { 908 struct stat64 statbuf; 909 int l; 910 vdev_label_t *label; 911 uint64_t size; 912 913 if (fstat64(fd, &statbuf) == -1) 914 return (0); 915 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); 916 917 if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL) 918 return (-1); 919 920 for (l = 0; l < VDEV_LABELS; l++) { 921 if (pwrite64(fd, label, sizeof (vdev_label_t), 922 label_offset(size, l)) != sizeof (vdev_label_t)) 923 return (-1); 924 } 925 926 free(label); 927 return (0); 928 } 929 930 /* 931 * Given a list of directories to search, find all pools stored on disk. This 932 * includes partial pools which are not available to import. If no args are 933 * given (argc is 0), then the default directory (/dev/dsk) is searched. 934 * poolname or guid (but not both) are provided by the caller when trying 935 * to import a specific pool. 936 */ 937 static nvlist_t * 938 zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, 939 boolean_t active_ok, char *poolname, uint64_t guid) 940 { 941 int i; 942 DIR *dirp = NULL; 943 struct dirent64 *dp; 944 char path[MAXPATHLEN]; 945 char *end; 946 size_t pathleft; 947 struct stat64 statbuf; 948 nvlist_t *ret = NULL, *config; 949 static char *default_dir = "/dev/dsk"; 950 int fd; 951 pool_list_t pools = { 0 }; 952 pool_entry_t *pe, *penext; 953 vdev_entry_t *ve, *venext; 954 config_entry_t *ce, *cenext; 955 name_entry_t *ne, *nenext; 956 957 verify(poolname == NULL || guid == 0); 958 959 if (argc == 0) { 960 argc = 1; 961 argv = &default_dir; 962 } 963 964 /* 965 * Go through and read the label configuration information from every 966 * possible device, organizing the information according to pool GUID 967 * and toplevel GUID. 968 */ 969 for (i = 0; i < argc; i++) { 970 char *rdsk; 971 int dfd; 972 973 /* use realpath to normalize the path */ 974 if (realpath(argv[i], path) == 0) { 975 (void) zfs_error_fmt(hdl, EZFS_BADPATH, 976 dgettext(TEXT_DOMAIN, "cannot open '%s'"), 977 argv[i]); 978 goto error; 979 } 980 end = &path[strlen(path)]; 981 *end++ = '/'; 982 *end = 0; 983 pathleft = &path[sizeof (path)] - end; 984 985 /* 986 * Using raw devices instead of block devices when we're 987 * reading the labels skips a bunch of slow operations during 988 * close(2) processing, so we replace /dev/dsk with /dev/rdsk. 989 */ 990 if (strcmp(path, "/dev/dsk/") == 0) 991 rdsk = "/dev/rdsk/"; 992 else 993 rdsk = path; 994 995 if ((dfd = open64(rdsk, O_RDONLY)) < 0 || 996 (dirp = fdopendir(dfd)) == NULL) { 997 zfs_error_aux(hdl, strerror(errno)); 998 (void) zfs_error_fmt(hdl, EZFS_BADPATH, 999 dgettext(TEXT_DOMAIN, "cannot open '%s'"), 1000 rdsk); 1001 goto error; 1002 } 1003 1004 /* 1005 * This is not MT-safe, but we have no MT consumers of libzfs 1006 */ 1007 while ((dp = readdir64(dirp)) != NULL) { 1008 const char *name = dp->d_name; 1009 if (name[0] == '.' && 1010 (name[1] == 0 || (name[1] == '.' && name[2] == 0))) 1011 continue; 1012 1013 if ((fd = openat64(dfd, name, O_RDONLY)) < 0) 1014 continue; 1015 1016 /* 1017 * Ignore failed stats. We only want regular 1018 * files, character devs and block devs. 1019 */ 1020 if (fstat64(fd, &statbuf) != 0 || 1021 (!S_ISREG(statbuf.st_mode) && 1022 !S_ISCHR(statbuf.st_mode) && 1023 !S_ISBLK(statbuf.st_mode))) { 1024 (void) close(fd); 1025 continue; 1026 } 1027 1028 if ((zpool_read_label(fd, &config)) != 0) { 1029 (void) close(fd); 1030 (void) no_memory(hdl); 1031 goto error; 1032 } 1033 1034 (void) close(fd); 1035 1036 if (config != NULL) { 1037 boolean_t matched = B_TRUE; 1038 1039 if (poolname != NULL) { 1040 char *pname; 1041 1042 matched = nvlist_lookup_string(config, 1043 ZPOOL_CONFIG_POOL_NAME, 1044 &pname) == 0 && 1045 strcmp(poolname, pname) == 0; 1046 } else if (guid != 0) { 1047 uint64_t this_guid; 1048 1049 matched = nvlist_lookup_uint64(config, 1050 ZPOOL_CONFIG_POOL_GUID, 1051 &this_guid) == 0 && 1052 guid == this_guid; 1053 } 1054 if (!matched) { 1055 nvlist_free(config); 1056 config = NULL; 1057 continue; 1058 } 1059 /* use the non-raw path for the config */ 1060 (void) strlcpy(end, name, pathleft); 1061 if (add_config(hdl, &pools, path, config) != 0) 1062 goto error; 1063 } 1064 } 1065 1066 (void) closedir(dirp); 1067 dirp = NULL; 1068 } 1069 1070 ret = get_configs(hdl, &pools, active_ok); 1071 1072 error: 1073 for (pe = pools.pools; pe != NULL; pe = penext) { 1074 penext = pe->pe_next; 1075 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) { 1076 venext = ve->ve_next; 1077 for (ce = ve->ve_configs; ce != NULL; ce = cenext) { 1078 cenext = ce->ce_next; 1079 if (ce->ce_config) 1080 nvlist_free(ce->ce_config); 1081 free(ce); 1082 } 1083 free(ve); 1084 } 1085 free(pe); 1086 } 1087 1088 for (ne = pools.names; ne != NULL; ne = nenext) { 1089 nenext = ne->ne_next; 1090 if (ne->ne_name) 1091 free(ne->ne_name); 1092 free(ne); 1093 } 1094 1095 if (dirp) 1096 (void) closedir(dirp); 1097 1098 return (ret); 1099 } 1100 1101 nvlist_t * 1102 zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv) 1103 { 1104 return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, 0)); 1105 } 1106 1107 nvlist_t * 1108 zpool_find_import_byname(libzfs_handle_t *hdl, int argc, char **argv, 1109 char *pool) 1110 { 1111 return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, pool, 0)); 1112 } 1113 1114 nvlist_t * 1115 zpool_find_import_byguid(libzfs_handle_t *hdl, int argc, char **argv, 1116 uint64_t guid) 1117 { 1118 return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, guid)); 1119 } 1120 1121 nvlist_t * 1122 zpool_find_import_activeok(libzfs_handle_t *hdl, int argc, char **argv) 1123 { 1124 return (zpool_find_import_impl(hdl, argc, argv, B_TRUE, NULL, 0)); 1125 } 1126 1127 /* 1128 * Given a cache file, return the contents as a list of importable pools. 1129 * poolname or guid (but not both) are provided by the caller when trying 1130 * to import a specific pool. 1131 */ 1132 nvlist_t * 1133 zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile, 1134 char *poolname, uint64_t guid) 1135 { 1136 char *buf; 1137 int fd; 1138 struct stat64 statbuf; 1139 nvlist_t *raw, *src, *dst; 1140 nvlist_t *pools; 1141 nvpair_t *elem; 1142 char *name; 1143 uint64_t this_guid; 1144 boolean_t active; 1145 1146 verify(poolname == NULL || guid == 0); 1147 1148 if ((fd = open(cachefile, O_RDONLY)) < 0) { 1149 zfs_error_aux(hdl, "%s", strerror(errno)); 1150 (void) zfs_error(hdl, EZFS_BADCACHE, 1151 dgettext(TEXT_DOMAIN, "failed to open cache file")); 1152 return (NULL); 1153 } 1154 1155 if (fstat64(fd, &statbuf) != 0) { 1156 zfs_error_aux(hdl, "%s", strerror(errno)); 1157 (void) close(fd); 1158 (void) zfs_error(hdl, EZFS_BADCACHE, 1159 dgettext(TEXT_DOMAIN, "failed to get size of cache file")); 1160 return (NULL); 1161 } 1162 1163 if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) { 1164 (void) close(fd); 1165 return (NULL); 1166 } 1167 1168 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { 1169 (void) close(fd); 1170 free(buf); 1171 (void) zfs_error(hdl, EZFS_BADCACHE, 1172 dgettext(TEXT_DOMAIN, 1173 "failed to read cache file contents")); 1174 return (NULL); 1175 } 1176 1177 (void) close(fd); 1178 1179 if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) { 1180 free(buf); 1181 (void) zfs_error(hdl, EZFS_BADCACHE, 1182 dgettext(TEXT_DOMAIN, 1183 "invalid or corrupt cache file contents")); 1184 return (NULL); 1185 } 1186 1187 free(buf); 1188 1189 /* 1190 * Go through and get the current state of the pools and refresh their 1191 * state. 1192 */ 1193 if (nvlist_alloc(&pools, 0, 0) != 0) { 1194 (void) no_memory(hdl); 1195 nvlist_free(raw); 1196 return (NULL); 1197 } 1198 1199 elem = NULL; 1200 while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) { 1201 verify(nvpair_value_nvlist(elem, &src) == 0); 1202 1203 verify(nvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME, 1204 &name) == 0); 1205 if (poolname != NULL && strcmp(poolname, name) != 0) 1206 continue; 1207 1208 verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID, 1209 &this_guid) == 0); 1210 if (guid != 0) { 1211 verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID, 1212 &this_guid) == 0); 1213 if (guid != this_guid) 1214 continue; 1215 } 1216 1217 if (pool_active(hdl, name, this_guid, &active) != 0) { 1218 nvlist_free(raw); 1219 nvlist_free(pools); 1220 return (NULL); 1221 } 1222 1223 if (active) 1224 continue; 1225 1226 if ((dst = refresh_config(hdl, src)) == NULL) { 1227 nvlist_free(raw); 1228 nvlist_free(pools); 1229 return (NULL); 1230 } 1231 1232 if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) { 1233 (void) no_memory(hdl); 1234 nvlist_free(dst); 1235 nvlist_free(raw); 1236 nvlist_free(pools); 1237 return (NULL); 1238 } 1239 nvlist_free(dst); 1240 } 1241 1242 nvlist_free(raw); 1243 return (pools); 1244 } 1245 1246 1247 boolean_t 1248 find_guid(nvlist_t *nv, uint64_t guid) 1249 { 1250 uint64_t tmp; 1251 nvlist_t **child; 1252 uint_t c, children; 1253 1254 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0); 1255 if (tmp == guid) 1256 return (B_TRUE); 1257 1258 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 1259 &child, &children) == 0) { 1260 for (c = 0; c < children; c++) 1261 if (find_guid(child[c], guid)) 1262 return (B_TRUE); 1263 } 1264 1265 return (B_FALSE); 1266 } 1267 1268 typedef struct aux_cbdata { 1269 const char *cb_type; 1270 uint64_t cb_guid; 1271 zpool_handle_t *cb_zhp; 1272 } aux_cbdata_t; 1273 1274 static int 1275 find_aux(zpool_handle_t *zhp, void *data) 1276 { 1277 aux_cbdata_t *cbp = data; 1278 nvlist_t **list; 1279 uint_t i, count; 1280 uint64_t guid; 1281 nvlist_t *nvroot; 1282 1283 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, 1284 &nvroot) == 0); 1285 1286 if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type, 1287 &list, &count) == 0) { 1288 for (i = 0; i < count; i++) { 1289 verify(nvlist_lookup_uint64(list[i], 1290 ZPOOL_CONFIG_GUID, &guid) == 0); 1291 if (guid == cbp->cb_guid) { 1292 cbp->cb_zhp = zhp; 1293 return (1); 1294 } 1295 } 1296 } 1297 1298 zpool_close(zhp); 1299 return (0); 1300 } 1301 1302 /* 1303 * Determines if the pool is in use. If so, it returns true and the state of 1304 * the pool as well as the name of the pool. Both strings are allocated and 1305 * must be freed by the caller. 1306 */ 1307 int 1308 zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, 1309 boolean_t *inuse) 1310 { 1311 nvlist_t *config; 1312 char *name; 1313 boolean_t ret; 1314 uint64_t guid, vdev_guid; 1315 zpool_handle_t *zhp; 1316 nvlist_t *pool_config; 1317 uint64_t stateval, isspare; 1318 aux_cbdata_t cb = { 0 }; 1319 boolean_t isactive; 1320 1321 *inuse = B_FALSE; 1322 1323 if (zpool_read_label(fd, &config) != 0) { 1324 (void) no_memory(hdl); 1325 return (-1); 1326 } 1327 1328 if (config == NULL) 1329 return (0); 1330 1331 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 1332 &stateval) == 0); 1333 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, 1334 &vdev_guid) == 0); 1335 1336 if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) { 1337 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 1338 &name) == 0); 1339 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 1340 &guid) == 0); 1341 } 1342 1343 switch (stateval) { 1344 case POOL_STATE_EXPORTED: 1345 ret = B_TRUE; 1346 break; 1347 1348 case POOL_STATE_ACTIVE: 1349 /* 1350 * For an active pool, we have to determine if it's really part 1351 * of a currently active pool (in which case the pool will exist 1352 * and the guid will be the same), or whether it's part of an 1353 * active pool that was disconnected without being explicitly 1354 * exported. 1355 */ 1356 if (pool_active(hdl, name, guid, &isactive) != 0) { 1357 nvlist_free(config); 1358 return (-1); 1359 } 1360 1361 if (isactive) { 1362 /* 1363 * Because the device may have been removed while 1364 * offlined, we only report it as active if the vdev is 1365 * still present in the config. Otherwise, pretend like 1366 * it's not in use. 1367 */ 1368 if ((zhp = zpool_open_canfail(hdl, name)) != NULL && 1369 (pool_config = zpool_get_config(zhp, NULL)) 1370 != NULL) { 1371 nvlist_t *nvroot; 1372 1373 verify(nvlist_lookup_nvlist(pool_config, 1374 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 1375 ret = find_guid(nvroot, vdev_guid); 1376 } else { 1377 ret = B_FALSE; 1378 } 1379 1380 /* 1381 * If this is an active spare within another pool, we 1382 * treat it like an unused hot spare. This allows the 1383 * user to create a pool with a hot spare that currently 1384 * in use within another pool. Since we return B_TRUE, 1385 * libdiskmgt will continue to prevent generic consumers 1386 * from using the device. 1387 */ 1388 if (ret && nvlist_lookup_uint64(config, 1389 ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare) 1390 stateval = POOL_STATE_SPARE; 1391 1392 if (zhp != NULL) 1393 zpool_close(zhp); 1394 } else { 1395 stateval = POOL_STATE_POTENTIALLY_ACTIVE; 1396 ret = B_TRUE; 1397 } 1398 break; 1399 1400 case POOL_STATE_SPARE: 1401 /* 1402 * For a hot spare, it can be either definitively in use, or 1403 * potentially active. To determine if it's in use, we iterate 1404 * over all pools in the system and search for one with a spare 1405 * with a matching guid. 1406 * 1407 * Due to the shared nature of spares, we don't actually report 1408 * the potentially active case as in use. This means the user 1409 * can freely create pools on the hot spares of exported pools, 1410 * but to do otherwise makes the resulting code complicated, and 1411 * we end up having to deal with this case anyway. 1412 */ 1413 cb.cb_zhp = NULL; 1414 cb.cb_guid = vdev_guid; 1415 cb.cb_type = ZPOOL_CONFIG_SPARES; 1416 if (zpool_iter(hdl, find_aux, &cb) == 1) { 1417 name = (char *)zpool_get_name(cb.cb_zhp); 1418 ret = TRUE; 1419 } else { 1420 ret = FALSE; 1421 } 1422 break; 1423 1424 case POOL_STATE_L2CACHE: 1425 1426 /* 1427 * Check if any pool is currently using this l2cache device. 1428 */ 1429 cb.cb_zhp = NULL; 1430 cb.cb_guid = vdev_guid; 1431 cb.cb_type = ZPOOL_CONFIG_L2CACHE; 1432 if (zpool_iter(hdl, find_aux, &cb) == 1) { 1433 name = (char *)zpool_get_name(cb.cb_zhp); 1434 ret = TRUE; 1435 } else { 1436 ret = FALSE; 1437 } 1438 break; 1439 1440 default: 1441 ret = B_FALSE; 1442 } 1443 1444 1445 if (ret) { 1446 if ((*namestr = zfs_strdup(hdl, name)) == NULL) { 1447 if (cb.cb_zhp) 1448 zpool_close(cb.cb_zhp); 1449 nvlist_free(config); 1450 return (-1); 1451 } 1452 *state = (pool_state_t)stateval; 1453 } 1454 1455 if (cb.cb_zhp) 1456 zpool_close(cb.cb_zhp); 1457 1458 nvlist_free(config); 1459 *inuse = ret; 1460 return (0); 1461 } 1462