1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Pool import support functions. 28 * 29 * To import a pool, we rely on reading the configuration information from the 30 * ZFS label of each device. If we successfully read the label, then we 31 * organize the configuration information in the following hierarchy: 32 * 33 * pool guid -> toplevel vdev guid -> label txg 34 * 35 * Duplicate entries matching this same tuple will be discarded. Once we have 36 * examined every device, we pick the best label txg config for each toplevel 37 * vdev. We then arrange these toplevel vdevs into a complete pool config, and 38 * update any paths that have changed. Finally, we attempt to import the pool 39 * using our derived config, and record the results. 40 */ 41 42 #include <devid.h> 43 #include <dirent.h> 44 #include <errno.h> 45 #include <libintl.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <sys/stat.h> 49 #include <unistd.h> 50 #include <fcntl.h> 51 52 #include <sys/vdev_impl.h> 53 54 #include "libzfs.h" 55 #include "libzfs_impl.h" 56 57 /* 58 * Intermediate structures used to gather configuration information. 59 */ 60 typedef struct config_entry { 61 uint64_t ce_txg; 62 nvlist_t *ce_config; 63 struct config_entry *ce_next; 64 } config_entry_t; 65 66 typedef struct vdev_entry { 67 uint64_t ve_guid; 68 config_entry_t *ve_configs; 69 struct vdev_entry *ve_next; 70 } vdev_entry_t; 71 72 typedef struct pool_entry { 73 uint64_t pe_guid; 74 vdev_entry_t *pe_vdevs; 75 struct pool_entry *pe_next; 76 } pool_entry_t; 77 78 typedef struct name_entry { 79 char *ne_name; 80 uint64_t ne_guid; 81 struct name_entry *ne_next; 82 } name_entry_t; 83 84 typedef struct pool_list { 85 pool_entry_t *pools; 86 name_entry_t *names; 87 } pool_list_t; 88 89 static char * 90 get_devid(const char *path) 91 { 92 int fd; 93 ddi_devid_t devid; 94 char *minor, *ret; 95 96 if ((fd = open(path, O_RDONLY)) < 0) 97 return (NULL); 98 99 minor = NULL; 100 ret = NULL; 101 if (devid_get(fd, &devid) == 0) { 102 if (devid_get_minor_name(fd, &minor) == 0) 103 ret = devid_str_encode(devid, minor); 104 if (minor != NULL) 105 devid_str_free(minor); 106 devid_free(devid); 107 } 108 (void) close(fd); 109 110 return (ret); 111 } 112 113 114 /* 115 * Go through and fix up any path and/or devid information for the given vdev 116 * configuration. 117 */ 118 static int 119 fix_paths(nvlist_t *nv, name_entry_t *names) 120 { 121 nvlist_t **child; 122 uint_t c, children; 123 uint64_t guid; 124 name_entry_t *ne, *best; 125 char *path, *devid; 126 int matched; 127 128 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 129 &child, &children) == 0) { 130 for (c = 0; c < children; c++) 131 if (fix_paths(child[c], names) != 0) 132 return (-1); 133 return (0); 134 } 135 136 /* 137 * This is a leaf (file or disk) vdev. In either case, go through 138 * the name list and see if we find a matching guid. If so, replace 139 * the path and see if we can calculate a new devid. 140 * 141 * There may be multiple names associated with a particular guid, in 142 * which case we have overlapping slices or multiple paths to the same 143 * disk. If this is the case, then we want to pick the path that is 144 * the most similar to the original, where "most similar" is the number 145 * of matching characters starting from the end of the path. This will 146 * preserve slice numbers even if the disks have been reorganized, and 147 * will also catch preferred disk names if multiple paths exist. 148 */ 149 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0); 150 if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0) 151 path = NULL; 152 153 matched = 0; 154 best = NULL; 155 for (ne = names; ne != NULL; ne = ne->ne_next) { 156 if (ne->ne_guid == guid) { 157 const char *src, *dst; 158 int count; 159 160 if (path == NULL) { 161 best = ne; 162 break; 163 } 164 165 src = ne->ne_name + strlen(ne->ne_name) - 1; 166 dst = path + strlen(path) - 1; 167 for (count = 0; src >= ne->ne_name && dst >= path; 168 src--, dst--, count++) 169 if (*src != *dst) 170 break; 171 172 /* 173 * At this point, 'count' is the number of characters 174 * matched from the end. 175 */ 176 if (count > matched || best == NULL) { 177 best = ne; 178 matched = count; 179 } 180 } 181 } 182 183 if (best == NULL) 184 return (0); 185 186 if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0) 187 return (-1); 188 189 if ((devid = get_devid(best->ne_name)) == NULL) { 190 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 191 } else { 192 if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0) 193 return (-1); 194 devid_str_free(devid); 195 } 196 197 return (0); 198 } 199 200 /* 201 * Add the given configuration to the list of known devices. 202 */ 203 static int 204 add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path, 205 nvlist_t *config) 206 { 207 uint64_t pool_guid, vdev_guid, top_guid, txg, state; 208 pool_entry_t *pe; 209 vdev_entry_t *ve; 210 config_entry_t *ce; 211 name_entry_t *ne; 212 213 /* 214 * If this is a hot spare not currently in use or level 2 cache 215 * device, add it to the list of names to translate, but don't do 216 * anything else. 217 */ 218 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 219 &state) == 0 && 220 (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) && 221 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) { 222 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) 223 return (-1); 224 225 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { 226 free(ne); 227 return (-1); 228 } 229 ne->ne_guid = vdev_guid; 230 ne->ne_next = pl->names; 231 pl->names = ne; 232 return (0); 233 } 234 235 /* 236 * If we have a valid config but cannot read any of these fields, then 237 * it means we have a half-initialized label. In vdev_label_init() 238 * we write a label with txg == 0 so that we can identify the device 239 * in case the user refers to the same disk later on. If we fail to 240 * create the pool, we'll be left with a label in this state 241 * which should not be considered part of a valid pool. 242 */ 243 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 244 &pool_guid) != 0 || 245 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, 246 &vdev_guid) != 0 || 247 nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, 248 &top_guid) != 0 || 249 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 250 &txg) != 0 || txg == 0) { 251 nvlist_free(config); 252 return (0); 253 } 254 255 /* 256 * First, see if we know about this pool. If not, then add it to the 257 * list of known pools. 258 */ 259 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { 260 if (pe->pe_guid == pool_guid) 261 break; 262 } 263 264 if (pe == NULL) { 265 if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) { 266 nvlist_free(config); 267 return (-1); 268 } 269 pe->pe_guid = pool_guid; 270 pe->pe_next = pl->pools; 271 pl->pools = pe; 272 } 273 274 /* 275 * Second, see if we know about this toplevel vdev. Add it if its 276 * missing. 277 */ 278 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { 279 if (ve->ve_guid == top_guid) 280 break; 281 } 282 283 if (ve == NULL) { 284 if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) { 285 nvlist_free(config); 286 return (-1); 287 } 288 ve->ve_guid = top_guid; 289 ve->ve_next = pe->pe_vdevs; 290 pe->pe_vdevs = ve; 291 } 292 293 /* 294 * Third, see if we have a config with a matching transaction group. If 295 * so, then we do nothing. Otherwise, add it to the list of known 296 * configs. 297 */ 298 for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) { 299 if (ce->ce_txg == txg) 300 break; 301 } 302 303 if (ce == NULL) { 304 if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) { 305 nvlist_free(config); 306 return (-1); 307 } 308 ce->ce_txg = txg; 309 ce->ce_config = config; 310 ce->ce_next = ve->ve_configs; 311 ve->ve_configs = ce; 312 } else { 313 nvlist_free(config); 314 } 315 316 /* 317 * At this point we've successfully added our config to the list of 318 * known configs. The last thing to do is add the vdev guid -> path 319 * mappings so that we can fix up the configuration as necessary before 320 * doing the import. 321 */ 322 if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL) 323 return (-1); 324 325 if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) { 326 free(ne); 327 return (-1); 328 } 329 330 ne->ne_guid = vdev_guid; 331 ne->ne_next = pl->names; 332 pl->names = ne; 333 334 return (0); 335 } 336 337 /* 338 * Returns true if the named pool matches the given GUID. 339 */ 340 static int 341 pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid, 342 boolean_t *isactive) 343 { 344 zpool_handle_t *zhp; 345 uint64_t theguid; 346 347 if (zpool_open_silent(hdl, name, &zhp) != 0) 348 return (-1); 349 350 if (zhp == NULL) { 351 *isactive = B_FALSE; 352 return (0); 353 } 354 355 verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID, 356 &theguid) == 0); 357 358 zpool_close(zhp); 359 360 *isactive = (theguid == guid); 361 return (0); 362 } 363 364 static nvlist_t * 365 refresh_config(libzfs_handle_t *hdl, nvlist_t *config) 366 { 367 nvlist_t *nvl; 368 zfs_cmd_t zc = { 0 }; 369 int err; 370 371 if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) 372 return (NULL); 373 374 if (zcmd_alloc_dst_nvlist(hdl, &zc, 375 zc.zc_nvlist_conf_size * 2) != 0) { 376 zcmd_free_nvlists(&zc); 377 return (NULL); 378 } 379 380 while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT, 381 &zc)) != 0 && errno == ENOMEM) { 382 if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { 383 zcmd_free_nvlists(&zc); 384 return (NULL); 385 } 386 } 387 388 if (err) { 389 zcmd_free_nvlists(&zc); 390 return (NULL); 391 } 392 393 if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) { 394 zcmd_free_nvlists(&zc); 395 return (NULL); 396 } 397 398 zcmd_free_nvlists(&zc); 399 return (nvl); 400 } 401 402 /* 403 * Determine if the vdev id is a hole in the namespace. 404 */ 405 boolean_t 406 vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id) 407 { 408 for (int c = 0; c < holes; c++) { 409 410 /* Top-level is a hole */ 411 if (hole_array[c] == id) 412 return (B_TRUE); 413 } 414 return (B_FALSE); 415 } 416 417 /* 418 * Convert our list of pools into the definitive set of configurations. We 419 * start by picking the best config for each toplevel vdev. Once that's done, 420 * we assemble the toplevel vdevs into a full config for the pool. We make a 421 * pass to fix up any incorrect paths, and then add it to the main list to 422 * return to the user. 423 */ 424 static nvlist_t * 425 get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok) 426 { 427 pool_entry_t *pe; 428 vdev_entry_t *ve; 429 config_entry_t *ce; 430 nvlist_t *ret = NULL, *config = NULL, *tmp, *nvtop, *nvroot; 431 nvlist_t **spares, **l2cache; 432 uint_t i, nspares, nl2cache; 433 boolean_t config_seen; 434 uint64_t best_txg; 435 char *name, *hostname; 436 uint64_t version, guid; 437 uint_t children = 0; 438 nvlist_t **child = NULL; 439 uint_t holes; 440 uint64_t *hole_array, max_id; 441 uint_t c; 442 boolean_t isactive; 443 uint64_t hostid; 444 nvlist_t *nvl; 445 boolean_t found_one = B_FALSE; 446 boolean_t valid_top_config = B_FALSE; 447 448 if (nvlist_alloc(&ret, 0, 0) != 0) 449 goto nomem; 450 451 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { 452 uint64_t id, max_txg = 0; 453 454 if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0) 455 goto nomem; 456 config_seen = B_FALSE; 457 458 /* 459 * Iterate over all toplevel vdevs. Grab the pool configuration 460 * from the first one we find, and then go through the rest and 461 * add them as necessary to the 'vdevs' member of the config. 462 */ 463 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { 464 465 /* 466 * Determine the best configuration for this vdev by 467 * selecting the config with the latest transaction 468 * group. 469 */ 470 best_txg = 0; 471 for (ce = ve->ve_configs; ce != NULL; 472 ce = ce->ce_next) { 473 474 if (ce->ce_txg > best_txg) { 475 tmp = ce->ce_config; 476 best_txg = ce->ce_txg; 477 } 478 } 479 480 /* 481 * We rely on the fact that the max txg for the 482 * pool will contain the most up-to-date information 483 * about the valid top-levels in the vdev namespace. 484 */ 485 if (best_txg > max_txg) { 486 (void) nvlist_remove(config, 487 ZPOOL_CONFIG_VDEV_CHILDREN, 488 DATA_TYPE_UINT64); 489 (void) nvlist_remove(config, 490 ZPOOL_CONFIG_HOLE_ARRAY, 491 DATA_TYPE_UINT64_ARRAY); 492 493 max_txg = best_txg; 494 hole_array = NULL; 495 holes = 0; 496 max_id = 0; 497 valid_top_config = B_FALSE; 498 499 if (nvlist_lookup_uint64(tmp, 500 ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) { 501 verify(nvlist_add_uint64(config, 502 ZPOOL_CONFIG_VDEV_CHILDREN, 503 max_id) == 0); 504 valid_top_config = B_TRUE; 505 } 506 507 if (nvlist_lookup_uint64_array(tmp, 508 ZPOOL_CONFIG_HOLE_ARRAY, &hole_array, 509 &holes) == 0) { 510 verify(nvlist_add_uint64_array(config, 511 ZPOOL_CONFIG_HOLE_ARRAY, 512 hole_array, holes) == 0); 513 } 514 } 515 516 if (!config_seen) { 517 /* 518 * Copy the relevant pieces of data to the pool 519 * configuration: 520 * 521 * version 522 * pool guid 523 * name 524 * pool state 525 * hostid (if available) 526 * hostname (if available) 527 */ 528 uint64_t state; 529 530 verify(nvlist_lookup_uint64(tmp, 531 ZPOOL_CONFIG_VERSION, &version) == 0); 532 if (nvlist_add_uint64(config, 533 ZPOOL_CONFIG_VERSION, version) != 0) 534 goto nomem; 535 verify(nvlist_lookup_uint64(tmp, 536 ZPOOL_CONFIG_POOL_GUID, &guid) == 0); 537 if (nvlist_add_uint64(config, 538 ZPOOL_CONFIG_POOL_GUID, guid) != 0) 539 goto nomem; 540 verify(nvlist_lookup_string(tmp, 541 ZPOOL_CONFIG_POOL_NAME, &name) == 0); 542 if (nvlist_add_string(config, 543 ZPOOL_CONFIG_POOL_NAME, name) != 0) 544 goto nomem; 545 verify(nvlist_lookup_uint64(tmp, 546 ZPOOL_CONFIG_POOL_STATE, &state) == 0); 547 if (nvlist_add_uint64(config, 548 ZPOOL_CONFIG_POOL_STATE, state) != 0) 549 goto nomem; 550 hostid = 0; 551 if (nvlist_lookup_uint64(tmp, 552 ZPOOL_CONFIG_HOSTID, &hostid) == 0) { 553 if (nvlist_add_uint64(config, 554 ZPOOL_CONFIG_HOSTID, hostid) != 0) 555 goto nomem; 556 verify(nvlist_lookup_string(tmp, 557 ZPOOL_CONFIG_HOSTNAME, 558 &hostname) == 0); 559 if (nvlist_add_string(config, 560 ZPOOL_CONFIG_HOSTNAME, 561 hostname) != 0) 562 goto nomem; 563 } 564 565 config_seen = B_TRUE; 566 } 567 568 /* 569 * Add this top-level vdev to the child array. 570 */ 571 verify(nvlist_lookup_nvlist(tmp, 572 ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); 573 verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID, 574 &id) == 0); 575 576 if (id >= children) { 577 nvlist_t **newchild; 578 579 newchild = zfs_alloc(hdl, (id + 1) * 580 sizeof (nvlist_t *)); 581 if (newchild == NULL) 582 goto nomem; 583 584 for (c = 0; c < children; c++) 585 newchild[c] = child[c]; 586 587 free(child); 588 child = newchild; 589 children = id + 1; 590 } 591 if (nvlist_dup(nvtop, &child[id], 0) != 0) 592 goto nomem; 593 594 } 595 596 /* 597 * If we have information about all the top-levels then 598 * clean up the nvlist which we've constructed. This 599 * means removing any extraneous devices that are 600 * beyond the valid range or adding devices to the end 601 * of our array which appear to be missing. 602 */ 603 if (valid_top_config) { 604 if (max_id < children) { 605 for (c = max_id; c < children; c++) 606 nvlist_free(child[c]); 607 children = max_id; 608 } else if (max_id > children) { 609 nvlist_t **newchild; 610 611 newchild = zfs_alloc(hdl, (max_id) * 612 sizeof (nvlist_t *)); 613 if (newchild == NULL) 614 goto nomem; 615 616 for (c = 0; c < children; c++) 617 newchild[c] = child[c]; 618 619 free(child); 620 child = newchild; 621 children = max_id; 622 } 623 } 624 625 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 626 &guid) == 0); 627 628 /* 629 * The vdev namespace may contain holes as a result of 630 * device removal. We must add them back into the vdev 631 * tree before we process any missing devices. 632 */ 633 if (holes > 0) { 634 ASSERT(valid_top_config); 635 636 for (c = 0; c < children; c++) { 637 nvlist_t *holey; 638 639 if (child[c] != NULL || 640 !vdev_is_hole(hole_array, holes, c)) 641 continue; 642 643 if (nvlist_alloc(&holey, NV_UNIQUE_NAME, 644 0) != 0) 645 goto nomem; 646 647 /* 648 * Holes in the namespace are treated as 649 * "hole" top-level vdevs and have a 650 * special flag set on them. 651 */ 652 if (nvlist_add_string(holey, 653 ZPOOL_CONFIG_TYPE, 654 VDEV_TYPE_HOLE) != 0 || 655 nvlist_add_uint64(holey, 656 ZPOOL_CONFIG_ID, c) != 0 || 657 nvlist_add_uint64(holey, 658 ZPOOL_CONFIG_GUID, 0ULL) != 0) 659 goto nomem; 660 child[c] = holey; 661 } 662 } 663 664 /* 665 * Look for any missing top-level vdevs. If this is the case, 666 * create a faked up 'missing' vdev as a placeholder. We cannot 667 * simply compress the child array, because the kernel performs 668 * certain checks to make sure the vdev IDs match their location 669 * in the configuration. 670 */ 671 for (c = 0; c < children; c++) { 672 if (child[c] == NULL) { 673 nvlist_t *missing; 674 if (nvlist_alloc(&missing, NV_UNIQUE_NAME, 675 0) != 0) 676 goto nomem; 677 if (nvlist_add_string(missing, 678 ZPOOL_CONFIG_TYPE, 679 VDEV_TYPE_MISSING) != 0 || 680 nvlist_add_uint64(missing, 681 ZPOOL_CONFIG_ID, c) != 0 || 682 nvlist_add_uint64(missing, 683 ZPOOL_CONFIG_GUID, 0ULL) != 0) { 684 nvlist_free(missing); 685 goto nomem; 686 } 687 child[c] = missing; 688 } 689 } 690 691 /* 692 * Put all of this pool's top-level vdevs into a root vdev. 693 */ 694 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0) 695 goto nomem; 696 if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 697 VDEV_TYPE_ROOT) != 0 || 698 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 || 699 nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 || 700 nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 701 child, children) != 0) { 702 nvlist_free(nvroot); 703 goto nomem; 704 } 705 706 for (c = 0; c < children; c++) 707 nvlist_free(child[c]); 708 free(child); 709 children = 0; 710 child = NULL; 711 712 /* 713 * Go through and fix up any paths and/or devids based on our 714 * known list of vdev GUID -> path mappings. 715 */ 716 if (fix_paths(nvroot, pl->names) != 0) { 717 nvlist_free(nvroot); 718 goto nomem; 719 } 720 721 /* 722 * Add the root vdev to this pool's configuration. 723 */ 724 if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 725 nvroot) != 0) { 726 nvlist_free(nvroot); 727 goto nomem; 728 } 729 nvlist_free(nvroot); 730 731 /* 732 * zdb uses this path to report on active pools that were 733 * imported or created using -R. 734 */ 735 if (active_ok) 736 goto add_pool; 737 738 /* 739 * Determine if this pool is currently active, in which case we 740 * can't actually import it. 741 */ 742 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 743 &name) == 0); 744 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 745 &guid) == 0); 746 747 if (pool_active(hdl, name, guid, &isactive) != 0) 748 goto error; 749 750 if (isactive) { 751 nvlist_free(config); 752 config = NULL; 753 continue; 754 } 755 756 if ((nvl = refresh_config(hdl, config)) == NULL) { 757 nvlist_free(config); 758 config = NULL; 759 continue; 760 } 761 762 nvlist_free(config); 763 config = nvl; 764 765 /* 766 * Go through and update the paths for spares, now that we have 767 * them. 768 */ 769 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 770 &nvroot) == 0); 771 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 772 &spares, &nspares) == 0) { 773 for (i = 0; i < nspares; i++) { 774 if (fix_paths(spares[i], pl->names) != 0) 775 goto nomem; 776 } 777 } 778 779 /* 780 * Update the paths for l2cache devices. 781 */ 782 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, 783 &l2cache, &nl2cache) == 0) { 784 for (i = 0; i < nl2cache; i++) { 785 if (fix_paths(l2cache[i], pl->names) != 0) 786 goto nomem; 787 } 788 } 789 790 /* 791 * Restore the original information read from the actual label. 792 */ 793 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID, 794 DATA_TYPE_UINT64); 795 (void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME, 796 DATA_TYPE_STRING); 797 if (hostid != 0) { 798 verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, 799 hostid) == 0); 800 verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, 801 hostname) == 0); 802 } 803 804 add_pool: 805 /* 806 * Add this pool to the list of configs. 807 */ 808 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 809 &name) == 0); 810 if (nvlist_add_nvlist(ret, name, config) != 0) 811 goto nomem; 812 813 found_one = B_TRUE; 814 nvlist_free(config); 815 config = NULL; 816 } 817 818 if (!found_one) { 819 nvlist_free(ret); 820 ret = NULL; 821 } 822 823 return (ret); 824 825 nomem: 826 (void) no_memory(hdl); 827 error: 828 nvlist_free(config); 829 nvlist_free(ret); 830 for (c = 0; c < children; c++) 831 nvlist_free(child[c]); 832 free(child); 833 834 return (NULL); 835 } 836 837 /* 838 * Return the offset of the given label. 839 */ 840 static uint64_t 841 label_offset(uint64_t size, int l) 842 { 843 ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0); 844 return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 845 0 : size - VDEV_LABELS * sizeof (vdev_label_t))); 846 } 847 848 /* 849 * Given a file descriptor, read the label information and return an nvlist 850 * describing the configuration, if there is one. 851 */ 852 int 853 zpool_read_label(int fd, nvlist_t **config) 854 { 855 struct stat64 statbuf; 856 int l; 857 vdev_label_t *label; 858 uint64_t state, txg, size; 859 860 *config = NULL; 861 862 if (fstat64(fd, &statbuf) == -1) 863 return (0); 864 size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); 865 866 if ((label = malloc(sizeof (vdev_label_t))) == NULL) 867 return (-1); 868 869 for (l = 0; l < VDEV_LABELS; l++) { 870 if (pread64(fd, label, sizeof (vdev_label_t), 871 label_offset(size, l)) != sizeof (vdev_label_t)) 872 continue; 873 874 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, 875 sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) 876 continue; 877 878 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE, 879 &state) != 0 || state > POOL_STATE_L2CACHE) { 880 nvlist_free(*config); 881 continue; 882 } 883 884 if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE && 885 (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG, 886 &txg) != 0 || txg == 0)) { 887 nvlist_free(*config); 888 continue; 889 } 890 891 free(label); 892 return (0); 893 } 894 895 free(label); 896 *config = NULL; 897 return (0); 898 } 899 900 /* 901 * Given a list of directories to search, find all pools stored on disk. This 902 * includes partial pools which are not available to import. If no args are 903 * given (argc is 0), then the default directory (/dev/dsk) is searched. 904 * poolname or guid (but not both) are provided by the caller when trying 905 * to import a specific pool. 906 */ 907 static nvlist_t * 908 zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv, 909 boolean_t active_ok, char *poolname, uint64_t guid) 910 { 911 int i; 912 DIR *dirp = NULL; 913 struct dirent64 *dp; 914 char path[MAXPATHLEN]; 915 char *end; 916 size_t pathleft; 917 struct stat64 statbuf; 918 nvlist_t *ret = NULL, *config; 919 static char *default_dir = "/dev/dsk"; 920 int fd; 921 pool_list_t pools = { 0 }; 922 pool_entry_t *pe, *penext; 923 vdev_entry_t *ve, *venext; 924 config_entry_t *ce, *cenext; 925 name_entry_t *ne, *nenext; 926 927 verify(poolname == NULL || guid == 0); 928 929 if (argc == 0) { 930 argc = 1; 931 argv = &default_dir; 932 } 933 934 /* 935 * Go through and read the label configuration information from every 936 * possible device, organizing the information according to pool GUID 937 * and toplevel GUID. 938 */ 939 for (i = 0; i < argc; i++) { 940 char *rdsk; 941 int dfd; 942 943 /* use realpath to normalize the path */ 944 if (realpath(argv[i], path) == 0) { 945 (void) zfs_error_fmt(hdl, EZFS_BADPATH, 946 dgettext(TEXT_DOMAIN, "cannot open '%s'"), 947 argv[i]); 948 goto error; 949 } 950 end = &path[strlen(path)]; 951 *end++ = '/'; 952 *end = 0; 953 pathleft = &path[sizeof (path)] - end; 954 955 /* 956 * Using raw devices instead of block devices when we're 957 * reading the labels skips a bunch of slow operations during 958 * close(2) processing, so we replace /dev/dsk with /dev/rdsk. 959 */ 960 if (strcmp(path, "/dev/dsk/") == 0) 961 rdsk = "/dev/rdsk/"; 962 else 963 rdsk = path; 964 965 if ((dfd = open64(rdsk, O_RDONLY)) < 0 || 966 (dirp = fdopendir(dfd)) == NULL) { 967 zfs_error_aux(hdl, strerror(errno)); 968 (void) zfs_error_fmt(hdl, EZFS_BADPATH, 969 dgettext(TEXT_DOMAIN, "cannot open '%s'"), 970 rdsk); 971 goto error; 972 } 973 974 /* 975 * This is not MT-safe, but we have no MT consumers of libzfs 976 */ 977 while ((dp = readdir64(dirp)) != NULL) { 978 const char *name = dp->d_name; 979 if (name[0] == '.' && 980 (name[1] == 0 || (name[1] == '.' && name[2] == 0))) 981 continue; 982 983 if ((fd = openat64(dfd, name, O_RDONLY)) < 0) 984 continue; 985 986 /* 987 * Ignore failed stats. We only want regular 988 * files, character devs and block devs. 989 */ 990 if (fstat64(fd, &statbuf) != 0 || 991 (!S_ISREG(statbuf.st_mode) && 992 !S_ISCHR(statbuf.st_mode) && 993 !S_ISBLK(statbuf.st_mode))) { 994 (void) close(fd); 995 continue; 996 } 997 998 if ((zpool_read_label(fd, &config)) != 0) { 999 (void) close(fd); 1000 (void) no_memory(hdl); 1001 goto error; 1002 } 1003 1004 (void) close(fd); 1005 1006 if (config != NULL) { 1007 boolean_t matched = B_TRUE; 1008 1009 if (poolname != NULL) { 1010 char *pname; 1011 1012 matched = nvlist_lookup_string(config, 1013 ZPOOL_CONFIG_POOL_NAME, 1014 &pname) == 0 && 1015 strcmp(poolname, pname) == 0; 1016 } else if (guid != 0) { 1017 uint64_t this_guid; 1018 1019 matched = nvlist_lookup_uint64(config, 1020 ZPOOL_CONFIG_POOL_GUID, 1021 &this_guid) == 0 && 1022 guid == this_guid; 1023 } 1024 if (!matched) { 1025 nvlist_free(config); 1026 config = NULL; 1027 continue; 1028 } 1029 /* use the non-raw path for the config */ 1030 (void) strlcpy(end, name, pathleft); 1031 if (add_config(hdl, &pools, path, config) != 0) 1032 goto error; 1033 } 1034 } 1035 1036 (void) closedir(dirp); 1037 dirp = NULL; 1038 } 1039 1040 ret = get_configs(hdl, &pools, active_ok); 1041 1042 error: 1043 for (pe = pools.pools; pe != NULL; pe = penext) { 1044 penext = pe->pe_next; 1045 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) { 1046 venext = ve->ve_next; 1047 for (ce = ve->ve_configs; ce != NULL; ce = cenext) { 1048 cenext = ce->ce_next; 1049 if (ce->ce_config) 1050 nvlist_free(ce->ce_config); 1051 free(ce); 1052 } 1053 free(ve); 1054 } 1055 free(pe); 1056 } 1057 1058 for (ne = pools.names; ne != NULL; ne = nenext) { 1059 nenext = ne->ne_next; 1060 if (ne->ne_name) 1061 free(ne->ne_name); 1062 free(ne); 1063 } 1064 1065 if (dirp) 1066 (void) closedir(dirp); 1067 1068 return (ret); 1069 } 1070 1071 nvlist_t * 1072 zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv) 1073 { 1074 return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, 0)); 1075 } 1076 1077 nvlist_t * 1078 zpool_find_import_byname(libzfs_handle_t *hdl, int argc, char **argv, 1079 char *pool) 1080 { 1081 return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, pool, 0)); 1082 } 1083 1084 nvlist_t * 1085 zpool_find_import_byguid(libzfs_handle_t *hdl, int argc, char **argv, 1086 uint64_t guid) 1087 { 1088 return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, guid)); 1089 } 1090 1091 nvlist_t * 1092 zpool_find_import_activeok(libzfs_handle_t *hdl, int argc, char **argv) 1093 { 1094 return (zpool_find_import_impl(hdl, argc, argv, B_TRUE, NULL, 0)); 1095 } 1096 1097 /* 1098 * Given a cache file, return the contents as a list of importable pools. 1099 * poolname or guid (but not both) are provided by the caller when trying 1100 * to import a specific pool. 1101 */ 1102 nvlist_t * 1103 zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile, 1104 char *poolname, uint64_t guid) 1105 { 1106 char *buf; 1107 int fd; 1108 struct stat64 statbuf; 1109 nvlist_t *raw, *src, *dst; 1110 nvlist_t *pools; 1111 nvpair_t *elem; 1112 char *name; 1113 uint64_t this_guid; 1114 boolean_t active; 1115 1116 verify(poolname == NULL || guid == 0); 1117 1118 if ((fd = open(cachefile, O_RDONLY)) < 0) { 1119 zfs_error_aux(hdl, "%s", strerror(errno)); 1120 (void) zfs_error(hdl, EZFS_BADCACHE, 1121 dgettext(TEXT_DOMAIN, "failed to open cache file")); 1122 return (NULL); 1123 } 1124 1125 if (fstat64(fd, &statbuf) != 0) { 1126 zfs_error_aux(hdl, "%s", strerror(errno)); 1127 (void) close(fd); 1128 (void) zfs_error(hdl, EZFS_BADCACHE, 1129 dgettext(TEXT_DOMAIN, "failed to get size of cache file")); 1130 return (NULL); 1131 } 1132 1133 if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) { 1134 (void) close(fd); 1135 return (NULL); 1136 } 1137 1138 if (read(fd, buf, statbuf.st_size) != statbuf.st_size) { 1139 (void) close(fd); 1140 free(buf); 1141 (void) zfs_error(hdl, EZFS_BADCACHE, 1142 dgettext(TEXT_DOMAIN, 1143 "failed to read cache file contents")); 1144 return (NULL); 1145 } 1146 1147 (void) close(fd); 1148 1149 if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) { 1150 free(buf); 1151 (void) zfs_error(hdl, EZFS_BADCACHE, 1152 dgettext(TEXT_DOMAIN, 1153 "invalid or corrupt cache file contents")); 1154 return (NULL); 1155 } 1156 1157 free(buf); 1158 1159 /* 1160 * Go through and get the current state of the pools and refresh their 1161 * state. 1162 */ 1163 if (nvlist_alloc(&pools, 0, 0) != 0) { 1164 (void) no_memory(hdl); 1165 nvlist_free(raw); 1166 return (NULL); 1167 } 1168 1169 elem = NULL; 1170 while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) { 1171 verify(nvpair_value_nvlist(elem, &src) == 0); 1172 1173 verify(nvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME, 1174 &name) == 0); 1175 if (poolname != NULL && strcmp(poolname, name) != 0) 1176 continue; 1177 1178 verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID, 1179 &this_guid) == 0); 1180 if (guid != 0) { 1181 verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID, 1182 &this_guid) == 0); 1183 if (guid != this_guid) 1184 continue; 1185 } 1186 1187 if (pool_active(hdl, name, this_guid, &active) != 0) { 1188 nvlist_free(raw); 1189 nvlist_free(pools); 1190 return (NULL); 1191 } 1192 1193 if (active) 1194 continue; 1195 1196 if ((dst = refresh_config(hdl, src)) == NULL) { 1197 nvlist_free(raw); 1198 nvlist_free(pools); 1199 return (NULL); 1200 } 1201 1202 if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) { 1203 (void) no_memory(hdl); 1204 nvlist_free(dst); 1205 nvlist_free(raw); 1206 nvlist_free(pools); 1207 return (NULL); 1208 } 1209 nvlist_free(dst); 1210 } 1211 1212 nvlist_free(raw); 1213 return (pools); 1214 } 1215 1216 1217 boolean_t 1218 find_guid(nvlist_t *nv, uint64_t guid) 1219 { 1220 uint64_t tmp; 1221 nvlist_t **child; 1222 uint_t c, children; 1223 1224 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0); 1225 if (tmp == guid) 1226 return (B_TRUE); 1227 1228 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 1229 &child, &children) == 0) { 1230 for (c = 0; c < children; c++) 1231 if (find_guid(child[c], guid)) 1232 return (B_TRUE); 1233 } 1234 1235 return (B_FALSE); 1236 } 1237 1238 typedef struct aux_cbdata { 1239 const char *cb_type; 1240 uint64_t cb_guid; 1241 zpool_handle_t *cb_zhp; 1242 } aux_cbdata_t; 1243 1244 static int 1245 find_aux(zpool_handle_t *zhp, void *data) 1246 { 1247 aux_cbdata_t *cbp = data; 1248 nvlist_t **list; 1249 uint_t i, count; 1250 uint64_t guid; 1251 nvlist_t *nvroot; 1252 1253 verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE, 1254 &nvroot) == 0); 1255 1256 if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type, 1257 &list, &count) == 0) { 1258 for (i = 0; i < count; i++) { 1259 verify(nvlist_lookup_uint64(list[i], 1260 ZPOOL_CONFIG_GUID, &guid) == 0); 1261 if (guid == cbp->cb_guid) { 1262 cbp->cb_zhp = zhp; 1263 return (1); 1264 } 1265 } 1266 } 1267 1268 zpool_close(zhp); 1269 return (0); 1270 } 1271 1272 /* 1273 * Determines if the pool is in use. If so, it returns true and the state of 1274 * the pool as well as the name of the pool. Both strings are allocated and 1275 * must be freed by the caller. 1276 */ 1277 int 1278 zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr, 1279 boolean_t *inuse) 1280 { 1281 nvlist_t *config; 1282 char *name; 1283 boolean_t ret; 1284 uint64_t guid, vdev_guid; 1285 zpool_handle_t *zhp; 1286 nvlist_t *pool_config; 1287 uint64_t stateval, isspare; 1288 aux_cbdata_t cb = { 0 }; 1289 boolean_t isactive; 1290 1291 *inuse = B_FALSE; 1292 1293 if (zpool_read_label(fd, &config) != 0) { 1294 (void) no_memory(hdl); 1295 return (-1); 1296 } 1297 1298 if (config == NULL) 1299 return (0); 1300 1301 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 1302 &stateval) == 0); 1303 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, 1304 &vdev_guid) == 0); 1305 1306 if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) { 1307 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 1308 &name) == 0); 1309 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 1310 &guid) == 0); 1311 } 1312 1313 switch (stateval) { 1314 case POOL_STATE_EXPORTED: 1315 ret = B_TRUE; 1316 break; 1317 1318 case POOL_STATE_ACTIVE: 1319 /* 1320 * For an active pool, we have to determine if it's really part 1321 * of a currently active pool (in which case the pool will exist 1322 * and the guid will be the same), or whether it's part of an 1323 * active pool that was disconnected without being explicitly 1324 * exported. 1325 */ 1326 if (pool_active(hdl, name, guid, &isactive) != 0) { 1327 nvlist_free(config); 1328 return (-1); 1329 } 1330 1331 if (isactive) { 1332 /* 1333 * Because the device may have been removed while 1334 * offlined, we only report it as active if the vdev is 1335 * still present in the config. Otherwise, pretend like 1336 * it's not in use. 1337 */ 1338 if ((zhp = zpool_open_canfail(hdl, name)) != NULL && 1339 (pool_config = zpool_get_config(zhp, NULL)) 1340 != NULL) { 1341 nvlist_t *nvroot; 1342 1343 verify(nvlist_lookup_nvlist(pool_config, 1344 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 1345 ret = find_guid(nvroot, vdev_guid); 1346 } else { 1347 ret = B_FALSE; 1348 } 1349 1350 /* 1351 * If this is an active spare within another pool, we 1352 * treat it like an unused hot spare. This allows the 1353 * user to create a pool with a hot spare that currently 1354 * in use within another pool. Since we return B_TRUE, 1355 * libdiskmgt will continue to prevent generic consumers 1356 * from using the device. 1357 */ 1358 if (ret && nvlist_lookup_uint64(config, 1359 ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare) 1360 stateval = POOL_STATE_SPARE; 1361 1362 if (zhp != NULL) 1363 zpool_close(zhp); 1364 } else { 1365 stateval = POOL_STATE_POTENTIALLY_ACTIVE; 1366 ret = B_TRUE; 1367 } 1368 break; 1369 1370 case POOL_STATE_SPARE: 1371 /* 1372 * For a hot spare, it can be either definitively in use, or 1373 * potentially active. To determine if it's in use, we iterate 1374 * over all pools in the system and search for one with a spare 1375 * with a matching guid. 1376 * 1377 * Due to the shared nature of spares, we don't actually report 1378 * the potentially active case as in use. This means the user 1379 * can freely create pools on the hot spares of exported pools, 1380 * but to do otherwise makes the resulting code complicated, and 1381 * we end up having to deal with this case anyway. 1382 */ 1383 cb.cb_zhp = NULL; 1384 cb.cb_guid = vdev_guid; 1385 cb.cb_type = ZPOOL_CONFIG_SPARES; 1386 if (zpool_iter(hdl, find_aux, &cb) == 1) { 1387 name = (char *)zpool_get_name(cb.cb_zhp); 1388 ret = TRUE; 1389 } else { 1390 ret = FALSE; 1391 } 1392 break; 1393 1394 case POOL_STATE_L2CACHE: 1395 1396 /* 1397 * Check if any pool is currently using this l2cache device. 1398 */ 1399 cb.cb_zhp = NULL; 1400 cb.cb_guid = vdev_guid; 1401 cb.cb_type = ZPOOL_CONFIG_L2CACHE; 1402 if (zpool_iter(hdl, find_aux, &cb) == 1) { 1403 name = (char *)zpool_get_name(cb.cb_zhp); 1404 ret = TRUE; 1405 } else { 1406 ret = FALSE; 1407 } 1408 break; 1409 1410 default: 1411 ret = B_FALSE; 1412 } 1413 1414 1415 if (ret) { 1416 if ((*namestr = zfs_strdup(hdl, name)) == NULL) { 1417 if (cb.cb_zhp) 1418 zpool_close(cb.cb_zhp); 1419 nvlist_free(config); 1420 return (-1); 1421 } 1422 *state = (pool_state_t)stateval; 1423 } 1424 1425 if (cb.cb_zhp) 1426 zpool_close(cb.cb_zhp); 1427 1428 nvlist_free(config); 1429 *inuse = ret; 1430 return (0); 1431 } 1432