1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Pool import support functions. 31 * 32 * To import a pool, we rely on reading the configuration information from the 33 * ZFS label of each device. If we successfully read the label, then we 34 * organize the configuration information in the following hierarchy: 35 * 36 * pool guid -> toplevel vdev guid -> label txg 37 * 38 * Duplicate entries matching this same tuple will be discarded. Once we have 39 * examined every device, we pick the best label txg config for each toplevel 40 * vdev. We then arrange these toplevel vdevs into a complete pool config, and 41 * update any paths that have changed. Finally, we attempt to import the pool 42 * using our derived config, and record the results. 43 */ 44 45 #include <devid.h> 46 #include <dirent.h> 47 #include <errno.h> 48 #include <libintl.h> 49 #include <stdlib.h> 50 #include <string.h> 51 #include <sys/stat.h> 52 #include <unistd.h> 53 #include <fcntl.h> 54 55 #include <sys/vdev_impl.h> 56 57 #include "libzfs.h" 58 #include "libzfs_impl.h" 59 60 /* 61 * Intermediate structures used to gather configuration information. 62 */ 63 typedef struct config_entry { 64 uint64_t ce_txg; 65 nvlist_t *ce_config; 66 struct config_entry *ce_next; 67 } config_entry_t; 68 69 typedef struct vdev_entry { 70 uint64_t ve_guid; 71 config_entry_t *ve_configs; 72 struct vdev_entry *ve_next; 73 } vdev_entry_t; 74 75 typedef struct pool_entry { 76 uint64_t pe_guid; 77 vdev_entry_t *pe_vdevs; 78 struct pool_entry *pe_next; 79 } pool_entry_t; 80 81 typedef struct name_entry { 82 const char *ne_name; 83 uint64_t ne_guid; 84 struct name_entry *ne_next; 85 } name_entry_t; 86 87 typedef struct pool_list { 88 pool_entry_t *pools; 89 name_entry_t *names; 90 } pool_list_t; 91 92 static char * 93 get_devid(const char *path) 94 { 95 int fd; 96 ddi_devid_t devid; 97 char *minor, *ret; 98 99 if ((fd = open(path, O_RDONLY)) < 0) 100 return (NULL); 101 102 minor = NULL; 103 ret = NULL; 104 if (devid_get(fd, &devid) == 0) { 105 if (devid_get_minor_name(fd, &minor) == 0) 106 ret = devid_str_encode(devid, minor); 107 if (minor != NULL) 108 devid_str_free(minor); 109 devid_free(devid); 110 } 111 112 return (ret); 113 } 114 115 116 /* 117 * Go through and fix up any path and/or devid information for the given vdev 118 * configuration. 119 */ 120 static void 121 fix_paths(nvlist_t *nv, name_entry_t *names) 122 { 123 nvlist_t **child; 124 uint_t c, children; 125 uint64_t guid; 126 name_entry_t *ne; 127 char *devid; 128 129 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 130 &child, &children) == 0) { 131 for (c = 0; c < children; c++) 132 fix_paths(child[c], names); 133 return; 134 } 135 136 /* 137 * This is a leaf (file or disk) vdev. In either case, go through 138 * the name list and see if we find a matching guid. If so, replace 139 * the path and see if we can calculate a new devid. 140 */ 141 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0); 142 143 for (ne = names; ne != NULL; ne = ne->ne_next) 144 if (ne->ne_guid == guid) 145 break; 146 147 if (ne == NULL) 148 return; 149 150 verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, ne->ne_name) == 0); 151 152 if ((devid = get_devid(ne->ne_name)) == NULL) { 153 (void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID); 154 } else { 155 verify(nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) == 0); 156 devid_str_free(devid); 157 } 158 } 159 160 /* 161 * Add the given configuration to the list of known devices. 162 */ 163 static void 164 add_config(pool_list_t *pl, const char *path, nvlist_t *config) 165 { 166 uint64_t pool_guid, vdev_guid, top_guid, txg; 167 pool_entry_t *pe; 168 vdev_entry_t *ve; 169 config_entry_t *ce; 170 name_entry_t *ne; 171 172 /* 173 * If we have a valid config but cannot read any of these fields, then 174 * it means we have a half-initialized label. In vdev_label_init() 175 * we write a label with txg == 0 so that we can identify the device 176 * in case the user refers to the same disk later on. If we fail to 177 * create the pool, we'll be left with a label in this state 178 * which should not be considered part of a valid pool. 179 */ 180 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 181 &pool_guid) != 0 || 182 nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, 183 &vdev_guid) != 0 || 184 nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, 185 &top_guid) != 0 || 186 nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 187 &txg) != 0 || txg == 0) { 188 nvlist_free(config); 189 return; 190 } 191 192 /* 193 * First, see if we know about this pool. If not, then add it to the 194 * list of known pools. 195 */ 196 for (pe = pl->pools; pe != NULL; pe = pe->pe_next) { 197 if (pe->pe_guid == pool_guid) 198 break; 199 } 200 201 if (pe == NULL) { 202 pe = zfs_malloc(sizeof (pool_entry_t)); 203 pe->pe_guid = pool_guid; 204 pe->pe_next = pl->pools; 205 pl->pools = pe; 206 } 207 208 /* 209 * Second, see if we know about this toplevel vdev. Add it if its 210 * missing. 211 */ 212 for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) { 213 if (ve->ve_guid == top_guid) 214 break; 215 } 216 217 if (ve == NULL) { 218 ve = zfs_malloc(sizeof (vdev_entry_t)); 219 ve->ve_guid = top_guid; 220 ve->ve_next = pe->pe_vdevs; 221 pe->pe_vdevs = ve; 222 } 223 224 /* 225 * Third, see if we have a config with a matching transaction group. If 226 * so, then we do nothing. Otherwise, add it to the list of known 227 * configs. 228 */ 229 for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) { 230 if (ce->ce_txg == txg) 231 break; 232 } 233 234 if (ce == NULL) { 235 ce = zfs_malloc(sizeof (config_entry_t)); 236 ce->ce_txg = txg; 237 ce->ce_config = config; 238 ce->ce_next = ve->ve_configs; 239 ve->ve_configs = ce; 240 } else { 241 nvlist_free(config); 242 } 243 244 /* 245 * At this point we've successfully added our config to the list of 246 * known configs. The last thing to do is add the vdev guid -> path 247 * mappings so that we can fix up the configuration as necessary before 248 * doing the import. 249 */ 250 ne = zfs_malloc(sizeof (name_entry_t)); 251 252 ne->ne_name = zfs_strdup(path); 253 ne->ne_guid = vdev_guid; 254 ne->ne_next = pl->names; 255 pl->names = ne; 256 } 257 258 /* 259 * Convert our list of pools into the definitive set of configurations. We 260 * start by picking the best config for each toplevel vdev. Once that's done, 261 * we assemble the toplevel vdevs into a full config for the pool. We make a 262 * pass to fix up any incorrect paths, and then add it to the main list to 263 * return to the user. 264 */ 265 static nvlist_t * 266 get_configs(pool_list_t *pl) 267 { 268 pool_entry_t *pe, *penext; 269 vdev_entry_t *ve, *venext; 270 config_entry_t *ce, *cenext; 271 nvlist_t *ret, *config, *tmp, *nvtop, *nvroot; 272 int config_seen; 273 uint64_t best_txg; 274 char *name; 275 zfs_cmd_t zc = { 0 }; 276 uint64_t guid; 277 char *packed; 278 size_t len; 279 int err; 280 281 verify(nvlist_alloc(&ret, 0, 0) == 0); 282 283 for (pe = pl->pools; pe != NULL; pe = penext) { 284 uint_t c; 285 uint_t children = 0; 286 uint64_t id; 287 nvlist_t **child = NULL; 288 289 penext = pe->pe_next; 290 291 verify(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0); 292 config_seen = FALSE; 293 294 /* 295 * Iterate over all toplevel vdevs. Grab the pool configuration 296 * from the first one we find, and then go through the rest and 297 * add them as necessary to the 'vdevs' member of the config. 298 */ 299 for (ve = pe->pe_vdevs; ve != NULL; ve = venext) { 300 venext = ve->ve_next; 301 302 /* 303 * Determine the best configuration for this vdev by 304 * selecting the config with the latest transaction 305 * group. 306 */ 307 best_txg = 0; 308 for (ce = ve->ve_configs; ce != NULL; 309 ce = ce->ce_next) { 310 311 if (ce->ce_txg > best_txg) 312 tmp = ce->ce_config; 313 } 314 315 if (!config_seen) { 316 /* 317 * Copy the relevant pieces of data to the pool 318 * configuration: 319 * 320 * pool guid 321 * name 322 * pool state 323 */ 324 uint64_t state; 325 326 verify(nvlist_lookup_uint64(tmp, 327 ZPOOL_CONFIG_POOL_GUID, &guid) == 0); 328 verify(nvlist_add_uint64(config, 329 ZPOOL_CONFIG_POOL_GUID, guid) == 0); 330 verify(nvlist_lookup_string(tmp, 331 ZPOOL_CONFIG_POOL_NAME, &name) == 0); 332 verify(nvlist_add_string(config, 333 ZPOOL_CONFIG_POOL_NAME, name) == 0); 334 verify(nvlist_lookup_uint64(tmp, 335 ZPOOL_CONFIG_POOL_STATE, &state) == 0); 336 verify(nvlist_add_uint64(config, 337 ZPOOL_CONFIG_POOL_STATE, state) == 0); 338 339 config_seen = TRUE; 340 } 341 342 /* 343 * Add this top-level vdev to the child array. 344 */ 345 verify(nvlist_lookup_nvlist(tmp, 346 ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0); 347 verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID, 348 &id) == 0); 349 if (id >= children) { 350 nvlist_t **newchild; 351 352 newchild = zfs_malloc((id + 1) * 353 sizeof (nvlist_t *)); 354 355 for (c = 0; c < children; c++) 356 newchild[c] = child[c]; 357 358 free(child); 359 child = newchild; 360 children = id + 1; 361 } 362 verify(nvlist_dup(nvtop, &child[id], 0) == 0); 363 364 /* 365 * Go through and free all config information. 366 */ 367 for (ce = ve->ve_configs; ce != NULL; ce = cenext) { 368 cenext = ce->ce_next; 369 370 nvlist_free(ce->ce_config); 371 free(ce); 372 } 373 374 /* 375 * Free this vdev entry, since it has now been merged 376 * into the main config. 377 */ 378 free(ve); 379 } 380 381 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 382 &guid) == 0); 383 384 /* 385 * Look for any missing top-level vdevs. If this is the case, 386 * create a faked up 'missing' vdev as a placeholder. We cannot 387 * simply compress the child array, because the kernel performs 388 * certain checks to make sure the vdev IDs match their location 389 * in the configuration. 390 */ 391 for (c = 0; c < children; c++) 392 if (child[c] == NULL) { 393 nvlist_t *missing; 394 verify(nvlist_alloc(&missing, NV_UNIQUE_NAME, 395 0) == 0); 396 verify(nvlist_add_string(missing, 397 ZPOOL_CONFIG_TYPE, VDEV_TYPE_MISSING) == 0); 398 verify(nvlist_add_uint64(missing, 399 ZPOOL_CONFIG_ID, c) == 0); 400 verify(nvlist_add_uint64(missing, 401 ZPOOL_CONFIG_GUID, 0ULL) == 0); 402 child[c] = missing; 403 } 404 405 /* 406 * Put all of this pool's top-level vdevs into a root vdev. 407 */ 408 verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0); 409 verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, 410 VDEV_TYPE_ROOT) == 0); 411 verify(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); 412 verify(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) == 0); 413 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, 414 child, children) == 0); 415 416 for (c = 0; c < children; c++) 417 nvlist_free(child[c]); 418 free(child); 419 420 /* 421 * Go through and fix up any paths and/or devids based on our 422 * known list of vdev GUID -> path mappings. 423 */ 424 fix_paths(nvroot, pl->names); 425 426 /* 427 * Add the root vdev to this pool's configuration. 428 */ 429 verify(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 430 nvroot) == 0); 431 nvlist_free(nvroot); 432 433 /* 434 * Free this pool entry. 435 */ 436 free(pe); 437 438 /* 439 * Determine if this pool is currently active, in which case we 440 * can't actually import it. 441 */ 442 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 443 &name) == 0); 444 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 445 &guid) == 0); 446 447 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); 448 if (ioctl(zfs_fd, ZFS_IOC_POOL_GUID, &zc) == 0 && 449 guid == zc.zc_pool_guid) { 450 nvlist_free(config); 451 continue; 452 } 453 454 /* 455 * Try to do the import in order to get vdev state. 456 */ 457 if ((err = nvlist_size(config, &len, NV_ENCODE_NATIVE)) != 0) 458 zfs_baderror(err); 459 460 packed = zfs_malloc(len); 461 462 if ((err = nvlist_pack(config, &packed, &len, 463 NV_ENCODE_NATIVE, 0)) != 0) 464 zfs_baderror(err); 465 466 nvlist_free(config); 467 config = NULL; 468 469 zc.zc_config_src_size = len; 470 zc.zc_config_src = (uint64_t)(uintptr_t)packed; 471 472 zc.zc_config_dst_size = 2 * len; 473 zc.zc_config_dst = (uint64_t)(uintptr_t) 474 zfs_malloc(zc.zc_config_dst_size); 475 476 while ((err = ioctl(zfs_fd, ZFS_IOC_POOL_TRYIMPORT, 477 &zc)) != 0 && errno == ENOMEM) { 478 free((void *)(uintptr_t)zc.zc_config_dst); 479 zc.zc_config_dst = (uint64_t)(uintptr_t) 480 zfs_malloc(zc.zc_config_dst_size); 481 } 482 483 free(packed); 484 485 if (err) 486 zfs_baderror(errno); 487 488 verify(nvlist_unpack((void *)(uintptr_t)zc.zc_config_dst, 489 zc.zc_config_dst_size, &config, 0) == 0); 490 491 set_pool_health(config); 492 493 /* 494 * Add this pool to the list of configs. 495 */ 496 verify(nvlist_add_nvlist(ret, name, config) == 0); 497 498 nvlist_free(config); 499 500 free((void *)(uintptr_t)zc.zc_config_dst); 501 } 502 503 return (ret); 504 } 505 506 /* 507 * Return the offset of the given label. 508 */ 509 static uint64_t 510 label_offset(size_t size, int l) 511 { 512 return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ? 513 0 : size - VDEV_LABELS * sizeof (vdev_label_t))); 514 } 515 516 /* 517 * Given a file descriptor, read the label information and return an nvlist 518 * describing the configuration, if there is one. 519 */ 520 nvlist_t * 521 zpool_read_label(int fd) 522 { 523 struct stat64 statbuf; 524 int l; 525 vdev_label_t *label; 526 nvlist_t *config; 527 uint64_t version, state, txg; 528 529 if (fstat64(fd, &statbuf) == -1) 530 return (NULL); 531 532 label = zfs_malloc(sizeof (vdev_label_t)); 533 534 for (l = 0; l < VDEV_LABELS; l++) { 535 if (pread(fd, label, sizeof (vdev_label_t), 536 label_offset(statbuf.st_size, l)) != sizeof (vdev_label_t)) 537 continue; 538 539 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, 540 sizeof (label->vl_vdev_phys.vp_nvlist), &config, 0) != 0) 541 continue; 542 543 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 544 &version) != 0 || version != UBERBLOCK_VERSION) { 545 nvlist_free(config); 546 continue; 547 } 548 549 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 550 &state) != 0 || state > POOL_STATE_EXPORTED) { 551 nvlist_free(config); 552 continue; 553 } 554 555 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, 556 &txg) != 0 || txg == 0) { 557 nvlist_free(config); 558 continue; 559 } 560 561 free(label); 562 return (config); 563 } 564 565 free(label); 566 return (NULL); 567 } 568 569 /* 570 * Given a list of directories to search, find all pools stored on disk. This 571 * includes partial pools which are not available to import. If no args are 572 * given (argc is 0), then the default directory (/dev/dsk) is searched. 573 */ 574 nvlist_t * 575 zpool_find_import(int argc, char **argv) 576 { 577 int i; 578 DIR *dirp; 579 struct dirent64 *dp; 580 char path[MAXPATHLEN]; 581 struct stat64 statbuf; 582 nvlist_t *ret, *config; 583 static char *default_dir = "/dev/dsk"; 584 int fd; 585 pool_list_t pools = { 0 }; 586 587 if (argc == 0) { 588 argc = 1; 589 argv = &default_dir; 590 } 591 592 /* 593 * Go through and read the label configuration information from every 594 * possible device, organizing the information according to pool GUID 595 * and toplevel GUID. 596 */ 597 for (i = 0; i < argc; i++) { 598 if (argv[i][0] != '/') { 599 zfs_error(dgettext(TEXT_DOMAIN, 600 "cannot open '%s': must be an absolute path"), 601 argv[i]); 602 return (NULL); 603 } 604 605 if ((dirp = opendir(argv[i])) == NULL) { 606 zfs_error(dgettext(TEXT_DOMAIN, 607 "cannot open '%s': %s"), argv[i], 608 strerror(errno)); 609 return (NULL); 610 } 611 612 /* 613 * This is not MT-safe, but we have no MT consumers of libzfs 614 */ 615 while ((dp = readdir64(dirp)) != NULL) { 616 617 (void) snprintf(path, sizeof (path), "%s/%s", 618 argv[i], dp->d_name); 619 620 if (stat64(path, &statbuf) != 0) 621 continue; 622 623 /* 624 * Ignore directories (which includes "." and ".."). 625 */ 626 if (S_ISDIR(statbuf.st_mode)) 627 continue; 628 629 if ((fd = open64(path, O_RDONLY)) < 0) 630 continue; 631 632 config = zpool_read_label(fd); 633 634 (void) close(fd); 635 636 if (config != NULL) 637 add_config(&pools, path, config); 638 } 639 } 640 641 ret = get_configs(&pools); 642 643 return (ret); 644 } 645 646 int 647 find_guid(nvlist_t *nv, uint64_t guid) 648 { 649 uint64_t tmp; 650 nvlist_t **child; 651 uint_t c, children; 652 653 verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0); 654 if (tmp == guid) 655 return (TRUE); 656 657 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 658 &child, &children) == 0) { 659 for (c = 0; c < children; c++) 660 if (find_guid(child[c], guid)) 661 return (TRUE); 662 } 663 664 return (FALSE); 665 } 666 667 /* 668 * Determines if the pool is in use. If so, it returns TRUE and the state of 669 * the pool as well as the name of the pool. Both strings are allocated and 670 * must be freed by the caller. 671 */ 672 int 673 zpool_in_use(int fd, pool_state_t *state, char **namestr) 674 { 675 nvlist_t *config; 676 char *name; 677 int ret; 678 zfs_cmd_t zc = { 0 }; 679 uint64_t guid, vdev_guid; 680 zpool_handle_t *zhp; 681 nvlist_t *pool_config; 682 uint64_t stateval; 683 684 if ((config = zpool_read_label(fd)) == NULL) 685 return (FALSE); 686 687 verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME, 688 &name) == 0); 689 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 690 &stateval) == 0); 691 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, 692 &guid) == 0); 693 verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, 694 &vdev_guid) == 0); 695 696 switch (stateval) { 697 case POOL_STATE_EXPORTED: 698 ret = TRUE; 699 break; 700 701 case POOL_STATE_ACTIVE: 702 /* 703 * For an active pool, we have to determine if it's really part 704 * of an active pool (in which case the pool will exist and the 705 * guid will be the same), or whether it's part of an active 706 * pool that was disconnected without being explicitly exported. 707 * 708 * We use the direct ioctl() first to avoid triggering an error 709 * message if the pool cannot be opened. 710 */ 711 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name)); 712 if (ioctl(zfs_fd, ZFS_IOC_POOL_GUID, &zc) == 0 && 713 guid == zc.zc_pool_guid) { 714 /* 715 * Because the device may have been removed while 716 * offlined, we only report it as active if the vdev is 717 * still present in the config. Otherwise, pretend like 718 * it's not in use. 719 */ 720 if ((zhp = zpool_open_canfail(name)) != NULL && 721 (pool_config = zpool_get_config(zhp, NULL)) 722 != NULL) { 723 nvlist_t *nvroot; 724 725 verify(nvlist_lookup_nvlist(pool_config, 726 ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0); 727 ret = find_guid(nvroot, vdev_guid); 728 } else { 729 ret = FALSE; 730 } 731 } else { 732 stateval = POOL_STATE_POTENTIALLY_ACTIVE; 733 ret = TRUE; 734 } 735 break; 736 737 default: 738 ret = FALSE; 739 } 740 741 742 if (ret) { 743 *namestr = zfs_strdup(name); 744 *state = (pool_state_t)stateval; 745 } 746 747 nvlist_free(config); 748 return (ret); 749 } 750