1 /*- 2 * Copyright (c) 2007 Doug Rabson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 /* 33 * Stand-alone file reading package. 34 */ 35 36 #include <stand.h> 37 #include <sys/disk.h> 38 #include <sys/param.h> 39 #include <sys/time.h> 40 #include <sys/queue.h> 41 #include <disk.h> 42 #include <part.h> 43 #include <stddef.h> 44 #include <stdarg.h> 45 #include <string.h> 46 #include <bootstrap.h> 47 48 #include "libzfs.h" 49 50 #include "zfsimpl.c" 51 52 /* Define the range of indexes to be populated with ZFS Boot Environments */ 53 #define ZFS_BE_FIRST 4 54 #define ZFS_BE_LAST 8 55 56 static int zfs_open(const char *path, struct open_file *f); 57 static int zfs_close(struct open_file *f); 58 static int zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid); 59 static off_t zfs_seek(struct open_file *f, off_t offset, int where); 60 static int zfs_stat(struct open_file *f, struct stat *sb); 61 static int zfs_readdir(struct open_file *f, struct dirent *d); 62 63 static void zfs_bootenv_initial(const char *envname, spa_t *spa, 64 const char *name, const char *dsname, int checkpoint); 65 static void zfs_checkpoints_initial(spa_t *spa, const char *name, 66 const char *dsname); 67 68 struct devsw zfs_dev; 69 70 struct fs_ops zfs_fsops = { 71 "zfs", 72 zfs_open, 73 zfs_close, 74 zfs_read, 75 null_write, 76 zfs_seek, 77 zfs_stat, 78 zfs_readdir 79 }; 80 81 /* 82 * In-core open file. 83 */ 84 struct file { 85 off_t f_seekp; /* seek pointer */ 86 dnode_phys_t f_dnode; 87 uint64_t f_zap_type; /* zap type for readdir */ 88 uint64_t f_num_leafs; /* number of fzap leaf blocks */ 89 zap_leaf_phys_t *f_zap_leaf; /* zap leaf buffer */ 90 }; 91 92 static int zfs_env_index; 93 static int zfs_env_count; 94 95 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head); 96 struct zfs_be_list *zfs_be_headp; 97 struct zfs_be_entry { 98 char *name; 99 SLIST_ENTRY(zfs_be_entry) entries; 100 } *zfs_be, *zfs_be_tmp; 101 102 /* 103 * Open a file. 104 */ 105 static int 106 zfs_open(const char *upath, struct open_file *f) 107 { 108 struct zfsmount *mount = (struct zfsmount *)f->f_devdata; 109 struct file *fp; 110 int rc; 111 112 if (f->f_dev != &zfs_dev) 113 return (EINVAL); 114 115 /* allocate file system specific data structure */ 116 fp = calloc(1, sizeof(struct file)); 117 if (fp == NULL) 118 return (ENOMEM); 119 f->f_fsdata = fp; 120 121 rc = zfs_lookup(mount, upath, &fp->f_dnode); 122 fp->f_seekp = 0; 123 if (rc) { 124 f->f_fsdata = NULL; 125 free(fp); 126 } 127 return (rc); 128 } 129 130 static int 131 zfs_close(struct open_file *f) 132 { 133 struct file *fp = (struct file *)f->f_fsdata; 134 135 dnode_cache_obj = NULL; 136 f->f_fsdata = NULL; 137 138 free(fp); 139 return (0); 140 } 141 142 /* 143 * Copy a portion of a file into kernel memory. 144 * Cross block boundaries when necessary. 145 */ 146 static int 147 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */) 148 { 149 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; 150 struct file *fp = (struct file *)f->f_fsdata; 151 struct stat sb; 152 size_t n; 153 int rc; 154 155 rc = zfs_stat(f, &sb); 156 if (rc) 157 return (rc); 158 n = size; 159 if (fp->f_seekp + n > sb.st_size) 160 n = sb.st_size - fp->f_seekp; 161 162 rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n); 163 if (rc) 164 return (rc); 165 166 if (0) { 167 int i; 168 for (i = 0; i < n; i++) 169 putchar(((char*) start)[i]); 170 } 171 fp->f_seekp += n; 172 if (resid) 173 *resid = size - n; 174 175 return (0); 176 } 177 178 static off_t 179 zfs_seek(struct open_file *f, off_t offset, int where) 180 { 181 struct file *fp = (struct file *)f->f_fsdata; 182 183 switch (where) { 184 case SEEK_SET: 185 fp->f_seekp = offset; 186 break; 187 case SEEK_CUR: 188 fp->f_seekp += offset; 189 break; 190 case SEEK_END: 191 { 192 struct stat sb; 193 int error; 194 195 error = zfs_stat(f, &sb); 196 if (error != 0) { 197 errno = error; 198 return (-1); 199 } 200 fp->f_seekp = sb.st_size - offset; 201 break; 202 } 203 default: 204 errno = EINVAL; 205 return (-1); 206 } 207 return (fp->f_seekp); 208 } 209 210 static int 211 zfs_stat(struct open_file *f, struct stat *sb) 212 { 213 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; 214 struct file *fp = (struct file *)f->f_fsdata; 215 216 return (zfs_dnode_stat(spa, &fp->f_dnode, sb)); 217 } 218 219 static int 220 zfs_readdir(struct open_file *f, struct dirent *d) 221 { 222 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; 223 struct file *fp = (struct file *)f->f_fsdata; 224 mzap_ent_phys_t mze; 225 struct stat sb; 226 size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT; 227 int rc; 228 229 rc = zfs_stat(f, &sb); 230 if (rc) 231 return (rc); 232 if (!S_ISDIR(sb.st_mode)) 233 return (ENOTDIR); 234 235 /* 236 * If this is the first read, get the zap type. 237 */ 238 if (fp->f_seekp == 0) { 239 rc = dnode_read(spa, &fp->f_dnode, 240 0, &fp->f_zap_type, sizeof(fp->f_zap_type)); 241 if (rc) 242 return (rc); 243 244 if (fp->f_zap_type == ZBT_MICRO) { 245 fp->f_seekp = offsetof(mzap_phys_t, mz_chunk); 246 } else { 247 rc = dnode_read(spa, &fp->f_dnode, 248 offsetof(zap_phys_t, zap_num_leafs), 249 &fp->f_num_leafs, 250 sizeof(fp->f_num_leafs)); 251 if (rc) 252 return (rc); 253 254 fp->f_seekp = bsize; 255 fp->f_zap_leaf = malloc(bsize); 256 if (fp->f_zap_leaf == NULL) 257 return (ENOMEM); 258 rc = dnode_read(spa, &fp->f_dnode, 259 fp->f_seekp, 260 fp->f_zap_leaf, 261 bsize); 262 if (rc) 263 return (rc); 264 } 265 } 266 267 if (fp->f_zap_type == ZBT_MICRO) { 268 mzap_next: 269 if (fp->f_seekp >= bsize) 270 return (ENOENT); 271 272 rc = dnode_read(spa, &fp->f_dnode, 273 fp->f_seekp, &mze, sizeof(mze)); 274 if (rc) 275 return (rc); 276 fp->f_seekp += sizeof(mze); 277 278 if (!mze.mze_name[0]) 279 goto mzap_next; 280 281 d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value); 282 d->d_type = ZFS_DIRENT_TYPE(mze.mze_value); 283 strcpy(d->d_name, mze.mze_name); 284 d->d_namlen = strlen(d->d_name); 285 return (0); 286 } else { 287 zap_leaf_t zl; 288 zap_leaf_chunk_t *zc, *nc; 289 int chunk; 290 size_t namelen; 291 char *p; 292 uint64_t value; 293 294 /* 295 * Initialise this so we can use the ZAP size 296 * calculating macros. 297 */ 298 zl.l_bs = ilog2(bsize); 299 zl.l_phys = fp->f_zap_leaf; 300 301 /* 302 * Figure out which chunk we are currently looking at 303 * and consider seeking to the next leaf. We use the 304 * low bits of f_seekp as a simple chunk index. 305 */ 306 fzap_next: 307 chunk = fp->f_seekp & (bsize - 1); 308 if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) { 309 fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize; 310 chunk = 0; 311 312 /* 313 * Check for EOF and read the new leaf. 314 */ 315 if (fp->f_seekp >= bsize * fp->f_num_leafs) 316 return (ENOENT); 317 318 rc = dnode_read(spa, &fp->f_dnode, 319 fp->f_seekp, 320 fp->f_zap_leaf, 321 bsize); 322 if (rc) 323 return (rc); 324 } 325 326 zc = &ZAP_LEAF_CHUNK(&zl, chunk); 327 fp->f_seekp++; 328 if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY) 329 goto fzap_next; 330 331 namelen = zc->l_entry.le_name_numints; 332 if (namelen > sizeof(d->d_name)) 333 namelen = sizeof(d->d_name); 334 335 /* 336 * Paste the name back together. 337 */ 338 nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk); 339 p = d->d_name; 340 while (namelen > 0) { 341 int len; 342 len = namelen; 343 if (len > ZAP_LEAF_ARRAY_BYTES) 344 len = ZAP_LEAF_ARRAY_BYTES; 345 memcpy(p, nc->l_array.la_array, len); 346 p += len; 347 namelen -= len; 348 nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next); 349 } 350 d->d_name[sizeof(d->d_name) - 1] = 0; 351 352 /* 353 * Assume the first eight bytes of the value are 354 * a uint64_t. 355 */ 356 value = fzap_leaf_value(&zl, zc); 357 358 d->d_fileno = ZFS_DIRENT_OBJ(value); 359 d->d_type = ZFS_DIRENT_TYPE(value); 360 d->d_namlen = strlen(d->d_name); 361 362 return (0); 363 } 364 } 365 366 static int 367 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes) 368 { 369 int fd, ret; 370 size_t res, head, tail, total_size, full_sec_size; 371 unsigned secsz, do_tail_read; 372 off_t start_sec; 373 char *outbuf, *bouncebuf; 374 375 fd = (uintptr_t) priv; 376 outbuf = (char *) buf; 377 bouncebuf = NULL; 378 379 ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); 380 if (ret != 0) 381 return (ret); 382 383 /* 384 * Handling reads of arbitrary offset and size - multi-sector case 385 * and single-sector case. 386 * 387 * Multi-sector Case 388 * (do_tail_read = true if tail > 0) 389 * 390 * |<----------------------total_size--------------------->| 391 * | | 392 * |<--head-->|<--------------bytes------------>|<--tail-->| 393 * | | | | 394 * | | |<~full_sec_size~>| | | 395 * +------------------+ +------------------+ 396 * | |0101010| . . . |0101011| | 397 * +------------------+ +------------------+ 398 * start_sec start_sec + n 399 * 400 * 401 * Single-sector Case 402 * (do_tail_read = false) 403 * 404 * |<------total_size = secsz----->| 405 * | | 406 * |<-head->|<---bytes--->|<-tail->| 407 * +-------------------------------+ 408 * | |0101010101010| | 409 * +-------------------------------+ 410 * start_sec 411 */ 412 start_sec = offset / secsz; 413 head = offset % secsz; 414 total_size = roundup2(head + bytes, secsz); 415 tail = total_size - (head + bytes); 416 do_tail_read = ((tail > 0) && (head + bytes > secsz)); 417 full_sec_size = total_size; 418 if (head > 0) 419 full_sec_size -= secsz; 420 if (do_tail_read) 421 full_sec_size -= secsz; 422 423 /* Return of partial sector data requires a bounce buffer. */ 424 if ((head > 0) || do_tail_read || bytes < secsz) { 425 bouncebuf = malloc(secsz); 426 if (bouncebuf == NULL) { 427 printf("vdev_read: out of memory\n"); 428 return (ENOMEM); 429 } 430 } 431 432 if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) { 433 ret = errno; 434 goto error; 435 } 436 437 /* Partial data return from first sector */ 438 if (head > 0) { 439 res = read(fd, bouncebuf, secsz); 440 if (res != secsz) { 441 ret = EIO; 442 goto error; 443 } 444 memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes)); 445 outbuf += min(secsz - head, bytes); 446 } 447 448 /* 449 * Full data return from read sectors. 450 * Note, there is still corner case where we read 451 * from sector boundary, but less than sector size, e.g. reading 512B 452 * from 4k sector. 453 */ 454 if (full_sec_size > 0) { 455 if (bytes < full_sec_size) { 456 res = read(fd, bouncebuf, secsz); 457 if (res != secsz) { 458 ret = EIO; 459 goto error; 460 } 461 memcpy(outbuf, bouncebuf, bytes); 462 } else { 463 res = read(fd, outbuf, full_sec_size); 464 if (res != full_sec_size) { 465 ret = EIO; 466 goto error; 467 } 468 outbuf += full_sec_size; 469 } 470 } 471 472 /* Partial data return from last sector */ 473 if (do_tail_read) { 474 res = read(fd, bouncebuf, secsz); 475 if (res != secsz) { 476 ret = EIO; 477 goto error; 478 } 479 memcpy(outbuf, bouncebuf, secsz - tail); 480 } 481 482 ret = 0; 483 error: 484 free(bouncebuf); 485 return (ret); 486 } 487 488 static int 489 vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, 490 size_t bytes) 491 { 492 int fd, ret; 493 size_t head, tail, total_size, full_sec_size; 494 unsigned secsz, do_tail_write; 495 off_t start_sec; 496 ssize_t res; 497 char *outbuf, *bouncebuf; 498 499 fd = (uintptr_t)priv; 500 outbuf = (char *) buf; 501 bouncebuf = NULL; 502 503 ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); 504 if (ret != 0) 505 return (ret); 506 507 start_sec = offset / secsz; 508 head = offset % secsz; 509 total_size = roundup2(head + bytes, secsz); 510 tail = total_size - (head + bytes); 511 do_tail_write = ((tail > 0) && (head + bytes > secsz)); 512 full_sec_size = total_size; 513 if (head > 0) 514 full_sec_size -= secsz; 515 if (do_tail_write) 516 full_sec_size -= secsz; 517 518 /* Partial sector write requires a bounce buffer. */ 519 if ((head > 0) || do_tail_write || bytes < secsz) { 520 bouncebuf = malloc(secsz); 521 if (bouncebuf == NULL) { 522 printf("vdev_write: out of memory\n"); 523 return (ENOMEM); 524 } 525 } 526 527 if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) { 528 ret = errno; 529 goto error; 530 } 531 532 /* Partial data for first sector */ 533 if (head > 0) { 534 res = read(fd, bouncebuf, secsz); 535 if (res != secsz) { 536 ret = EIO; 537 goto error; 538 } 539 memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes)); 540 (void) lseek(fd, -secsz, SEEK_CUR); 541 res = write(fd, bouncebuf, secsz); 542 if (res != secsz) { 543 ret = EIO; 544 goto error; 545 } 546 outbuf += min(secsz - head, bytes); 547 } 548 549 /* 550 * Full data write to sectors. 551 * Note, there is still corner case where we write 552 * to sector boundary, but less than sector size, e.g. write 512B 553 * to 4k sector. 554 */ 555 if (full_sec_size > 0) { 556 if (bytes < full_sec_size) { 557 res = read(fd, bouncebuf, secsz); 558 if (res != secsz) { 559 ret = EIO; 560 goto error; 561 } 562 memcpy(bouncebuf, outbuf, bytes); 563 (void) lseek(fd, -secsz, SEEK_CUR); 564 res = write(fd, bouncebuf, secsz); 565 if (res != secsz) { 566 ret = EIO; 567 goto error; 568 } 569 } else { 570 res = write(fd, outbuf, full_sec_size); 571 if (res != full_sec_size) { 572 ret = EIO; 573 goto error; 574 } 575 outbuf += full_sec_size; 576 } 577 } 578 579 /* Partial data write to last sector */ 580 if (do_tail_write) { 581 res = read(fd, bouncebuf, secsz); 582 if (res != secsz) { 583 ret = EIO; 584 goto error; 585 } 586 memcpy(bouncebuf, outbuf, secsz - tail); 587 (void) lseek(fd, -secsz, SEEK_CUR); 588 res = write(fd, bouncebuf, secsz); 589 if (res != secsz) { 590 ret = EIO; 591 goto error; 592 } 593 } 594 595 ret = 0; 596 error: 597 free(bouncebuf); 598 return (ret); 599 } 600 601 static void 602 vdev_clear_pad2(vdev_t *vdev) 603 { 604 vdev_t *kid; 605 vdev_boot_envblock_t *be; 606 off_t off = offsetof(vdev_label_t, vl_be); 607 zio_checksum_info_t *ci; 608 zio_cksum_t cksum; 609 610 STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { 611 if (kid->v_state != VDEV_STATE_HEALTHY) 612 continue; 613 vdev_clear_pad2(kid); 614 } 615 616 if (!STAILQ_EMPTY(&vdev->v_children)) 617 return; 618 619 be = calloc(1, sizeof (*be)); 620 if (be == NULL) { 621 printf("failed to clear be area: out of memory\n"); 622 return; 623 } 624 625 ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; 626 be->vbe_zbt.zec_magic = ZEC_MAGIC; 627 zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); 628 ci->ci_func[0](be, sizeof (*be), NULL, &cksum); 629 be->vbe_zbt.zec_cksum = cksum; 630 631 if (vdev_write(vdev, vdev->v_read_priv, off, be, VDEV_PAD_SIZE)) { 632 printf("failed to clear be area of primary vdev: %d\n", 633 errno); 634 } 635 free(be); 636 } 637 638 /* 639 * Read the next boot command from pad2. 640 * If any instance of pad2 is set to empty string, or the returned string 641 * values are not the same, we consider next boot not to be set. 642 */ 643 static char * 644 vdev_read_pad2(vdev_t *vdev) 645 { 646 vdev_t *kid; 647 char *tmp, *result = NULL; 648 vdev_boot_envblock_t *be; 649 off_t off = offsetof(vdev_label_t, vl_be); 650 651 STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { 652 if (kid->v_state != VDEV_STATE_HEALTHY) 653 continue; 654 tmp = vdev_read_pad2(kid); 655 if (tmp == NULL) 656 continue; 657 658 /* The next boot is not set, we are done. */ 659 if (*tmp == '\0') { 660 free(result); 661 return (tmp); 662 } 663 if (result == NULL) { 664 result = tmp; 665 continue; 666 } 667 /* Are the next boot strings different? */ 668 if (strcmp(result, tmp) != 0) { 669 free(tmp); 670 *result = '\0'; 671 break; 672 } 673 free(tmp); 674 } 675 if (result != NULL) 676 return (result); 677 678 be = malloc(sizeof (*be)); 679 if (be == NULL) 680 return (NULL); 681 682 if (vdev_read(vdev, vdev->v_read_priv, off, be, sizeof (*be))) { 683 return (NULL); 684 } 685 686 switch (be->vbe_version) { 687 case VB_RAW: 688 case VB_NVLIST: 689 result = strdup(be->vbe_bootenv); 690 default: 691 /* Backward compatibility with initial nextboot feaure. */ 692 result = strdup((char *)be); 693 } 694 return (result); 695 } 696 697 static int 698 zfs_dev_init(void) 699 { 700 spa_t *spa; 701 spa_t *next; 702 spa_t *prev; 703 704 zfs_init(); 705 if (archsw.arch_zfs_probe == NULL) 706 return (ENXIO); 707 archsw.arch_zfs_probe(); 708 709 prev = NULL; 710 spa = STAILQ_FIRST(&zfs_pools); 711 while (spa != NULL) { 712 next = STAILQ_NEXT(spa, spa_link); 713 if (zfs_spa_init(spa)) { 714 if (prev == NULL) 715 STAILQ_REMOVE_HEAD(&zfs_pools, spa_link); 716 else 717 STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link); 718 } else 719 prev = spa; 720 spa = next; 721 } 722 return (0); 723 } 724 725 struct zfs_probe_args { 726 int fd; 727 const char *devname; 728 uint64_t *pool_guid; 729 u_int secsz; 730 }; 731 732 static int 733 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset) 734 { 735 struct zfs_probe_args *ppa; 736 737 ppa = (struct zfs_probe_args *)arg; 738 return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd, 739 offset * ppa->secsz, buf, blocks * ppa->secsz)); 740 } 741 742 static int 743 zfs_probe(int fd, uint64_t *pool_guid) 744 { 745 spa_t *spa; 746 int ret; 747 748 spa = NULL; 749 ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa); 750 if (ret == 0 && pool_guid != NULL) 751 *pool_guid = spa->spa_guid; 752 return (ret); 753 } 754 755 static int 756 zfs_probe_partition(void *arg, const char *partname, 757 const struct ptable_entry *part) 758 { 759 struct zfs_probe_args *ppa, pa; 760 struct ptable *table; 761 char devname[32]; 762 int ret; 763 764 /* Probe only freebsd-zfs and freebsd partitions */ 765 if (part->type != PART_FREEBSD && 766 part->type != PART_FREEBSD_ZFS) 767 return (0); 768 769 ppa = (struct zfs_probe_args *)arg; 770 strncpy(devname, ppa->devname, strlen(ppa->devname) - 1); 771 devname[strlen(ppa->devname) - 1] = '\0'; 772 sprintf(devname, "%s%s:", devname, partname); 773 pa.fd = open(devname, O_RDWR); 774 if (pa.fd == -1) 775 return (0); 776 ret = zfs_probe(pa.fd, ppa->pool_guid); 777 if (ret == 0) 778 return (0); 779 /* Do we have BSD label here? */ 780 if (part->type == PART_FREEBSD) { 781 pa.devname = devname; 782 pa.pool_guid = ppa->pool_guid; 783 pa.secsz = ppa->secsz; 784 table = ptable_open(&pa, part->end - part->start + 1, 785 ppa->secsz, zfs_diskread); 786 if (table != NULL) { 787 ptable_iterate(table, &pa, zfs_probe_partition); 788 ptable_close(table); 789 } 790 } 791 close(pa.fd); 792 return (0); 793 } 794 795 int 796 zfs_nextboot(void *vdev, char *buf, size_t size) 797 { 798 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; 799 spa_t *spa; 800 vdev_t *vd; 801 char *result = NULL; 802 803 if (dev->dd.d_dev->dv_type != DEVT_ZFS) 804 return (1); 805 806 if (dev->pool_guid == 0) 807 spa = STAILQ_FIRST(&zfs_pools); 808 else 809 spa = spa_find_by_guid(dev->pool_guid); 810 811 if (spa == NULL) { 812 printf("ZFS: can't find pool by guid\n"); 813 return (1); 814 } 815 816 STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { 817 char *tmp = vdev_read_pad2(vd); 818 819 /* Continue on error. */ 820 if (tmp == NULL) 821 continue; 822 /* Nextboot is not set. */ 823 if (*tmp == '\0') { 824 free(result); 825 free(tmp); 826 return (1); 827 } 828 if (result == NULL) { 829 result = tmp; 830 continue; 831 } 832 free(tmp); 833 } 834 if (result == NULL) 835 return (1); 836 837 STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { 838 vdev_clear_pad2(vd); 839 } 840 841 strlcpy(buf, result, size); 842 free(result); 843 return (0); 844 } 845 846 int 847 zfs_probe_dev(const char *devname, uint64_t *pool_guid) 848 { 849 struct disk_devdesc *dev; 850 struct ptable *table; 851 struct zfs_probe_args pa; 852 uint64_t mediasz; 853 int ret; 854 855 if (pool_guid) 856 *pool_guid = 0; 857 pa.fd = open(devname, O_RDWR); 858 if (pa.fd == -1) 859 return (ENXIO); 860 /* 861 * We will not probe the whole disk, we can not boot from such 862 * disks and some systems will misreport the disk sizes and will 863 * hang while accessing the disk. 864 */ 865 if (archsw.arch_getdev((void **)&dev, devname, NULL) == 0) { 866 int partition = dev->d_partition; 867 int slice = dev->d_slice; 868 869 free(dev); 870 if (partition != D_PARTNONE && slice != D_SLICENONE) { 871 ret = zfs_probe(pa.fd, pool_guid); 872 if (ret == 0) 873 return (0); 874 } 875 } 876 877 /* Probe each partition */ 878 ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz); 879 if (ret == 0) 880 ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz); 881 if (ret == 0) { 882 pa.devname = devname; 883 pa.pool_guid = pool_guid; 884 table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz, 885 zfs_diskread); 886 if (table != NULL) { 887 ptable_iterate(table, &pa, zfs_probe_partition); 888 ptable_close(table); 889 } 890 } 891 close(pa.fd); 892 if (pool_guid && *pool_guid == 0) 893 ret = ENXIO; 894 return (ret); 895 } 896 897 /* 898 * Print information about ZFS pools 899 */ 900 static int 901 zfs_dev_print(int verbose) 902 { 903 spa_t *spa; 904 char line[80]; 905 int ret = 0; 906 907 if (STAILQ_EMPTY(&zfs_pools)) 908 return (0); 909 910 printf("%s devices:", zfs_dev.dv_name); 911 if ((ret = pager_output("\n")) != 0) 912 return (ret); 913 914 if (verbose) { 915 return (spa_all_status()); 916 } 917 STAILQ_FOREACH(spa, &zfs_pools, spa_link) { 918 snprintf(line, sizeof(line), " zfs:%s\n", spa->spa_name); 919 ret = pager_output(line); 920 if (ret != 0) 921 break; 922 } 923 return (ret); 924 } 925 926 /* 927 * Attempt to open the pool described by (dev) for use by (f). 928 */ 929 static int 930 zfs_dev_open(struct open_file *f, ...) 931 { 932 va_list args; 933 struct zfs_devdesc *dev; 934 struct zfsmount *mount; 935 spa_t *spa; 936 int rv; 937 938 va_start(args, f); 939 dev = va_arg(args, struct zfs_devdesc *); 940 va_end(args); 941 942 if (dev->pool_guid == 0) 943 spa = STAILQ_FIRST(&zfs_pools); 944 else 945 spa = spa_find_by_guid(dev->pool_guid); 946 if (!spa) 947 return (ENXIO); 948 mount = malloc(sizeof(*mount)); 949 if (mount == NULL) 950 rv = ENOMEM; 951 else 952 rv = zfs_mount(spa, dev->root_guid, mount); 953 if (rv != 0) { 954 free(mount); 955 return (rv); 956 } 957 if (mount->objset.os_type != DMU_OST_ZFS) { 958 printf("Unexpected object set type %ju\n", 959 (uintmax_t)mount->objset.os_type); 960 free(mount); 961 return (EIO); 962 } 963 f->f_devdata = mount; 964 free(dev); 965 return (0); 966 } 967 968 static int 969 zfs_dev_close(struct open_file *f) 970 { 971 972 free(f->f_devdata); 973 f->f_devdata = NULL; 974 return (0); 975 } 976 977 static int 978 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize) 979 { 980 981 return (ENOSYS); 982 } 983 984 struct devsw zfs_dev = { 985 .dv_name = "zfs", 986 .dv_type = DEVT_ZFS, 987 .dv_init = zfs_dev_init, 988 .dv_strategy = zfs_dev_strategy, 989 .dv_open = zfs_dev_open, 990 .dv_close = zfs_dev_close, 991 .dv_ioctl = noioctl, 992 .dv_print = zfs_dev_print, 993 .dv_cleanup = NULL 994 }; 995 996 int 997 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path) 998 { 999 static char rootname[ZFS_MAXNAMELEN]; 1000 static char poolname[ZFS_MAXNAMELEN]; 1001 spa_t *spa; 1002 const char *end; 1003 const char *np; 1004 const char *sep; 1005 int rv; 1006 1007 np = devspec; 1008 if (*np != ':') 1009 return (EINVAL); 1010 np++; 1011 end = strrchr(np, ':'); 1012 if (end == NULL) 1013 return (EINVAL); 1014 sep = strchr(np, '/'); 1015 if (sep == NULL || sep >= end) 1016 sep = end; 1017 memcpy(poolname, np, sep - np); 1018 poolname[sep - np] = '\0'; 1019 if (sep < end) { 1020 sep++; 1021 memcpy(rootname, sep, end - sep); 1022 rootname[end - sep] = '\0'; 1023 } 1024 else 1025 rootname[0] = '\0'; 1026 1027 spa = spa_find_by_name(poolname); 1028 if (!spa) 1029 return (ENXIO); 1030 dev->pool_guid = spa->spa_guid; 1031 rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid); 1032 if (rv != 0) 1033 return (rv); 1034 if (path != NULL) 1035 *path = (*end == '\0') ? end : end + 1; 1036 dev->dd.d_dev = &zfs_dev; 1037 return (0); 1038 } 1039 1040 char * 1041 zfs_fmtdev(void *vdev) 1042 { 1043 static char rootname[ZFS_MAXNAMELEN]; 1044 static char buf[2 * ZFS_MAXNAMELEN + 8]; 1045 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; 1046 spa_t *spa; 1047 1048 buf[0] = '\0'; 1049 if (dev->dd.d_dev->dv_type != DEVT_ZFS) 1050 return (buf); 1051 1052 /* Do we have any pools? */ 1053 spa = STAILQ_FIRST(&zfs_pools); 1054 if (spa == NULL) 1055 return (buf); 1056 1057 if (dev->pool_guid == 0) 1058 dev->pool_guid = spa->spa_guid; 1059 else 1060 spa = spa_find_by_guid(dev->pool_guid); 1061 1062 if (spa == NULL) { 1063 printf("ZFS: can't find pool by guid\n"); 1064 return (buf); 1065 } 1066 if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) { 1067 printf("ZFS: can't find root filesystem\n"); 1068 return (buf); 1069 } 1070 if (zfs_rlookup(spa, dev->root_guid, rootname)) { 1071 printf("ZFS: can't find filesystem by guid\n"); 1072 return (buf); 1073 } 1074 1075 if (rootname[0] == '\0') 1076 sprintf(buf, "%s:%s:", dev->dd.d_dev->dv_name, spa->spa_name); 1077 else 1078 sprintf(buf, "%s:%s/%s:", dev->dd.d_dev->dv_name, spa->spa_name, 1079 rootname); 1080 return (buf); 1081 } 1082 1083 static int 1084 split_devname(const char *name, char *poolname, size_t size, 1085 const char **dsnamep) 1086 { 1087 const char *dsname; 1088 size_t len; 1089 1090 ASSERT(name != NULL); 1091 ASSERT(poolname != NULL); 1092 1093 len = strlen(name); 1094 dsname = strchr(name, '/'); 1095 if (dsname != NULL) { 1096 len = dsname - name; 1097 dsname++; 1098 } else 1099 dsname = ""; 1100 1101 if (len + 1 > size) 1102 return (EINVAL); 1103 1104 strlcpy(poolname, name, len + 1); 1105 1106 if (dsnamep != NULL) 1107 *dsnamep = dsname; 1108 1109 return (0); 1110 } 1111 1112 int 1113 zfs_list(const char *name) 1114 { 1115 static char poolname[ZFS_MAXNAMELEN]; 1116 uint64_t objid; 1117 spa_t *spa; 1118 const char *dsname; 1119 int rv; 1120 1121 if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0) 1122 return (EINVAL); 1123 1124 spa = spa_find_by_name(poolname); 1125 if (!spa) 1126 return (ENXIO); 1127 rv = zfs_lookup_dataset(spa, dsname, &objid); 1128 if (rv != 0) 1129 return (rv); 1130 1131 return (zfs_list_dataset(spa, objid)); 1132 } 1133 1134 void 1135 init_zfs_boot_options(const char *currdev_in) 1136 { 1137 char poolname[ZFS_MAXNAMELEN]; 1138 char *beroot, *currdev; 1139 spa_t *spa; 1140 int currdev_len; 1141 const char *dsname; 1142 1143 currdev = NULL; 1144 currdev_len = strlen(currdev_in); 1145 if (currdev_len == 0) 1146 return; 1147 if (strncmp(currdev_in, "zfs:", 4) != 0) 1148 return; 1149 currdev = strdup(currdev_in); 1150 if (currdev == NULL) 1151 return; 1152 /* Remove the trailing : */ 1153 currdev[currdev_len - 1] = '\0'; 1154 1155 setenv("zfs_be_active", currdev, 1); 1156 setenv("zfs_be_currpage", "1", 1); 1157 /* Remove the last element (current bootenv) */ 1158 beroot = strrchr(currdev, '/'); 1159 if (beroot != NULL) 1160 beroot[0] = '\0'; 1161 beroot = strchr(currdev, ':') + 1; 1162 setenv("zfs_be_root", beroot, 1); 1163 1164 if (split_devname(beroot, poolname, sizeof(poolname), &dsname) != 0) 1165 return; 1166 1167 spa = spa_find_by_name(poolname); 1168 if (spa == NULL) 1169 return; 1170 1171 zfs_bootenv_initial("bootenvs", spa, beroot, dsname, 0); 1172 zfs_checkpoints_initial(spa, beroot, dsname); 1173 1174 free(currdev); 1175 } 1176 1177 static void 1178 zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname) 1179 { 1180 char envname[32]; 1181 1182 if (spa->spa_uberblock_checkpoint.ub_checkpoint_txg != 0) { 1183 snprintf(envname, sizeof(envname), "zpool_checkpoint"); 1184 setenv(envname, name, 1); 1185 1186 spa->spa_uberblock = &spa->spa_uberblock_checkpoint; 1187 spa->spa_mos = &spa->spa_mos_checkpoint; 1188 1189 zfs_bootenv_initial("bootenvs_check", spa, name, dsname, 1); 1190 1191 spa->spa_uberblock = &spa->spa_uberblock_master; 1192 spa->spa_mos = &spa->spa_mos_master; 1193 } 1194 } 1195 1196 static void 1197 zfs_bootenv_initial(const char *envprefix, spa_t *spa, const char *rootname, 1198 const char *dsname, int checkpoint) 1199 { 1200 char envname[32], envval[256]; 1201 uint64_t objid; 1202 int bootenvs_idx, rv; 1203 1204 SLIST_INIT(&zfs_be_head); 1205 zfs_env_count = 0; 1206 1207 rv = zfs_lookup_dataset(spa, dsname, &objid); 1208 if (rv != 0) 1209 return; 1210 1211 rv = zfs_callback_dataset(spa, objid, zfs_belist_add); 1212 bootenvs_idx = 0; 1213 /* Populate the initial environment variables */ 1214 SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) { 1215 /* Enumerate all bootenvs for general usage */ 1216 snprintf(envname, sizeof(envname), "%s[%d]", 1217 envprefix, bootenvs_idx); 1218 snprintf(envval, sizeof(envval), "zfs:%s%s/%s", 1219 checkpoint ? "!" : "", rootname, zfs_be->name); 1220 rv = setenv(envname, envval, 1); 1221 if (rv != 0) 1222 break; 1223 bootenvs_idx++; 1224 } 1225 snprintf(envname, sizeof(envname), "%s_count", envprefix); 1226 snprintf(envval, sizeof(envval), "%d", bootenvs_idx); 1227 setenv(envname, envval, 1); 1228 1229 /* Clean up the SLIST of ZFS BEs */ 1230 while (!SLIST_EMPTY(&zfs_be_head)) { 1231 zfs_be = SLIST_FIRST(&zfs_be_head); 1232 SLIST_REMOVE_HEAD(&zfs_be_head, entries); 1233 free(zfs_be->name); 1234 free(zfs_be); 1235 } 1236 } 1237 1238 int 1239 zfs_bootenv(const char *name) 1240 { 1241 char poolname[ZFS_MAXNAMELEN], *root; 1242 const char *dsname; 1243 char becount[4]; 1244 uint64_t objid; 1245 spa_t *spa; 1246 int rv, pages, perpage, currpage; 1247 1248 if (name == NULL) 1249 return (EINVAL); 1250 if ((root = getenv("zfs_be_root")) == NULL) 1251 return (EINVAL); 1252 1253 if (strcmp(name, root) != 0) { 1254 if (setenv("zfs_be_root", name, 1) != 0) 1255 return (ENOMEM); 1256 } 1257 1258 SLIST_INIT(&zfs_be_head); 1259 zfs_env_count = 0; 1260 1261 if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0) 1262 return (EINVAL); 1263 1264 spa = spa_find_by_name(poolname); 1265 if (!spa) 1266 return (ENXIO); 1267 rv = zfs_lookup_dataset(spa, dsname, &objid); 1268 if (rv != 0) 1269 return (rv); 1270 rv = zfs_callback_dataset(spa, objid, zfs_belist_add); 1271 1272 /* Calculate and store the number of pages of BEs */ 1273 perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1); 1274 pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0); 1275 snprintf(becount, 4, "%d", pages); 1276 if (setenv("zfs_be_pages", becount, 1) != 0) 1277 return (ENOMEM); 1278 1279 /* Roll over the page counter if it has exceeded the maximum */ 1280 currpage = strtol(getenv("zfs_be_currpage"), NULL, 10); 1281 if (currpage > pages) { 1282 if (setenv("zfs_be_currpage", "1", 1) != 0) 1283 return (ENOMEM); 1284 } 1285 1286 /* Populate the menu environment variables */ 1287 zfs_set_env(); 1288 1289 /* Clean up the SLIST of ZFS BEs */ 1290 while (!SLIST_EMPTY(&zfs_be_head)) { 1291 zfs_be = SLIST_FIRST(&zfs_be_head); 1292 SLIST_REMOVE_HEAD(&zfs_be_head, entries); 1293 free(zfs_be->name); 1294 free(zfs_be); 1295 } 1296 1297 return (rv); 1298 } 1299 1300 int 1301 zfs_belist_add(const char *name, uint64_t value __unused) 1302 { 1303 1304 /* Skip special datasets that start with a $ character */ 1305 if (strncmp(name, "$", 1) == 0) { 1306 return (0); 1307 } 1308 /* Add the boot environment to the head of the SLIST */ 1309 zfs_be = malloc(sizeof(struct zfs_be_entry)); 1310 if (zfs_be == NULL) { 1311 return (ENOMEM); 1312 } 1313 zfs_be->name = strdup(name); 1314 if (zfs_be->name == NULL) { 1315 free(zfs_be); 1316 return (ENOMEM); 1317 } 1318 SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries); 1319 zfs_env_count++; 1320 1321 return (0); 1322 } 1323 1324 int 1325 zfs_set_env(void) 1326 { 1327 char envname[32], envval[256]; 1328 char *beroot, *pagenum; 1329 int rv, page, ctr; 1330 1331 beroot = getenv("zfs_be_root"); 1332 if (beroot == NULL) { 1333 return (1); 1334 } 1335 1336 pagenum = getenv("zfs_be_currpage"); 1337 if (pagenum != NULL) { 1338 page = strtol(pagenum, NULL, 10); 1339 } else { 1340 page = 1; 1341 } 1342 1343 ctr = 1; 1344 rv = 0; 1345 zfs_env_index = ZFS_BE_FIRST; 1346 SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) { 1347 /* Skip to the requested page number */ 1348 if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) { 1349 ctr++; 1350 continue; 1351 } 1352 1353 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index); 1354 snprintf(envval, sizeof(envval), "%s", zfs_be->name); 1355 rv = setenv(envname, envval, 1); 1356 if (rv != 0) { 1357 break; 1358 } 1359 1360 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index); 1361 rv = setenv(envname, envval, 1); 1362 if (rv != 0){ 1363 break; 1364 } 1365 1366 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index); 1367 rv = setenv(envname, "set_bootenv", 1); 1368 if (rv != 0){ 1369 break; 1370 } 1371 1372 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index); 1373 snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name); 1374 rv = setenv(envname, envval, 1); 1375 if (rv != 0){ 1376 break; 1377 } 1378 1379 zfs_env_index++; 1380 if (zfs_env_index > ZFS_BE_LAST) { 1381 break; 1382 } 1383 1384 } 1385 1386 for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) { 1387 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index); 1388 (void)unsetenv(envname); 1389 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index); 1390 (void)unsetenv(envname); 1391 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index); 1392 (void)unsetenv(envname); 1393 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index); 1394 (void)unsetenv(envname); 1395 } 1396 1397 return (rv); 1398 } 1399