1 /*- 2 * Copyright (c) 2007 Doug Rabson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 /* 33 * Stand-alone file reading package. 34 */ 35 36 #include <stand.h> 37 #include <sys/disk.h> 38 #include <sys/param.h> 39 #include <sys/time.h> 40 #include <sys/queue.h> 41 #include <disk.h> 42 #include <part.h> 43 #include <stddef.h> 44 #include <stdarg.h> 45 #include <string.h> 46 #include <bootstrap.h> 47 48 #include "libzfs.h" 49 50 #include "zfsimpl.c" 51 52 /* Define the range of indexes to be populated with ZFS Boot Environments */ 53 #define ZFS_BE_FIRST 4 54 #define ZFS_BE_LAST 8 55 56 static int zfs_open(const char *path, struct open_file *f); 57 static int zfs_close(struct open_file *f); 58 static int zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid); 59 static off_t zfs_seek(struct open_file *f, off_t offset, int where); 60 static int zfs_stat(struct open_file *f, struct stat *sb); 61 static int zfs_readdir(struct open_file *f, struct dirent *d); 62 63 static void zfs_bootenv_initial(const char *); 64 65 struct devsw zfs_dev; 66 67 struct fs_ops zfs_fsops = { 68 "zfs", 69 zfs_open, 70 zfs_close, 71 zfs_read, 72 null_write, 73 zfs_seek, 74 zfs_stat, 75 zfs_readdir 76 }; 77 78 /* 79 * In-core open file. 80 */ 81 struct file { 82 off_t f_seekp; /* seek pointer */ 83 dnode_phys_t f_dnode; 84 uint64_t f_zap_type; /* zap type for readdir */ 85 uint64_t f_num_leafs; /* number of fzap leaf blocks */ 86 zap_leaf_phys_t *f_zap_leaf; /* zap leaf buffer */ 87 }; 88 89 static int zfs_env_index; 90 static int zfs_env_count; 91 92 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head); 93 struct zfs_be_list *zfs_be_headp; 94 struct zfs_be_entry { 95 char *name; 96 SLIST_ENTRY(zfs_be_entry) entries; 97 } *zfs_be, *zfs_be_tmp; 98 99 /* 100 * Open a file. 101 */ 102 static int 103 zfs_open(const char *upath, struct open_file *f) 104 { 105 struct zfsmount *mount = (struct zfsmount *)f->f_devdata; 106 struct file *fp; 107 int rc; 108 109 if (f->f_dev != &zfs_dev) 110 return (EINVAL); 111 112 /* allocate file system specific data structure */ 113 fp = calloc(1, sizeof(struct file)); 114 if (fp == NULL) 115 return (ENOMEM); 116 f->f_fsdata = fp; 117 118 rc = zfs_lookup(mount, upath, &fp->f_dnode); 119 fp->f_seekp = 0; 120 if (rc) { 121 f->f_fsdata = NULL; 122 free(fp); 123 } 124 return (rc); 125 } 126 127 static int 128 zfs_close(struct open_file *f) 129 { 130 struct file *fp = (struct file *)f->f_fsdata; 131 132 dnode_cache_obj = NULL; 133 f->f_fsdata = NULL; 134 135 free(fp); 136 return (0); 137 } 138 139 /* 140 * Copy a portion of a file into kernel memory. 141 * Cross block boundaries when necessary. 142 */ 143 static int 144 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */) 145 { 146 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; 147 struct file *fp = (struct file *)f->f_fsdata; 148 struct stat sb; 149 size_t n; 150 int rc; 151 152 rc = zfs_stat(f, &sb); 153 if (rc) 154 return (rc); 155 n = size; 156 if (fp->f_seekp + n > sb.st_size) 157 n = sb.st_size - fp->f_seekp; 158 159 rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n); 160 if (rc) 161 return (rc); 162 163 if (0) { 164 int i; 165 for (i = 0; i < n; i++) 166 putchar(((char*) start)[i]); 167 } 168 fp->f_seekp += n; 169 if (resid) 170 *resid = size - n; 171 172 return (0); 173 } 174 175 static off_t 176 zfs_seek(struct open_file *f, off_t offset, int where) 177 { 178 struct file *fp = (struct file *)f->f_fsdata; 179 180 switch (where) { 181 case SEEK_SET: 182 fp->f_seekp = offset; 183 break; 184 case SEEK_CUR: 185 fp->f_seekp += offset; 186 break; 187 case SEEK_END: 188 { 189 struct stat sb; 190 int error; 191 192 error = zfs_stat(f, &sb); 193 if (error != 0) { 194 errno = error; 195 return (-1); 196 } 197 fp->f_seekp = sb.st_size - offset; 198 break; 199 } 200 default: 201 errno = EINVAL; 202 return (-1); 203 } 204 return (fp->f_seekp); 205 } 206 207 static int 208 zfs_stat(struct open_file *f, struct stat *sb) 209 { 210 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; 211 struct file *fp = (struct file *)f->f_fsdata; 212 213 return (zfs_dnode_stat(spa, &fp->f_dnode, sb)); 214 } 215 216 static int 217 zfs_readdir(struct open_file *f, struct dirent *d) 218 { 219 const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; 220 struct file *fp = (struct file *)f->f_fsdata; 221 mzap_ent_phys_t mze; 222 struct stat sb; 223 size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT; 224 int rc; 225 226 rc = zfs_stat(f, &sb); 227 if (rc) 228 return (rc); 229 if (!S_ISDIR(sb.st_mode)) 230 return (ENOTDIR); 231 232 /* 233 * If this is the first read, get the zap type. 234 */ 235 if (fp->f_seekp == 0) { 236 rc = dnode_read(spa, &fp->f_dnode, 237 0, &fp->f_zap_type, sizeof(fp->f_zap_type)); 238 if (rc) 239 return (rc); 240 241 if (fp->f_zap_type == ZBT_MICRO) { 242 fp->f_seekp = offsetof(mzap_phys_t, mz_chunk); 243 } else { 244 rc = dnode_read(spa, &fp->f_dnode, 245 offsetof(zap_phys_t, zap_num_leafs), 246 &fp->f_num_leafs, 247 sizeof(fp->f_num_leafs)); 248 if (rc) 249 return (rc); 250 251 fp->f_seekp = bsize; 252 fp->f_zap_leaf = malloc(bsize); 253 if (fp->f_zap_leaf == NULL) 254 return (ENOMEM); 255 rc = dnode_read(spa, &fp->f_dnode, 256 fp->f_seekp, 257 fp->f_zap_leaf, 258 bsize); 259 if (rc) 260 return (rc); 261 } 262 } 263 264 if (fp->f_zap_type == ZBT_MICRO) { 265 mzap_next: 266 if (fp->f_seekp >= bsize) 267 return (ENOENT); 268 269 rc = dnode_read(spa, &fp->f_dnode, 270 fp->f_seekp, &mze, sizeof(mze)); 271 if (rc) 272 return (rc); 273 fp->f_seekp += sizeof(mze); 274 275 if (!mze.mze_name[0]) 276 goto mzap_next; 277 278 d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value); 279 d->d_type = ZFS_DIRENT_TYPE(mze.mze_value); 280 strcpy(d->d_name, mze.mze_name); 281 d->d_namlen = strlen(d->d_name); 282 return (0); 283 } else { 284 zap_leaf_t zl; 285 zap_leaf_chunk_t *zc, *nc; 286 int chunk; 287 size_t namelen; 288 char *p; 289 uint64_t value; 290 291 /* 292 * Initialise this so we can use the ZAP size 293 * calculating macros. 294 */ 295 zl.l_bs = ilog2(bsize); 296 zl.l_phys = fp->f_zap_leaf; 297 298 /* 299 * Figure out which chunk we are currently looking at 300 * and consider seeking to the next leaf. We use the 301 * low bits of f_seekp as a simple chunk index. 302 */ 303 fzap_next: 304 chunk = fp->f_seekp & (bsize - 1); 305 if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) { 306 fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize; 307 chunk = 0; 308 309 /* 310 * Check for EOF and read the new leaf. 311 */ 312 if (fp->f_seekp >= bsize * fp->f_num_leafs) 313 return (ENOENT); 314 315 rc = dnode_read(spa, &fp->f_dnode, 316 fp->f_seekp, 317 fp->f_zap_leaf, 318 bsize); 319 if (rc) 320 return (rc); 321 } 322 323 zc = &ZAP_LEAF_CHUNK(&zl, chunk); 324 fp->f_seekp++; 325 if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY) 326 goto fzap_next; 327 328 namelen = zc->l_entry.le_name_numints; 329 if (namelen > sizeof(d->d_name)) 330 namelen = sizeof(d->d_name); 331 332 /* 333 * Paste the name back together. 334 */ 335 nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk); 336 p = d->d_name; 337 while (namelen > 0) { 338 int len; 339 len = namelen; 340 if (len > ZAP_LEAF_ARRAY_BYTES) 341 len = ZAP_LEAF_ARRAY_BYTES; 342 memcpy(p, nc->l_array.la_array, len); 343 p += len; 344 namelen -= len; 345 nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next); 346 } 347 d->d_name[sizeof(d->d_name) - 1] = 0; 348 349 /* 350 * Assume the first eight bytes of the value are 351 * a uint64_t. 352 */ 353 value = fzap_leaf_value(&zl, zc); 354 355 d->d_fileno = ZFS_DIRENT_OBJ(value); 356 d->d_type = ZFS_DIRENT_TYPE(value); 357 d->d_namlen = strlen(d->d_name); 358 359 return (0); 360 } 361 } 362 363 static int 364 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes) 365 { 366 int fd, ret; 367 size_t res, head, tail, total_size, full_sec_size; 368 unsigned secsz, do_tail_read; 369 off_t start_sec; 370 char *outbuf, *bouncebuf; 371 372 fd = (uintptr_t) priv; 373 outbuf = (char *) buf; 374 bouncebuf = NULL; 375 376 ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); 377 if (ret != 0) 378 return (ret); 379 380 /* 381 * Handling reads of arbitrary offset and size - multi-sector case 382 * and single-sector case. 383 * 384 * Multi-sector Case 385 * (do_tail_read = true if tail > 0) 386 * 387 * |<----------------------total_size--------------------->| 388 * | | 389 * |<--head-->|<--------------bytes------------>|<--tail-->| 390 * | | | | 391 * | | |<~full_sec_size~>| | | 392 * +------------------+ +------------------+ 393 * | |0101010| . . . |0101011| | 394 * +------------------+ +------------------+ 395 * start_sec start_sec + n 396 * 397 * 398 * Single-sector Case 399 * (do_tail_read = false) 400 * 401 * |<------total_size = secsz----->| 402 * | | 403 * |<-head->|<---bytes--->|<-tail->| 404 * +-------------------------------+ 405 * | |0101010101010| | 406 * +-------------------------------+ 407 * start_sec 408 */ 409 start_sec = offset / secsz; 410 head = offset % secsz; 411 total_size = roundup2(head + bytes, secsz); 412 tail = total_size - (head + bytes); 413 do_tail_read = ((tail > 0) && (head + bytes > secsz)); 414 full_sec_size = total_size; 415 if (head > 0) 416 full_sec_size -= secsz; 417 if (do_tail_read) 418 full_sec_size -= secsz; 419 420 /* Return of partial sector data requires a bounce buffer. */ 421 if ((head > 0) || do_tail_read || bytes < secsz) { 422 bouncebuf = malloc(secsz); 423 if (bouncebuf == NULL) { 424 printf("vdev_read: out of memory\n"); 425 return (ENOMEM); 426 } 427 } 428 429 if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) { 430 ret = errno; 431 goto error; 432 } 433 434 /* Partial data return from first sector */ 435 if (head > 0) { 436 res = read(fd, bouncebuf, secsz); 437 if (res != secsz) { 438 ret = EIO; 439 goto error; 440 } 441 memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes)); 442 outbuf += min(secsz - head, bytes); 443 } 444 445 /* 446 * Full data return from read sectors. 447 * Note, there is still corner case where we read 448 * from sector boundary, but less than sector size, e.g. reading 512B 449 * from 4k sector. 450 */ 451 if (full_sec_size > 0) { 452 if (bytes < full_sec_size) { 453 res = read(fd, bouncebuf, secsz); 454 if (res != secsz) { 455 ret = EIO; 456 goto error; 457 } 458 memcpy(outbuf, bouncebuf, bytes); 459 } else { 460 res = read(fd, outbuf, full_sec_size); 461 if (res != full_sec_size) { 462 ret = EIO; 463 goto error; 464 } 465 outbuf += full_sec_size; 466 } 467 } 468 469 /* Partial data return from last sector */ 470 if (do_tail_read) { 471 res = read(fd, bouncebuf, secsz); 472 if (res != secsz) { 473 ret = EIO; 474 goto error; 475 } 476 memcpy(outbuf, bouncebuf, secsz - tail); 477 } 478 479 ret = 0; 480 error: 481 free(bouncebuf); 482 return (ret); 483 } 484 485 static int 486 vdev_write(vdev_t *vdev __unused, void *priv, off_t offset, void *buf, 487 size_t bytes) 488 { 489 int fd, ret; 490 size_t head, tail, total_size, full_sec_size; 491 unsigned secsz, do_tail_write; 492 off_t start_sec; 493 ssize_t res; 494 char *outbuf, *bouncebuf; 495 496 fd = (uintptr_t)priv; 497 outbuf = (char *) buf; 498 bouncebuf = NULL; 499 500 ret = ioctl(fd, DIOCGSECTORSIZE, &secsz); 501 if (ret != 0) 502 return (ret); 503 504 start_sec = offset / secsz; 505 head = offset % secsz; 506 total_size = roundup2(head + bytes, secsz); 507 tail = total_size - (head + bytes); 508 do_tail_write = ((tail > 0) && (head + bytes > secsz)); 509 full_sec_size = total_size; 510 if (head > 0) 511 full_sec_size -= secsz; 512 if (do_tail_write) 513 full_sec_size -= secsz; 514 515 /* Partial sector write requires a bounce buffer. */ 516 if ((head > 0) || do_tail_write || bytes < secsz) { 517 bouncebuf = malloc(secsz); 518 if (bouncebuf == NULL) { 519 printf("vdev_write: out of memory\n"); 520 return (ENOMEM); 521 } 522 } 523 524 if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) { 525 ret = errno; 526 goto error; 527 } 528 529 /* Partial data for first sector */ 530 if (head > 0) { 531 res = read(fd, bouncebuf, secsz); 532 if (res != secsz) { 533 ret = EIO; 534 goto error; 535 } 536 memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes)); 537 (void) lseek(fd, -secsz, SEEK_CUR); 538 res = write(fd, bouncebuf, secsz); 539 if (res != secsz) { 540 ret = EIO; 541 goto error; 542 } 543 outbuf += min(secsz - head, bytes); 544 } 545 546 /* 547 * Full data write to sectors. 548 * Note, there is still corner case where we write 549 * to sector boundary, but less than sector size, e.g. write 512B 550 * to 4k sector. 551 */ 552 if (full_sec_size > 0) { 553 if (bytes < full_sec_size) { 554 res = read(fd, bouncebuf, secsz); 555 if (res != secsz) { 556 ret = EIO; 557 goto error; 558 } 559 memcpy(bouncebuf, outbuf, bytes); 560 (void) lseek(fd, -secsz, SEEK_CUR); 561 res = write(fd, bouncebuf, secsz); 562 if (res != secsz) { 563 ret = EIO; 564 goto error; 565 } 566 } else { 567 res = write(fd, outbuf, full_sec_size); 568 if (res != full_sec_size) { 569 ret = EIO; 570 goto error; 571 } 572 outbuf += full_sec_size; 573 } 574 } 575 576 /* Partial data write to last sector */ 577 if (do_tail_write) { 578 res = read(fd, bouncebuf, secsz); 579 if (res != secsz) { 580 ret = EIO; 581 goto error; 582 } 583 memcpy(bouncebuf, outbuf, secsz - tail); 584 (void) lseek(fd, -secsz, SEEK_CUR); 585 res = write(fd, bouncebuf, secsz); 586 if (res != secsz) { 587 ret = EIO; 588 goto error; 589 } 590 } 591 592 ret = 0; 593 error: 594 free(bouncebuf); 595 return (ret); 596 } 597 598 static void 599 vdev_clear_pad2(vdev_t *vdev) 600 { 601 vdev_t *kid; 602 vdev_boot_envblock_t *be; 603 off_t off = offsetof(vdev_label_t, vl_be); 604 zio_checksum_info_t *ci; 605 zio_cksum_t cksum; 606 607 STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { 608 if (kid->v_state != VDEV_STATE_HEALTHY) 609 continue; 610 vdev_clear_pad2(kid); 611 } 612 613 if (!STAILQ_EMPTY(&vdev->v_children)) 614 return; 615 616 be = calloc(1, sizeof (*be)); 617 if (be == NULL) { 618 printf("failed to clear be area: out of memory\n"); 619 return; 620 } 621 622 ci = &zio_checksum_table[ZIO_CHECKSUM_LABEL]; 623 be->vbe_zbt.zec_magic = ZEC_MAGIC; 624 zio_checksum_label_verifier(&be->vbe_zbt.zec_cksum, off); 625 ci->ci_func[0](be, sizeof (*be), NULL, &cksum); 626 be->vbe_zbt.zec_cksum = cksum; 627 628 if (vdev_write(vdev, vdev->v_read_priv, off, be, VDEV_PAD_SIZE)) { 629 printf("failed to clear be area of primary vdev: %d\n", 630 errno); 631 } 632 free(be); 633 } 634 635 /* 636 * Read the next boot command from pad2. 637 * If any instance of pad2 is set to empty string, or the returned string 638 * values are not the same, we consider next boot not to be set. 639 */ 640 static char * 641 vdev_read_pad2(vdev_t *vdev) 642 { 643 vdev_t *kid; 644 char *tmp, *result = NULL; 645 vdev_boot_envblock_t *be; 646 off_t off = offsetof(vdev_label_t, vl_be); 647 648 STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { 649 if (kid->v_state != VDEV_STATE_HEALTHY) 650 continue; 651 tmp = vdev_read_pad2(kid); 652 if (tmp == NULL) 653 continue; 654 655 /* The next boot is not set, we are done. */ 656 if (*tmp == '\0') { 657 free(result); 658 return (tmp); 659 } 660 if (result == NULL) { 661 result = tmp; 662 continue; 663 } 664 /* Are the next boot strings different? */ 665 if (strcmp(result, tmp) != 0) { 666 free(tmp); 667 *result = '\0'; 668 break; 669 } 670 free(tmp); 671 } 672 if (result != NULL) 673 return (result); 674 675 be = malloc(sizeof (*be)); 676 if (be == NULL) 677 return (NULL); 678 679 if (vdev_read(vdev, vdev->v_read_priv, off, be, sizeof (*be))) { 680 return (NULL); 681 } 682 683 switch (be->vbe_version) { 684 case VB_RAW: 685 case VB_NVLIST: 686 result = strdup(be->vbe_bootenv); 687 default: 688 /* Backward compatibility with initial nextboot feaure. */ 689 result = strdup((char *)be); 690 } 691 return (result); 692 } 693 694 static int 695 zfs_dev_init(void) 696 { 697 spa_t *spa; 698 spa_t *next; 699 spa_t *prev; 700 701 zfs_init(); 702 if (archsw.arch_zfs_probe == NULL) 703 return (ENXIO); 704 archsw.arch_zfs_probe(); 705 706 prev = NULL; 707 spa = STAILQ_FIRST(&zfs_pools); 708 while (spa != NULL) { 709 next = STAILQ_NEXT(spa, spa_link); 710 if (zfs_spa_init(spa)) { 711 if (prev == NULL) 712 STAILQ_REMOVE_HEAD(&zfs_pools, spa_link); 713 else 714 STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link); 715 } else 716 prev = spa; 717 spa = next; 718 } 719 return (0); 720 } 721 722 struct zfs_probe_args { 723 int fd; 724 const char *devname; 725 uint64_t *pool_guid; 726 u_int secsz; 727 }; 728 729 static int 730 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset) 731 { 732 struct zfs_probe_args *ppa; 733 734 ppa = (struct zfs_probe_args *)arg; 735 return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd, 736 offset * ppa->secsz, buf, blocks * ppa->secsz)); 737 } 738 739 static int 740 zfs_probe(int fd, uint64_t *pool_guid) 741 { 742 spa_t *spa; 743 int ret; 744 745 spa = NULL; 746 ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa); 747 if (ret == 0 && pool_guid != NULL) 748 *pool_guid = spa->spa_guid; 749 return (ret); 750 } 751 752 static int 753 zfs_probe_partition(void *arg, const char *partname, 754 const struct ptable_entry *part) 755 { 756 struct zfs_probe_args *ppa, pa; 757 struct ptable *table; 758 char devname[32]; 759 int ret; 760 761 /* Probe only freebsd-zfs and freebsd partitions */ 762 if (part->type != PART_FREEBSD && 763 part->type != PART_FREEBSD_ZFS) 764 return (0); 765 766 ppa = (struct zfs_probe_args *)arg; 767 strncpy(devname, ppa->devname, strlen(ppa->devname) - 1); 768 devname[strlen(ppa->devname) - 1] = '\0'; 769 sprintf(devname, "%s%s:", devname, partname); 770 pa.fd = open(devname, O_RDWR); 771 if (pa.fd == -1) 772 return (0); 773 ret = zfs_probe(pa.fd, ppa->pool_guid); 774 if (ret == 0) 775 return (0); 776 /* Do we have BSD label here? */ 777 if (part->type == PART_FREEBSD) { 778 pa.devname = devname; 779 pa.pool_guid = ppa->pool_guid; 780 pa.secsz = ppa->secsz; 781 table = ptable_open(&pa, part->end - part->start + 1, 782 ppa->secsz, zfs_diskread); 783 if (table != NULL) { 784 ptable_iterate(table, &pa, zfs_probe_partition); 785 ptable_close(table); 786 } 787 } 788 close(pa.fd); 789 return (0); 790 } 791 792 int 793 zfs_nextboot(void *vdev, char *buf, size_t size) 794 { 795 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; 796 spa_t *spa; 797 vdev_t *vd; 798 char *result = NULL; 799 800 if (dev->dd.d_dev->dv_type != DEVT_ZFS) 801 return (1); 802 803 if (dev->pool_guid == 0) 804 spa = STAILQ_FIRST(&zfs_pools); 805 else 806 spa = spa_find_by_guid(dev->pool_guid); 807 808 if (spa == NULL) { 809 printf("ZFS: can't find pool by guid\n"); 810 return (1); 811 } 812 813 STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { 814 char *tmp = vdev_read_pad2(vd); 815 816 /* Continue on error. */ 817 if (tmp == NULL) 818 continue; 819 /* Nextboot is not set. */ 820 if (*tmp == '\0') { 821 free(result); 822 free(tmp); 823 return (1); 824 } 825 if (result == NULL) { 826 result = tmp; 827 continue; 828 } 829 free(tmp); 830 } 831 if (result == NULL) 832 return (1); 833 834 STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { 835 vdev_clear_pad2(vd); 836 } 837 838 strlcpy(buf, result, size); 839 free(result); 840 return (0); 841 } 842 843 int 844 zfs_probe_dev(const char *devname, uint64_t *pool_guid) 845 { 846 struct disk_devdesc *dev; 847 struct ptable *table; 848 struct zfs_probe_args pa; 849 uint64_t mediasz; 850 int ret; 851 852 if (pool_guid) 853 *pool_guid = 0; 854 pa.fd = open(devname, O_RDWR); 855 if (pa.fd == -1) 856 return (ENXIO); 857 /* 858 * We will not probe the whole disk, we can not boot from such 859 * disks and some systems will misreport the disk sizes and will 860 * hang while accessing the disk. 861 */ 862 if (archsw.arch_getdev((void **)&dev, devname, NULL) == 0) { 863 int partition = dev->d_partition; 864 int slice = dev->d_slice; 865 866 free(dev); 867 if (partition != D_PARTNONE && slice != D_SLICENONE) { 868 ret = zfs_probe(pa.fd, pool_guid); 869 if (ret == 0) 870 return (0); 871 } 872 } 873 874 /* Probe each partition */ 875 ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz); 876 if (ret == 0) 877 ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz); 878 if (ret == 0) { 879 pa.devname = devname; 880 pa.pool_guid = pool_guid; 881 table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz, 882 zfs_diskread); 883 if (table != NULL) { 884 ptable_iterate(table, &pa, zfs_probe_partition); 885 ptable_close(table); 886 } 887 } 888 close(pa.fd); 889 if (pool_guid && *pool_guid == 0) 890 ret = ENXIO; 891 return (ret); 892 } 893 894 /* 895 * Print information about ZFS pools 896 */ 897 static int 898 zfs_dev_print(int verbose) 899 { 900 spa_t *spa; 901 char line[80]; 902 int ret = 0; 903 904 if (STAILQ_EMPTY(&zfs_pools)) 905 return (0); 906 907 printf("%s devices:", zfs_dev.dv_name); 908 if ((ret = pager_output("\n")) != 0) 909 return (ret); 910 911 if (verbose) { 912 return (spa_all_status()); 913 } 914 STAILQ_FOREACH(spa, &zfs_pools, spa_link) { 915 snprintf(line, sizeof(line), " zfs:%s\n", spa->spa_name); 916 ret = pager_output(line); 917 if (ret != 0) 918 break; 919 } 920 return (ret); 921 } 922 923 /* 924 * Attempt to open the pool described by (dev) for use by (f). 925 */ 926 static int 927 zfs_dev_open(struct open_file *f, ...) 928 { 929 va_list args; 930 struct zfs_devdesc *dev; 931 struct zfsmount *mount; 932 spa_t *spa; 933 int rv; 934 935 va_start(args, f); 936 dev = va_arg(args, struct zfs_devdesc *); 937 va_end(args); 938 939 if (dev->pool_guid == 0) 940 spa = STAILQ_FIRST(&zfs_pools); 941 else 942 spa = spa_find_by_guid(dev->pool_guid); 943 if (!spa) 944 return (ENXIO); 945 mount = malloc(sizeof(*mount)); 946 if (mount == NULL) 947 rv = ENOMEM; 948 else 949 rv = zfs_mount(spa, dev->root_guid, mount); 950 if (rv != 0) { 951 free(mount); 952 return (rv); 953 } 954 if (mount->objset.os_type != DMU_OST_ZFS) { 955 printf("Unexpected object set type %ju\n", 956 (uintmax_t)mount->objset.os_type); 957 free(mount); 958 return (EIO); 959 } 960 f->f_devdata = mount; 961 free(dev); 962 return (0); 963 } 964 965 static int 966 zfs_dev_close(struct open_file *f) 967 { 968 969 free(f->f_devdata); 970 f->f_devdata = NULL; 971 return (0); 972 } 973 974 static int 975 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize) 976 { 977 978 return (ENOSYS); 979 } 980 981 struct devsw zfs_dev = { 982 .dv_name = "zfs", 983 .dv_type = DEVT_ZFS, 984 .dv_init = zfs_dev_init, 985 .dv_strategy = zfs_dev_strategy, 986 .dv_open = zfs_dev_open, 987 .dv_close = zfs_dev_close, 988 .dv_ioctl = noioctl, 989 .dv_print = zfs_dev_print, 990 .dv_cleanup = NULL 991 }; 992 993 int 994 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path) 995 { 996 static char rootname[ZFS_MAXNAMELEN]; 997 static char poolname[ZFS_MAXNAMELEN]; 998 spa_t *spa; 999 const char *end; 1000 const char *np; 1001 const char *sep; 1002 int rv; 1003 1004 np = devspec; 1005 if (*np != ':') 1006 return (EINVAL); 1007 np++; 1008 end = strrchr(np, ':'); 1009 if (end == NULL) 1010 return (EINVAL); 1011 sep = strchr(np, '/'); 1012 if (sep == NULL || sep >= end) 1013 sep = end; 1014 memcpy(poolname, np, sep - np); 1015 poolname[sep - np] = '\0'; 1016 if (sep < end) { 1017 sep++; 1018 memcpy(rootname, sep, end - sep); 1019 rootname[end - sep] = '\0'; 1020 } 1021 else 1022 rootname[0] = '\0'; 1023 1024 spa = spa_find_by_name(poolname); 1025 if (!spa) 1026 return (ENXIO); 1027 dev->pool_guid = spa->spa_guid; 1028 rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid); 1029 if (rv != 0) 1030 return (rv); 1031 if (path != NULL) 1032 *path = (*end == '\0') ? end : end + 1; 1033 dev->dd.d_dev = &zfs_dev; 1034 return (0); 1035 } 1036 1037 char * 1038 zfs_fmtdev(void *vdev) 1039 { 1040 static char rootname[ZFS_MAXNAMELEN]; 1041 static char buf[2 * ZFS_MAXNAMELEN + 8]; 1042 struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; 1043 spa_t *spa; 1044 1045 buf[0] = '\0'; 1046 if (dev->dd.d_dev->dv_type != DEVT_ZFS) 1047 return (buf); 1048 1049 /* Do we have any pools? */ 1050 spa = STAILQ_FIRST(&zfs_pools); 1051 if (spa == NULL) 1052 return (buf); 1053 1054 if (dev->pool_guid == 0) 1055 dev->pool_guid = spa->spa_guid; 1056 else 1057 spa = spa_find_by_guid(dev->pool_guid); 1058 1059 if (spa == NULL) { 1060 printf("ZFS: can't find pool by guid\n"); 1061 return (buf); 1062 } 1063 if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) { 1064 printf("ZFS: can't find root filesystem\n"); 1065 return (buf); 1066 } 1067 if (zfs_rlookup(spa, dev->root_guid, rootname)) { 1068 printf("ZFS: can't find filesystem by guid\n"); 1069 return (buf); 1070 } 1071 1072 if (rootname[0] == '\0') 1073 sprintf(buf, "%s:%s:", dev->dd.d_dev->dv_name, spa->spa_name); 1074 else 1075 sprintf(buf, "%s:%s/%s:", dev->dd.d_dev->dv_name, spa->spa_name, 1076 rootname); 1077 return (buf); 1078 } 1079 1080 int 1081 zfs_list(const char *name) 1082 { 1083 static char poolname[ZFS_MAXNAMELEN]; 1084 uint64_t objid; 1085 spa_t *spa; 1086 const char *dsname; 1087 int len; 1088 int rv; 1089 1090 len = strlen(name); 1091 dsname = strchr(name, '/'); 1092 if (dsname != NULL) { 1093 len = dsname - name; 1094 dsname++; 1095 } else 1096 dsname = ""; 1097 memcpy(poolname, name, len); 1098 poolname[len] = '\0'; 1099 1100 spa = spa_find_by_name(poolname); 1101 if (!spa) 1102 return (ENXIO); 1103 rv = zfs_lookup_dataset(spa, dsname, &objid); 1104 if (rv != 0) 1105 return (rv); 1106 1107 return (zfs_list_dataset(spa, objid)); 1108 } 1109 1110 void 1111 init_zfs_bootenv(const char *currdev_in) 1112 { 1113 char *beroot, *currdev; 1114 int currdev_len; 1115 1116 currdev = NULL; 1117 currdev_len = strlen(currdev_in); 1118 if (currdev_len == 0) 1119 return; 1120 if (strncmp(currdev_in, "zfs:", 4) != 0) 1121 return; 1122 currdev = strdup(currdev_in); 1123 if (currdev == NULL) 1124 return; 1125 /* Remove the trailing : */ 1126 currdev[currdev_len - 1] = '\0'; 1127 setenv("zfs_be_active", currdev, 1); 1128 setenv("zfs_be_currpage", "1", 1); 1129 /* Remove the last element (current bootenv) */ 1130 beroot = strrchr(currdev, '/'); 1131 if (beroot != NULL) 1132 beroot[0] = '\0'; 1133 beroot = strchr(currdev, ':') + 1; 1134 setenv("zfs_be_root", beroot, 1); 1135 zfs_bootenv_initial(beroot); 1136 free(currdev); 1137 } 1138 1139 static void 1140 zfs_bootenv_initial(const char *name) 1141 { 1142 char poolname[ZFS_MAXNAMELEN], *dsname; 1143 char envname[32], envval[256]; 1144 uint64_t objid; 1145 spa_t *spa; 1146 int bootenvs_idx, len, rv; 1147 1148 SLIST_INIT(&zfs_be_head); 1149 zfs_env_count = 0; 1150 len = strlen(name); 1151 dsname = strchr(name, '/'); 1152 if (dsname != NULL) { 1153 len = dsname - name; 1154 dsname++; 1155 } else 1156 dsname = ""; 1157 strlcpy(poolname, name, len + 1); 1158 spa = spa_find_by_name(poolname); 1159 if (spa == NULL) 1160 return; 1161 rv = zfs_lookup_dataset(spa, dsname, &objid); 1162 if (rv != 0) 1163 return; 1164 rv = zfs_callback_dataset(spa, objid, zfs_belist_add); 1165 bootenvs_idx = 0; 1166 /* Populate the initial environment variables */ 1167 SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) { 1168 /* Enumerate all bootenvs for general usage */ 1169 snprintf(envname, sizeof(envname), "bootenvs[%d]", bootenvs_idx); 1170 snprintf(envval, sizeof(envval), "zfs:%s/%s", name, zfs_be->name); 1171 rv = setenv(envname, envval, 1); 1172 if (rv != 0) 1173 break; 1174 bootenvs_idx++; 1175 } 1176 snprintf(envval, sizeof(envval), "%d", bootenvs_idx); 1177 setenv("bootenvs_count", envval, 1); 1178 1179 /* Clean up the SLIST of ZFS BEs */ 1180 while (!SLIST_EMPTY(&zfs_be_head)) { 1181 zfs_be = SLIST_FIRST(&zfs_be_head); 1182 SLIST_REMOVE_HEAD(&zfs_be_head, entries); 1183 free(zfs_be->name); 1184 free(zfs_be); 1185 } 1186 1187 return; 1188 1189 } 1190 1191 int 1192 zfs_bootenv(const char *name) 1193 { 1194 static char poolname[ZFS_MAXNAMELEN], *dsname, *root; 1195 char becount[4]; 1196 uint64_t objid; 1197 spa_t *spa; 1198 int len, rv, pages, perpage, currpage; 1199 1200 if (name == NULL) 1201 return (EINVAL); 1202 if ((root = getenv("zfs_be_root")) == NULL) 1203 return (EINVAL); 1204 1205 if (strcmp(name, root) != 0) { 1206 if (setenv("zfs_be_root", name, 1) != 0) 1207 return (ENOMEM); 1208 } 1209 1210 SLIST_INIT(&zfs_be_head); 1211 zfs_env_count = 0; 1212 len = strlen(name); 1213 dsname = strchr(name, '/'); 1214 if (dsname != NULL) { 1215 len = dsname - name; 1216 dsname++; 1217 } else 1218 dsname = ""; 1219 memcpy(poolname, name, len); 1220 poolname[len] = '\0'; 1221 1222 spa = spa_find_by_name(poolname); 1223 if (!spa) 1224 return (ENXIO); 1225 rv = zfs_lookup_dataset(spa, dsname, &objid); 1226 if (rv != 0) 1227 return (rv); 1228 rv = zfs_callback_dataset(spa, objid, zfs_belist_add); 1229 1230 /* Calculate and store the number of pages of BEs */ 1231 perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1); 1232 pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0); 1233 snprintf(becount, 4, "%d", pages); 1234 if (setenv("zfs_be_pages", becount, 1) != 0) 1235 return (ENOMEM); 1236 1237 /* Roll over the page counter if it has exceeded the maximum */ 1238 currpage = strtol(getenv("zfs_be_currpage"), NULL, 10); 1239 if (currpage > pages) { 1240 if (setenv("zfs_be_currpage", "1", 1) != 0) 1241 return (ENOMEM); 1242 } 1243 1244 /* Populate the menu environment variables */ 1245 zfs_set_env(); 1246 1247 /* Clean up the SLIST of ZFS BEs */ 1248 while (!SLIST_EMPTY(&zfs_be_head)) { 1249 zfs_be = SLIST_FIRST(&zfs_be_head); 1250 SLIST_REMOVE_HEAD(&zfs_be_head, entries); 1251 free(zfs_be->name); 1252 free(zfs_be); 1253 } 1254 1255 return (rv); 1256 } 1257 1258 int 1259 zfs_belist_add(const char *name, uint64_t value __unused) 1260 { 1261 1262 /* Skip special datasets that start with a $ character */ 1263 if (strncmp(name, "$", 1) == 0) { 1264 return (0); 1265 } 1266 /* Add the boot environment to the head of the SLIST */ 1267 zfs_be = malloc(sizeof(struct zfs_be_entry)); 1268 if (zfs_be == NULL) { 1269 return (ENOMEM); 1270 } 1271 zfs_be->name = strdup(name); 1272 if (zfs_be->name == NULL) { 1273 free(zfs_be); 1274 return (ENOMEM); 1275 } 1276 SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries); 1277 zfs_env_count++; 1278 1279 return (0); 1280 } 1281 1282 int 1283 zfs_set_env(void) 1284 { 1285 char envname[32], envval[256]; 1286 char *beroot, *pagenum; 1287 int rv, page, ctr; 1288 1289 beroot = getenv("zfs_be_root"); 1290 if (beroot == NULL) { 1291 return (1); 1292 } 1293 1294 pagenum = getenv("zfs_be_currpage"); 1295 if (pagenum != NULL) { 1296 page = strtol(pagenum, NULL, 10); 1297 } else { 1298 page = 1; 1299 } 1300 1301 ctr = 1; 1302 rv = 0; 1303 zfs_env_index = ZFS_BE_FIRST; 1304 SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) { 1305 /* Skip to the requested page number */ 1306 if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) { 1307 ctr++; 1308 continue; 1309 } 1310 1311 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index); 1312 snprintf(envval, sizeof(envval), "%s", zfs_be->name); 1313 rv = setenv(envname, envval, 1); 1314 if (rv != 0) { 1315 break; 1316 } 1317 1318 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index); 1319 rv = setenv(envname, envval, 1); 1320 if (rv != 0){ 1321 break; 1322 } 1323 1324 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index); 1325 rv = setenv(envname, "set_bootenv", 1); 1326 if (rv != 0){ 1327 break; 1328 } 1329 1330 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index); 1331 snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name); 1332 rv = setenv(envname, envval, 1); 1333 if (rv != 0){ 1334 break; 1335 } 1336 1337 zfs_env_index++; 1338 if (zfs_env_index > ZFS_BE_LAST) { 1339 break; 1340 } 1341 1342 } 1343 1344 for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) { 1345 snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index); 1346 (void)unsetenv(envname); 1347 snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index); 1348 (void)unsetenv(envname); 1349 snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index); 1350 (void)unsetenv(envname); 1351 snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index); 1352 (void)unsetenv(envname); 1353 } 1354 1355 return (rv); 1356 } 1357