1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 24 * Copyright 2020 Joyent, Inc. 25 * Copyright 2017 RackTop Systems. 26 */ 27 28 #include <assert.h> 29 #include <fcntl.h> 30 #include <poll.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <zlib.h> 35 #include <libgen.h> 36 #include <sys/spa.h> 37 #include <sys/stat.h> 38 #include <sys/processor.h> 39 #include <sys/zfs_context.h> 40 #include <zfs_fletcher.h> 41 #include <sys/rrwlock.h> 42 #include <sys/zmod.h> 43 #include <sys/utsname.h> 44 #include <sys/systeminfo.h> 45 #include <libzutil.h> 46 #include <sys/crypto/common.h> 47 #include <sys/crypto/impl.h> 48 #include <sys/crypto/api.h> 49 #include <sys/sha2.h> 50 #include <crypto/aes/aes_impl.h> 51 52 extern void system_taskq_init(void); 53 extern void system_taskq_fini(void); 54 55 /* 56 * Emulation of kernel services in userland. 57 */ 58 59 pgcnt_t physmem; 60 vnode_t *rootdir = (vnode_t *)0xabcd1234; 61 char hw_serial[HW_HOSTID_LEN]; 62 kmutex_t cpu_lock; 63 vmem_t *zio_arena = NULL; 64 65 /* If set, all blocks read will be copied to the specified directory. */ 66 char *vn_dumpdir = NULL; 67 68 struct utsname utsname = { 69 "userland", "libzpool", "1", "1", "na" 70 }; 71 72 /* 73 * ========================================================================= 74 * vnode operations 75 * ========================================================================= 76 */ 77 /* 78 * Note: for the xxxat() versions of these functions, we assume that the 79 * starting vp is always rootdir (which is true for spa_directory.c, the only 80 * ZFS consumer of these interfaces). We assert this is true, and then emulate 81 * them by adding '/' in front of the path. 82 */ 83 84 /*ARGSUSED*/ 85 int 86 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) 87 { 88 int fd; 89 int dump_fd; 90 vnode_t *vp; 91 int old_umask; 92 char realpath[MAXPATHLEN]; 93 struct stat64 st; 94 95 /* 96 * If we're accessing a real disk from userland, we need to use 97 * the character interface to avoid caching. This is particularly 98 * important if we're trying to look at a real in-kernel storage 99 * pool from userland, e.g. via zdb, because otherwise we won't 100 * see the changes occurring under the segmap cache. 101 * On the other hand, the stupid character device returns zero 102 * for its size. So -- gag -- we open the block device to get 103 * its size, and remember it for subsequent VOP_GETATTR(). 104 */ 105 if (strncmp(path, "/dev/", 5) == 0) { 106 char *dsk; 107 fd = open64(path, O_RDONLY); 108 if (fd == -1) 109 return (errno); 110 if (fstat64(fd, &st) == -1) { 111 close(fd); 112 return (errno); 113 } 114 close(fd); 115 (void) sprintf(realpath, "%s", path); 116 dsk = strstr(path, "/dsk/"); 117 if (dsk != NULL) 118 (void) sprintf(realpath + (dsk - path) + 1, "r%s", 119 dsk + 1); 120 } else { 121 (void) sprintf(realpath, "%s", path); 122 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) 123 return (errno); 124 } 125 126 if (flags & FCREAT) 127 old_umask = umask(0); 128 129 /* 130 * The construct 'flags - FREAD' conveniently maps combinations of 131 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. 132 */ 133 fd = open64(realpath, flags - FREAD, mode); 134 135 if (flags & FCREAT) 136 (void) umask(old_umask); 137 138 if (vn_dumpdir != NULL) { 139 char dumppath[MAXPATHLEN]; 140 (void) snprintf(dumppath, sizeof (dumppath), 141 "%s/%s", vn_dumpdir, basename(realpath)); 142 dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); 143 if (dump_fd == -1) 144 return (errno); 145 } else { 146 dump_fd = -1; 147 } 148 149 if (fd == -1) 150 return (errno); 151 152 if (fstat64(fd, &st) == -1) { 153 close(fd); 154 return (errno); 155 } 156 157 (void) fcntl(fd, F_SETFD, FD_CLOEXEC); 158 159 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); 160 161 vp->v_fd = fd; 162 vp->v_size = st.st_size; 163 vp->v_path = spa_strdup(path); 164 vp->v_dump_fd = dump_fd; 165 166 return (0); 167 } 168 169 /*ARGSUSED*/ 170 int 171 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, 172 int x3, vnode_t *startvp, int fd) 173 { 174 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); 175 int ret; 176 177 ASSERT(startvp == rootdir); 178 (void) sprintf(realpath, "/%s", path); 179 180 /* fd ignored for now, need if want to simulate nbmand support */ 181 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); 182 183 umem_free(realpath, strlen(path) + 2); 184 185 return (ret); 186 } 187 188 /*ARGSUSED*/ 189 int 190 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, 191 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) 192 { 193 ssize_t iolen, split; 194 195 if (uio == UIO_READ) { 196 iolen = pread64(vp->v_fd, addr, len, offset); 197 if (vp->v_dump_fd != -1) { 198 int status = 199 pwrite64(vp->v_dump_fd, addr, iolen, offset); 200 ASSERT(status != -1); 201 } 202 } else { 203 /* 204 * To simulate partial disk writes, we split writes into two 205 * system calls so that the process can be killed in between. 206 */ 207 int sectors = len >> SPA_MINBLOCKSHIFT; 208 split = (sectors > 0 ? rand() % sectors : 0) << 209 SPA_MINBLOCKSHIFT; 210 iolen = pwrite64(vp->v_fd, addr, split, offset); 211 iolen += pwrite64(vp->v_fd, (char *)addr + split, 212 len - split, offset + split); 213 } 214 215 if (iolen == -1) 216 return (errno); 217 if (residp) 218 *residp = len - iolen; 219 else if (iolen != len) 220 return (EIO); 221 return (0); 222 } 223 224 void 225 vn_close(vnode_t *vp) 226 { 227 close(vp->v_fd); 228 if (vp->v_dump_fd != -1) 229 close(vp->v_dump_fd); 230 spa_strfree(vp->v_path); 231 umem_free(vp, sizeof (vnode_t)); 232 } 233 234 /* 235 * At a minimum we need to update the size since vdev_reopen() 236 * will no longer call vn_openat(). 237 */ 238 int 239 fop_getattr(vnode_t *vp, vattr_t *vap) 240 { 241 struct stat64 st; 242 243 if (fstat64(vp->v_fd, &st) == -1) { 244 close(vp->v_fd); 245 return (errno); 246 } 247 248 vap->va_size = st.st_size; 249 return (0); 250 } 251 252 #ifdef ZFS_DEBUG 253 254 /* 255 * ========================================================================= 256 * Figure out which debugging statements to print 257 * ========================================================================= 258 */ 259 260 static char *dprintf_string; 261 static int dprintf_print_all; 262 263 int 264 dprintf_find_string(const char *string) 265 { 266 char *tmp_str = dprintf_string; 267 int len = strlen(string); 268 269 /* 270 * Find out if this is a string we want to print. 271 * String format: file1.c,function_name1,file2.c,file3.c 272 */ 273 274 while (tmp_str != NULL) { 275 if (strncmp(tmp_str, string, len) == 0 && 276 (tmp_str[len] == ',' || tmp_str[len] == '\0')) 277 return (1); 278 tmp_str = strchr(tmp_str, ','); 279 if (tmp_str != NULL) 280 tmp_str++; /* Get rid of , */ 281 } 282 return (0); 283 } 284 285 void 286 dprintf_setup(int *argc, char **argv) 287 { 288 int i, j; 289 290 /* 291 * Debugging can be specified two ways: by setting the 292 * environment variable ZFS_DEBUG, or by including a 293 * "debug=..." argument on the command line. The command 294 * line setting overrides the environment variable. 295 */ 296 297 for (i = 1; i < *argc; i++) { 298 int len = strlen("debug="); 299 /* First look for a command line argument */ 300 if (strncmp("debug=", argv[i], len) == 0) { 301 dprintf_string = argv[i] + len; 302 /* Remove from args */ 303 for (j = i; j < *argc; j++) 304 argv[j] = argv[j+1]; 305 argv[j] = NULL; 306 (*argc)--; 307 } 308 } 309 310 if (dprintf_string == NULL) { 311 /* Look for ZFS_DEBUG environment variable */ 312 dprintf_string = getenv("ZFS_DEBUG"); 313 } 314 315 /* 316 * Are we just turning on all debugging? 317 */ 318 if (dprintf_find_string("on")) 319 dprintf_print_all = 1; 320 321 if (dprintf_string != NULL) 322 zfs_flags |= ZFS_DEBUG_DPRINTF; 323 } 324 325 /* 326 * ========================================================================= 327 * debug printfs 328 * ========================================================================= 329 */ 330 void 331 __dprintf(const char *file, const char *func, int line, const char *fmt, ...) 332 { 333 const char *newfile; 334 va_list adx; 335 336 /* 337 * Get rid of annoying "../common/" prefix to filename. 338 */ 339 newfile = strrchr(file, '/'); 340 if (newfile != NULL) { 341 newfile = newfile + 1; /* Get rid of leading / */ 342 } else { 343 newfile = file; 344 } 345 346 if (dprintf_print_all || 347 dprintf_find_string(newfile) || 348 dprintf_find_string(func)) { 349 /* Print out just the function name if requested */ 350 flockfile(stdout); 351 if (dprintf_find_string("pid")) 352 (void) printf("%d ", getpid()); 353 if (dprintf_find_string("tid")) 354 (void) printf("%u ", thr_self()); 355 if (dprintf_find_string("cpu")) 356 (void) printf("%u ", getcpuid()); 357 if (dprintf_find_string("time")) 358 (void) printf("%llu ", gethrtime()); 359 if (dprintf_find_string("long")) 360 (void) printf("%s, line %d: ", newfile, line); 361 (void) printf("%s: ", func); 362 va_start(adx, fmt); 363 (void) vprintf(fmt, adx); 364 va_end(adx); 365 funlockfile(stdout); 366 } 367 } 368 369 #endif /* ZFS_DEBUG */ 370 371 /* 372 * ========================================================================= 373 * kobj interfaces 374 * ========================================================================= 375 */ 376 struct _buf * 377 kobj_open_file(char *name) 378 { 379 struct _buf *file; 380 vnode_t *vp; 381 382 /* set vp as the _fd field of the file */ 383 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, 384 -1) != 0) 385 return ((void *)-1UL); 386 387 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); 388 file->_fd = (intptr_t)vp; 389 return (file); 390 } 391 392 int 393 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) 394 { 395 ssize_t resid; 396 397 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, 398 UIO_SYSSPACE, 0, 0, 0, &resid); 399 400 return (size - resid); 401 } 402 403 void 404 kobj_close_file(struct _buf *file) 405 { 406 vn_close((vnode_t *)file->_fd); 407 umem_free(file, sizeof (struct _buf)); 408 } 409 410 int 411 kobj_get_filesize(struct _buf *file, uint64_t *size) 412 { 413 struct stat64 st; 414 vnode_t *vp = (vnode_t *)file->_fd; 415 416 if (fstat64(vp->v_fd, &st) == -1) { 417 vn_close(vp); 418 return (errno); 419 } 420 *size = st.st_size; 421 return (0); 422 } 423 424 /* 425 * ========================================================================= 426 * misc routines 427 * ========================================================================= 428 */ 429 430 /* 431 * Find lowest one bit set. 432 * Returns bit number + 1 of lowest bit that is set, otherwise returns 0. 433 * This is basically a reimplementation of ffsll(), which is GNU specific. 434 */ 435 int 436 lowbit64(uint64_t i) 437 { 438 register int h = 64; 439 if (i == 0) 440 return (0); 441 442 if (i & 0x00000000ffffffffULL) 443 h -= 32; 444 else 445 i >>= 32; 446 447 if (i & 0x0000ffff) 448 h -= 16; 449 else 450 i >>= 16; 451 452 if (i & 0x00ff) 453 h -= 8; 454 else 455 i >>= 8; 456 457 if (i & 0x0f) 458 h -= 4; 459 else 460 i >>= 4; 461 462 if (i & 0x3) 463 h -= 2; 464 else 465 i >>= 2; 466 467 if (i & 0x1) 468 h -= 1; 469 470 return (h); 471 } 472 473 int 474 highbit64(uint64_t i) 475 { 476 int h = 1; 477 478 if (i == 0) 479 return (0); 480 if (i & 0xffffffff00000000ULL) { 481 h += 32; i >>= 32; 482 } 483 if (i & 0xffff0000) { 484 h += 16; i >>= 16; 485 } 486 if (i & 0xff00) { 487 h += 8; i >>= 8; 488 } 489 if (i & 0xf0) { 490 h += 4; i >>= 4; 491 } 492 if (i & 0xc) { 493 h += 2; i >>= 2; 494 } 495 if (i & 0x2) { 496 h += 1; 497 } 498 return (h); 499 } 500 501 /* 502 * ========================================================================= 503 * kernel emulation setup & teardown 504 * ========================================================================= 505 */ 506 static int 507 umem_out_of_memory(void) 508 { 509 char errmsg[] = "out of memory -- generating core dump\n"; 510 511 write(fileno(stderr), errmsg, sizeof (errmsg)); 512 abort(); 513 return (0); 514 } 515 516 void 517 kernel_init(int mode) 518 { 519 extern uint_t rrw_tsd_key; 520 521 umem_nofail_callback(umem_out_of_memory); 522 523 physmem = sysconf(_SC_PHYS_PAGES); 524 525 dprintf("physmem = %llu pages (%.2f GB)\n", physmem, 526 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); 527 528 (void) snprintf(hw_serial, sizeof (hw_serial), "%ld", 529 (mode & FWRITE) ? get_system_hostid() : 0); 530 531 system_taskq_init(); 532 533 mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL); 534 535 spa_init(mode); 536 537 fletcher_4_init(); 538 539 tsd_create(&rrw_tsd_key, rrw_tsd_destroy); 540 } 541 542 void 543 kernel_fini(void) 544 { 545 fletcher_4_fini(); 546 547 spa_fini(); 548 549 system_taskq_fini(); 550 } 551 552 /* ARGSUSED */ 553 uint32_t 554 zone_get_hostid(void *zonep) 555 { 556 /* 557 * We're emulating the system's hostid in userland. 558 */ 559 return (strtoul(hw_serial, NULL, 10)); 560 } 561 562 int 563 z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen) 564 { 565 int ret; 566 uLongf len = *dstlen; 567 568 if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK) 569 *dstlen = (size_t)len; 570 571 return (ret); 572 } 573 574 int 575 z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen, 576 int level) 577 { 578 int ret; 579 uLongf len = *dstlen; 580 581 if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK) 582 *dstlen = (size_t)len; 583 584 return (ret); 585 } 586 587 int 588 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) 589 { 590 return (0); 591 } 592 593 int 594 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) 595 { 596 return (0); 597 } 598 599 int 600 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) 601 { 602 return (0); 603 } 604 605 /* ARGSUSED */ 606 int 607 zfs_onexit_fd_hold(int fd, minor_t *minorp) 608 { 609 *minorp = 0; 610 return (0); 611 } 612 613 /* ARGSUSED */ 614 void 615 zfs_onexit_fd_rele(int fd) 616 { 617 } 618 619 /* ARGSUSED */ 620 int 621 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, 622 uint64_t *action_handle) 623 { 624 return (0); 625 } 626 627 /* ARGSUSED */ 628 int 629 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire) 630 { 631 return (0); 632 } 633 634 /* ARGSUSED */ 635 int 636 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) 637 { 638 return (0); 639 } 640 641 void 642 bioinit(buf_t *bp) 643 { 644 bzero(bp, sizeof (buf_t)); 645 } 646 647 void 648 biodone(buf_t *bp) 649 { 650 if (bp->b_iodone != NULL) { 651 (*(bp->b_iodone))(bp); 652 return; 653 } 654 ASSERT((bp->b_flags & B_DONE) == 0); 655 bp->b_flags |= B_DONE; 656 } 657 658 void 659 bioerror(buf_t *bp, int error) 660 { 661 ASSERT(bp != NULL); 662 ASSERT(error >= 0); 663 664 if (error != 0) { 665 bp->b_flags |= B_ERROR; 666 } else { 667 bp->b_flags &= ~B_ERROR; 668 } 669 bp->b_error = error; 670 } 671 672 673 int 674 geterror(struct buf *bp) 675 { 676 int error = 0; 677 678 if (bp->b_flags & B_ERROR) { 679 error = bp->b_error; 680 if (!error) 681 error = EIO; 682 } 683 return (error); 684 } 685 686 int 687 crypto_create_ctx_template(crypto_mechanism_t *mech, 688 crypto_key_t *key, crypto_ctx_template_t *tmpl, int kmflag) 689 { 690 return (0); 691 } 692 693 crypto_mech_type_t 694 crypto_mech2id(const char *name) 695 { 696 return (CRYPTO_MECH_INVALID); 697 } 698 699 int 700 crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data, 701 crypto_key_t *key, crypto_ctx_template_t impl, 702 crypto_data_t *mac, crypto_call_req_t *cr) 703 { 704 return (0); 705 } 706 707 int 708 crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext, 709 crypto_key_t *key, crypto_ctx_template_t tmpl, 710 crypto_data_t *ciphertext, crypto_call_req_t *cr) 711 { 712 return (0); 713 } 714 715 /* This could probably be a weak reference */ 716 int 717 crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext, 718 crypto_key_t *key, crypto_ctx_template_t tmpl, 719 crypto_data_t *ciphertext, crypto_call_req_t *cr) 720 { 721 return (0); 722 } 723 724 725 int 726 crypto_digest_final(crypto_context_t context, crypto_data_t *digest, 727 crypto_call_req_t *cr) 728 { 729 return (0); 730 } 731 732 int 733 crypto_digest_update(crypto_context_t context, crypto_data_t *data, 734 crypto_call_req_t *cr) 735 { 736 return (0); 737 } 738 739 int 740 crypto_digest_init(crypto_mechanism_t *mech, crypto_context_t *ctxp, 741 crypto_call_req_t *crq) 742 { 743 return (0); 744 } 745 746 void 747 crypto_destroy_ctx_template(crypto_ctx_template_t tmpl) 748 { 749 } 750 751 extern int crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key, 752 crypto_ctx_template_t tmpl, crypto_context_t *ctxp, 753 crypto_call_req_t *cr) 754 { 755 return (0); 756 } 757 758 extern int crypto_mac_update(crypto_context_t ctx, crypto_data_t *data, 759 crypto_call_req_t *cr) 760 { 761 return (0); 762 } 763 764 extern int crypto_mac_final(crypto_context_t ctx, crypto_data_t *data, 765 crypto_call_req_t *cr) 766 { 767 return (0); 768 } 769