1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 24 * Copyright (c) 2016 Actifio, Inc. All rights reserved. 25 */ 26 27 #include <assert.h> 28 #include <fcntl.h> 29 #include <libgen.h> 30 #include <poll.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <sys/crypto/icp.h> 35 #include <sys/processor.h> 36 #include <sys/rrwlock.h> 37 #include <sys/spa.h> 38 #include <sys/stat.h> 39 #include <sys/systeminfo.h> 40 #include <sys/time.h> 41 #include <sys/utsname.h> 42 #include <sys/zfs_context.h> 43 #include <sys/zfs_onexit.h> 44 #include <sys/zfs_vfsops.h> 45 #include <sys/zstd/zstd.h> 46 #include <sys/zvol.h> 47 #include <zfs_fletcher.h> 48 #include <zlib.h> 49 50 /* 51 * Emulation of kernel services in userland. 52 */ 53 54 uint64_t physmem; 55 char hw_serial[HW_HOSTID_LEN]; 56 struct utsname hw_utsname; 57 58 /* If set, all blocks read will be copied to the specified directory. */ 59 char *vn_dumpdir = NULL; 60 61 /* this only exists to have its address taken */ 62 struct proc p0; 63 64 /* 65 * ========================================================================= 66 * threads 67 * ========================================================================= 68 * 69 * TS_STACK_MIN is dictated by the minimum allowed pthread stack size. While 70 * TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for 71 * the expected stack depth while small enough to avoid exhausting address 72 * space with high thread counts. 73 */ 74 #define TS_STACK_MIN MAX(PTHREAD_STACK_MIN, 32768) 75 #define TS_STACK_MAX (256 * 1024) 76 77 /*ARGSUSED*/ 78 kthread_t * 79 zk_thread_create(void (*func)(void *), void *arg, size_t stksize, int state) 80 { 81 pthread_attr_t attr; 82 pthread_t tid; 83 char *stkstr; 84 int detachstate = PTHREAD_CREATE_DETACHED; 85 86 VERIFY0(pthread_attr_init(&attr)); 87 88 if (state & TS_JOINABLE) 89 detachstate = PTHREAD_CREATE_JOINABLE; 90 91 VERIFY0(pthread_attr_setdetachstate(&attr, detachstate)); 92 93 /* 94 * We allow the default stack size in user space to be specified by 95 * setting the ZFS_STACK_SIZE environment variable. This allows us 96 * the convenience of observing and debugging stack overruns in 97 * user space. Explicitly specified stack sizes will be honored. 98 * The usage of ZFS_STACK_SIZE is discussed further in the 99 * ENVIRONMENT VARIABLES sections of the ztest(1) man page. 100 */ 101 if (stksize == 0) { 102 stkstr = getenv("ZFS_STACK_SIZE"); 103 104 if (stkstr == NULL) 105 stksize = TS_STACK_MAX; 106 else 107 stksize = MAX(atoi(stkstr), TS_STACK_MIN); 108 } 109 110 VERIFY3S(stksize, >, 0); 111 stksize = P2ROUNDUP(MAX(stksize, TS_STACK_MIN), PAGESIZE); 112 113 /* 114 * If this ever fails, it may be because the stack size is not a 115 * multiple of system page size. 116 */ 117 VERIFY0(pthread_attr_setstacksize(&attr, stksize)); 118 VERIFY0(pthread_attr_setguardsize(&attr, PAGESIZE)); 119 120 VERIFY0(pthread_create(&tid, &attr, (void *(*)(void *))func, arg)); 121 VERIFY0(pthread_attr_destroy(&attr)); 122 123 return ((void *)(uintptr_t)tid); 124 } 125 126 /* 127 * ========================================================================= 128 * kstats 129 * ========================================================================= 130 */ 131 /*ARGSUSED*/ 132 kstat_t * 133 kstat_create(const char *module, int instance, const char *name, 134 const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag) 135 { 136 return (NULL); 137 } 138 139 /*ARGSUSED*/ 140 void 141 kstat_install(kstat_t *ksp) 142 {} 143 144 /*ARGSUSED*/ 145 void 146 kstat_delete(kstat_t *ksp) 147 {} 148 149 /*ARGSUSED*/ 150 void 151 kstat_waitq_enter(kstat_io_t *kiop) 152 {} 153 154 /*ARGSUSED*/ 155 void 156 kstat_waitq_exit(kstat_io_t *kiop) 157 {} 158 159 /*ARGSUSED*/ 160 void 161 kstat_runq_enter(kstat_io_t *kiop) 162 {} 163 164 /*ARGSUSED*/ 165 void 166 kstat_runq_exit(kstat_io_t *kiop) 167 {} 168 169 /*ARGSUSED*/ 170 void 171 kstat_waitq_to_runq(kstat_io_t *kiop) 172 {} 173 174 /*ARGSUSED*/ 175 void 176 kstat_runq_back_to_waitq(kstat_io_t *kiop) 177 {} 178 179 void 180 kstat_set_raw_ops(kstat_t *ksp, 181 int (*headers)(char *buf, size_t size), 182 int (*data)(char *buf, size_t size, void *data), 183 void *(*addr)(kstat_t *ksp, loff_t index)) 184 {} 185 186 /* 187 * ========================================================================= 188 * mutexes 189 * ========================================================================= 190 */ 191 192 void 193 mutex_init(kmutex_t *mp, char *name, int type, void *cookie) 194 { 195 VERIFY0(pthread_mutex_init(&mp->m_lock, NULL)); 196 memset(&mp->m_owner, 0, sizeof (pthread_t)); 197 } 198 199 void 200 mutex_destroy(kmutex_t *mp) 201 { 202 VERIFY0(pthread_mutex_destroy(&mp->m_lock)); 203 } 204 205 void 206 mutex_enter(kmutex_t *mp) 207 { 208 VERIFY0(pthread_mutex_lock(&mp->m_lock)); 209 mp->m_owner = pthread_self(); 210 } 211 212 int 213 mutex_tryenter(kmutex_t *mp) 214 { 215 int error; 216 217 error = pthread_mutex_trylock(&mp->m_lock); 218 if (error == 0) { 219 mp->m_owner = pthread_self(); 220 return (1); 221 } else { 222 VERIFY3S(error, ==, EBUSY); 223 return (0); 224 } 225 } 226 227 void 228 mutex_exit(kmutex_t *mp) 229 { 230 memset(&mp->m_owner, 0, sizeof (pthread_t)); 231 VERIFY0(pthread_mutex_unlock(&mp->m_lock)); 232 } 233 234 /* 235 * ========================================================================= 236 * rwlocks 237 * ========================================================================= 238 */ 239 240 void 241 rw_init(krwlock_t *rwlp, char *name, int type, void *arg) 242 { 243 VERIFY0(pthread_rwlock_init(&rwlp->rw_lock, NULL)); 244 rwlp->rw_readers = 0; 245 rwlp->rw_owner = 0; 246 } 247 248 void 249 rw_destroy(krwlock_t *rwlp) 250 { 251 VERIFY0(pthread_rwlock_destroy(&rwlp->rw_lock)); 252 } 253 254 void 255 rw_enter(krwlock_t *rwlp, krw_t rw) 256 { 257 if (rw == RW_READER) { 258 VERIFY0(pthread_rwlock_rdlock(&rwlp->rw_lock)); 259 atomic_inc_uint(&rwlp->rw_readers); 260 } else { 261 VERIFY0(pthread_rwlock_wrlock(&rwlp->rw_lock)); 262 rwlp->rw_owner = pthread_self(); 263 } 264 } 265 266 void 267 rw_exit(krwlock_t *rwlp) 268 { 269 if (RW_READ_HELD(rwlp)) 270 atomic_dec_uint(&rwlp->rw_readers); 271 else 272 rwlp->rw_owner = 0; 273 274 VERIFY0(pthread_rwlock_unlock(&rwlp->rw_lock)); 275 } 276 277 int 278 rw_tryenter(krwlock_t *rwlp, krw_t rw) 279 { 280 int error; 281 282 if (rw == RW_READER) 283 error = pthread_rwlock_tryrdlock(&rwlp->rw_lock); 284 else 285 error = pthread_rwlock_trywrlock(&rwlp->rw_lock); 286 287 if (error == 0) { 288 if (rw == RW_READER) 289 atomic_inc_uint(&rwlp->rw_readers); 290 else 291 rwlp->rw_owner = pthread_self(); 292 293 return (1); 294 } 295 296 VERIFY3S(error, ==, EBUSY); 297 298 return (0); 299 } 300 301 /* ARGSUSED */ 302 uint32_t 303 zone_get_hostid(void *zonep) 304 { 305 /* 306 * We're emulating the system's hostid in userland. 307 */ 308 return (strtoul(hw_serial, NULL, 10)); 309 } 310 311 int 312 rw_tryupgrade(krwlock_t *rwlp) 313 { 314 return (0); 315 } 316 317 /* 318 * ========================================================================= 319 * condition variables 320 * ========================================================================= 321 */ 322 323 void 324 cv_init(kcondvar_t *cv, char *name, int type, void *arg) 325 { 326 VERIFY0(pthread_cond_init(cv, NULL)); 327 } 328 329 void 330 cv_destroy(kcondvar_t *cv) 331 { 332 VERIFY0(pthread_cond_destroy(cv)); 333 } 334 335 void 336 cv_wait(kcondvar_t *cv, kmutex_t *mp) 337 { 338 memset(&mp->m_owner, 0, sizeof (pthread_t)); 339 VERIFY0(pthread_cond_wait(cv, &mp->m_lock)); 340 mp->m_owner = pthread_self(); 341 } 342 343 int 344 cv_wait_sig(kcondvar_t *cv, kmutex_t *mp) 345 { 346 cv_wait(cv, mp); 347 return (1); 348 } 349 350 int 351 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) 352 { 353 int error; 354 struct timeval tv; 355 struct timespec ts; 356 clock_t delta; 357 358 delta = abstime - ddi_get_lbolt(); 359 if (delta <= 0) 360 return (-1); 361 362 VERIFY(gettimeofday(&tv, NULL) == 0); 363 364 ts.tv_sec = tv.tv_sec + delta / hz; 365 ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % hz) * (NANOSEC / hz); 366 if (ts.tv_nsec >= NANOSEC) { 367 ts.tv_sec++; 368 ts.tv_nsec -= NANOSEC; 369 } 370 371 memset(&mp->m_owner, 0, sizeof (pthread_t)); 372 error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); 373 mp->m_owner = pthread_self(); 374 375 if (error == ETIMEDOUT) 376 return (-1); 377 378 VERIFY0(error); 379 380 return (1); 381 } 382 383 /*ARGSUSED*/ 384 int 385 cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res, 386 int flag) 387 { 388 int error; 389 struct timeval tv; 390 struct timespec ts; 391 hrtime_t delta; 392 393 ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE); 394 395 delta = tim; 396 if (flag & CALLOUT_FLAG_ABSOLUTE) 397 delta -= gethrtime(); 398 399 if (delta <= 0) 400 return (-1); 401 402 VERIFY0(gettimeofday(&tv, NULL)); 403 404 ts.tv_sec = tv.tv_sec + delta / NANOSEC; 405 ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % NANOSEC); 406 if (ts.tv_nsec >= NANOSEC) { 407 ts.tv_sec++; 408 ts.tv_nsec -= NANOSEC; 409 } 410 411 memset(&mp->m_owner, 0, sizeof (pthread_t)); 412 error = pthread_cond_timedwait(cv, &mp->m_lock, &ts); 413 mp->m_owner = pthread_self(); 414 415 if (error == ETIMEDOUT) 416 return (-1); 417 418 VERIFY0(error); 419 420 return (1); 421 } 422 423 void 424 cv_signal(kcondvar_t *cv) 425 { 426 VERIFY0(pthread_cond_signal(cv)); 427 } 428 429 void 430 cv_broadcast(kcondvar_t *cv) 431 { 432 VERIFY0(pthread_cond_broadcast(cv)); 433 } 434 435 /* 436 * ========================================================================= 437 * procfs list 438 * ========================================================================= 439 */ 440 441 void 442 seq_printf(struct seq_file *m, const char *fmt, ...) 443 {} 444 445 void 446 procfs_list_install(const char *module, 447 const char *name, 448 mode_t mode, 449 procfs_list_t *procfs_list, 450 int (*show)(struct seq_file *f, void *p), 451 int (*show_header)(struct seq_file *f), 452 int (*clear)(procfs_list_t *procfs_list), 453 size_t procfs_list_node_off) 454 { 455 mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL); 456 list_create(&procfs_list->pl_list, 457 procfs_list_node_off + sizeof (procfs_list_node_t), 458 procfs_list_node_off + offsetof(procfs_list_node_t, pln_link)); 459 procfs_list->pl_next_id = 1; 460 procfs_list->pl_node_offset = procfs_list_node_off; 461 } 462 463 void 464 procfs_list_uninstall(procfs_list_t *procfs_list) 465 {} 466 467 void 468 procfs_list_destroy(procfs_list_t *procfs_list) 469 { 470 ASSERT(list_is_empty(&procfs_list->pl_list)); 471 list_destroy(&procfs_list->pl_list); 472 mutex_destroy(&procfs_list->pl_lock); 473 } 474 475 #define NODE_ID(procfs_list, obj) \ 476 (((procfs_list_node_t *)(((char *)obj) + \ 477 (procfs_list)->pl_node_offset))->pln_id) 478 479 void 480 procfs_list_add(procfs_list_t *procfs_list, void *p) 481 { 482 ASSERT(MUTEX_HELD(&procfs_list->pl_lock)); 483 NODE_ID(procfs_list, p) = procfs_list->pl_next_id++; 484 list_insert_tail(&procfs_list->pl_list, p); 485 } 486 487 /* 488 * ========================================================================= 489 * vnode operations 490 * ========================================================================= 491 */ 492 493 /* 494 * ========================================================================= 495 * Figure out which debugging statements to print 496 * ========================================================================= 497 */ 498 499 static char *dprintf_string; 500 static int dprintf_print_all; 501 502 int 503 dprintf_find_string(const char *string) 504 { 505 char *tmp_str = dprintf_string; 506 int len = strlen(string); 507 508 /* 509 * Find out if this is a string we want to print. 510 * String format: file1.c,function_name1,file2.c,file3.c 511 */ 512 513 while (tmp_str != NULL) { 514 if (strncmp(tmp_str, string, len) == 0 && 515 (tmp_str[len] == ',' || tmp_str[len] == '\0')) 516 return (1); 517 tmp_str = strchr(tmp_str, ','); 518 if (tmp_str != NULL) 519 tmp_str++; /* Get rid of , */ 520 } 521 return (0); 522 } 523 524 void 525 dprintf_setup(int *argc, char **argv) 526 { 527 int i, j; 528 529 /* 530 * Debugging can be specified two ways: by setting the 531 * environment variable ZFS_DEBUG, or by including a 532 * "debug=..." argument on the command line. The command 533 * line setting overrides the environment variable. 534 */ 535 536 for (i = 1; i < *argc; i++) { 537 int len = strlen("debug="); 538 /* First look for a command line argument */ 539 if (strncmp("debug=", argv[i], len) == 0) { 540 dprintf_string = argv[i] + len; 541 /* Remove from args */ 542 for (j = i; j < *argc; j++) 543 argv[j] = argv[j+1]; 544 argv[j] = NULL; 545 (*argc)--; 546 } 547 } 548 549 if (dprintf_string == NULL) { 550 /* Look for ZFS_DEBUG environment variable */ 551 dprintf_string = getenv("ZFS_DEBUG"); 552 } 553 554 /* 555 * Are we just turning on all debugging? 556 */ 557 if (dprintf_find_string("on")) 558 dprintf_print_all = 1; 559 560 if (dprintf_string != NULL) 561 zfs_flags |= ZFS_DEBUG_DPRINTF; 562 } 563 564 /* 565 * ========================================================================= 566 * debug printfs 567 * ========================================================================= 568 */ 569 void 570 __dprintf(boolean_t dprint, const char *file, const char *func, 571 int line, const char *fmt, ...) 572 { 573 const char *newfile; 574 va_list adx; 575 576 /* 577 * Get rid of annoying "../common/" prefix to filename. 578 */ 579 newfile = strrchr(file, '/'); 580 if (newfile != NULL) { 581 newfile = newfile + 1; /* Get rid of leading / */ 582 } else { 583 newfile = file; 584 } 585 586 if (dprint) { 587 /* dprintf messages are printed immediately */ 588 589 if (!dprintf_print_all && 590 !dprintf_find_string(newfile) && 591 !dprintf_find_string(func)) 592 return; 593 594 /* Print out just the function name if requested */ 595 flockfile(stdout); 596 if (dprintf_find_string("pid")) 597 (void) printf("%d ", getpid()); 598 if (dprintf_find_string("tid")) 599 (void) printf("%ju ", 600 (uintmax_t)(uintptr_t)pthread_self()); 601 if (dprintf_find_string("cpu")) 602 (void) printf("%u ", getcpuid()); 603 if (dprintf_find_string("time")) 604 (void) printf("%llu ", gethrtime()); 605 if (dprintf_find_string("long")) 606 (void) printf("%s, line %d: ", newfile, line); 607 (void) printf("dprintf: %s: ", func); 608 va_start(adx, fmt); 609 (void) vprintf(fmt, adx); 610 va_end(adx); 611 funlockfile(stdout); 612 } else { 613 /* zfs_dbgmsg is logged for dumping later */ 614 size_t size; 615 char *buf; 616 int i; 617 618 size = 1024; 619 buf = umem_alloc(size, UMEM_NOFAIL); 620 i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func); 621 622 if (i < size) { 623 va_start(adx, fmt); 624 (void) vsnprintf(buf + i, size - i, fmt, adx); 625 va_end(adx); 626 } 627 628 __zfs_dbgmsg(buf); 629 630 umem_free(buf, size); 631 } 632 } 633 634 /* 635 * ========================================================================= 636 * cmn_err() and panic() 637 * ========================================================================= 638 */ 639 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; 640 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; 641 642 void 643 vpanic(const char *fmt, va_list adx) 644 { 645 (void) fprintf(stderr, "error: "); 646 (void) vfprintf(stderr, fmt, adx); 647 (void) fprintf(stderr, "\n"); 648 649 abort(); /* think of it as a "user-level crash dump" */ 650 } 651 652 void 653 panic(const char *fmt, ...) 654 { 655 va_list adx; 656 657 va_start(adx, fmt); 658 vpanic(fmt, adx); 659 va_end(adx); 660 } 661 662 void 663 vcmn_err(int ce, const char *fmt, va_list adx) 664 { 665 if (ce == CE_PANIC) 666 vpanic(fmt, adx); 667 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ 668 (void) fprintf(stderr, "%s", ce_prefix[ce]); 669 (void) vfprintf(stderr, fmt, adx); 670 (void) fprintf(stderr, "%s", ce_suffix[ce]); 671 } 672 } 673 674 /*PRINTFLIKE2*/ 675 void 676 cmn_err(int ce, const char *fmt, ...) 677 { 678 va_list adx; 679 680 va_start(adx, fmt); 681 vcmn_err(ce, fmt, adx); 682 va_end(adx); 683 } 684 685 /* 686 * ========================================================================= 687 * misc routines 688 * ========================================================================= 689 */ 690 691 void 692 delay(clock_t ticks) 693 { 694 (void) poll(0, 0, ticks * (1000 / hz)); 695 } 696 697 /* 698 * Find highest one bit set. 699 * Returns bit number + 1 of highest bit that is set, otherwise returns 0. 700 * The __builtin_clzll() function is supported by both GCC and Clang. 701 */ 702 int 703 highbit64(uint64_t i) 704 { 705 if (i == 0) 706 return (0); 707 708 return (NBBY * sizeof (uint64_t) - __builtin_clzll(i)); 709 } 710 711 /* 712 * Find lowest one bit set. 713 * Returns bit number + 1 of lowest bit that is set, otherwise returns 0. 714 * The __builtin_ffsll() function is supported by both GCC and Clang. 715 */ 716 int 717 lowbit64(uint64_t i) 718 { 719 if (i == 0) 720 return (0); 721 722 return (__builtin_ffsll(i)); 723 } 724 725 char *random_path = "/dev/random"; 726 char *urandom_path = "/dev/urandom"; 727 static int random_fd = -1, urandom_fd = -1; 728 729 void 730 random_init(void) 731 { 732 VERIFY((random_fd = open(random_path, O_RDONLY)) != -1); 733 VERIFY((urandom_fd = open(urandom_path, O_RDONLY)) != -1); 734 } 735 736 void 737 random_fini(void) 738 { 739 close(random_fd); 740 close(urandom_fd); 741 742 random_fd = -1; 743 urandom_fd = -1; 744 } 745 746 static int 747 random_get_bytes_common(uint8_t *ptr, size_t len, int fd) 748 { 749 size_t resid = len; 750 ssize_t bytes; 751 752 ASSERT(fd != -1); 753 754 while (resid != 0) { 755 bytes = read(fd, ptr, resid); 756 ASSERT3S(bytes, >=, 0); 757 ptr += bytes; 758 resid -= bytes; 759 } 760 761 return (0); 762 } 763 764 int 765 random_get_bytes(uint8_t *ptr, size_t len) 766 { 767 return (random_get_bytes_common(ptr, len, random_fd)); 768 } 769 770 int 771 random_get_pseudo_bytes(uint8_t *ptr, size_t len) 772 { 773 return (random_get_bytes_common(ptr, len, urandom_fd)); 774 } 775 776 int 777 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result) 778 { 779 char *end; 780 781 *result = strtoul(hw_serial, &end, base); 782 if (*result == 0) 783 return (errno); 784 return (0); 785 } 786 787 int 788 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result) 789 { 790 char *end; 791 792 *result = strtoull(str, &end, base); 793 if (*result == 0) 794 return (errno); 795 return (0); 796 } 797 798 utsname_t * 799 utsname(void) 800 { 801 return (&hw_utsname); 802 } 803 804 /* 805 * ========================================================================= 806 * kernel emulation setup & teardown 807 * ========================================================================= 808 */ 809 static int 810 umem_out_of_memory(void) 811 { 812 char errmsg[] = "out of memory -- generating core dump\n"; 813 814 (void) fprintf(stderr, "%s", errmsg); 815 abort(); 816 return (0); 817 } 818 819 void 820 kernel_init(int mode) 821 { 822 extern uint_t rrw_tsd_key; 823 824 umem_nofail_callback(umem_out_of_memory); 825 826 physmem = sysconf(_SC_PHYS_PAGES); 827 828 dprintf("physmem = %llu pages (%.2f GB)\n", physmem, 829 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); 830 831 (void) snprintf(hw_serial, sizeof (hw_serial), "%ld", 832 (mode & SPA_MODE_WRITE) ? get_system_hostid() : 0); 833 834 random_init(); 835 836 VERIFY0(uname(&hw_utsname)); 837 838 system_taskq_init(); 839 icp_init(); 840 841 zstd_init(); 842 843 spa_init((spa_mode_t)mode); 844 845 fletcher_4_init(); 846 847 tsd_create(&rrw_tsd_key, rrw_tsd_destroy); 848 } 849 850 void 851 kernel_fini(void) 852 { 853 fletcher_4_fini(); 854 spa_fini(); 855 856 zstd_fini(); 857 858 icp_fini(); 859 system_taskq_fini(); 860 861 random_fini(); 862 } 863 864 uid_t 865 crgetuid(cred_t *cr) 866 { 867 return (0); 868 } 869 870 uid_t 871 crgetruid(cred_t *cr) 872 { 873 return (0); 874 } 875 876 gid_t 877 crgetgid(cred_t *cr) 878 { 879 return (0); 880 } 881 882 int 883 crgetngroups(cred_t *cr) 884 { 885 return (0); 886 } 887 888 gid_t * 889 crgetgroups(cred_t *cr) 890 { 891 return (NULL); 892 } 893 894 int 895 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) 896 { 897 return (0); 898 } 899 900 int 901 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) 902 { 903 return (0); 904 } 905 906 int 907 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) 908 { 909 return (0); 910 } 911 912 int 913 secpolicy_zfs(const cred_t *cr) 914 { 915 return (0); 916 } 917 918 int 919 secpolicy_zfs_proc(const cred_t *cr, proc_t *proc) 920 { 921 return (0); 922 } 923 924 ksiddomain_t * 925 ksid_lookupdomain(const char *dom) 926 { 927 ksiddomain_t *kd; 928 929 kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL); 930 kd->kd_name = spa_strdup(dom); 931 return (kd); 932 } 933 934 void 935 ksiddomain_rele(ksiddomain_t *ksid) 936 { 937 spa_strfree(ksid->kd_name); 938 umem_free(ksid, sizeof (ksiddomain_t)); 939 } 940 941 char * 942 kmem_vasprintf(const char *fmt, va_list adx) 943 { 944 char *buf = NULL; 945 va_list adx_copy; 946 947 va_copy(adx_copy, adx); 948 VERIFY(vasprintf(&buf, fmt, adx_copy) != -1); 949 va_end(adx_copy); 950 951 return (buf); 952 } 953 954 char * 955 kmem_asprintf(const char *fmt, ...) 956 { 957 char *buf = NULL; 958 va_list adx; 959 960 va_start(adx, fmt); 961 VERIFY(vasprintf(&buf, fmt, adx) != -1); 962 va_end(adx); 963 964 return (buf); 965 } 966 967 /* ARGSUSED */ 968 int 969 zfs_onexit_fd_hold(int fd, minor_t *minorp) 970 { 971 *minorp = 0; 972 return (0); 973 } 974 975 /* ARGSUSED */ 976 void 977 zfs_onexit_fd_rele(int fd) 978 { 979 } 980 981 /* ARGSUSED */ 982 int 983 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, 984 uint64_t *action_handle) 985 { 986 return (0); 987 } 988 989 fstrans_cookie_t 990 spl_fstrans_mark(void) 991 { 992 return ((fstrans_cookie_t)0); 993 } 994 995 void 996 spl_fstrans_unmark(fstrans_cookie_t cookie) 997 { 998 } 999 1000 int 1001 __spl_pf_fstrans_check(void) 1002 { 1003 return (0); 1004 } 1005 1006 int 1007 kmem_cache_reap_active(void) 1008 { 1009 return (0); 1010 } 1011 1012 void *zvol_tag = "zvol_tag"; 1013 1014 void 1015 zvol_create_minor(const char *name) 1016 { 1017 } 1018 1019 void 1020 zvol_create_minors_recursive(const char *name) 1021 { 1022 } 1023 1024 void 1025 zvol_remove_minors(spa_t *spa, const char *name, boolean_t async) 1026 { 1027 } 1028 1029 void 1030 zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname, 1031 boolean_t async) 1032 { 1033 } 1034 1035 /* 1036 * Open file 1037 * 1038 * path - fully qualified path to file 1039 * flags - file attributes O_READ / O_WRITE / O_EXCL 1040 * fpp - pointer to return file pointer 1041 * 1042 * Returns 0 on success underlying error on failure. 1043 */ 1044 int 1045 zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp) 1046 { 1047 int fd = -1; 1048 int dump_fd = -1; 1049 int err; 1050 int old_umask = 0; 1051 zfs_file_t *fp; 1052 struct stat64 st; 1053 1054 if (!(flags & O_CREAT) && stat64(path, &st) == -1) 1055 return (errno); 1056 1057 if (!(flags & O_CREAT) && S_ISBLK(st.st_mode)) 1058 flags |= O_DIRECT; 1059 1060 if (flags & O_CREAT) 1061 old_umask = umask(0); 1062 1063 fd = open64(path, flags, mode); 1064 if (fd == -1) 1065 return (errno); 1066 1067 if (flags & O_CREAT) 1068 (void) umask(old_umask); 1069 1070 if (vn_dumpdir != NULL) { 1071 char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL); 1072 char *inpath = basename((char *)(uintptr_t)path); 1073 1074 (void) snprintf(dumppath, MAXPATHLEN, 1075 "%s/%s", vn_dumpdir, inpath); 1076 dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666); 1077 umem_free(dumppath, MAXPATHLEN); 1078 if (dump_fd == -1) { 1079 err = errno; 1080 close(fd); 1081 return (err); 1082 } 1083 } else { 1084 dump_fd = -1; 1085 } 1086 1087 (void) fcntl(fd, F_SETFD, FD_CLOEXEC); 1088 1089 fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL); 1090 fp->f_fd = fd; 1091 fp->f_dump_fd = dump_fd; 1092 *fpp = fp; 1093 1094 return (0); 1095 } 1096 1097 void 1098 zfs_file_close(zfs_file_t *fp) 1099 { 1100 close(fp->f_fd); 1101 if (fp->f_dump_fd != -1) 1102 close(fp->f_dump_fd); 1103 1104 umem_free(fp, sizeof (zfs_file_t)); 1105 } 1106 1107 /* 1108 * Stateful write - use os internal file pointer to determine where to 1109 * write and update on successful completion. 1110 * 1111 * fp - pointer to file (pipe, socket, etc) to write to 1112 * buf - buffer to write 1113 * count - # of bytes to write 1114 * resid - pointer to count of unwritten bytes (if short write) 1115 * 1116 * Returns 0 on success errno on failure. 1117 */ 1118 int 1119 zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid) 1120 { 1121 ssize_t rc; 1122 1123 rc = write(fp->f_fd, buf, count); 1124 if (rc < 0) 1125 return (errno); 1126 1127 if (resid) { 1128 *resid = count - rc; 1129 } else if (rc != count) { 1130 return (EIO); 1131 } 1132 1133 return (0); 1134 } 1135 1136 /* 1137 * Stateless write - os internal file pointer is not updated. 1138 * 1139 * fp - pointer to file (pipe, socket, etc) to write to 1140 * buf - buffer to write 1141 * count - # of bytes to write 1142 * off - file offset to write to (only valid for seekable types) 1143 * resid - pointer to count of unwritten bytes 1144 * 1145 * Returns 0 on success errno on failure. 1146 */ 1147 int 1148 zfs_file_pwrite(zfs_file_t *fp, const void *buf, 1149 size_t count, loff_t pos, ssize_t *resid) 1150 { 1151 ssize_t rc, split, done; 1152 int sectors; 1153 1154 /* 1155 * To simulate partial disk writes, we split writes into two 1156 * system calls so that the process can be killed in between. 1157 * This is used by ztest to simulate realistic failure modes. 1158 */ 1159 sectors = count >> SPA_MINBLOCKSHIFT; 1160 split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT; 1161 rc = pwrite64(fp->f_fd, buf, split, pos); 1162 if (rc != -1) { 1163 done = rc; 1164 rc = pwrite64(fp->f_fd, (char *)buf + split, 1165 count - split, pos + split); 1166 } 1167 #ifdef __linux__ 1168 if (rc == -1 && errno == EINVAL) { 1169 /* 1170 * Under Linux, this most likely means an alignment issue 1171 * (memory or disk) due to O_DIRECT, so we abort() in order 1172 * to catch the offender. 1173 */ 1174 abort(); 1175 } 1176 #endif 1177 1178 if (rc < 0) 1179 return (errno); 1180 1181 done += rc; 1182 1183 if (resid) { 1184 *resid = count - done; 1185 } else if (done != count) { 1186 return (EIO); 1187 } 1188 1189 return (0); 1190 } 1191 1192 /* 1193 * Stateful read - use os internal file pointer to determine where to 1194 * read and update on successful completion. 1195 * 1196 * fp - pointer to file (pipe, socket, etc) to read from 1197 * buf - buffer to write 1198 * count - # of bytes to read 1199 * resid - pointer to count of unread bytes (if short read) 1200 * 1201 * Returns 0 on success errno on failure. 1202 */ 1203 int 1204 zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid) 1205 { 1206 int rc; 1207 1208 rc = read(fp->f_fd, buf, count); 1209 if (rc < 0) 1210 return (errno); 1211 1212 if (resid) { 1213 *resid = count - rc; 1214 } else if (rc != count) { 1215 return (EIO); 1216 } 1217 1218 return (0); 1219 } 1220 1221 /* 1222 * Stateless read - os internal file pointer is not updated. 1223 * 1224 * fp - pointer to file (pipe, socket, etc) to read from 1225 * buf - buffer to write 1226 * count - # of bytes to write 1227 * off - file offset to read from (only valid for seekable types) 1228 * resid - pointer to count of unwritten bytes (if short write) 1229 * 1230 * Returns 0 on success errno on failure. 1231 */ 1232 int 1233 zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off, 1234 ssize_t *resid) 1235 { 1236 ssize_t rc; 1237 1238 rc = pread64(fp->f_fd, buf, count, off); 1239 if (rc < 0) { 1240 #ifdef __linux__ 1241 /* 1242 * Under Linux, this most likely means an alignment issue 1243 * (memory or disk) due to O_DIRECT, so we abort() in order to 1244 * catch the offender. 1245 */ 1246 if (errno == EINVAL) 1247 abort(); 1248 #endif 1249 return (errno); 1250 } 1251 1252 if (fp->f_dump_fd != -1) { 1253 int status; 1254 1255 status = pwrite64(fp->f_dump_fd, buf, rc, off); 1256 ASSERT(status != -1); 1257 } 1258 1259 if (resid) { 1260 *resid = count - rc; 1261 } else if (rc != count) { 1262 return (EIO); 1263 } 1264 1265 return (0); 1266 } 1267 1268 /* 1269 * lseek - set / get file pointer 1270 * 1271 * fp - pointer to file (pipe, socket, etc) to read from 1272 * offp - value to seek to, returns current value plus passed offset 1273 * whence - see man pages for standard lseek whence values 1274 * 1275 * Returns 0 on success errno on failure (ESPIPE for non seekable types) 1276 */ 1277 int 1278 zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence) 1279 { 1280 loff_t rc; 1281 1282 rc = lseek(fp->f_fd, *offp, whence); 1283 if (rc < 0) 1284 return (errno); 1285 1286 *offp = rc; 1287 1288 return (0); 1289 } 1290 1291 /* 1292 * Get file attributes 1293 * 1294 * filp - file pointer 1295 * zfattr - pointer to file attr structure 1296 * 1297 * Currently only used for fetching size and file mode 1298 * 1299 * Returns 0 on success or error code of underlying getattr call on failure. 1300 */ 1301 int 1302 zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr) 1303 { 1304 struct stat64 st; 1305 1306 if (fstat64_blk(fp->f_fd, &st) == -1) 1307 return (errno); 1308 1309 zfattr->zfa_size = st.st_size; 1310 zfattr->zfa_mode = st.st_mode; 1311 1312 return (0); 1313 } 1314 1315 /* 1316 * Sync file to disk 1317 * 1318 * filp - file pointer 1319 * flags - O_SYNC and or O_DSYNC 1320 * 1321 * Returns 0 on success or error code of underlying sync call on failure. 1322 */ 1323 int 1324 zfs_file_fsync(zfs_file_t *fp, int flags) 1325 { 1326 int rc; 1327 1328 rc = fsync(fp->f_fd); 1329 if (rc < 0) 1330 return (errno); 1331 1332 return (0); 1333 } 1334 1335 /* 1336 * fallocate - allocate or free space on disk 1337 * 1338 * fp - file pointer 1339 * mode (non-standard options for hole punching etc) 1340 * offset - offset to start allocating or freeing from 1341 * len - length to free / allocate 1342 * 1343 * OPTIONAL 1344 */ 1345 int 1346 zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len) 1347 { 1348 #ifdef __linux__ 1349 return (fallocate(fp->f_fd, mode, offset, len)); 1350 #else 1351 return (EOPNOTSUPP); 1352 #endif 1353 } 1354 1355 /* 1356 * Request current file pointer offset 1357 * 1358 * fp - pointer to file 1359 * 1360 * Returns current file offset. 1361 */ 1362 loff_t 1363 zfs_file_off(zfs_file_t *fp) 1364 { 1365 return (lseek(fp->f_fd, SEEK_CUR, 0)); 1366 } 1367 1368 /* 1369 * unlink file 1370 * 1371 * path - fully qualified file path 1372 * 1373 * Returns 0 on success. 1374 * 1375 * OPTIONAL 1376 */ 1377 int 1378 zfs_file_unlink(const char *path) 1379 { 1380 return (remove(path)); 1381 } 1382 1383 /* 1384 * Get reference to file pointer 1385 * 1386 * fd - input file descriptor 1387 * fpp - pointer to file pointer 1388 * 1389 * Returns 0 on success EBADF on failure. 1390 * Unsupported in user space. 1391 */ 1392 int 1393 zfs_file_get(int fd, zfs_file_t **fpp) 1394 { 1395 abort(); 1396 1397 return (EOPNOTSUPP); 1398 } 1399 1400 /* 1401 * Drop reference to file pointer 1402 * 1403 * fd - input file descriptor 1404 * 1405 * Unsupported in user space. 1406 */ 1407 void 1408 zfs_file_put(int fd) 1409 { 1410 abort(); 1411 } 1412 1413 void 1414 zfsvfs_update_fromname(const char *oldname, const char *newname) 1415 { 1416 } 1417