1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <assert.h> 29 #include <fcntl.h> 30 #include <poll.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <zlib.h> 35 #include <sys/spa.h> 36 #include <sys/stat.h> 37 #include <sys/processor.h> 38 #include <sys/zfs_context.h> 39 #include <sys/zmod.h> 40 #include <sys/utsname.h> 41 42 /* 43 * Emulation of kernel services in userland. 44 */ 45 46 uint64_t physmem; 47 vnode_t *rootdir = (vnode_t *)0xabcd1234; 48 char hw_serial[11]; 49 50 struct utsname utsname = { 51 "userland", "libzpool", "1", "1", "na" 52 }; 53 54 /* 55 * ========================================================================= 56 * threads 57 * ========================================================================= 58 */ 59 /*ARGSUSED*/ 60 kthread_t * 61 zk_thread_create(void (*func)(), void *arg) 62 { 63 thread_t tid; 64 65 VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, 66 &tid) == 0); 67 68 return ((void *)(uintptr_t)tid); 69 } 70 71 /* 72 * ========================================================================= 73 * kstats 74 * ========================================================================= 75 */ 76 /*ARGSUSED*/ 77 kstat_t * 78 kstat_create(char *module, int instance, char *name, char *class, 79 uchar_t type, ulong_t ndata, uchar_t ks_flag) 80 { 81 return (NULL); 82 } 83 84 /*ARGSUSED*/ 85 void 86 kstat_install(kstat_t *ksp) 87 {} 88 89 /*ARGSUSED*/ 90 void 91 kstat_delete(kstat_t *ksp) 92 {} 93 94 /* 95 * ========================================================================= 96 * mutexes 97 * ========================================================================= 98 */ 99 void 100 zmutex_init(kmutex_t *mp) 101 { 102 mp->m_owner = NULL; 103 mp->initialized = B_TRUE; 104 (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); 105 } 106 107 void 108 zmutex_destroy(kmutex_t *mp) 109 { 110 ASSERT(mp->initialized == B_TRUE); 111 ASSERT(mp->m_owner == NULL); 112 (void) _mutex_destroy(&(mp)->m_lock); 113 mp->m_owner = (void *)-1UL; 114 mp->initialized = B_FALSE; 115 } 116 117 void 118 mutex_enter(kmutex_t *mp) 119 { 120 ASSERT(mp->initialized == B_TRUE); 121 ASSERT(mp->m_owner != (void *)-1UL); 122 ASSERT(mp->m_owner != curthread); 123 VERIFY(mutex_lock(&mp->m_lock) == 0); 124 ASSERT(mp->m_owner == NULL); 125 mp->m_owner = curthread; 126 } 127 128 int 129 mutex_tryenter(kmutex_t *mp) 130 { 131 ASSERT(mp->initialized == B_TRUE); 132 ASSERT(mp->m_owner != (void *)-1UL); 133 if (0 == mutex_trylock(&mp->m_lock)) { 134 ASSERT(mp->m_owner == NULL); 135 mp->m_owner = curthread; 136 return (1); 137 } else { 138 return (0); 139 } 140 } 141 142 void 143 mutex_exit(kmutex_t *mp) 144 { 145 ASSERT(mp->initialized == B_TRUE); 146 ASSERT(mutex_owner(mp) == curthread); 147 mp->m_owner = NULL; 148 VERIFY(mutex_unlock(&mp->m_lock) == 0); 149 } 150 151 void * 152 mutex_owner(kmutex_t *mp) 153 { 154 ASSERT(mp->initialized == B_TRUE); 155 return (mp->m_owner); 156 } 157 158 /* 159 * ========================================================================= 160 * rwlocks 161 * ========================================================================= 162 */ 163 /*ARGSUSED*/ 164 void 165 rw_init(krwlock_t *rwlp, char *name, int type, void *arg) 166 { 167 rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); 168 rwlp->rw_owner = NULL; 169 rwlp->initialized = B_TRUE; 170 } 171 172 void 173 rw_destroy(krwlock_t *rwlp) 174 { 175 rwlock_destroy(&rwlp->rw_lock); 176 rwlp->rw_owner = (void *)-1UL; 177 rwlp->initialized = B_FALSE; 178 } 179 180 void 181 rw_enter(krwlock_t *rwlp, krw_t rw) 182 { 183 ASSERT(!RW_LOCK_HELD(rwlp)); 184 ASSERT(rwlp->initialized == B_TRUE); 185 ASSERT(rwlp->rw_owner != (void *)-1UL); 186 ASSERT(rwlp->rw_owner != curthread); 187 188 if (rw == RW_READER) 189 (void) rw_rdlock(&rwlp->rw_lock); 190 else 191 (void) rw_wrlock(&rwlp->rw_lock); 192 193 rwlp->rw_owner = curthread; 194 } 195 196 void 197 rw_exit(krwlock_t *rwlp) 198 { 199 ASSERT(rwlp->initialized == B_TRUE); 200 ASSERT(rwlp->rw_owner != (void *)-1UL); 201 202 rwlp->rw_owner = NULL; 203 (void) rw_unlock(&rwlp->rw_lock); 204 } 205 206 int 207 rw_tryenter(krwlock_t *rwlp, krw_t rw) 208 { 209 int rv; 210 211 ASSERT(rwlp->initialized == B_TRUE); 212 ASSERT(rwlp->rw_owner != (void *)-1UL); 213 214 if (rw == RW_READER) 215 rv = rw_tryrdlock(&rwlp->rw_lock); 216 else 217 rv = rw_trywrlock(&rwlp->rw_lock); 218 219 if (rv == 0) { 220 rwlp->rw_owner = curthread; 221 return (1); 222 } 223 224 return (0); 225 } 226 227 /*ARGSUSED*/ 228 int 229 rw_tryupgrade(krwlock_t *rwlp) 230 { 231 ASSERT(rwlp->initialized == B_TRUE); 232 ASSERT(rwlp->rw_owner != (void *)-1UL); 233 234 return (0); 235 } 236 237 /* 238 * ========================================================================= 239 * condition variables 240 * ========================================================================= 241 */ 242 /*ARGSUSED*/ 243 void 244 cv_init(kcondvar_t *cv, char *name, int type, void *arg) 245 { 246 VERIFY(cond_init(cv, type, NULL) == 0); 247 } 248 249 void 250 cv_destroy(kcondvar_t *cv) 251 { 252 VERIFY(cond_destroy(cv) == 0); 253 } 254 255 void 256 cv_wait(kcondvar_t *cv, kmutex_t *mp) 257 { 258 ASSERT(mutex_owner(mp) == curthread); 259 mp->m_owner = NULL; 260 int ret = cond_wait(cv, &mp->m_lock); 261 VERIFY(ret == 0 || ret == EINTR); 262 mp->m_owner = curthread; 263 } 264 265 clock_t 266 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) 267 { 268 int error; 269 timestruc_t ts; 270 clock_t delta; 271 272 top: 273 delta = abstime - lbolt; 274 if (delta <= 0) 275 return (-1); 276 277 ts.tv_sec = delta / hz; 278 ts.tv_nsec = (delta % hz) * (NANOSEC / hz); 279 280 ASSERT(mutex_owner(mp) == curthread); 281 mp->m_owner = NULL; 282 error = cond_reltimedwait(cv, &mp->m_lock, &ts); 283 mp->m_owner = curthread; 284 285 if (error == ETIME) 286 return (-1); 287 288 if (error == EINTR) 289 goto top; 290 291 ASSERT(error == 0); 292 293 return (1); 294 } 295 296 void 297 cv_signal(kcondvar_t *cv) 298 { 299 VERIFY(cond_signal(cv) == 0); 300 } 301 302 void 303 cv_broadcast(kcondvar_t *cv) 304 { 305 VERIFY(cond_broadcast(cv) == 0); 306 } 307 308 /* 309 * ========================================================================= 310 * vnode operations 311 * ========================================================================= 312 */ 313 /* 314 * Note: for the xxxat() versions of these functions, we assume that the 315 * starting vp is always rootdir (which is true for spa_directory.c, the only 316 * ZFS consumer of these interfaces). We assert this is true, and then emulate 317 * them by adding '/' in front of the path. 318 */ 319 320 /*ARGSUSED*/ 321 int 322 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) 323 { 324 int fd; 325 vnode_t *vp; 326 int old_umask; 327 char realpath[MAXPATHLEN]; 328 struct stat64 st; 329 330 /* 331 * If we're accessing a real disk from userland, we need to use 332 * the character interface to avoid caching. This is particularly 333 * important if we're trying to look at a real in-kernel storage 334 * pool from userland, e.g. via zdb, because otherwise we won't 335 * see the changes occurring under the segmap cache. 336 * On the other hand, the stupid character device returns zero 337 * for its size. So -- gag -- we open the block device to get 338 * its size, and remember it for subsequent VOP_GETATTR(). 339 */ 340 if (strncmp(path, "/dev/", 5) == 0) { 341 char *dsk; 342 fd = open64(path, O_RDONLY); 343 if (fd == -1) 344 return (errno); 345 if (fstat64(fd, &st) == -1) { 346 close(fd); 347 return (errno); 348 } 349 close(fd); 350 (void) sprintf(realpath, "%s", path); 351 dsk = strstr(path, "/dsk/"); 352 if (dsk != NULL) 353 (void) sprintf(realpath + (dsk - path) + 1, "r%s", 354 dsk + 1); 355 } else { 356 (void) sprintf(realpath, "%s", path); 357 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) 358 return (errno); 359 } 360 361 if (flags & FCREAT) 362 old_umask = umask(0); 363 364 /* 365 * The construct 'flags - FREAD' conveniently maps combinations of 366 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. 367 */ 368 fd = open64(realpath, flags - FREAD, mode); 369 370 if (flags & FCREAT) 371 (void) umask(old_umask); 372 373 if (fd == -1) 374 return (errno); 375 376 if (fstat64(fd, &st) == -1) { 377 close(fd); 378 return (errno); 379 } 380 381 (void) fcntl(fd, F_SETFD, FD_CLOEXEC); 382 383 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); 384 385 vp->v_fd = fd; 386 vp->v_size = st.st_size; 387 vp->v_path = spa_strdup(path); 388 389 return (0); 390 } 391 392 int 393 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, 394 int x3, vnode_t *startvp) 395 { 396 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); 397 int ret; 398 399 ASSERT(startvp == rootdir); 400 (void) sprintf(realpath, "/%s", path); 401 402 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); 403 404 umem_free(realpath, strlen(path) + 2); 405 406 return (ret); 407 } 408 409 /*ARGSUSED*/ 410 int 411 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, 412 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) 413 { 414 ssize_t iolen, split; 415 416 if (uio == UIO_READ) { 417 iolen = pread64(vp->v_fd, addr, len, offset); 418 } else { 419 /* 420 * To simulate partial disk writes, we split writes into two 421 * system calls so that the process can be killed in between. 422 */ 423 split = (len > 0 ? rand() % len : 0); 424 iolen = pwrite64(vp->v_fd, addr, split, offset); 425 iolen += pwrite64(vp->v_fd, (char *)addr + split, 426 len - split, offset + split); 427 } 428 429 if (iolen == -1) 430 return (errno); 431 if (residp) 432 *residp = len - iolen; 433 else if (iolen != len) 434 return (EIO); 435 return (0); 436 } 437 438 void 439 vn_close(vnode_t *vp) 440 { 441 close(vp->v_fd); 442 spa_strfree(vp->v_path); 443 umem_free(vp, sizeof (vnode_t)); 444 } 445 446 #ifdef ZFS_DEBUG 447 448 /* 449 * ========================================================================= 450 * Figure out which debugging statements to print 451 * ========================================================================= 452 */ 453 454 static char *dprintf_string; 455 static int dprintf_print_all; 456 457 int 458 dprintf_find_string(const char *string) 459 { 460 char *tmp_str = dprintf_string; 461 int len = strlen(string); 462 463 /* 464 * Find out if this is a string we want to print. 465 * String format: file1.c,function_name1,file2.c,file3.c 466 */ 467 468 while (tmp_str != NULL) { 469 if (strncmp(tmp_str, string, len) == 0 && 470 (tmp_str[len] == ',' || tmp_str[len] == '\0')) 471 return (1); 472 tmp_str = strchr(tmp_str, ','); 473 if (tmp_str != NULL) 474 tmp_str++; /* Get rid of , */ 475 } 476 return (0); 477 } 478 479 void 480 dprintf_setup(int *argc, char **argv) 481 { 482 int i, j; 483 484 /* 485 * Debugging can be specified two ways: by setting the 486 * environment variable ZFS_DEBUG, or by including a 487 * "debug=..." argument on the command line. The command 488 * line setting overrides the environment variable. 489 */ 490 491 for (i = 1; i < *argc; i++) { 492 int len = strlen("debug="); 493 /* First look for a command line argument */ 494 if (strncmp("debug=", argv[i], len) == 0) { 495 dprintf_string = argv[i] + len; 496 /* Remove from args */ 497 for (j = i; j < *argc; j++) 498 argv[j] = argv[j+1]; 499 argv[j] = NULL; 500 (*argc)--; 501 } 502 } 503 504 if (dprintf_string == NULL) { 505 /* Look for ZFS_DEBUG environment variable */ 506 dprintf_string = getenv("ZFS_DEBUG"); 507 } 508 509 /* 510 * Are we just turning on all debugging? 511 */ 512 if (dprintf_find_string("on")) 513 dprintf_print_all = 1; 514 } 515 516 /* 517 * ========================================================================= 518 * debug printfs 519 * ========================================================================= 520 */ 521 void 522 __dprintf(const char *file, const char *func, int line, const char *fmt, ...) 523 { 524 const char *newfile; 525 va_list adx; 526 527 /* 528 * Get rid of annoying "../common/" prefix to filename. 529 */ 530 newfile = strrchr(file, '/'); 531 if (newfile != NULL) { 532 newfile = newfile + 1; /* Get rid of leading / */ 533 } else { 534 newfile = file; 535 } 536 537 if (dprintf_print_all || 538 dprintf_find_string(newfile) || 539 dprintf_find_string(func)) { 540 /* Print out just the function name if requested */ 541 flockfile(stdout); 542 if (dprintf_find_string("pid")) 543 (void) printf("%d ", getpid()); 544 if (dprintf_find_string("tid")) 545 (void) printf("%u ", thr_self()); 546 if (dprintf_find_string("cpu")) 547 (void) printf("%u ", getcpuid()); 548 if (dprintf_find_string("time")) 549 (void) printf("%llu ", gethrtime()); 550 if (dprintf_find_string("long")) 551 (void) printf("%s, line %d: ", newfile, line); 552 (void) printf("%s: ", func); 553 va_start(adx, fmt); 554 (void) vprintf(fmt, adx); 555 va_end(adx); 556 funlockfile(stdout); 557 } 558 } 559 560 #endif /* ZFS_DEBUG */ 561 562 /* 563 * ========================================================================= 564 * cmn_err() and panic() 565 * ========================================================================= 566 */ 567 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; 568 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; 569 570 void 571 vpanic(const char *fmt, va_list adx) 572 { 573 (void) fprintf(stderr, "error: "); 574 (void) vfprintf(stderr, fmt, adx); 575 (void) fprintf(stderr, "\n"); 576 577 abort(); /* think of it as a "user-level crash dump" */ 578 } 579 580 void 581 panic(const char *fmt, ...) 582 { 583 va_list adx; 584 585 va_start(adx, fmt); 586 vpanic(fmt, adx); 587 va_end(adx); 588 } 589 590 void 591 vcmn_err(int ce, const char *fmt, va_list adx) 592 { 593 if (ce == CE_PANIC) 594 vpanic(fmt, adx); 595 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ 596 (void) fprintf(stderr, "%s", ce_prefix[ce]); 597 (void) vfprintf(stderr, fmt, adx); 598 (void) fprintf(stderr, "%s", ce_suffix[ce]); 599 } 600 } 601 602 /*PRINTFLIKE2*/ 603 void 604 cmn_err(int ce, const char *fmt, ...) 605 { 606 va_list adx; 607 608 va_start(adx, fmt); 609 vcmn_err(ce, fmt, adx); 610 va_end(adx); 611 } 612 613 /* 614 * ========================================================================= 615 * kobj interfaces 616 * ========================================================================= 617 */ 618 struct _buf * 619 kobj_open_file(char *name) 620 { 621 struct _buf *file; 622 vnode_t *vp; 623 624 /* set vp as the _fd field of the file */ 625 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir) != 0) 626 return ((void *)-1UL); 627 628 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); 629 file->_fd = (intptr_t)vp; 630 return (file); 631 } 632 633 int 634 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) 635 { 636 ssize_t resid; 637 638 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, 639 UIO_SYSSPACE, 0, 0, 0, &resid); 640 641 return (size - resid); 642 } 643 644 void 645 kobj_close_file(struct _buf *file) 646 { 647 vn_close((vnode_t *)file->_fd); 648 umem_free(file, sizeof (struct _buf)); 649 } 650 651 int 652 kobj_get_filesize(struct _buf *file, uint64_t *size) 653 { 654 struct stat64 st; 655 vnode_t *vp = (vnode_t *)file->_fd; 656 657 if (fstat64(vp->v_fd, &st) == -1) { 658 vn_close(vp); 659 return (errno); 660 } 661 *size = st.st_size; 662 return (0); 663 } 664 665 /* 666 * ========================================================================= 667 * misc routines 668 * ========================================================================= 669 */ 670 671 void 672 delay(clock_t ticks) 673 { 674 poll(0, 0, ticks * (1000 / hz)); 675 } 676 677 /* 678 * Find highest one bit set. 679 * Returns bit number + 1 of highest bit that is set, otherwise returns 0. 680 * High order bit is 31 (or 63 in _LP64 kernel). 681 */ 682 int 683 highbit(ulong_t i) 684 { 685 register int h = 1; 686 687 if (i == 0) 688 return (0); 689 #ifdef _LP64 690 if (i & 0xffffffff00000000ul) { 691 h += 32; i >>= 32; 692 } 693 #endif 694 if (i & 0xffff0000) { 695 h += 16; i >>= 16; 696 } 697 if (i & 0xff00) { 698 h += 8; i >>= 8; 699 } 700 if (i & 0xf0) { 701 h += 4; i >>= 4; 702 } 703 if (i & 0xc) { 704 h += 2; i >>= 2; 705 } 706 if (i & 0x2) { 707 h += 1; 708 } 709 return (h); 710 } 711 712 static int 713 random_get_bytes_common(uint8_t *ptr, size_t len, char *devname) 714 { 715 int fd = open(devname, O_RDONLY); 716 size_t resid = len; 717 ssize_t bytes; 718 719 ASSERT(fd != -1); 720 721 while (resid != 0) { 722 bytes = read(fd, ptr, resid); 723 ASSERT(bytes >= 0); 724 ptr += bytes; 725 resid -= bytes; 726 } 727 728 close(fd); 729 730 return (0); 731 } 732 733 int 734 random_get_bytes(uint8_t *ptr, size_t len) 735 { 736 return (random_get_bytes_common(ptr, len, "/dev/random")); 737 } 738 739 int 740 random_get_pseudo_bytes(uint8_t *ptr, size_t len) 741 { 742 return (random_get_bytes_common(ptr, len, "/dev/urandom")); 743 } 744 745 int 746 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result) 747 { 748 char *end; 749 750 *result = strtoul(hw_serial, &end, base); 751 if (*result == 0) 752 return (errno); 753 return (0); 754 } 755 756 /* 757 * ========================================================================= 758 * kernel emulation setup & teardown 759 * ========================================================================= 760 */ 761 static int 762 umem_out_of_memory(void) 763 { 764 char errmsg[] = "out of memory -- generating core dump\n"; 765 766 write(fileno(stderr), errmsg, sizeof (errmsg)); 767 abort(); 768 return (0); 769 } 770 771 void 772 kernel_init(int mode) 773 { 774 umem_nofail_callback(umem_out_of_memory); 775 776 physmem = sysconf(_SC_PHYS_PAGES); 777 778 dprintf("physmem = %llu pages (%.2f GB)\n", physmem, 779 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); 780 781 snprintf(hw_serial, sizeof (hw_serial), "%ld", gethostid()); 782 783 spa_init(mode); 784 } 785 786 void 787 kernel_fini(void) 788 { 789 spa_fini(); 790 } 791 792 int 793 z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen) 794 { 795 int ret; 796 uLongf len = *dstlen; 797 798 if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK) 799 *dstlen = (size_t)len; 800 801 return (ret); 802 } 803 804 int 805 z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen, 806 int level) 807 { 808 int ret; 809 uLongf len = *dstlen; 810 811 if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK) 812 *dstlen = (size_t)len; 813 814 return (ret); 815 } 816 817 uid_t 818 crgetuid(cred_t *cr) 819 { 820 return (0); 821 } 822 823 gid_t 824 crgetgid(cred_t *cr) 825 { 826 return (0); 827 } 828 829 int 830 crgetngroups(cred_t *cr) 831 { 832 return (0); 833 } 834 835 gid_t * 836 crgetgroups(cred_t *cr) 837 { 838 return (NULL); 839 } 840 841 int 842 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr) 843 { 844 return (0); 845 } 846 847 int 848 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr) 849 { 850 return (0); 851 } 852 853 int 854 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr) 855 { 856 return (0); 857 } 858