1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <assert.h> 29 #include <fcntl.h> 30 #include <poll.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <zlib.h> 35 #include <sys/spa.h> 36 #include <sys/stat.h> 37 #include <sys/processor.h> 38 #include <sys/zfs_context.h> 39 #include <sys/zmod.h> 40 #include <sys/utsname.h> 41 42 /* 43 * Emulation of kernel services in userland. 44 */ 45 46 uint64_t physmem; 47 vnode_t *rootdir = (vnode_t *)0xabcd1234; 48 char hw_serial[11]; 49 50 struct utsname utsname = { 51 "userland", "libzpool", "1", "1", "na" 52 }; 53 54 /* 55 * ========================================================================= 56 * threads 57 * ========================================================================= 58 */ 59 /*ARGSUSED*/ 60 kthread_t * 61 zk_thread_create(void (*func)(), void *arg) 62 { 63 thread_t tid; 64 65 VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, 66 &tid) == 0); 67 68 return ((void *)(uintptr_t)tid); 69 } 70 71 /* 72 * ========================================================================= 73 * kstats 74 * ========================================================================= 75 */ 76 /*ARGSUSED*/ 77 kstat_t * 78 kstat_create(char *module, int instance, char *name, char *class, 79 uchar_t type, ulong_t ndata, uchar_t ks_flag) 80 { 81 return (NULL); 82 } 83 84 /*ARGSUSED*/ 85 void 86 kstat_install(kstat_t *ksp) 87 {} 88 89 /*ARGSUSED*/ 90 void 91 kstat_delete(kstat_t *ksp) 92 {} 93 94 /* 95 * ========================================================================= 96 * mutexes 97 * ========================================================================= 98 */ 99 void 100 zmutex_init(kmutex_t *mp) 101 { 102 mp->m_owner = NULL; 103 (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); 104 } 105 106 void 107 zmutex_destroy(kmutex_t *mp) 108 { 109 ASSERT(mp->m_owner == NULL); 110 (void) _mutex_destroy(&(mp)->m_lock); 111 mp->m_owner = (void *)-1UL; 112 } 113 114 void 115 mutex_enter(kmutex_t *mp) 116 { 117 ASSERT(mp->m_owner != (void *)-1UL); 118 ASSERT(mp->m_owner != curthread); 119 VERIFY(mutex_lock(&mp->m_lock) == 0); 120 ASSERT(mp->m_owner == NULL); 121 mp->m_owner = curthread; 122 } 123 124 int 125 mutex_tryenter(kmutex_t *mp) 126 { 127 ASSERT(mp->m_owner != (void *)-1UL); 128 if (0 == mutex_trylock(&mp->m_lock)) { 129 ASSERT(mp->m_owner == NULL); 130 mp->m_owner = curthread; 131 return (1); 132 } else { 133 return (0); 134 } 135 } 136 137 void 138 mutex_exit(kmutex_t *mp) 139 { 140 ASSERT(mutex_owner(mp) == curthread); 141 mp->m_owner = NULL; 142 VERIFY(mutex_unlock(&mp->m_lock) == 0); 143 } 144 145 void * 146 mutex_owner(kmutex_t *mp) 147 { 148 return (mp->m_owner); 149 } 150 151 /* 152 * ========================================================================= 153 * rwlocks 154 * ========================================================================= 155 */ 156 /*ARGSUSED*/ 157 void 158 rw_init(krwlock_t *rwlp, char *name, int type, void *arg) 159 { 160 rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); 161 rwlp->rw_owner = NULL; 162 } 163 164 void 165 rw_destroy(krwlock_t *rwlp) 166 { 167 rwlock_destroy(&rwlp->rw_lock); 168 rwlp->rw_owner = (void *)-1UL; 169 } 170 171 void 172 rw_enter(krwlock_t *rwlp, krw_t rw) 173 { 174 ASSERT(!RW_LOCK_HELD(rwlp)); 175 ASSERT(rwlp->rw_owner != (void *)-1UL); 176 ASSERT(rwlp->rw_owner != curthread); 177 178 if (rw == RW_READER) 179 (void) rw_rdlock(&rwlp->rw_lock); 180 else 181 (void) rw_wrlock(&rwlp->rw_lock); 182 183 rwlp->rw_owner = curthread; 184 } 185 186 void 187 rw_exit(krwlock_t *rwlp) 188 { 189 ASSERT(rwlp->rw_owner != (void *)-1UL); 190 191 rwlp->rw_owner = NULL; 192 (void) rw_unlock(&rwlp->rw_lock); 193 } 194 195 int 196 rw_tryenter(krwlock_t *rwlp, krw_t rw) 197 { 198 int rv; 199 200 ASSERT(rwlp->rw_owner != (void *)-1UL); 201 202 if (rw == RW_READER) 203 rv = rw_tryrdlock(&rwlp->rw_lock); 204 else 205 rv = rw_trywrlock(&rwlp->rw_lock); 206 207 if (rv == 0) { 208 rwlp->rw_owner = curthread; 209 return (1); 210 } 211 212 return (0); 213 } 214 215 /*ARGSUSED*/ 216 int 217 rw_tryupgrade(krwlock_t *rwlp) 218 { 219 ASSERT(rwlp->rw_owner != (void *)-1UL); 220 221 return (0); 222 } 223 224 /* 225 * ========================================================================= 226 * condition variables 227 * ========================================================================= 228 */ 229 /*ARGSUSED*/ 230 void 231 cv_init(kcondvar_t *cv, char *name, int type, void *arg) 232 { 233 VERIFY(cond_init(cv, type, NULL) == 0); 234 } 235 236 void 237 cv_destroy(kcondvar_t *cv) 238 { 239 VERIFY(cond_destroy(cv) == 0); 240 } 241 242 void 243 cv_wait(kcondvar_t *cv, kmutex_t *mp) 244 { 245 ASSERT(mutex_owner(mp) == curthread); 246 mp->m_owner = NULL; 247 int ret = cond_wait(cv, &mp->m_lock); 248 VERIFY(ret == 0 || ret == EINTR); 249 mp->m_owner = curthread; 250 } 251 252 clock_t 253 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) 254 { 255 int error; 256 timestruc_t ts; 257 clock_t delta; 258 259 top: 260 delta = abstime - lbolt; 261 if (delta <= 0) 262 return (-1); 263 264 ts.tv_sec = delta / hz; 265 ts.tv_nsec = (delta % hz) * (NANOSEC / hz); 266 267 ASSERT(mutex_owner(mp) == curthread); 268 mp->m_owner = NULL; 269 error = cond_reltimedwait(cv, &mp->m_lock, &ts); 270 mp->m_owner = curthread; 271 272 if (error == ETIME) 273 return (-1); 274 275 if (error == EINTR) 276 goto top; 277 278 ASSERT(error == 0); 279 280 return (1); 281 } 282 283 void 284 cv_signal(kcondvar_t *cv) 285 { 286 VERIFY(cond_signal(cv) == 0); 287 } 288 289 void 290 cv_broadcast(kcondvar_t *cv) 291 { 292 VERIFY(cond_broadcast(cv) == 0); 293 } 294 295 /* 296 * ========================================================================= 297 * vnode operations 298 * ========================================================================= 299 */ 300 /* 301 * Note: for the xxxat() versions of these functions, we assume that the 302 * starting vp is always rootdir (which is true for spa_directory.c, the only 303 * ZFS consumer of these interfaces). We assert this is true, and then emulate 304 * them by adding '/' in front of the path. 305 */ 306 307 /*ARGSUSED*/ 308 int 309 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) 310 { 311 int fd; 312 vnode_t *vp; 313 int old_umask; 314 char realpath[MAXPATHLEN]; 315 struct stat64 st; 316 317 /* 318 * If we're accessing a real disk from userland, we need to use 319 * the character interface to avoid caching. This is particularly 320 * important if we're trying to look at a real in-kernel storage 321 * pool from userland, e.g. via zdb, because otherwise we won't 322 * see the changes occurring under the segmap cache. 323 * On the other hand, the stupid character device returns zero 324 * for its size. So -- gag -- we open the block device to get 325 * its size, and remember it for subsequent VOP_GETATTR(). 326 */ 327 if (strncmp(path, "/dev/", 5) == 0) { 328 char *dsk; 329 fd = open64(path, O_RDONLY); 330 if (fd == -1) 331 return (errno); 332 if (fstat64(fd, &st) == -1) { 333 close(fd); 334 return (errno); 335 } 336 close(fd); 337 (void) sprintf(realpath, "%s", path); 338 dsk = strstr(path, "/dsk/"); 339 if (dsk != NULL) 340 (void) sprintf(realpath + (dsk - path) + 1, "r%s", 341 dsk + 1); 342 } else { 343 (void) sprintf(realpath, "%s", path); 344 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) 345 return (errno); 346 } 347 348 if (flags & FCREAT) 349 old_umask = umask(0); 350 351 /* 352 * The construct 'flags - FREAD' conveniently maps combinations of 353 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. 354 */ 355 fd = open64(realpath, flags - FREAD, mode); 356 357 if (flags & FCREAT) 358 (void) umask(old_umask); 359 360 if (fd == -1) 361 return (errno); 362 363 if (fstat64(fd, &st) == -1) { 364 close(fd); 365 return (errno); 366 } 367 368 (void) fcntl(fd, F_SETFD, FD_CLOEXEC); 369 370 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); 371 372 vp->v_fd = fd; 373 vp->v_size = st.st_size; 374 vp->v_path = spa_strdup(path); 375 376 return (0); 377 } 378 379 int 380 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, 381 int x3, vnode_t *startvp) 382 { 383 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); 384 int ret; 385 386 ASSERT(startvp == rootdir); 387 (void) sprintf(realpath, "/%s", path); 388 389 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); 390 391 umem_free(realpath, strlen(path) + 2); 392 393 return (ret); 394 } 395 396 /*ARGSUSED*/ 397 int 398 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, 399 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) 400 { 401 ssize_t iolen, split; 402 403 if (uio == UIO_READ) { 404 iolen = pread64(vp->v_fd, addr, len, offset); 405 } else { 406 /* 407 * To simulate partial disk writes, we split writes into two 408 * system calls so that the process can be killed in between. 409 */ 410 split = (len > 0 ? rand() % len : 0); 411 iolen = pwrite64(vp->v_fd, addr, split, offset); 412 iolen += pwrite64(vp->v_fd, (char *)addr + split, 413 len - split, offset + split); 414 } 415 416 if (iolen == -1) 417 return (errno); 418 if (residp) 419 *residp = len - iolen; 420 else if (iolen != len) 421 return (EIO); 422 return (0); 423 } 424 425 void 426 vn_close(vnode_t *vp) 427 { 428 close(vp->v_fd); 429 spa_strfree(vp->v_path); 430 umem_free(vp, sizeof (vnode_t)); 431 } 432 433 #ifdef ZFS_DEBUG 434 435 /* 436 * ========================================================================= 437 * Figure out which debugging statements to print 438 * ========================================================================= 439 */ 440 441 static char *dprintf_string; 442 static int dprintf_print_all; 443 444 int 445 dprintf_find_string(const char *string) 446 { 447 char *tmp_str = dprintf_string; 448 int len = strlen(string); 449 450 /* 451 * Find out if this is a string we want to print. 452 * String format: file1.c,function_name1,file2.c,file3.c 453 */ 454 455 while (tmp_str != NULL) { 456 if (strncmp(tmp_str, string, len) == 0 && 457 (tmp_str[len] == ',' || tmp_str[len] == '\0')) 458 return (1); 459 tmp_str = strchr(tmp_str, ','); 460 if (tmp_str != NULL) 461 tmp_str++; /* Get rid of , */ 462 } 463 return (0); 464 } 465 466 void 467 dprintf_setup(int *argc, char **argv) 468 { 469 int i, j; 470 471 /* 472 * Debugging can be specified two ways: by setting the 473 * environment variable ZFS_DEBUG, or by including a 474 * "debug=..." argument on the command line. The command 475 * line setting overrides the environment variable. 476 */ 477 478 for (i = 1; i < *argc; i++) { 479 int len = strlen("debug="); 480 /* First look for a command line argument */ 481 if (strncmp("debug=", argv[i], len) == 0) { 482 dprintf_string = argv[i] + len; 483 /* Remove from args */ 484 for (j = i; j < *argc; j++) 485 argv[j] = argv[j+1]; 486 argv[j] = NULL; 487 (*argc)--; 488 } 489 } 490 491 if (dprintf_string == NULL) { 492 /* Look for ZFS_DEBUG environment variable */ 493 dprintf_string = getenv("ZFS_DEBUG"); 494 } 495 496 /* 497 * Are we just turning on all debugging? 498 */ 499 if (dprintf_find_string("on")) 500 dprintf_print_all = 1; 501 } 502 503 /* 504 * ========================================================================= 505 * debug printfs 506 * ========================================================================= 507 */ 508 void 509 __dprintf(const char *file, const char *func, int line, const char *fmt, ...) 510 { 511 const char *newfile; 512 va_list adx; 513 514 /* 515 * Get rid of annoying "../common/" prefix to filename. 516 */ 517 newfile = strrchr(file, '/'); 518 if (newfile != NULL) { 519 newfile = newfile + 1; /* Get rid of leading / */ 520 } else { 521 newfile = file; 522 } 523 524 if (dprintf_print_all || 525 dprintf_find_string(newfile) || 526 dprintf_find_string(func)) { 527 /* Print out just the function name if requested */ 528 flockfile(stdout); 529 if (dprintf_find_string("pid")) 530 (void) printf("%d ", getpid()); 531 if (dprintf_find_string("tid")) 532 (void) printf("%u ", thr_self()); 533 if (dprintf_find_string("cpu")) 534 (void) printf("%u ", getcpuid()); 535 if (dprintf_find_string("time")) 536 (void) printf("%llu ", gethrtime()); 537 if (dprintf_find_string("long")) 538 (void) printf("%s, line %d: ", newfile, line); 539 (void) printf("%s: ", func); 540 va_start(adx, fmt); 541 (void) vprintf(fmt, adx); 542 va_end(adx); 543 funlockfile(stdout); 544 } 545 } 546 547 #endif /* ZFS_DEBUG */ 548 549 /* 550 * ========================================================================= 551 * cmn_err() and panic() 552 * ========================================================================= 553 */ 554 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; 555 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; 556 557 void 558 vpanic(const char *fmt, va_list adx) 559 { 560 (void) fprintf(stderr, "error: "); 561 (void) vfprintf(stderr, fmt, adx); 562 (void) fprintf(stderr, "\n"); 563 564 abort(); /* think of it as a "user-level crash dump" */ 565 } 566 567 void 568 panic(const char *fmt, ...) 569 { 570 va_list adx; 571 572 va_start(adx, fmt); 573 vpanic(fmt, adx); 574 va_end(adx); 575 } 576 577 void 578 vcmn_err(int ce, const char *fmt, va_list adx) 579 { 580 if (ce == CE_PANIC) 581 vpanic(fmt, adx); 582 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ 583 (void) fprintf(stderr, "%s", ce_prefix[ce]); 584 (void) vfprintf(stderr, fmt, adx); 585 (void) fprintf(stderr, "%s", ce_suffix[ce]); 586 } 587 } 588 589 /*PRINTFLIKE2*/ 590 void 591 cmn_err(int ce, const char *fmt, ...) 592 { 593 va_list adx; 594 595 va_start(adx, fmt); 596 vcmn_err(ce, fmt, adx); 597 va_end(adx); 598 } 599 600 /* 601 * ========================================================================= 602 * kobj interfaces 603 * ========================================================================= 604 */ 605 struct _buf * 606 kobj_open_file(char *name) 607 { 608 struct _buf *file; 609 vnode_t *vp; 610 611 /* set vp as the _fd field of the file */ 612 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir) != 0) 613 return ((void *)-1UL); 614 615 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); 616 file->_fd = (intptr_t)vp; 617 return (file); 618 } 619 620 int 621 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) 622 { 623 ssize_t resid; 624 625 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, 626 UIO_SYSSPACE, 0, 0, 0, &resid); 627 628 return (size - resid); 629 } 630 631 void 632 kobj_close_file(struct _buf *file) 633 { 634 vn_close((vnode_t *)file->_fd); 635 umem_free(file, sizeof (struct _buf)); 636 } 637 638 int 639 kobj_get_filesize(struct _buf *file, uint64_t *size) 640 { 641 struct stat64 st; 642 vnode_t *vp = (vnode_t *)file->_fd; 643 644 if (fstat64(vp->v_fd, &st) == -1) { 645 vn_close(vp); 646 return (errno); 647 } 648 *size = st.st_size; 649 return (0); 650 } 651 652 /* 653 * ========================================================================= 654 * misc routines 655 * ========================================================================= 656 */ 657 658 void 659 delay(clock_t ticks) 660 { 661 poll(0, 0, ticks * (1000 / hz)); 662 } 663 664 /* 665 * Find highest one bit set. 666 * Returns bit number + 1 of highest bit that is set, otherwise returns 0. 667 * High order bit is 31 (or 63 in _LP64 kernel). 668 */ 669 int 670 highbit(ulong_t i) 671 { 672 register int h = 1; 673 674 if (i == 0) 675 return (0); 676 #ifdef _LP64 677 if (i & 0xffffffff00000000ul) { 678 h += 32; i >>= 32; 679 } 680 #endif 681 if (i & 0xffff0000) { 682 h += 16; i >>= 16; 683 } 684 if (i & 0xff00) { 685 h += 8; i >>= 8; 686 } 687 if (i & 0xf0) { 688 h += 4; i >>= 4; 689 } 690 if (i & 0xc) { 691 h += 2; i >>= 2; 692 } 693 if (i & 0x2) { 694 h += 1; 695 } 696 return (h); 697 } 698 699 static int 700 random_get_bytes_common(uint8_t *ptr, size_t len, char *devname) 701 { 702 int fd = open(devname, O_RDONLY); 703 size_t resid = len; 704 ssize_t bytes; 705 706 ASSERT(fd != -1); 707 708 while (resid != 0) { 709 bytes = read(fd, ptr, resid); 710 ASSERT(bytes >= 0); 711 ptr += bytes; 712 resid -= bytes; 713 } 714 715 close(fd); 716 717 return (0); 718 } 719 720 int 721 random_get_bytes(uint8_t *ptr, size_t len) 722 { 723 return (random_get_bytes_common(ptr, len, "/dev/random")); 724 } 725 726 int 727 random_get_pseudo_bytes(uint8_t *ptr, size_t len) 728 { 729 return (random_get_bytes_common(ptr, len, "/dev/urandom")); 730 } 731 732 int 733 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result) 734 { 735 char *end; 736 737 *result = strtoul(hw_serial, &end, base); 738 if (*result == 0) 739 return (errno); 740 return (0); 741 } 742 743 /* 744 * ========================================================================= 745 * kernel emulation setup & teardown 746 * ========================================================================= 747 */ 748 static int 749 umem_out_of_memory(void) 750 { 751 char errmsg[] = "out of memory -- generating core dump\n"; 752 753 write(fileno(stderr), errmsg, sizeof (errmsg)); 754 abort(); 755 return (0); 756 } 757 758 void 759 kernel_init(int mode) 760 { 761 umem_nofail_callback(umem_out_of_memory); 762 763 physmem = sysconf(_SC_PHYS_PAGES); 764 765 dprintf("physmem = %llu pages (%.2f GB)\n", physmem, 766 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); 767 768 snprintf(hw_serial, sizeof (hw_serial), "%ld", gethostid()); 769 770 spa_init(mode); 771 } 772 773 void 774 kernel_fini(void) 775 { 776 spa_fini(); 777 } 778 779 int 780 z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen) 781 { 782 int ret; 783 uLongf len = *dstlen; 784 785 if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK) 786 *dstlen = (size_t)len; 787 788 return (ret); 789 } 790 791 int 792 z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen, 793 int level) 794 { 795 int ret; 796 uLongf len = *dstlen; 797 798 if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK) 799 *dstlen = (size_t)len; 800 801 return (ret); 802 } 803