1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <assert.h> 29 #include <sys/zfs_context.h> 30 #include <poll.h> 31 #include <string.h> 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <fcntl.h> 35 #include <sys/stat.h> 36 #include <sys/spa.h> 37 #include <sys/processor.h> 38 39 40 /* 41 * Emulation of kernel services in userland. 42 */ 43 44 uint64_t physmem; 45 vnode_t *rootdir = (vnode_t *)0xabcd1234; 46 47 /* 48 * ========================================================================= 49 * threads 50 * ========================================================================= 51 */ 52 /*ARGSUSED*/ 53 kthread_t * 54 zk_thread_create(void (*func)(), void *arg) 55 { 56 thread_t tid; 57 58 VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, 59 &tid) == 0); 60 61 return ((void *)(uintptr_t)tid); 62 } 63 64 /* 65 * ========================================================================= 66 * kstats 67 * ========================================================================= 68 */ 69 /*ARGSUSED*/ 70 kstat_t * 71 kstat_create(char *module, int instance, char *name, char *class, 72 uchar_t type, ulong_t ndata, uchar_t ks_flag) 73 { 74 return (NULL); 75 } 76 77 /*ARGSUSED*/ 78 void 79 kstat_install(kstat_t *ksp) 80 {} 81 82 /*ARGSUSED*/ 83 void 84 kstat_delete(kstat_t *ksp) 85 {} 86 87 /* 88 * ========================================================================= 89 * mutexes 90 * ========================================================================= 91 */ 92 void 93 zmutex_init(kmutex_t *mp) 94 { 95 mp->m_owner = NULL; 96 (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); 97 } 98 99 void 100 zmutex_destroy(kmutex_t *mp) 101 { 102 ASSERT(mp->m_owner == NULL); 103 (void) _mutex_destroy(&(mp)->m_lock); 104 mp->m_owner = (void *)-1UL; 105 } 106 107 void 108 mutex_enter(kmutex_t *mp) 109 { 110 ASSERT(mp->m_owner != (void *)-1UL); 111 ASSERT(mp->m_owner != curthread); 112 VERIFY(mutex_lock(&mp->m_lock) == 0); 113 ASSERT(mp->m_owner == NULL); 114 mp->m_owner = curthread; 115 } 116 117 int 118 mutex_tryenter(kmutex_t *mp) 119 { 120 ASSERT(mp->m_owner != (void *)-1UL); 121 if (0 == mutex_trylock(&mp->m_lock)) { 122 ASSERT(mp->m_owner == NULL); 123 mp->m_owner = curthread; 124 return (1); 125 } else { 126 return (0); 127 } 128 } 129 130 void 131 mutex_exit(kmutex_t *mp) 132 { 133 ASSERT(mutex_owner(mp) == curthread); 134 mp->m_owner = NULL; 135 VERIFY(mutex_unlock(&mp->m_lock) == 0); 136 } 137 138 void * 139 mutex_owner(kmutex_t *mp) 140 { 141 return (mp->m_owner); 142 } 143 144 /* 145 * ========================================================================= 146 * rwlocks 147 * ========================================================================= 148 */ 149 /*ARGSUSED*/ 150 void 151 rw_init(krwlock_t *rwlp, char *name, int type, void *arg) 152 { 153 rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); 154 rwlp->rw_owner = NULL; 155 } 156 157 void 158 rw_destroy(krwlock_t *rwlp) 159 { 160 rwlock_destroy(&rwlp->rw_lock); 161 rwlp->rw_owner = (void *)-1UL; 162 } 163 164 void 165 rw_enter(krwlock_t *rwlp, krw_t rw) 166 { 167 ASSERT(!RW_LOCK_HELD(rwlp)); 168 ASSERT(rwlp->rw_owner != (void *)-1UL); 169 ASSERT(rwlp->rw_owner != curthread); 170 171 if (rw == RW_READER) 172 (void) rw_rdlock(&rwlp->rw_lock); 173 else 174 (void) rw_wrlock(&rwlp->rw_lock); 175 176 rwlp->rw_owner = curthread; 177 } 178 179 void 180 rw_exit(krwlock_t *rwlp) 181 { 182 ASSERT(rwlp->rw_owner != (void *)-1UL); 183 184 rwlp->rw_owner = NULL; 185 (void) rw_unlock(&rwlp->rw_lock); 186 } 187 188 int 189 rw_tryenter(krwlock_t *rwlp, krw_t rw) 190 { 191 int rv; 192 193 ASSERT(rwlp->rw_owner != (void *)-1UL); 194 195 if (rw == RW_READER) 196 rv = rw_tryrdlock(&rwlp->rw_lock); 197 else 198 rv = rw_trywrlock(&rwlp->rw_lock); 199 200 if (rv == 0) { 201 rwlp->rw_owner = curthread; 202 return (1); 203 } 204 205 return (0); 206 } 207 208 /*ARGSUSED*/ 209 int 210 rw_tryupgrade(krwlock_t *rwlp) 211 { 212 ASSERT(rwlp->rw_owner != (void *)-1UL); 213 214 return (0); 215 } 216 217 /* 218 * ========================================================================= 219 * condition variables 220 * ========================================================================= 221 */ 222 /*ARGSUSED*/ 223 void 224 cv_init(kcondvar_t *cv, char *name, int type, void *arg) 225 { 226 VERIFY(cond_init(cv, type, NULL) == 0); 227 } 228 229 void 230 cv_destroy(kcondvar_t *cv) 231 { 232 VERIFY(cond_destroy(cv) == 0); 233 } 234 235 void 236 cv_wait(kcondvar_t *cv, kmutex_t *mp) 237 { 238 ASSERT(mutex_owner(mp) == curthread); 239 mp->m_owner = NULL; 240 int ret = cond_wait(cv, &mp->m_lock); 241 VERIFY(ret == 0 || ret == EINTR); 242 mp->m_owner = curthread; 243 } 244 245 clock_t 246 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) 247 { 248 int error; 249 timestruc_t ts; 250 clock_t delta; 251 252 top: 253 delta = abstime - lbolt; 254 if (delta <= 0) 255 return (-1); 256 257 ts.tv_sec = delta / hz; 258 ts.tv_nsec = (delta % hz) * (NANOSEC / hz); 259 260 ASSERT(mutex_owner(mp) == curthread); 261 mp->m_owner = NULL; 262 error = cond_reltimedwait(cv, &mp->m_lock, &ts); 263 mp->m_owner = curthread; 264 265 if (error == ETIME) 266 return (-1); 267 268 if (error == EINTR) 269 goto top; 270 271 ASSERT(error == 0); 272 273 return (1); 274 } 275 276 void 277 cv_signal(kcondvar_t *cv) 278 { 279 VERIFY(cond_signal(cv) == 0); 280 } 281 282 void 283 cv_broadcast(kcondvar_t *cv) 284 { 285 VERIFY(cond_broadcast(cv) == 0); 286 } 287 288 /* 289 * ========================================================================= 290 * vnode operations 291 * ========================================================================= 292 */ 293 /* 294 * Note: for the xxxat() versions of these functions, we assume that the 295 * starting vp is always rootdir (which is true for spa_directory.c, the only 296 * ZFS consumer of these interfaces). We assert this is true, and then emulate 297 * them by adding '/' in front of the path. 298 */ 299 300 /*ARGSUSED*/ 301 int 302 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) 303 { 304 int fd; 305 vnode_t *vp; 306 int old_umask; 307 char realpath[MAXPATHLEN]; 308 struct stat64 st; 309 310 /* 311 * If we're accessing a real disk from userland, we need to use 312 * the character interface to avoid caching. This is particularly 313 * important if we're trying to look at a real in-kernel storage 314 * pool from userland, e.g. via zdb, because otherwise we won't 315 * see the changes occurring under the segmap cache. 316 * On the other hand, the stupid character device returns zero 317 * for its size. So -- gag -- we open the block device to get 318 * its size, and remember it for subsequent VOP_GETATTR(). 319 */ 320 if (strncmp(path, "/dev/", 5) == 0) { 321 char *dsk; 322 fd = open64(path, O_RDONLY); 323 if (fd == -1) 324 return (errno); 325 if (fstat64(fd, &st) == -1) { 326 close(fd); 327 return (errno); 328 } 329 close(fd); 330 (void) sprintf(realpath, "%s", path); 331 dsk = strstr(path, "/dsk/"); 332 if (dsk != NULL) 333 (void) sprintf(realpath + (dsk - path) + 1, "r%s", 334 dsk + 1); 335 } else { 336 (void) sprintf(realpath, "%s", path); 337 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) 338 return (errno); 339 } 340 341 if (flags & FCREAT) 342 old_umask = umask(0); 343 344 /* 345 * The construct 'flags - FREAD' conveniently maps combinations of 346 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. 347 */ 348 fd = open64(realpath, flags - FREAD, mode); 349 350 if (flags & FCREAT) 351 (void) umask(old_umask); 352 353 if (fd == -1) 354 return (errno); 355 356 if (fstat64(fd, &st) == -1) { 357 close(fd); 358 return (errno); 359 } 360 361 (void) fcntl(fd, F_SETFD, FD_CLOEXEC); 362 363 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); 364 365 vp->v_fd = fd; 366 vp->v_size = st.st_size; 367 vp->v_path = spa_strdup(path); 368 369 return (0); 370 } 371 372 int 373 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, 374 int x3, vnode_t *startvp) 375 { 376 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); 377 int ret; 378 379 ASSERT(startvp == rootdir); 380 (void) sprintf(realpath, "/%s", path); 381 382 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); 383 384 umem_free(realpath, strlen(path) + 2); 385 386 return (ret); 387 } 388 389 /*ARGSUSED*/ 390 int 391 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, 392 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) 393 { 394 ssize_t iolen, split; 395 396 if (uio == UIO_READ) { 397 iolen = pread64(vp->v_fd, addr, len, offset); 398 } else { 399 /* 400 * To simulate partial disk writes, we split writes into two 401 * system calls so that the process can be killed in between. 402 */ 403 split = (len > 0 ? rand() % len : 0); 404 iolen = pwrite64(vp->v_fd, addr, split, offset); 405 iolen += pwrite64(vp->v_fd, (char *)addr + split, 406 len - split, offset + split); 407 } 408 409 if (iolen == -1) 410 return (errno); 411 if (residp) 412 *residp = len - iolen; 413 else if (iolen != len) 414 return (EIO); 415 return (0); 416 } 417 418 void 419 vn_close(vnode_t *vp) 420 { 421 close(vp->v_fd); 422 spa_strfree(vp->v_path); 423 umem_free(vp, sizeof (vnode_t)); 424 } 425 426 #ifdef ZFS_DEBUG 427 428 /* 429 * ========================================================================= 430 * Figure out which debugging statements to print 431 * ========================================================================= 432 */ 433 434 static char *dprintf_string; 435 static int dprintf_print_all; 436 437 int 438 dprintf_find_string(const char *string) 439 { 440 char *tmp_str = dprintf_string; 441 int len = strlen(string); 442 443 /* 444 * Find out if this is a string we want to print. 445 * String format: file1.c,function_name1,file2.c,file3.c 446 */ 447 448 while (tmp_str != NULL) { 449 if (strncmp(tmp_str, string, len) == 0 && 450 (tmp_str[len] == ',' || tmp_str[len] == '\0')) 451 return (1); 452 tmp_str = strchr(tmp_str, ','); 453 if (tmp_str != NULL) 454 tmp_str++; /* Get rid of , */ 455 } 456 return (0); 457 } 458 459 void 460 dprintf_setup(int *argc, char **argv) 461 { 462 int i, j; 463 464 /* 465 * Debugging can be specified two ways: by setting the 466 * environment variable ZFS_DEBUG, or by including a 467 * "debug=..." argument on the command line. The command 468 * line setting overrides the environment variable. 469 */ 470 471 for (i = 1; i < *argc; i++) { 472 int len = strlen("debug="); 473 /* First look for a command line argument */ 474 if (strncmp("debug=", argv[i], len) == 0) { 475 dprintf_string = argv[i] + len; 476 /* Remove from args */ 477 for (j = i; j < *argc; j++) 478 argv[j] = argv[j+1]; 479 argv[j] = NULL; 480 (*argc)--; 481 } 482 } 483 484 if (dprintf_string == NULL) { 485 /* Look for ZFS_DEBUG environment variable */ 486 dprintf_string = getenv("ZFS_DEBUG"); 487 } 488 489 /* 490 * Are we just turning on all debugging? 491 */ 492 if (dprintf_find_string("on")) 493 dprintf_print_all = 1; 494 } 495 496 /* 497 * ========================================================================= 498 * debug printfs 499 * ========================================================================= 500 */ 501 void 502 __dprintf(const char *file, const char *func, int line, const char *fmt, ...) 503 { 504 const char *newfile; 505 va_list adx; 506 507 /* 508 * Get rid of annoying "../common/" prefix to filename. 509 */ 510 newfile = strrchr(file, '/'); 511 if (newfile != NULL) { 512 newfile = newfile + 1; /* Get rid of leading / */ 513 } else { 514 newfile = file; 515 } 516 517 if (dprintf_print_all || 518 dprintf_find_string(newfile) || 519 dprintf_find_string(func)) { 520 /* Print out just the function name if requested */ 521 flockfile(stdout); 522 if (dprintf_find_string("pid")) 523 (void) printf("%d ", getpid()); 524 if (dprintf_find_string("tid")) 525 (void) printf("%u ", thr_self()); 526 if (dprintf_find_string("cpu")) 527 (void) printf("%u ", getcpuid()); 528 if (dprintf_find_string("time")) 529 (void) printf("%llu ", gethrtime()); 530 if (dprintf_find_string("long")) 531 (void) printf("%s, line %d: ", newfile, line); 532 (void) printf("%s: ", func); 533 va_start(adx, fmt); 534 (void) vprintf(fmt, adx); 535 va_end(adx); 536 funlockfile(stdout); 537 } 538 } 539 540 #endif /* ZFS_DEBUG */ 541 542 /* 543 * ========================================================================= 544 * cmn_err() and panic() 545 * ========================================================================= 546 */ 547 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; 548 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; 549 550 void 551 vpanic(const char *fmt, va_list adx) 552 { 553 (void) fprintf(stderr, "error: "); 554 (void) vfprintf(stderr, fmt, adx); 555 (void) fprintf(stderr, "\n"); 556 557 abort(); /* think of it as a "user-level crash dump" */ 558 } 559 560 void 561 panic(const char *fmt, ...) 562 { 563 va_list adx; 564 565 va_start(adx, fmt); 566 vpanic(fmt, adx); 567 va_end(adx); 568 } 569 570 /*PRINTFLIKE2*/ 571 void 572 cmn_err(int ce, const char *fmt, ...) 573 { 574 va_list adx; 575 576 va_start(adx, fmt); 577 if (ce == CE_PANIC) 578 vpanic(fmt, adx); 579 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ 580 (void) fprintf(stderr, "%s", ce_prefix[ce]); 581 (void) vfprintf(stderr, fmt, adx); 582 (void) fprintf(stderr, "%s", ce_suffix[ce]); 583 } 584 va_end(adx); 585 } 586 587 /* 588 * ========================================================================= 589 * kobj interfaces 590 * ========================================================================= 591 */ 592 struct _buf * 593 kobj_open_file(char *name) 594 { 595 struct _buf *file; 596 vnode_t *vp; 597 598 /* set vp as the _fd field of the file */ 599 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir) != 0) 600 return ((void *)-1UL); 601 602 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); 603 file->_fd = (intptr_t)vp; 604 return (file); 605 } 606 607 int 608 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) 609 { 610 ssize_t resid; 611 612 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, 613 UIO_SYSSPACE, 0, 0, 0, &resid); 614 615 return (0); 616 } 617 618 void 619 kobj_close_file(struct _buf *file) 620 { 621 vn_close((vnode_t *)file->_fd); 622 umem_free(file, sizeof (struct _buf)); 623 } 624 625 int 626 kobj_fstat(intptr_t fd, struct bootstat *bst) 627 { 628 struct stat64 st; 629 vnode_t *vp = (vnode_t *)fd; 630 if (fstat64(vp->v_fd, &st) == -1) { 631 vn_close(vp); 632 return (errno); 633 } 634 bst->st_size = (uint64_t)st.st_size; 635 return (0); 636 } 637 638 /* 639 * ========================================================================= 640 * misc routines 641 * ========================================================================= 642 */ 643 644 void 645 delay(clock_t ticks) 646 { 647 poll(0, 0, ticks * (1000 / hz)); 648 } 649 650 /* 651 * Find highest one bit set. 652 * Returns bit number + 1 of highest bit that is set, otherwise returns 0. 653 * High order bit is 31 (or 63 in _LP64 kernel). 654 */ 655 int 656 highbit(ulong_t i) 657 { 658 register int h = 1; 659 660 if (i == 0) 661 return (0); 662 #ifdef _LP64 663 if (i & 0xffffffff00000000ul) { 664 h += 32; i >>= 32; 665 } 666 #endif 667 if (i & 0xffff0000) { 668 h += 16; i >>= 16; 669 } 670 if (i & 0xff00) { 671 h += 8; i >>= 8; 672 } 673 if (i & 0xf0) { 674 h += 4; i >>= 4; 675 } 676 if (i & 0xc) { 677 h += 2; i >>= 2; 678 } 679 if (i & 0x2) { 680 h += 1; 681 } 682 return (h); 683 } 684 685 static int 686 random_get_bytes_common(uint8_t *ptr, size_t len, char *devname) 687 { 688 int fd = open(devname, O_RDONLY); 689 size_t resid = len; 690 ssize_t bytes; 691 692 ASSERT(fd != -1); 693 694 while (resid != 0) { 695 bytes = read(fd, ptr, resid); 696 ASSERT(bytes >= 0); 697 ptr += bytes; 698 resid -= bytes; 699 } 700 701 close(fd); 702 703 return (0); 704 } 705 706 int 707 random_get_bytes(uint8_t *ptr, size_t len) 708 { 709 return (random_get_bytes_common(ptr, len, "/dev/random")); 710 } 711 712 int 713 random_get_pseudo_bytes(uint8_t *ptr, size_t len) 714 { 715 return (random_get_bytes_common(ptr, len, "/dev/urandom")); 716 } 717 718 /* 719 * ========================================================================= 720 * kernel emulation setup & teardown 721 * ========================================================================= 722 */ 723 static int 724 umem_out_of_memory(void) 725 { 726 char errmsg[] = "out of memory -- generating core dump\n"; 727 728 write(fileno(stderr), errmsg, sizeof (errmsg)); 729 abort(); 730 return (0); 731 } 732 733 void 734 kernel_init(int mode) 735 { 736 umem_nofail_callback(umem_out_of_memory); 737 738 physmem = sysconf(_SC_PHYS_PAGES); 739 740 dprintf("physmem = %llu pages (%.2f GB)\n", physmem, 741 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); 742 743 spa_init(mode); 744 } 745 746 void 747 kernel_fini(void) 748 { 749 spa_fini(); 750 } 751