1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <assert.h> 29 #include <sys/zfs_context.h> 30 #include <poll.h> 31 #include <string.h> 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <fcntl.h> 35 #include <sys/stat.h> 36 #include <sys/spa.h> 37 #include <sys/processor.h> 38 39 40 /* 41 * Emulation of kernel services in userland. 42 */ 43 44 uint64_t physmem; 45 vnode_t *rootdir = (vnode_t *)0xabcd1234; 46 47 /* 48 * ========================================================================= 49 * threads 50 * ========================================================================= 51 */ 52 /*ARGSUSED*/ 53 kthread_t * 54 zk_thread_create(void (*func)(), void *arg) 55 { 56 thread_t tid; 57 58 VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, 59 &tid) == 0); 60 61 return ((void *)(uintptr_t)tid); 62 } 63 64 /* 65 * ========================================================================= 66 * kstats 67 * ========================================================================= 68 */ 69 /*ARGSUSED*/ 70 kstat_t * 71 kstat_create(char *module, int instance, char *name, char *class, 72 uchar_t type, ulong_t ndata, uchar_t ks_flag) 73 { 74 return (NULL); 75 } 76 77 /*ARGSUSED*/ 78 void 79 kstat_install(kstat_t *ksp) 80 {} 81 82 /*ARGSUSED*/ 83 void 84 kstat_delete(kstat_t *ksp) 85 {} 86 87 /* 88 * ========================================================================= 89 * mutexes 90 * ========================================================================= 91 */ 92 void 93 zmutex_init(kmutex_t *mp) 94 { 95 mp->m_owner = NULL; 96 (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); 97 } 98 99 void 100 zmutex_destroy(kmutex_t *mp) 101 { 102 ASSERT(mp->m_owner == NULL); 103 (void) _mutex_destroy(&(mp)->m_lock); 104 mp->m_owner = (void *)-1UL; 105 } 106 107 void 108 mutex_enter(kmutex_t *mp) 109 { 110 ASSERT(mp->m_owner != (void *)-1UL); 111 ASSERT(mp->m_owner != curthread); 112 VERIFY(mutex_lock(&mp->m_lock) == 0); 113 ASSERT(mp->m_owner == NULL); 114 mp->m_owner = curthread; 115 } 116 117 int 118 mutex_tryenter(kmutex_t *mp) 119 { 120 ASSERT(mp->m_owner != (void *)-1UL); 121 if (0 == mutex_trylock(&mp->m_lock)) { 122 ASSERT(mp->m_owner == NULL); 123 mp->m_owner = curthread; 124 return (1); 125 } else { 126 return (0); 127 } 128 } 129 130 void 131 mutex_exit(kmutex_t *mp) 132 { 133 ASSERT(mutex_owner(mp) == curthread); 134 mp->m_owner = NULL; 135 VERIFY(mutex_unlock(&mp->m_lock) == 0); 136 } 137 138 void * 139 mutex_owner(kmutex_t *mp) 140 { 141 return (mp->m_owner); 142 } 143 144 /* 145 * ========================================================================= 146 * rwlocks 147 * ========================================================================= 148 */ 149 /*ARGSUSED*/ 150 void 151 rw_init(krwlock_t *rwlp, char *name, int type, void *arg) 152 { 153 rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); 154 rwlp->rw_owner = NULL; 155 } 156 157 void 158 rw_destroy(krwlock_t *rwlp) 159 { 160 rwlock_destroy(&rwlp->rw_lock); 161 rwlp->rw_owner = (void *)-1UL; 162 } 163 164 void 165 rw_enter(krwlock_t *rwlp, krw_t rw) 166 { 167 ASSERT(!RW_LOCK_HELD(rwlp)); 168 ASSERT(rwlp->rw_owner != (void *)-1UL); 169 ASSERT(rwlp->rw_owner != curthread); 170 171 if (rw == RW_READER) 172 (void) rw_rdlock(&rwlp->rw_lock); 173 else 174 (void) rw_wrlock(&rwlp->rw_lock); 175 176 rwlp->rw_owner = curthread; 177 } 178 179 void 180 rw_exit(krwlock_t *rwlp) 181 { 182 ASSERT(rwlp->rw_owner != (void *)-1UL); 183 184 rwlp->rw_owner = NULL; 185 (void) rw_unlock(&rwlp->rw_lock); 186 } 187 188 int 189 rw_tryenter(krwlock_t *rwlp, krw_t rw) 190 { 191 int rv; 192 193 ASSERT(rwlp->rw_owner != (void *)-1UL); 194 195 if (rw == RW_READER) 196 rv = rw_tryrdlock(&rwlp->rw_lock); 197 else 198 rv = rw_trywrlock(&rwlp->rw_lock); 199 200 if (rv == 0) { 201 rwlp->rw_owner = curthread; 202 return (1); 203 } 204 205 return (0); 206 } 207 208 /*ARGSUSED*/ 209 int 210 rw_tryupgrade(krwlock_t *rwlp) 211 { 212 ASSERT(rwlp->rw_owner != (void *)-1UL); 213 214 return (0); 215 } 216 217 /* 218 * ========================================================================= 219 * condition variables 220 * ========================================================================= 221 */ 222 /*ARGSUSED*/ 223 void 224 cv_init(kcondvar_t *cv, char *name, int type, void *arg) 225 { 226 VERIFY(cond_init(cv, type, NULL) == 0); 227 } 228 229 void 230 cv_destroy(kcondvar_t *cv) 231 { 232 VERIFY(cond_destroy(cv) == 0); 233 } 234 235 void 236 cv_wait(kcondvar_t *cv, kmutex_t *mp) 237 { 238 ASSERT(mutex_owner(mp) == curthread); 239 mp->m_owner = NULL; 240 int ret = cond_wait(cv, &mp->m_lock); 241 VERIFY(ret == 0 || ret == EINTR); 242 mp->m_owner = curthread; 243 } 244 245 clock_t 246 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) 247 { 248 int error; 249 timestruc_t ts; 250 clock_t delta; 251 252 top: 253 delta = abstime - lbolt; 254 if (delta <= 0) 255 return (-1); 256 257 ts.tv_sec = delta / hz; 258 ts.tv_nsec = (delta % hz) * (NANOSEC / hz); 259 260 ASSERT(mutex_owner(mp) == curthread); 261 mp->m_owner = NULL; 262 error = cond_reltimedwait(cv, &mp->m_lock, &ts); 263 mp->m_owner = curthread; 264 265 if (error == ETIME) 266 return (-1); 267 268 if (error == EINTR) 269 goto top; 270 271 ASSERT(error == 0); 272 273 return (1); 274 } 275 276 void 277 cv_signal(kcondvar_t *cv) 278 { 279 VERIFY(cond_signal(cv) == 0); 280 } 281 282 void 283 cv_broadcast(kcondvar_t *cv) 284 { 285 VERIFY(cond_broadcast(cv) == 0); 286 } 287 288 /* 289 * ========================================================================= 290 * vnode operations 291 * ========================================================================= 292 */ 293 /* 294 * Note: for the xxxat() versions of these functions, we assume that the 295 * starting vp is always rootdir (which is true for spa_directory.c, the only 296 * ZFS consumer of these interfaces). We assert this is true, and then emulate 297 * them by adding '/' in front of the path. 298 */ 299 300 /*ARGSUSED*/ 301 int 302 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) 303 { 304 int fd; 305 vnode_t *vp; 306 int old_umask; 307 char realpath[MAXPATHLEN]; 308 struct stat64 st; 309 310 /* 311 * If we're accessing a real disk from userland, we need to use 312 * the character interface to avoid caching. This is particularly 313 * important if we're trying to look at a real in-kernel storage 314 * pool from userland, e.g. via zdb, because otherwise we won't 315 * see the changes occurring under the segmap cache. 316 * On the other hand, the stupid character device returns zero 317 * for its size. So -- gag -- we open the block device to get 318 * its size, and remember it for subsequent VOP_GETATTR(). 319 */ 320 if (strncmp(path, "/dev/", 5) == 0) { 321 char *dsk; 322 fd = open64(path, O_RDONLY); 323 if (fd == -1) 324 return (errno); 325 if (fstat64(fd, &st) == -1) { 326 close(fd); 327 return (errno); 328 } 329 close(fd); 330 (void) sprintf(realpath, "%s", path); 331 dsk = strstr(path, "/dsk/"); 332 if (dsk != NULL) 333 (void) sprintf(realpath + (dsk - path) + 1, "r%s", 334 dsk + 1); 335 } else { 336 (void) sprintf(realpath, "%s", path); 337 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) 338 return (errno); 339 } 340 341 if (flags & FCREAT) 342 old_umask = umask(0); 343 344 /* 345 * The construct 'flags - FREAD' conveniently maps combinations of 346 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. 347 */ 348 fd = open64(realpath, flags - FREAD, mode); 349 350 if (flags & FCREAT) 351 (void) umask(old_umask); 352 353 if (fd == -1) 354 return (errno); 355 356 if (fstat64(fd, &st) == -1) { 357 close(fd); 358 return (errno); 359 } 360 361 (void) fcntl(fd, F_SETFD, FD_CLOEXEC); 362 363 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); 364 365 vp->v_fd = fd; 366 vp->v_size = st.st_size; 367 vp->v_path = spa_strdup(path); 368 369 return (0); 370 } 371 372 int 373 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, 374 int x3, vnode_t *startvp) 375 { 376 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); 377 int ret; 378 379 ASSERT(startvp == rootdir); 380 (void) sprintf(realpath, "/%s", path); 381 382 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); 383 384 umem_free(realpath, strlen(path) + 2); 385 386 return (ret); 387 } 388 389 /*ARGSUSED*/ 390 int 391 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, 392 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) 393 { 394 ssize_t iolen, split; 395 396 if (uio == UIO_READ) { 397 iolen = pread64(vp->v_fd, addr, len, offset); 398 } else { 399 /* 400 * To simulate partial disk writes, we split writes into two 401 * system calls so that the process can be killed in between. 402 */ 403 split = (len > 0 ? rand() % len : 0); 404 iolen = pwrite64(vp->v_fd, addr, split, offset); 405 iolen += pwrite64(vp->v_fd, (char *)addr + split, 406 len - split, offset + split); 407 } 408 409 if (iolen == -1) 410 return (errno); 411 if (residp) 412 *residp = len - iolen; 413 else if (iolen != len) 414 return (EIO); 415 return (0); 416 } 417 418 void 419 vn_close(vnode_t *vp) 420 { 421 close(vp->v_fd); 422 spa_strfree(vp->v_path); 423 umem_free(vp, sizeof (vnode_t)); 424 } 425 426 #ifdef ZFS_DEBUG 427 428 /* 429 * ========================================================================= 430 * Figure out which debugging statements to print 431 * ========================================================================= 432 */ 433 434 static char *dprintf_string; 435 static int dprintf_print_all; 436 437 int 438 dprintf_find_string(const char *string) 439 { 440 char *tmp_str = dprintf_string; 441 int len = strlen(string); 442 443 /* 444 * Find out if this is a string we want to print. 445 * String format: file1.c,function_name1,file2.c,file3.c 446 */ 447 448 while (tmp_str != NULL) { 449 if (strncmp(tmp_str, string, len) == 0 && 450 (tmp_str[len] == ',' || tmp_str[len] == '\0')) 451 return (1); 452 tmp_str = strchr(tmp_str, ','); 453 if (tmp_str != NULL) 454 tmp_str++; /* Get rid of , */ 455 } 456 return (0); 457 } 458 459 void 460 dprintf_setup(int *argc, char **argv) 461 { 462 int i, j; 463 464 /* 465 * Debugging can be specified two ways: by setting the 466 * environment variable ZFS_DEBUG, or by including a 467 * "debug=..." argument on the command line. The command 468 * line setting overrides the environment variable. 469 */ 470 471 for (i = 1; i < *argc; i++) { 472 int len = strlen("debug="); 473 /* First look for a command line argument */ 474 if (strncmp("debug=", argv[i], len) == 0) { 475 dprintf_string = argv[i] + len; 476 /* Remove from args */ 477 for (j = i; j < *argc; j++) 478 argv[j] = argv[j+1]; 479 argv[j] = NULL; 480 (*argc)--; 481 } 482 } 483 484 if (dprintf_string == NULL) { 485 /* Look for ZFS_DEBUG environment variable */ 486 dprintf_string = getenv("ZFS_DEBUG"); 487 } 488 489 /* 490 * Are we just turning on all debugging? 491 */ 492 if (dprintf_find_string("on")) 493 dprintf_print_all = 1; 494 } 495 496 /* 497 * ========================================================================= 498 * debug printfs 499 * ========================================================================= 500 */ 501 void 502 __dprintf(const char *file, const char *func, int line, const char *fmt, ...) 503 { 504 const char *newfile; 505 va_list adx; 506 507 /* 508 * Get rid of annoying "../common/" prefix to filename. 509 */ 510 newfile = strrchr(file, '/'); 511 if (newfile != NULL) { 512 newfile = newfile + 1; /* Get rid of leading / */ 513 } else { 514 newfile = file; 515 } 516 517 if (dprintf_print_all || 518 dprintf_find_string(newfile) || 519 dprintf_find_string(func)) { 520 /* Print out just the function name if requested */ 521 flockfile(stdout); 522 if (dprintf_find_string("pid")) 523 (void) printf("%d ", getpid()); 524 if (dprintf_find_string("tid")) 525 (void) printf("%u ", thr_self()); 526 if (dprintf_find_string("cpu")) 527 (void) printf("%u ", getcpuid()); 528 if (dprintf_find_string("time")) 529 (void) printf("%llu ", gethrtime()); 530 if (dprintf_find_string("long")) 531 (void) printf("%s, line %d: ", newfile, line); 532 (void) printf("%s: ", func); 533 va_start(adx, fmt); 534 (void) vprintf(fmt, adx); 535 va_end(adx); 536 funlockfile(stdout); 537 } 538 } 539 540 #endif /* ZFS_DEBUG */ 541 542 /* 543 * ========================================================================= 544 * cmn_err() and panic() 545 * ========================================================================= 546 */ 547 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; 548 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; 549 550 void 551 vpanic(const char *fmt, va_list adx) 552 { 553 (void) fprintf(stderr, "error: "); 554 (void) vfprintf(stderr, fmt, adx); 555 (void) fprintf(stderr, "\n"); 556 557 abort(); /* think of it as a "user-level crash dump" */ 558 } 559 560 void 561 panic(const char *fmt, ...) 562 { 563 va_list adx; 564 565 va_start(adx, fmt); 566 vpanic(fmt, adx); 567 va_end(adx); 568 } 569 570 void 571 vcmn_err(int ce, const char *fmt, va_list adx) 572 { 573 if (ce == CE_PANIC) 574 vpanic(fmt, adx); 575 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ 576 (void) fprintf(stderr, "%s", ce_prefix[ce]); 577 (void) vfprintf(stderr, fmt, adx); 578 (void) fprintf(stderr, "%s", ce_suffix[ce]); 579 } 580 } 581 582 /*PRINTFLIKE2*/ 583 void 584 cmn_err(int ce, const char *fmt, ...) 585 { 586 va_list adx; 587 588 va_start(adx, fmt); 589 vcmn_err(ce, fmt, adx); 590 va_end(adx); 591 } 592 593 /* 594 * ========================================================================= 595 * kobj interfaces 596 * ========================================================================= 597 */ 598 struct _buf * 599 kobj_open_file(char *name) 600 { 601 struct _buf *file; 602 vnode_t *vp; 603 604 /* set vp as the _fd field of the file */ 605 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir) != 0) 606 return ((void *)-1UL); 607 608 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); 609 file->_fd = (intptr_t)vp; 610 return (file); 611 } 612 613 int 614 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) 615 { 616 ssize_t resid; 617 618 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, 619 UIO_SYSSPACE, 0, 0, 0, &resid); 620 621 return (0); 622 } 623 624 void 625 kobj_close_file(struct _buf *file) 626 { 627 vn_close((vnode_t *)file->_fd); 628 umem_free(file, sizeof (struct _buf)); 629 } 630 631 int 632 kobj_fstat(intptr_t fd, struct bootstat *bst) 633 { 634 struct stat64 st; 635 vnode_t *vp = (vnode_t *)fd; 636 if (fstat64(vp->v_fd, &st) == -1) { 637 vn_close(vp); 638 return (errno); 639 } 640 bst->st_size = (uint64_t)st.st_size; 641 return (0); 642 } 643 644 /* 645 * ========================================================================= 646 * misc routines 647 * ========================================================================= 648 */ 649 650 void 651 delay(clock_t ticks) 652 { 653 poll(0, 0, ticks * (1000 / hz)); 654 } 655 656 /* 657 * Find highest one bit set. 658 * Returns bit number + 1 of highest bit that is set, otherwise returns 0. 659 * High order bit is 31 (or 63 in _LP64 kernel). 660 */ 661 int 662 highbit(ulong_t i) 663 { 664 register int h = 1; 665 666 if (i == 0) 667 return (0); 668 #ifdef _LP64 669 if (i & 0xffffffff00000000ul) { 670 h += 32; i >>= 32; 671 } 672 #endif 673 if (i & 0xffff0000) { 674 h += 16; i >>= 16; 675 } 676 if (i & 0xff00) { 677 h += 8; i >>= 8; 678 } 679 if (i & 0xf0) { 680 h += 4; i >>= 4; 681 } 682 if (i & 0xc) { 683 h += 2; i >>= 2; 684 } 685 if (i & 0x2) { 686 h += 1; 687 } 688 return (h); 689 } 690 691 static int 692 random_get_bytes_common(uint8_t *ptr, size_t len, char *devname) 693 { 694 int fd = open(devname, O_RDONLY); 695 size_t resid = len; 696 ssize_t bytes; 697 698 ASSERT(fd != -1); 699 700 while (resid != 0) { 701 bytes = read(fd, ptr, resid); 702 ASSERT(bytes >= 0); 703 ptr += bytes; 704 resid -= bytes; 705 } 706 707 close(fd); 708 709 return (0); 710 } 711 712 int 713 random_get_bytes(uint8_t *ptr, size_t len) 714 { 715 return (random_get_bytes_common(ptr, len, "/dev/random")); 716 } 717 718 int 719 random_get_pseudo_bytes(uint8_t *ptr, size_t len) 720 { 721 return (random_get_bytes_common(ptr, len, "/dev/urandom")); 722 } 723 724 /* 725 * ========================================================================= 726 * kernel emulation setup & teardown 727 * ========================================================================= 728 */ 729 static int 730 umem_out_of_memory(void) 731 { 732 char errmsg[] = "out of memory -- generating core dump\n"; 733 734 write(fileno(stderr), errmsg, sizeof (errmsg)); 735 abort(); 736 return (0); 737 } 738 739 void 740 kernel_init(int mode) 741 { 742 umem_nofail_callback(umem_out_of_memory); 743 744 physmem = sysconf(_SC_PHYS_PAGES); 745 746 dprintf("physmem = %llu pages (%.2f GB)\n", physmem, 747 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); 748 749 spa_init(mode); 750 } 751 752 void 753 kernel_fini(void) 754 { 755 spa_fini(); 756 } 757