1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <assert.h> 29 #include <sys/zfs_context.h> 30 #include <poll.h> 31 #include <string.h> 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <fcntl.h> 35 #include <sys/stat.h> 36 #include <sys/spa.h> 37 #include <sys/processor.h> 38 39 /* 40 * Emulation of kernel services in userland. 41 */ 42 43 uint64_t physmem; 44 vnode_t *rootdir = (vnode_t *)0xabcd1234; 45 int modrootloaded = 0; 46 47 /* 48 * ========================================================================= 49 * threads 50 * ========================================================================= 51 */ 52 /*ARGSUSED*/ 53 kthread_t * 54 zk_thread_create(void (*func)(), void *arg) 55 { 56 thread_t tid; 57 58 VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, 59 &tid) == 0); 60 61 return ((void *)(uintptr_t)tid); 62 } 63 64 /* 65 * ========================================================================= 66 * mutexes 67 * ========================================================================= 68 */ 69 void 70 zmutex_init(kmutex_t *mp) 71 { 72 mp->m_owner = NULL; 73 (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); 74 } 75 76 void 77 zmutex_destroy(kmutex_t *mp) 78 { 79 ASSERT(mp->m_owner == NULL); 80 (void) _mutex_destroy(&(mp)->m_lock); 81 mp->m_owner = (void *)-1UL; 82 } 83 84 void 85 mutex_enter(kmutex_t *mp) 86 { 87 ASSERT(mp->m_owner != (void *)-1UL); 88 ASSERT(mp->m_owner != curthread); 89 (void) mutex_lock(&mp->m_lock); 90 ASSERT(mp->m_owner == NULL); 91 mp->m_owner = curthread; 92 } 93 94 int 95 mutex_tryenter(kmutex_t *mp) 96 { 97 ASSERT(mp->m_owner != (void *)-1UL); 98 if (0 == mutex_trylock(&mp->m_lock)) { 99 ASSERT(mp->m_owner == NULL); 100 mp->m_owner = curthread; 101 return (1); 102 } else { 103 return (0); 104 } 105 } 106 107 void 108 mutex_exit(kmutex_t *mp) 109 { 110 ASSERT(mutex_owner(mp) == curthread); 111 mp->m_owner = NULL; 112 (void) mutex_unlock(&mp->m_lock); 113 } 114 115 void * 116 mutex_owner(kmutex_t *mp) 117 { 118 return (mp->m_owner); 119 } 120 121 /* 122 * ========================================================================= 123 * rwlocks 124 * ========================================================================= 125 */ 126 /*ARGSUSED*/ 127 void 128 rw_init(krwlock_t *rwlp, char *name, int type, void *arg) 129 { 130 rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); 131 rwlp->rw_owner = NULL; 132 } 133 134 void 135 rw_destroy(krwlock_t *rwlp) 136 { 137 rwlock_destroy(&rwlp->rw_lock); 138 rwlp->rw_owner = (void *)-1UL; 139 } 140 141 void 142 rw_enter(krwlock_t *rwlp, krw_t rw) 143 { 144 ASSERT(!RW_LOCK_HELD(rwlp)); 145 ASSERT(rwlp->rw_owner != (void *)-1UL); 146 ASSERT(rwlp->rw_owner != curthread); 147 148 if (rw == RW_READER) 149 (void) rw_rdlock(&rwlp->rw_lock); 150 else 151 (void) rw_wrlock(&rwlp->rw_lock); 152 153 rwlp->rw_owner = curthread; 154 } 155 156 void 157 rw_exit(krwlock_t *rwlp) 158 { 159 ASSERT(rwlp->rw_owner != (void *)-1UL); 160 161 rwlp->rw_owner = NULL; 162 (void) rw_unlock(&rwlp->rw_lock); 163 } 164 165 int 166 rw_tryenter(krwlock_t *rwlp, krw_t rw) 167 { 168 int rv; 169 170 ASSERT(rwlp->rw_owner != (void *)-1UL); 171 172 if (rw == RW_READER) 173 rv = rw_tryrdlock(&rwlp->rw_lock); 174 else 175 rv = rw_trywrlock(&rwlp->rw_lock); 176 177 if (rv == 0) { 178 rwlp->rw_owner = curthread; 179 return (1); 180 } 181 182 return (0); 183 } 184 185 /*ARGSUSED*/ 186 int 187 rw_tryupgrade(krwlock_t *rwlp) 188 { 189 ASSERT(rwlp->rw_owner != (void *)-1UL); 190 191 return (0); 192 } 193 194 /* 195 * ========================================================================= 196 * condition variables 197 * ========================================================================= 198 */ 199 /*ARGSUSED*/ 200 void 201 cv_init(kcondvar_t *cv, char *name, int type, void *arg) 202 { 203 (void) cond_init(cv, type, NULL); 204 } 205 206 void 207 cv_destroy(kcondvar_t *cv) 208 { 209 (void) cond_destroy(cv); 210 } 211 212 void 213 cv_wait(kcondvar_t *cv, kmutex_t *mp) 214 { 215 ASSERT(mutex_owner(mp) == curthread); 216 mp->m_owner = NULL; 217 (void) cond_wait(cv, &mp->m_lock); 218 mp->m_owner = curthread; 219 } 220 221 clock_t 222 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) 223 { 224 int error; 225 timestruc_t ts; 226 clock_t delta; 227 228 top: 229 delta = abstime - lbolt; 230 if (delta <= 0) 231 return (-1); 232 233 ts.tv_sec = delta / hz; 234 ts.tv_nsec = (delta % hz) * (NANOSEC / hz); 235 236 ASSERT(mutex_owner(mp) == curthread); 237 mp->m_owner = NULL; 238 error = cond_reltimedwait(cv, &mp->m_lock, &ts); 239 mp->m_owner = curthread; 240 241 if (error == ETIME) 242 return (-1); 243 244 if (error == EINTR) 245 goto top; 246 247 ASSERT(error == 0); 248 249 return (1); 250 } 251 252 void 253 cv_signal(kcondvar_t *cv) 254 { 255 (void) cond_signal(cv); 256 } 257 258 void 259 cv_broadcast(kcondvar_t *cv) 260 { 261 (void) cond_broadcast(cv); 262 } 263 264 /* 265 * ========================================================================= 266 * vnode operations 267 * ========================================================================= 268 */ 269 /* 270 * Note: for the xxxat() versions of these functions, we assume that the 271 * starting vp is always rootdir (which is true for spa_directory.c, the only 272 * ZFS consumer of these interfaces). We assert this is true, and then emulate 273 * them by adding '/' in front of the path. 274 */ 275 276 /*ARGSUSED*/ 277 int 278 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) 279 { 280 int fd; 281 vnode_t *vp; 282 int old_umask; 283 char realpath[MAXPATHLEN]; 284 struct stat64 st; 285 286 /* 287 * If we're accessing a real disk from userland, we need to use 288 * the character interface to avoid caching. This is particularly 289 * important if we're trying to look at a real in-kernel storage 290 * pool from userland, e.g. via zdb, because otherwise we won't 291 * see the changes occurring under the segmap cache. 292 * On the other hand, the stupid character device returns zero 293 * for its size. So -- gag -- we open the block device to get 294 * its size, and remember it for subsequent VOP_GETATTR(). 295 */ 296 if (strncmp(path, "/dev/", 5) == 0) { 297 char *dsk; 298 fd = open64(path, O_RDONLY); 299 if (fd == -1) 300 return (errno); 301 if (fstat64(fd, &st) == -1) { 302 close(fd); 303 return (errno); 304 } 305 close(fd); 306 (void) sprintf(realpath, "%s", path); 307 dsk = strstr(path, "/dsk/"); 308 if (dsk != NULL) 309 (void) sprintf(realpath + (dsk - path) + 1, "r%s", 310 dsk + 1); 311 } else { 312 (void) sprintf(realpath, "%s", path); 313 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) 314 return (errno); 315 } 316 317 if (flags & FCREAT) 318 old_umask = umask(0); 319 320 /* 321 * The construct 'flags - FREAD' conveniently maps combinations of 322 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. 323 */ 324 fd = open64(realpath, flags - FREAD, mode); 325 326 if (flags & FCREAT) 327 (void) umask(old_umask); 328 329 if (fd == -1) 330 return (errno); 331 332 if (fstat64(fd, &st) == -1) { 333 close(fd); 334 return (errno); 335 } 336 337 (void) fcntl(fd, F_SETFD, FD_CLOEXEC); 338 339 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); 340 341 vp->v_fd = fd; 342 vp->v_size = st.st_size; 343 vp->v_path = spa_strdup(path); 344 345 return (0); 346 } 347 348 int 349 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, 350 int x3, vnode_t *startvp) 351 { 352 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); 353 int ret; 354 355 ASSERT(startvp == rootdir); 356 (void) sprintf(realpath, "/%s", path); 357 358 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); 359 360 umem_free(realpath, strlen(path) + 2); 361 362 return (ret); 363 } 364 365 /*ARGSUSED*/ 366 int 367 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, 368 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) 369 { 370 ssize_t iolen, split; 371 372 if (uio == UIO_READ) { 373 iolen = pread64(vp->v_fd, addr, len, offset); 374 } else { 375 /* 376 * To simulate partial disk writes, we split writes into two 377 * system calls so that the process can be killed in between. 378 */ 379 split = (len > 0 ? rand() % len : 0); 380 iolen = pwrite64(vp->v_fd, addr, split, offset); 381 iolen += pwrite64(vp->v_fd, (char *)addr + split, 382 len - split, offset + split); 383 } 384 385 if (iolen == -1) 386 return (errno); 387 if (residp) 388 *residp = len - iolen; 389 else if (iolen != len) 390 return (EIO); 391 return (0); 392 } 393 394 void 395 vn_close(vnode_t *vp) 396 { 397 close(vp->v_fd); 398 spa_strfree(vp->v_path); 399 umem_free(vp, sizeof (vnode_t)); 400 } 401 402 #ifdef ZFS_DEBUG 403 404 /* 405 * ========================================================================= 406 * Figure out which debugging statements to print 407 * ========================================================================= 408 */ 409 410 static char *dprintf_string; 411 static int dprintf_print_all; 412 413 int 414 dprintf_find_string(const char *string) 415 { 416 char *tmp_str = dprintf_string; 417 int len = strlen(string); 418 419 /* 420 * Find out if this is a string we want to print. 421 * String format: file1.c,function_name1,file2.c,file3.c 422 */ 423 424 while (tmp_str != NULL) { 425 if (strncmp(tmp_str, string, len) == 0 && 426 (tmp_str[len] == ',' || tmp_str[len] == '\0')) 427 return (1); 428 tmp_str = strchr(tmp_str, ','); 429 if (tmp_str != NULL) 430 tmp_str++; /* Get rid of , */ 431 } 432 return (0); 433 } 434 435 void 436 dprintf_setup(int *argc, char **argv) 437 { 438 int i, j; 439 440 /* 441 * Debugging can be specified two ways: by setting the 442 * environment variable ZFS_DEBUG, or by including a 443 * "debug=..." argument on the command line. The command 444 * line setting overrides the environment variable. 445 */ 446 447 for (i = 1; i < *argc; i++) { 448 int len = strlen("debug="); 449 /* First look for a command line argument */ 450 if (strncmp("debug=", argv[i], len) == 0) { 451 dprintf_string = argv[i] + len; 452 /* Remove from args */ 453 for (j = i; j < *argc; j++) 454 argv[j] = argv[j+1]; 455 argv[j] = NULL; 456 (*argc)--; 457 } 458 } 459 460 if (dprintf_string == NULL) { 461 /* Look for ZFS_DEBUG environment variable */ 462 dprintf_string = getenv("ZFS_DEBUG"); 463 } 464 465 /* 466 * Are we just turning on all debugging? 467 */ 468 if (dprintf_find_string("on")) 469 dprintf_print_all = 1; 470 } 471 472 /* 473 * ========================================================================= 474 * debug printfs 475 * ========================================================================= 476 */ 477 void 478 __dprintf(const char *file, const char *func, int line, const char *fmt, ...) 479 { 480 const char *newfile; 481 va_list adx; 482 483 /* 484 * Get rid of annoying "../common/" prefix to filename. 485 */ 486 newfile = strrchr(file, '/'); 487 if (newfile != NULL) { 488 newfile = newfile + 1; /* Get rid of leading / */ 489 } else { 490 newfile = file; 491 } 492 493 if (dprintf_print_all || 494 dprintf_find_string(newfile) || 495 dprintf_find_string(func)) { 496 /* Print out just the function name if requested */ 497 flockfile(stdout); 498 if (dprintf_find_string("pid")) 499 (void) printf("%d ", getpid()); 500 if (dprintf_find_string("tid")) 501 (void) printf("%u ", thr_self()); 502 if (dprintf_find_string("cpu")) 503 (void) printf("%u ", getcpuid()); 504 if (dprintf_find_string("time")) 505 (void) printf("%llu ", gethrtime()); 506 if (dprintf_find_string("long")) 507 (void) printf("%s, line %d: ", newfile, line); 508 (void) printf("%s: ", func); 509 va_start(adx, fmt); 510 (void) vprintf(fmt, adx); 511 va_end(adx); 512 funlockfile(stdout); 513 } 514 } 515 516 #endif /* ZFS_DEBUG */ 517 518 /* 519 * ========================================================================= 520 * cmn_err() and panic() 521 * ========================================================================= 522 */ 523 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; 524 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; 525 526 void 527 vpanic(const char *fmt, va_list adx) 528 { 529 (void) fprintf(stderr, "error: "); 530 (void) vfprintf(stderr, fmt, adx); 531 (void) fprintf(stderr, "\n"); 532 533 abort(); /* think of it as a "user-level crash dump" */ 534 } 535 536 void 537 panic(const char *fmt, ...) 538 { 539 va_list adx; 540 541 va_start(adx, fmt); 542 vpanic(fmt, adx); 543 va_end(adx); 544 } 545 546 /*PRINTFLIKE2*/ 547 void 548 cmn_err(int ce, const char *fmt, ...) 549 { 550 va_list adx; 551 552 va_start(adx, fmt); 553 if (ce == CE_PANIC) 554 vpanic(fmt, adx); 555 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ 556 (void) fprintf(stderr, "%s", ce_prefix[ce]); 557 (void) vfprintf(stderr, fmt, adx); 558 (void) fprintf(stderr, "%s", ce_suffix[ce]); 559 } 560 va_end(adx); 561 } 562 563 /* 564 * ========================================================================= 565 * kobj interfaces 566 * ========================================================================= 567 */ 568 struct _buf * 569 kobj_open_file(char *name) 570 { 571 struct _buf *file; 572 vnode_t *vp; 573 574 /* set vp as the _fd field of the file */ 575 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir) != 0) 576 return ((void *)-1UL); 577 578 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); 579 file->_fd = (intptr_t)vp; 580 return (file); 581 } 582 583 int 584 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) 585 { 586 ssize_t resid; 587 588 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, 589 UIO_SYSSPACE, 0, 0, 0, &resid); 590 591 return (0); 592 } 593 594 void 595 kobj_close_file(struct _buf *file) 596 { 597 vn_close((vnode_t *)file->_fd); 598 umem_free(file, sizeof (struct _buf)); 599 } 600 601 int 602 kobj_fstat(intptr_t fd, struct bootstat *bst) 603 { 604 struct stat64 st; 605 vnode_t *vp = (vnode_t *)fd; 606 if (fstat64(vp->v_fd, &st) == -1) { 607 vn_close(vp); 608 return (errno); 609 } 610 bst->st_size = (uint64_t)st.st_size; 611 return (0); 612 } 613 614 /* 615 * ========================================================================= 616 * misc routines 617 * ========================================================================= 618 */ 619 620 void 621 delay(clock_t ticks) 622 { 623 poll(0, 0, ticks * (1000 / hz)); 624 } 625 626 /* 627 * Find highest one bit set. 628 * Returns bit number + 1 of highest bit that is set, otherwise returns 0. 629 * High order bit is 31 (or 63 in _LP64 kernel). 630 */ 631 int 632 highbit(ulong_t i) 633 { 634 register int h = 1; 635 636 if (i == 0) 637 return (0); 638 #ifdef _LP64 639 if (i & 0xffffffff00000000ul) { 640 h += 32; i >>= 32; 641 } 642 #endif 643 if (i & 0xffff0000) { 644 h += 16; i >>= 16; 645 } 646 if (i & 0xff00) { 647 h += 8; i >>= 8; 648 } 649 if (i & 0xf0) { 650 h += 4; i >>= 4; 651 } 652 if (i & 0xc) { 653 h += 2; i >>= 2; 654 } 655 if (i & 0x2) { 656 h += 1; 657 } 658 return (h); 659 } 660 661 static int 662 random_get_bytes_common(uint8_t *ptr, size_t len, char *devname) 663 { 664 int fd = open(devname, O_RDONLY); 665 size_t resid = len; 666 ssize_t bytes; 667 668 ASSERT(fd != -1); 669 670 while (resid != 0) { 671 bytes = read(fd, ptr, resid); 672 ASSERT(bytes >= 0); 673 ptr += bytes; 674 resid -= bytes; 675 } 676 677 close(fd); 678 679 return (0); 680 } 681 682 int 683 random_get_bytes(uint8_t *ptr, size_t len) 684 { 685 return (random_get_bytes_common(ptr, len, "/dev/random")); 686 } 687 688 int 689 random_get_pseudo_bytes(uint8_t *ptr, size_t len) 690 { 691 return (random_get_bytes_common(ptr, len, "/dev/urandom")); 692 } 693 694 /* 695 * ========================================================================= 696 * kernel emulation setup & teardown 697 * ========================================================================= 698 */ 699 static int 700 umem_out_of_memory(void) 701 { 702 char errmsg[] = "out of memory -- generating core dump\n"; 703 704 write(fileno(stderr), errmsg, sizeof (errmsg)); 705 abort(); 706 return (0); 707 } 708 709 void 710 kernel_init(int mode) 711 { 712 umem_nofail_callback(umem_out_of_memory); 713 714 physmem = sysconf(_SC_PHYS_PAGES); 715 716 dprintf("physmem = %llu pages (%.2f GB)\n", physmem, 717 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); 718 719 spa_init(mode); 720 } 721 722 void 723 kernel_fini(void) 724 { 725 spa_fini(); 726 } 727