1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <assert.h> 29 #include <sys/zfs_context.h> 30 #include <poll.h> 31 #include <string.h> 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <fcntl.h> 35 #include <sys/stat.h> 36 #include <sys/spa.h> 37 #include <sys/processor.h> 38 39 /* 40 * Emulation of kernel services in userland. 41 */ 42 43 uint64_t physmem; 44 vnode_t *rootdir = (vnode_t *)0xabcd1234; 45 46 /* 47 * ========================================================================= 48 * threads 49 * ========================================================================= 50 */ 51 /*ARGSUSED*/ 52 kthread_t * 53 zk_thread_create(void (*func)(), void *arg) 54 { 55 thread_t tid; 56 57 VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, 58 &tid) == 0); 59 60 return ((void *)(uintptr_t)tid); 61 } 62 63 /* 64 * ========================================================================= 65 * mutexes 66 * ========================================================================= 67 */ 68 void 69 zmutex_init(kmutex_t *mp) 70 { 71 mp->m_owner = NULL; 72 (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); 73 } 74 75 void 76 zmutex_destroy(kmutex_t *mp) 77 { 78 ASSERT(mp->m_owner == NULL); 79 (void) _mutex_destroy(&(mp)->m_lock); 80 mp->m_owner = (void *)-1UL; 81 } 82 83 void 84 mutex_enter(kmutex_t *mp) 85 { 86 ASSERT(mp->m_owner != (void *)-1UL); 87 ASSERT(mp->m_owner != curthread); 88 (void) mutex_lock(&mp->m_lock); 89 ASSERT(mp->m_owner == NULL); 90 mp->m_owner = curthread; 91 } 92 93 int 94 mutex_tryenter(kmutex_t *mp) 95 { 96 ASSERT(mp->m_owner != (void *)-1UL); 97 if (0 == mutex_trylock(&mp->m_lock)) { 98 ASSERT(mp->m_owner == NULL); 99 mp->m_owner = curthread; 100 return (1); 101 } else { 102 return (0); 103 } 104 } 105 106 void 107 mutex_exit(kmutex_t *mp) 108 { 109 ASSERT(mutex_owner(mp) == curthread); 110 mp->m_owner = NULL; 111 (void) mutex_unlock(&mp->m_lock); 112 } 113 114 void * 115 mutex_owner(kmutex_t *mp) 116 { 117 return (mp->m_owner); 118 } 119 120 /* 121 * ========================================================================= 122 * rwlocks 123 * ========================================================================= 124 */ 125 /*ARGSUSED*/ 126 void 127 rw_init(krwlock_t *rwlp, char *name, int type, void *arg) 128 { 129 rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); 130 rwlp->rw_owner = NULL; 131 } 132 133 void 134 rw_destroy(krwlock_t *rwlp) 135 { 136 rwlock_destroy(&rwlp->rw_lock); 137 rwlp->rw_owner = (void *)-1UL; 138 } 139 140 void 141 rw_enter(krwlock_t *rwlp, krw_t rw) 142 { 143 ASSERT(!RW_LOCK_HELD(rwlp)); 144 ASSERT(rwlp->rw_owner != (void *)-1UL); 145 ASSERT(rwlp->rw_owner != curthread); 146 147 if (rw == RW_READER) 148 (void) rw_rdlock(&rwlp->rw_lock); 149 else 150 (void) rw_wrlock(&rwlp->rw_lock); 151 152 rwlp->rw_owner = curthread; 153 } 154 155 void 156 rw_exit(krwlock_t *rwlp) 157 { 158 ASSERT(rwlp->rw_owner != (void *)-1UL); 159 160 rwlp->rw_owner = NULL; 161 (void) rw_unlock(&rwlp->rw_lock); 162 } 163 164 int 165 rw_tryenter(krwlock_t *rwlp, krw_t rw) 166 { 167 int rv; 168 169 ASSERT(rwlp->rw_owner != (void *)-1UL); 170 171 if (rw == RW_READER) 172 rv = rw_tryrdlock(&rwlp->rw_lock); 173 else 174 rv = rw_trywrlock(&rwlp->rw_lock); 175 176 if (rv == 0) { 177 rwlp->rw_owner = curthread; 178 return (1); 179 } 180 181 return (0); 182 } 183 184 /*ARGSUSED*/ 185 int 186 rw_tryupgrade(krwlock_t *rwlp) 187 { 188 ASSERT(rwlp->rw_owner != (void *)-1UL); 189 190 return (0); 191 } 192 193 /* 194 * ========================================================================= 195 * condition variables 196 * ========================================================================= 197 */ 198 /*ARGSUSED*/ 199 void 200 cv_init(kcondvar_t *cv, char *name, int type, void *arg) 201 { 202 (void) cond_init(cv, type, NULL); 203 } 204 205 void 206 cv_destroy(kcondvar_t *cv) 207 { 208 (void) cond_destroy(cv); 209 } 210 211 void 212 cv_wait(kcondvar_t *cv, kmutex_t *mp) 213 { 214 ASSERT(mutex_owner(mp) == curthread); 215 mp->m_owner = NULL; 216 (void) cond_wait(cv, &mp->m_lock); 217 mp->m_owner = curthread; 218 } 219 220 clock_t 221 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) 222 { 223 int error; 224 timestruc_t ts; 225 clock_t delta; 226 227 top: 228 delta = abstime - lbolt; 229 if (delta <= 0) 230 return (-1); 231 232 ts.tv_sec = delta / hz; 233 ts.tv_nsec = (delta % hz) * (NANOSEC / hz); 234 235 ASSERT(mutex_owner(mp) == curthread); 236 mp->m_owner = NULL; 237 error = cond_reltimedwait(cv, &mp->m_lock, &ts); 238 mp->m_owner = curthread; 239 240 if (error == ETIME) 241 return (-1); 242 243 if (error == EINTR) 244 goto top; 245 246 ASSERT(error == 0); 247 248 return (1); 249 } 250 251 void 252 cv_signal(kcondvar_t *cv) 253 { 254 (void) cond_signal(cv); 255 } 256 257 void 258 cv_broadcast(kcondvar_t *cv) 259 { 260 (void) cond_broadcast(cv); 261 } 262 263 /* 264 * ========================================================================= 265 * vnode operations 266 * ========================================================================= 267 */ 268 /* 269 * Note: for the xxxat() versions of these functions, we assume that the 270 * starting vp is always rootdir (which is true for spa_directory.c, the only 271 * ZFS consumer of these interfaces). We assert this is true, and then emulate 272 * them by adding '/' in front of the path. 273 */ 274 275 /*ARGSUSED*/ 276 int 277 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) 278 { 279 int fd; 280 vnode_t *vp; 281 int old_umask; 282 char realpath[MAXPATHLEN]; 283 struct stat64 st; 284 285 /* 286 * If we're accessing a real disk from userland, we need to use 287 * the character interface to avoid caching. This is particularly 288 * important if we're trying to look at a real in-kernel storage 289 * pool from userland, e.g. via zdb, because otherwise we won't 290 * see the changes occurring under the segmap cache. 291 * On the other hand, the stupid character device returns zero 292 * for its size. So -- gag -- we open the block device to get 293 * its size, and remember it for subsequent VOP_GETATTR(). 294 */ 295 if (strncmp(path, "/dev/", 5) == 0) { 296 char *dsk; 297 fd = open64(path, O_RDONLY); 298 if (fd == -1) 299 return (errno); 300 if (fstat64(fd, &st) == -1) { 301 close(fd); 302 return (errno); 303 } 304 close(fd); 305 (void) sprintf(realpath, "%s", path); 306 dsk = strstr(path, "/dsk/"); 307 if (dsk != NULL) 308 (void) sprintf(realpath + (dsk - path) + 1, "r%s", 309 dsk + 1); 310 } else { 311 (void) sprintf(realpath, "%s", path); 312 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) 313 return (errno); 314 } 315 316 if (flags & FCREAT) 317 old_umask = umask(0); 318 319 /* 320 * The construct 'flags - FREAD' conveniently maps combinations of 321 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. 322 */ 323 fd = open64(realpath, flags - FREAD, mode); 324 325 if (flags & FCREAT) 326 (void) umask(old_umask); 327 328 if (fd == -1) 329 return (errno); 330 331 if (fstat64(fd, &st) == -1) { 332 close(fd); 333 return (errno); 334 } 335 336 (void) fcntl(fd, F_SETFD, FD_CLOEXEC); 337 338 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); 339 340 vp->v_fd = fd; 341 vp->v_size = st.st_size; 342 vp->v_path = spa_strdup(path); 343 344 return (0); 345 } 346 347 int 348 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, 349 int x3, vnode_t *startvp) 350 { 351 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); 352 int ret; 353 354 ASSERT(startvp == rootdir); 355 (void) sprintf(realpath, "/%s", path); 356 357 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); 358 359 umem_free(realpath, strlen(path) + 2); 360 361 return (ret); 362 } 363 364 /*ARGSUSED*/ 365 int 366 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, 367 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) 368 { 369 ssize_t iolen, split; 370 371 if (uio == UIO_READ) { 372 iolen = pread64(vp->v_fd, addr, len, offset); 373 } else { 374 /* 375 * To simulate partial disk writes, we split writes into two 376 * system calls so that the process can be killed in between. 377 */ 378 split = (len > 0 ? rand() % len : 0); 379 iolen = pwrite64(vp->v_fd, addr, split, offset); 380 iolen += pwrite64(vp->v_fd, (char *)addr + split, 381 len - split, offset + split); 382 } 383 384 if (iolen == -1) 385 return (errno); 386 if (residp) 387 *residp = len - iolen; 388 else if (iolen != len) 389 return (EIO); 390 return (0); 391 } 392 393 void 394 vn_close(vnode_t *vp) 395 { 396 close(vp->v_fd); 397 spa_strfree(vp->v_path); 398 umem_free(vp, sizeof (vnode_t)); 399 } 400 401 #ifdef ZFS_DEBUG 402 403 /* 404 * ========================================================================= 405 * Figure out which debugging statements to print 406 * ========================================================================= 407 */ 408 409 static char *dprintf_string; 410 static int dprintf_print_all; 411 412 int 413 dprintf_find_string(const char *string) 414 { 415 char *tmp_str = dprintf_string; 416 int len = strlen(string); 417 418 /* 419 * Find out if this is a string we want to print. 420 * String format: file1.c,function_name1,file2.c,file3.c 421 */ 422 423 while (tmp_str != NULL) { 424 if (strncmp(tmp_str, string, len) == 0 && 425 (tmp_str[len] == ',' || tmp_str[len] == '\0')) 426 return (1); 427 tmp_str = strchr(tmp_str, ','); 428 if (tmp_str != NULL) 429 tmp_str++; /* Get rid of , */ 430 } 431 return (0); 432 } 433 434 void 435 dprintf_setup(int *argc, char **argv) 436 { 437 int i, j; 438 439 /* 440 * Debugging can be specified two ways: by setting the 441 * environment variable ZFS_DEBUG, or by including a 442 * "debug=..." argument on the command line. The command 443 * line setting overrides the environment variable. 444 */ 445 446 for (i = 1; i < *argc; i++) { 447 int len = strlen("debug="); 448 /* First look for a command line argument */ 449 if (strncmp("debug=", argv[i], len) == 0) { 450 dprintf_string = argv[i] + len; 451 /* Remove from args */ 452 for (j = i; j < *argc; j++) 453 argv[j] = argv[j+1]; 454 argv[j] = NULL; 455 (*argc)--; 456 } 457 } 458 459 if (dprintf_string == NULL) { 460 /* Look for ZFS_DEBUG environment variable */ 461 dprintf_string = getenv("ZFS_DEBUG"); 462 } 463 464 /* 465 * Are we just turning on all debugging? 466 */ 467 if (dprintf_find_string("on")) 468 dprintf_print_all = 1; 469 } 470 471 /* 472 * ========================================================================= 473 * debug printfs 474 * ========================================================================= 475 */ 476 void 477 __dprintf(const char *file, const char *func, int line, const char *fmt, ...) 478 { 479 const char *newfile; 480 va_list adx; 481 482 /* 483 * Get rid of annoying "../common/" prefix to filename. 484 */ 485 newfile = strrchr(file, '/'); 486 if (newfile != NULL) { 487 newfile = newfile + 1; /* Get rid of leading / */ 488 } else { 489 newfile = file; 490 } 491 492 if (dprintf_print_all || 493 dprintf_find_string(newfile) || 494 dprintf_find_string(func)) { 495 /* Print out just the function name if requested */ 496 flockfile(stdout); 497 if (dprintf_find_string("pid")) 498 (void) printf("%d ", getpid()); 499 if (dprintf_find_string("tid")) 500 (void) printf("%u ", thr_self()); 501 if (dprintf_find_string("cpu")) 502 (void) printf("%u ", getcpuid()); 503 if (dprintf_find_string("time")) 504 (void) printf("%llu ", gethrtime()); 505 if (dprintf_find_string("long")) 506 (void) printf("%s, line %d: ", newfile, line); 507 (void) printf("%s: ", func); 508 va_start(adx, fmt); 509 (void) vprintf(fmt, adx); 510 va_end(adx); 511 funlockfile(stdout); 512 } 513 } 514 515 #endif /* ZFS_DEBUG */ 516 517 /* 518 * ========================================================================= 519 * cmn_err() and panic() 520 * ========================================================================= 521 */ 522 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; 523 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; 524 525 void 526 vpanic(const char *fmt, va_list adx) 527 { 528 (void) fprintf(stderr, "error: "); 529 (void) vfprintf(stderr, fmt, adx); 530 (void) fprintf(stderr, "\n"); 531 532 abort(); /* think of it as a "user-level crash dump" */ 533 } 534 535 void 536 panic(const char *fmt, ...) 537 { 538 va_list adx; 539 540 va_start(adx, fmt); 541 vpanic(fmt, adx); 542 va_end(adx); 543 } 544 545 /*PRINTFLIKE2*/ 546 void 547 cmn_err(int ce, const char *fmt, ...) 548 { 549 va_list adx; 550 551 va_start(adx, fmt); 552 if (ce == CE_PANIC) 553 vpanic(fmt, adx); 554 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ 555 (void) fprintf(stderr, "%s", ce_prefix[ce]); 556 (void) vfprintf(stderr, fmt, adx); 557 (void) fprintf(stderr, "%s", ce_suffix[ce]); 558 } 559 va_end(adx); 560 } 561 562 /* 563 * ========================================================================= 564 * kobj interfaces 565 * ========================================================================= 566 */ 567 struct _buf * 568 kobj_open_file(char *name) 569 { 570 struct _buf *file; 571 vnode_t *vp; 572 573 /* set vp as the _fd field of the file */ 574 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir) != 0) 575 return ((void *)-1UL); 576 577 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); 578 file->_fd = (intptr_t)vp; 579 return (file); 580 } 581 582 int 583 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) 584 { 585 ssize_t resid; 586 587 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, 588 UIO_SYSSPACE, 0, 0, 0, &resid); 589 590 return (0); 591 } 592 593 void 594 kobj_close_file(struct _buf *file) 595 { 596 vn_close((vnode_t *)file->_fd); 597 umem_free(file, sizeof (struct _buf)); 598 } 599 600 int 601 kobj_fstat(intptr_t fd, struct bootstat *bst) 602 { 603 struct stat64 st; 604 vnode_t *vp = (vnode_t *)fd; 605 if (fstat64(vp->v_fd, &st) == -1) { 606 vn_close(vp); 607 return (errno); 608 } 609 bst->st_size = (uint64_t)st.st_size; 610 return (0); 611 } 612 613 /* 614 * ========================================================================= 615 * misc routines 616 * ========================================================================= 617 */ 618 619 void 620 delay(clock_t ticks) 621 { 622 poll(0, 0, ticks * (1000 / hz)); 623 } 624 625 /* 626 * Find highest one bit set. 627 * Returns bit number + 1 of highest bit that is set, otherwise returns 0. 628 * High order bit is 31 (or 63 in _LP64 kernel). 629 */ 630 int 631 highbit(ulong_t i) 632 { 633 register int h = 1; 634 635 if (i == 0) 636 return (0); 637 #ifdef _LP64 638 if (i & 0xffffffff00000000ul) { 639 h += 32; i >>= 32; 640 } 641 #endif 642 if (i & 0xffff0000) { 643 h += 16; i >>= 16; 644 } 645 if (i & 0xff00) { 646 h += 8; i >>= 8; 647 } 648 if (i & 0xf0) { 649 h += 4; i >>= 4; 650 } 651 if (i & 0xc) { 652 h += 2; i >>= 2; 653 } 654 if (i & 0x2) { 655 h += 1; 656 } 657 return (h); 658 } 659 660 static int 661 random_get_bytes_common(uint8_t *ptr, size_t len, char *devname) 662 { 663 int fd = open(devname, O_RDONLY); 664 size_t resid = len; 665 ssize_t bytes; 666 667 ASSERT(fd != -1); 668 669 while (resid != 0) { 670 bytes = read(fd, ptr, resid); 671 ASSERT(bytes >= 0); 672 ptr += bytes; 673 resid -= bytes; 674 } 675 676 close(fd); 677 678 return (0); 679 } 680 681 int 682 random_get_bytes(uint8_t *ptr, size_t len) 683 { 684 return (random_get_bytes_common(ptr, len, "/dev/random")); 685 } 686 687 int 688 random_get_pseudo_bytes(uint8_t *ptr, size_t len) 689 { 690 return (random_get_bytes_common(ptr, len, "/dev/urandom")); 691 } 692 693 /* 694 * ========================================================================= 695 * kernel emulation setup & teardown 696 * ========================================================================= 697 */ 698 static int 699 umem_out_of_memory(void) 700 { 701 char errmsg[] = "out of memory -- generating core dump\n"; 702 703 write(fileno(stderr), errmsg, sizeof (errmsg)); 704 abort(); 705 return (0); 706 } 707 708 void 709 kernel_init(int mode) 710 { 711 umem_nofail_callback(umem_out_of_memory); 712 713 physmem = sysconf(_SC_PHYS_PAGES); 714 715 dprintf("physmem = %llu pages (%.2f GB)\n", physmem, 716 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); 717 718 spa_init(mode); 719 } 720 721 void 722 kernel_fini(void) 723 { 724 spa_fini(); 725 } 726