1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <assert.h> 29 #include <sys/zfs_context.h> 30 #include <poll.h> 31 #include <string.h> 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <fcntl.h> 35 #include <sys/stat.h> 36 #include <sys/spa.h> 37 #include <sys/processor.h> 38 39 40 /* 41 * Emulation of kernel services in userland. 42 */ 43 44 uint64_t physmem; 45 vnode_t *rootdir = (vnode_t *)0xabcd1234; 46 47 /* 48 * ========================================================================= 49 * threads 50 * ========================================================================= 51 */ 52 /*ARGSUSED*/ 53 kthread_t * 54 zk_thread_create(void (*func)(), void *arg) 55 { 56 thread_t tid; 57 58 VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED, 59 &tid) == 0); 60 61 return ((void *)(uintptr_t)tid); 62 } 63 64 /* 65 * ========================================================================= 66 * mutexes 67 * ========================================================================= 68 */ 69 void 70 zmutex_init(kmutex_t *mp) 71 { 72 mp->m_owner = NULL; 73 (void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL); 74 } 75 76 void 77 zmutex_destroy(kmutex_t *mp) 78 { 79 ASSERT(mp->m_owner == NULL); 80 (void) _mutex_destroy(&(mp)->m_lock); 81 mp->m_owner = (void *)-1UL; 82 } 83 84 void 85 mutex_enter(kmutex_t *mp) 86 { 87 ASSERT(mp->m_owner != (void *)-1UL); 88 ASSERT(mp->m_owner != curthread); 89 VERIFY(mutex_lock(&mp->m_lock) == 0); 90 ASSERT(mp->m_owner == NULL); 91 mp->m_owner = curthread; 92 } 93 94 int 95 mutex_tryenter(kmutex_t *mp) 96 { 97 ASSERT(mp->m_owner != (void *)-1UL); 98 if (0 == mutex_trylock(&mp->m_lock)) { 99 ASSERT(mp->m_owner == NULL); 100 mp->m_owner = curthread; 101 return (1); 102 } else { 103 return (0); 104 } 105 } 106 107 void 108 mutex_exit(kmutex_t *mp) 109 { 110 ASSERT(mutex_owner(mp) == curthread); 111 mp->m_owner = NULL; 112 VERIFY(mutex_unlock(&mp->m_lock) == 0); 113 } 114 115 void * 116 mutex_owner(kmutex_t *mp) 117 { 118 return (mp->m_owner); 119 } 120 121 /* 122 * ========================================================================= 123 * rwlocks 124 * ========================================================================= 125 */ 126 /*ARGSUSED*/ 127 void 128 rw_init(krwlock_t *rwlp, char *name, int type, void *arg) 129 { 130 rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL); 131 rwlp->rw_owner = NULL; 132 } 133 134 void 135 rw_destroy(krwlock_t *rwlp) 136 { 137 rwlock_destroy(&rwlp->rw_lock); 138 rwlp->rw_owner = (void *)-1UL; 139 } 140 141 void 142 rw_enter(krwlock_t *rwlp, krw_t rw) 143 { 144 ASSERT(!RW_LOCK_HELD(rwlp)); 145 ASSERT(rwlp->rw_owner != (void *)-1UL); 146 ASSERT(rwlp->rw_owner != curthread); 147 148 if (rw == RW_READER) 149 (void) rw_rdlock(&rwlp->rw_lock); 150 else 151 (void) rw_wrlock(&rwlp->rw_lock); 152 153 rwlp->rw_owner = curthread; 154 } 155 156 void 157 rw_exit(krwlock_t *rwlp) 158 { 159 ASSERT(rwlp->rw_owner != (void *)-1UL); 160 161 rwlp->rw_owner = NULL; 162 (void) rw_unlock(&rwlp->rw_lock); 163 } 164 165 int 166 rw_tryenter(krwlock_t *rwlp, krw_t rw) 167 { 168 int rv; 169 170 ASSERT(rwlp->rw_owner != (void *)-1UL); 171 172 if (rw == RW_READER) 173 rv = rw_tryrdlock(&rwlp->rw_lock); 174 else 175 rv = rw_trywrlock(&rwlp->rw_lock); 176 177 if (rv == 0) { 178 rwlp->rw_owner = curthread; 179 return (1); 180 } 181 182 return (0); 183 } 184 185 /*ARGSUSED*/ 186 int 187 rw_tryupgrade(krwlock_t *rwlp) 188 { 189 ASSERT(rwlp->rw_owner != (void *)-1UL); 190 191 return (0); 192 } 193 194 /* 195 * ========================================================================= 196 * condition variables 197 * ========================================================================= 198 */ 199 /*ARGSUSED*/ 200 void 201 cv_init(kcondvar_t *cv, char *name, int type, void *arg) 202 { 203 VERIFY(cond_init(cv, type, NULL) == 0); 204 } 205 206 void 207 cv_destroy(kcondvar_t *cv) 208 { 209 VERIFY(cond_destroy(cv) == 0); 210 } 211 212 void 213 cv_wait(kcondvar_t *cv, kmutex_t *mp) 214 { 215 ASSERT(mutex_owner(mp) == curthread); 216 mp->m_owner = NULL; 217 int ret = cond_wait(cv, &mp->m_lock); 218 VERIFY(ret == 0 || ret == EINTR); 219 mp->m_owner = curthread; 220 } 221 222 clock_t 223 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime) 224 { 225 int error; 226 timestruc_t ts; 227 clock_t delta; 228 229 top: 230 delta = abstime - lbolt; 231 if (delta <= 0) 232 return (-1); 233 234 ts.tv_sec = delta / hz; 235 ts.tv_nsec = (delta % hz) * (NANOSEC / hz); 236 237 ASSERT(mutex_owner(mp) == curthread); 238 mp->m_owner = NULL; 239 error = cond_reltimedwait(cv, &mp->m_lock, &ts); 240 mp->m_owner = curthread; 241 242 if (error == ETIME) 243 return (-1); 244 245 if (error == EINTR) 246 goto top; 247 248 ASSERT(error == 0); 249 250 return (1); 251 } 252 253 void 254 cv_signal(kcondvar_t *cv) 255 { 256 VERIFY(cond_signal(cv) == 0); 257 } 258 259 void 260 cv_broadcast(kcondvar_t *cv) 261 { 262 VERIFY(cond_broadcast(cv) == 0); 263 } 264 265 /* 266 * ========================================================================= 267 * vnode operations 268 * ========================================================================= 269 */ 270 /* 271 * Note: for the xxxat() versions of these functions, we assume that the 272 * starting vp is always rootdir (which is true for spa_directory.c, the only 273 * ZFS consumer of these interfaces). We assert this is true, and then emulate 274 * them by adding '/' in front of the path. 275 */ 276 277 /*ARGSUSED*/ 278 int 279 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3) 280 { 281 int fd; 282 vnode_t *vp; 283 int old_umask; 284 char realpath[MAXPATHLEN]; 285 struct stat64 st; 286 287 /* 288 * If we're accessing a real disk from userland, we need to use 289 * the character interface to avoid caching. This is particularly 290 * important if we're trying to look at a real in-kernel storage 291 * pool from userland, e.g. via zdb, because otherwise we won't 292 * see the changes occurring under the segmap cache. 293 * On the other hand, the stupid character device returns zero 294 * for its size. So -- gag -- we open the block device to get 295 * its size, and remember it for subsequent VOP_GETATTR(). 296 */ 297 if (strncmp(path, "/dev/", 5) == 0) { 298 char *dsk; 299 fd = open64(path, O_RDONLY); 300 if (fd == -1) 301 return (errno); 302 if (fstat64(fd, &st) == -1) { 303 close(fd); 304 return (errno); 305 } 306 close(fd); 307 (void) sprintf(realpath, "%s", path); 308 dsk = strstr(path, "/dsk/"); 309 if (dsk != NULL) 310 (void) sprintf(realpath + (dsk - path) + 1, "r%s", 311 dsk + 1); 312 } else { 313 (void) sprintf(realpath, "%s", path); 314 if (!(flags & FCREAT) && stat64(realpath, &st) == -1) 315 return (errno); 316 } 317 318 if (flags & FCREAT) 319 old_umask = umask(0); 320 321 /* 322 * The construct 'flags - FREAD' conveniently maps combinations of 323 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR. 324 */ 325 fd = open64(realpath, flags - FREAD, mode); 326 327 if (flags & FCREAT) 328 (void) umask(old_umask); 329 330 if (fd == -1) 331 return (errno); 332 333 if (fstat64(fd, &st) == -1) { 334 close(fd); 335 return (errno); 336 } 337 338 (void) fcntl(fd, F_SETFD, FD_CLOEXEC); 339 340 *vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL); 341 342 vp->v_fd = fd; 343 vp->v_size = st.st_size; 344 vp->v_path = spa_strdup(path); 345 346 return (0); 347 } 348 349 int 350 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, 351 int x3, vnode_t *startvp) 352 { 353 char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL); 354 int ret; 355 356 ASSERT(startvp == rootdir); 357 (void) sprintf(realpath, "/%s", path); 358 359 ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3); 360 361 umem_free(realpath, strlen(path) + 2); 362 363 return (ret); 364 } 365 366 /*ARGSUSED*/ 367 int 368 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset, 369 int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp) 370 { 371 ssize_t iolen, split; 372 373 if (uio == UIO_READ) { 374 iolen = pread64(vp->v_fd, addr, len, offset); 375 } else { 376 /* 377 * To simulate partial disk writes, we split writes into two 378 * system calls so that the process can be killed in between. 379 */ 380 split = (len > 0 ? rand() % len : 0); 381 iolen = pwrite64(vp->v_fd, addr, split, offset); 382 iolen += pwrite64(vp->v_fd, (char *)addr + split, 383 len - split, offset + split); 384 } 385 386 if (iolen == -1) 387 return (errno); 388 if (residp) 389 *residp = len - iolen; 390 else if (iolen != len) 391 return (EIO); 392 return (0); 393 } 394 395 void 396 vn_close(vnode_t *vp) 397 { 398 close(vp->v_fd); 399 spa_strfree(vp->v_path); 400 umem_free(vp, sizeof (vnode_t)); 401 } 402 403 #ifdef ZFS_DEBUG 404 405 /* 406 * ========================================================================= 407 * Figure out which debugging statements to print 408 * ========================================================================= 409 */ 410 411 static char *dprintf_string; 412 static int dprintf_print_all; 413 414 int 415 dprintf_find_string(const char *string) 416 { 417 char *tmp_str = dprintf_string; 418 int len = strlen(string); 419 420 /* 421 * Find out if this is a string we want to print. 422 * String format: file1.c,function_name1,file2.c,file3.c 423 */ 424 425 while (tmp_str != NULL) { 426 if (strncmp(tmp_str, string, len) == 0 && 427 (tmp_str[len] == ',' || tmp_str[len] == '\0')) 428 return (1); 429 tmp_str = strchr(tmp_str, ','); 430 if (tmp_str != NULL) 431 tmp_str++; /* Get rid of , */ 432 } 433 return (0); 434 } 435 436 void 437 dprintf_setup(int *argc, char **argv) 438 { 439 int i, j; 440 441 /* 442 * Debugging can be specified two ways: by setting the 443 * environment variable ZFS_DEBUG, or by including a 444 * "debug=..." argument on the command line. The command 445 * line setting overrides the environment variable. 446 */ 447 448 for (i = 1; i < *argc; i++) { 449 int len = strlen("debug="); 450 /* First look for a command line argument */ 451 if (strncmp("debug=", argv[i], len) == 0) { 452 dprintf_string = argv[i] + len; 453 /* Remove from args */ 454 for (j = i; j < *argc; j++) 455 argv[j] = argv[j+1]; 456 argv[j] = NULL; 457 (*argc)--; 458 } 459 } 460 461 if (dprintf_string == NULL) { 462 /* Look for ZFS_DEBUG environment variable */ 463 dprintf_string = getenv("ZFS_DEBUG"); 464 } 465 466 /* 467 * Are we just turning on all debugging? 468 */ 469 if (dprintf_find_string("on")) 470 dprintf_print_all = 1; 471 } 472 473 /* 474 * ========================================================================= 475 * debug printfs 476 * ========================================================================= 477 */ 478 void 479 __dprintf(const char *file, const char *func, int line, const char *fmt, ...) 480 { 481 const char *newfile; 482 va_list adx; 483 484 /* 485 * Get rid of annoying "../common/" prefix to filename. 486 */ 487 newfile = strrchr(file, '/'); 488 if (newfile != NULL) { 489 newfile = newfile + 1; /* Get rid of leading / */ 490 } else { 491 newfile = file; 492 } 493 494 if (dprintf_print_all || 495 dprintf_find_string(newfile) || 496 dprintf_find_string(func)) { 497 /* Print out just the function name if requested */ 498 flockfile(stdout); 499 if (dprintf_find_string("pid")) 500 (void) printf("%d ", getpid()); 501 if (dprintf_find_string("tid")) 502 (void) printf("%u ", thr_self()); 503 if (dprintf_find_string("cpu")) 504 (void) printf("%u ", getcpuid()); 505 if (dprintf_find_string("time")) 506 (void) printf("%llu ", gethrtime()); 507 if (dprintf_find_string("long")) 508 (void) printf("%s, line %d: ", newfile, line); 509 (void) printf("%s: ", func); 510 va_start(adx, fmt); 511 (void) vprintf(fmt, adx); 512 va_end(adx); 513 funlockfile(stdout); 514 } 515 } 516 517 #endif /* ZFS_DEBUG */ 518 519 /* 520 * ========================================================================= 521 * cmn_err() and panic() 522 * ========================================================================= 523 */ 524 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; 525 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; 526 527 void 528 vpanic(const char *fmt, va_list adx) 529 { 530 (void) fprintf(stderr, "error: "); 531 (void) vfprintf(stderr, fmt, adx); 532 (void) fprintf(stderr, "\n"); 533 534 abort(); /* think of it as a "user-level crash dump" */ 535 } 536 537 void 538 panic(const char *fmt, ...) 539 { 540 va_list adx; 541 542 va_start(adx, fmt); 543 vpanic(fmt, adx); 544 va_end(adx); 545 } 546 547 /*PRINTFLIKE2*/ 548 void 549 cmn_err(int ce, const char *fmt, ...) 550 { 551 va_list adx; 552 553 va_start(adx, fmt); 554 if (ce == CE_PANIC) 555 vpanic(fmt, adx); 556 if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ 557 (void) fprintf(stderr, "%s", ce_prefix[ce]); 558 (void) vfprintf(stderr, fmt, adx); 559 (void) fprintf(stderr, "%s", ce_suffix[ce]); 560 } 561 va_end(adx); 562 } 563 564 /* 565 * ========================================================================= 566 * kobj interfaces 567 * ========================================================================= 568 */ 569 struct _buf * 570 kobj_open_file(char *name) 571 { 572 struct _buf *file; 573 vnode_t *vp; 574 575 /* set vp as the _fd field of the file */ 576 if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir) != 0) 577 return ((void *)-1UL); 578 579 file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL); 580 file->_fd = (intptr_t)vp; 581 return (file); 582 } 583 584 int 585 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off) 586 { 587 ssize_t resid; 588 589 vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off, 590 UIO_SYSSPACE, 0, 0, 0, &resid); 591 592 return (0); 593 } 594 595 void 596 kobj_close_file(struct _buf *file) 597 { 598 vn_close((vnode_t *)file->_fd); 599 umem_free(file, sizeof (struct _buf)); 600 } 601 602 int 603 kobj_fstat(intptr_t fd, struct bootstat *bst) 604 { 605 struct stat64 st; 606 vnode_t *vp = (vnode_t *)fd; 607 if (fstat64(vp->v_fd, &st) == -1) { 608 vn_close(vp); 609 return (errno); 610 } 611 bst->st_size = (uint64_t)st.st_size; 612 return (0); 613 } 614 615 /* 616 * ========================================================================= 617 * misc routines 618 * ========================================================================= 619 */ 620 621 void 622 delay(clock_t ticks) 623 { 624 poll(0, 0, ticks * (1000 / hz)); 625 } 626 627 /* 628 * Find highest one bit set. 629 * Returns bit number + 1 of highest bit that is set, otherwise returns 0. 630 * High order bit is 31 (or 63 in _LP64 kernel). 631 */ 632 int 633 highbit(ulong_t i) 634 { 635 register int h = 1; 636 637 if (i == 0) 638 return (0); 639 #ifdef _LP64 640 if (i & 0xffffffff00000000ul) { 641 h += 32; i >>= 32; 642 } 643 #endif 644 if (i & 0xffff0000) { 645 h += 16; i >>= 16; 646 } 647 if (i & 0xff00) { 648 h += 8; i >>= 8; 649 } 650 if (i & 0xf0) { 651 h += 4; i >>= 4; 652 } 653 if (i & 0xc) { 654 h += 2; i >>= 2; 655 } 656 if (i & 0x2) { 657 h += 1; 658 } 659 return (h); 660 } 661 662 static int 663 random_get_bytes_common(uint8_t *ptr, size_t len, char *devname) 664 { 665 int fd = open(devname, O_RDONLY); 666 size_t resid = len; 667 ssize_t bytes; 668 669 ASSERT(fd != -1); 670 671 while (resid != 0) { 672 bytes = read(fd, ptr, resid); 673 ASSERT(bytes >= 0); 674 ptr += bytes; 675 resid -= bytes; 676 } 677 678 close(fd); 679 680 return (0); 681 } 682 683 int 684 random_get_bytes(uint8_t *ptr, size_t len) 685 { 686 return (random_get_bytes_common(ptr, len, "/dev/random")); 687 } 688 689 int 690 random_get_pseudo_bytes(uint8_t *ptr, size_t len) 691 { 692 return (random_get_bytes_common(ptr, len, "/dev/urandom")); 693 } 694 695 /* 696 * ========================================================================= 697 * kernel emulation setup & teardown 698 * ========================================================================= 699 */ 700 static int 701 umem_out_of_memory(void) 702 { 703 char errmsg[] = "out of memory -- generating core dump\n"; 704 705 write(fileno(stderr), errmsg, sizeof (errmsg)); 706 abort(); 707 return (0); 708 } 709 710 void 711 kernel_init(int mode) 712 { 713 umem_nofail_callback(umem_out_of_memory); 714 715 physmem = sysconf(_SC_PHYS_PAGES); 716 717 dprintf("physmem = %llu pages (%.2f GB)\n", physmem, 718 (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30)); 719 720 spa_init(mode); 721 } 722 723 void 724 kernel_fini(void) 725 { 726 spa_fini(); 727 } 728