xref: /titanic_50/usr/src/lib/libzpool/common/kernel.c (revision a9478106a12424322498e53cf7cd75bd8a4d6004)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012 by Delphix. All rights reserved.
24  */
25 
26 #include <assert.h>
27 #include <fcntl.h>
28 #include <poll.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <zlib.h>
33 #include <sys/spa.h>
34 #include <sys/stat.h>
35 #include <sys/processor.h>
36 #include <sys/zfs_context.h>
37 #include <sys/rrwlock.h>
38 #include <sys/zmod.h>
39 #include <sys/utsname.h>
40 #include <sys/systeminfo.h>
41 
42 /*
43  * Emulation of kernel services in userland.
44  */
45 
46 int aok;
47 uint64_t physmem;
48 vnode_t *rootdir = (vnode_t *)0xabcd1234;
49 char hw_serial[HW_HOSTID_LEN];
50 kmutex_t cpu_lock;
51 vmem_t *zio_arena = NULL;
52 
53 struct utsname utsname = {
54 	"userland", "libzpool", "1", "1", "na"
55 };
56 
57 /* this only exists to have its address taken */
58 struct proc p0;
59 
60 /*
61  * =========================================================================
62  * threads
63  * =========================================================================
64  */
65 /*ARGSUSED*/
66 kthread_t *
67 zk_thread_create(void (*func)(), void *arg)
68 {
69 	thread_t tid;
70 
71 	VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
72 	    &tid) == 0);
73 
74 	return ((void *)(uintptr_t)tid);
75 }
76 
77 /*
78  * =========================================================================
79  * kstats
80  * =========================================================================
81  */
82 /*ARGSUSED*/
83 kstat_t *
84 kstat_create(const char *module, int instance, const char *name,
85     const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)
86 {
87 	return (NULL);
88 }
89 
90 /*ARGSUSED*/
91 void
92 kstat_install(kstat_t *ksp)
93 {}
94 
95 /*ARGSUSED*/
96 void
97 kstat_delete(kstat_t *ksp)
98 {}
99 
100 /*ARGSUSED*/
101 void
102 kstat_waitq_enter(kstat_io_t *kiop)
103 {}
104 
105 /*ARGSUSED*/
106 void
107 kstat_waitq_exit(kstat_io_t *kiop)
108 {}
109 
110 /*ARGSUSED*/
111 void
112 kstat_runq_enter(kstat_io_t *kiop)
113 {}
114 
115 /*ARGSUSED*/
116 void
117 kstat_runq_exit(kstat_io_t *kiop)
118 {}
119 
120 /*ARGSUSED*/
121 void
122 kstat_waitq_to_runq(kstat_io_t *kiop)
123 {}
124 
125 /*ARGSUSED*/
126 void
127 kstat_runq_back_to_waitq(kstat_io_t *kiop)
128 {}
129 
130 /*
131  * =========================================================================
132  * mutexes
133  * =========================================================================
134  */
135 void
136 zmutex_init(kmutex_t *mp)
137 {
138 	mp->m_owner = NULL;
139 	mp->initialized = B_TRUE;
140 	(void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
141 }
142 
143 void
144 zmutex_destroy(kmutex_t *mp)
145 {
146 	ASSERT(mp->initialized == B_TRUE);
147 	ASSERT(mp->m_owner == NULL);
148 	(void) _mutex_destroy(&(mp)->m_lock);
149 	mp->m_owner = (void *)-1UL;
150 	mp->initialized = B_FALSE;
151 }
152 
153 void
154 mutex_enter(kmutex_t *mp)
155 {
156 	ASSERT(mp->initialized == B_TRUE);
157 	ASSERT(mp->m_owner != (void *)-1UL);
158 	ASSERT(mp->m_owner != curthread);
159 	VERIFY(mutex_lock(&mp->m_lock) == 0);
160 	ASSERT(mp->m_owner == NULL);
161 	mp->m_owner = curthread;
162 }
163 
164 int
165 mutex_tryenter(kmutex_t *mp)
166 {
167 	ASSERT(mp->initialized == B_TRUE);
168 	ASSERT(mp->m_owner != (void *)-1UL);
169 	if (0 == mutex_trylock(&mp->m_lock)) {
170 		ASSERT(mp->m_owner == NULL);
171 		mp->m_owner = curthread;
172 		return (1);
173 	} else {
174 		return (0);
175 	}
176 }
177 
178 void
179 mutex_exit(kmutex_t *mp)
180 {
181 	ASSERT(mp->initialized == B_TRUE);
182 	ASSERT(mutex_owner(mp) == curthread);
183 	mp->m_owner = NULL;
184 	VERIFY(mutex_unlock(&mp->m_lock) == 0);
185 }
186 
187 void *
188 mutex_owner(kmutex_t *mp)
189 {
190 	ASSERT(mp->initialized == B_TRUE);
191 	return (mp->m_owner);
192 }
193 
194 /*
195  * =========================================================================
196  * rwlocks
197  * =========================================================================
198  */
199 /*ARGSUSED*/
200 void
201 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
202 {
203 	rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
204 	rwlp->rw_owner = NULL;
205 	rwlp->initialized = B_TRUE;
206 }
207 
208 void
209 rw_destroy(krwlock_t *rwlp)
210 {
211 	rwlock_destroy(&rwlp->rw_lock);
212 	rwlp->rw_owner = (void *)-1UL;
213 	rwlp->initialized = B_FALSE;
214 }
215 
216 void
217 rw_enter(krwlock_t *rwlp, krw_t rw)
218 {
219 	ASSERT(!RW_LOCK_HELD(rwlp));
220 	ASSERT(rwlp->initialized == B_TRUE);
221 	ASSERT(rwlp->rw_owner != (void *)-1UL);
222 	ASSERT(rwlp->rw_owner != curthread);
223 
224 	if (rw == RW_READER)
225 		VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
226 	else
227 		VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
228 
229 	rwlp->rw_owner = curthread;
230 }
231 
232 void
233 rw_exit(krwlock_t *rwlp)
234 {
235 	ASSERT(rwlp->initialized == B_TRUE);
236 	ASSERT(rwlp->rw_owner != (void *)-1UL);
237 
238 	rwlp->rw_owner = NULL;
239 	VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
240 }
241 
242 int
243 rw_tryenter(krwlock_t *rwlp, krw_t rw)
244 {
245 	int rv;
246 
247 	ASSERT(rwlp->initialized == B_TRUE);
248 	ASSERT(rwlp->rw_owner != (void *)-1UL);
249 
250 	if (rw == RW_READER)
251 		rv = rw_tryrdlock(&rwlp->rw_lock);
252 	else
253 		rv = rw_trywrlock(&rwlp->rw_lock);
254 
255 	if (rv == 0) {
256 		rwlp->rw_owner = curthread;
257 		return (1);
258 	}
259 
260 	return (0);
261 }
262 
263 /*ARGSUSED*/
264 int
265 rw_tryupgrade(krwlock_t *rwlp)
266 {
267 	ASSERT(rwlp->initialized == B_TRUE);
268 	ASSERT(rwlp->rw_owner != (void *)-1UL);
269 
270 	return (0);
271 }
272 
273 /*
274  * =========================================================================
275  * condition variables
276  * =========================================================================
277  */
278 /*ARGSUSED*/
279 void
280 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
281 {
282 	VERIFY(cond_init(cv, type, NULL) == 0);
283 }
284 
285 void
286 cv_destroy(kcondvar_t *cv)
287 {
288 	VERIFY(cond_destroy(cv) == 0);
289 }
290 
291 void
292 cv_wait(kcondvar_t *cv, kmutex_t *mp)
293 {
294 	ASSERT(mutex_owner(mp) == curthread);
295 	mp->m_owner = NULL;
296 	int ret = cond_wait(cv, &mp->m_lock);
297 	VERIFY(ret == 0 || ret == EINTR);
298 	mp->m_owner = curthread;
299 }
300 
301 clock_t
302 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
303 {
304 	int error;
305 	timestruc_t ts;
306 	clock_t delta;
307 
308 top:
309 	delta = abstime - ddi_get_lbolt();
310 	if (delta <= 0)
311 		return (-1);
312 
313 	ts.tv_sec = delta / hz;
314 	ts.tv_nsec = (delta % hz) * (NANOSEC / hz);
315 
316 	ASSERT(mutex_owner(mp) == curthread);
317 	mp->m_owner = NULL;
318 	error = cond_reltimedwait(cv, &mp->m_lock, &ts);
319 	mp->m_owner = curthread;
320 
321 	if (error == ETIME)
322 		return (-1);
323 
324 	if (error == EINTR)
325 		goto top;
326 
327 	ASSERT(error == 0);
328 
329 	return (1);
330 }
331 
332 /*ARGSUSED*/
333 clock_t
334 cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
335     int flag)
336 {
337 	int error;
338 	timestruc_t ts;
339 	hrtime_t delta;
340 
341 	ASSERT(flag == 0);
342 
343 top:
344 	delta = tim - gethrtime();
345 	if (delta <= 0)
346 		return (-1);
347 
348 	ts.tv_sec = delta / NANOSEC;
349 	ts.tv_nsec = delta % NANOSEC;
350 
351 	ASSERT(mutex_owner(mp) == curthread);
352 	mp->m_owner = NULL;
353 	error = cond_reltimedwait(cv, &mp->m_lock, &ts);
354 	mp->m_owner = curthread;
355 
356 	if (error == ETIME)
357 		return (-1);
358 
359 	if (error == EINTR)
360 		goto top;
361 
362 	ASSERT(error == 0);
363 
364 	return (1);
365 }
366 
367 void
368 cv_signal(kcondvar_t *cv)
369 {
370 	VERIFY(cond_signal(cv) == 0);
371 }
372 
373 void
374 cv_broadcast(kcondvar_t *cv)
375 {
376 	VERIFY(cond_broadcast(cv) == 0);
377 }
378 
379 /*
380  * =========================================================================
381  * vnode operations
382  * =========================================================================
383  */
384 /*
385  * Note: for the xxxat() versions of these functions, we assume that the
386  * starting vp is always rootdir (which is true for spa_directory.c, the only
387  * ZFS consumer of these interfaces).  We assert this is true, and then emulate
388  * them by adding '/' in front of the path.
389  */
390 
391 /*ARGSUSED*/
392 int
393 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
394 {
395 	int fd;
396 	vnode_t *vp;
397 	int old_umask;
398 	char realpath[MAXPATHLEN];
399 	struct stat64 st;
400 
401 	/*
402 	 * If we're accessing a real disk from userland, we need to use
403 	 * the character interface to avoid caching.  This is particularly
404 	 * important if we're trying to look at a real in-kernel storage
405 	 * pool from userland, e.g. via zdb, because otherwise we won't
406 	 * see the changes occurring under the segmap cache.
407 	 * On the other hand, the stupid character device returns zero
408 	 * for its size.  So -- gag -- we open the block device to get
409 	 * its size, and remember it for subsequent VOP_GETATTR().
410 	 */
411 	if (strncmp(path, "/dev/", 5) == 0) {
412 		char *dsk;
413 		fd = open64(path, O_RDONLY);
414 		if (fd == -1)
415 			return (errno);
416 		if (fstat64(fd, &st) == -1) {
417 			close(fd);
418 			return (errno);
419 		}
420 		close(fd);
421 		(void) sprintf(realpath, "%s", path);
422 		dsk = strstr(path, "/dsk/");
423 		if (dsk != NULL)
424 			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
425 			    dsk + 1);
426 	} else {
427 		(void) sprintf(realpath, "%s", path);
428 		if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
429 			return (errno);
430 	}
431 
432 	if (flags & FCREAT)
433 		old_umask = umask(0);
434 
435 	/*
436 	 * The construct 'flags - FREAD' conveniently maps combinations of
437 	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
438 	 */
439 	fd = open64(realpath, flags - FREAD, mode);
440 
441 	if (flags & FCREAT)
442 		(void) umask(old_umask);
443 
444 	if (fd == -1)
445 		return (errno);
446 
447 	if (fstat64(fd, &st) == -1) {
448 		close(fd);
449 		return (errno);
450 	}
451 
452 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
453 
454 	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
455 
456 	vp->v_fd = fd;
457 	vp->v_size = st.st_size;
458 	vp->v_path = spa_strdup(path);
459 
460 	return (0);
461 }
462 
463 /*ARGSUSED*/
464 int
465 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
466     int x3, vnode_t *startvp, int fd)
467 {
468 	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
469 	int ret;
470 
471 	ASSERT(startvp == rootdir);
472 	(void) sprintf(realpath, "/%s", path);
473 
474 	/* fd ignored for now, need if want to simulate nbmand support */
475 	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
476 
477 	umem_free(realpath, strlen(path) + 2);
478 
479 	return (ret);
480 }
481 
482 /*ARGSUSED*/
483 int
484 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
485 	int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
486 {
487 	ssize_t iolen, split;
488 
489 	if (uio == UIO_READ) {
490 		iolen = pread64(vp->v_fd, addr, len, offset);
491 	} else {
492 		/*
493 		 * To simulate partial disk writes, we split writes into two
494 		 * system calls so that the process can be killed in between.
495 		 */
496 		int sectors = len >> SPA_MINBLOCKSHIFT;
497 		split = (sectors > 0 ? rand() % sectors : 0) <<
498 		    SPA_MINBLOCKSHIFT;
499 		iolen = pwrite64(vp->v_fd, addr, split, offset);
500 		iolen += pwrite64(vp->v_fd, (char *)addr + split,
501 		    len - split, offset + split);
502 	}
503 
504 	if (iolen == -1)
505 		return (errno);
506 	if (residp)
507 		*residp = len - iolen;
508 	else if (iolen != len)
509 		return (EIO);
510 	return (0);
511 }
512 
513 void
514 vn_close(vnode_t *vp)
515 {
516 	close(vp->v_fd);
517 	spa_strfree(vp->v_path);
518 	umem_free(vp, sizeof (vnode_t));
519 }
520 
521 /*
522  * At a minimum we need to update the size since vdev_reopen()
523  * will no longer call vn_openat().
524  */
525 int
526 fop_getattr(vnode_t *vp, vattr_t *vap)
527 {
528 	struct stat64 st;
529 
530 	if (fstat64(vp->v_fd, &st) == -1) {
531 		close(vp->v_fd);
532 		return (errno);
533 	}
534 
535 	vap->va_size = st.st_size;
536 	return (0);
537 }
538 
539 #ifdef ZFS_DEBUG
540 
541 /*
542  * =========================================================================
543  * Figure out which debugging statements to print
544  * =========================================================================
545  */
546 
547 static char *dprintf_string;
548 static int dprintf_print_all;
549 
550 int
551 dprintf_find_string(const char *string)
552 {
553 	char *tmp_str = dprintf_string;
554 	int len = strlen(string);
555 
556 	/*
557 	 * Find out if this is a string we want to print.
558 	 * String format: file1.c,function_name1,file2.c,file3.c
559 	 */
560 
561 	while (tmp_str != NULL) {
562 		if (strncmp(tmp_str, string, len) == 0 &&
563 		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
564 			return (1);
565 		tmp_str = strchr(tmp_str, ',');
566 		if (tmp_str != NULL)
567 			tmp_str++; /* Get rid of , */
568 	}
569 	return (0);
570 }
571 
572 void
573 dprintf_setup(int *argc, char **argv)
574 {
575 	int i, j;
576 
577 	/*
578 	 * Debugging can be specified two ways: by setting the
579 	 * environment variable ZFS_DEBUG, or by including a
580 	 * "debug=..."  argument on the command line.  The command
581 	 * line setting overrides the environment variable.
582 	 */
583 
584 	for (i = 1; i < *argc; i++) {
585 		int len = strlen("debug=");
586 		/* First look for a command line argument */
587 		if (strncmp("debug=", argv[i], len) == 0) {
588 			dprintf_string = argv[i] + len;
589 			/* Remove from args */
590 			for (j = i; j < *argc; j++)
591 				argv[j] = argv[j+1];
592 			argv[j] = NULL;
593 			(*argc)--;
594 		}
595 	}
596 
597 	if (dprintf_string == NULL) {
598 		/* Look for ZFS_DEBUG environment variable */
599 		dprintf_string = getenv("ZFS_DEBUG");
600 	}
601 
602 	/*
603 	 * Are we just turning on all debugging?
604 	 */
605 	if (dprintf_find_string("on"))
606 		dprintf_print_all = 1;
607 }
608 
609 /*
610  * =========================================================================
611  * debug printfs
612  * =========================================================================
613  */
614 void
615 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
616 {
617 	const char *newfile;
618 	va_list adx;
619 
620 	/*
621 	 * Get rid of annoying "../common/" prefix to filename.
622 	 */
623 	newfile = strrchr(file, '/');
624 	if (newfile != NULL) {
625 		newfile = newfile + 1; /* Get rid of leading / */
626 	} else {
627 		newfile = file;
628 	}
629 
630 	if (dprintf_print_all ||
631 	    dprintf_find_string(newfile) ||
632 	    dprintf_find_string(func)) {
633 		/* Print out just the function name if requested */
634 		flockfile(stdout);
635 		if (dprintf_find_string("pid"))
636 			(void) printf("%d ", getpid());
637 		if (dprintf_find_string("tid"))
638 			(void) printf("%u ", thr_self());
639 		if (dprintf_find_string("cpu"))
640 			(void) printf("%u ", getcpuid());
641 		if (dprintf_find_string("time"))
642 			(void) printf("%llu ", gethrtime());
643 		if (dprintf_find_string("long"))
644 			(void) printf("%s, line %d: ", newfile, line);
645 		(void) printf("%s: ", func);
646 		va_start(adx, fmt);
647 		(void) vprintf(fmt, adx);
648 		va_end(adx);
649 		funlockfile(stdout);
650 	}
651 }
652 
653 #endif /* ZFS_DEBUG */
654 
655 /*
656  * =========================================================================
657  * cmn_err() and panic()
658  * =========================================================================
659  */
660 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
661 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
662 
663 void
664 vpanic(const char *fmt, va_list adx)
665 {
666 	(void) fprintf(stderr, "error: ");
667 	(void) vfprintf(stderr, fmt, adx);
668 	(void) fprintf(stderr, "\n");
669 
670 	abort();	/* think of it as a "user-level crash dump" */
671 }
672 
673 void
674 panic(const char *fmt, ...)
675 {
676 	va_list adx;
677 
678 	va_start(adx, fmt);
679 	vpanic(fmt, adx);
680 	va_end(adx);
681 }
682 
683 void
684 vcmn_err(int ce, const char *fmt, va_list adx)
685 {
686 	if (ce == CE_PANIC)
687 		vpanic(fmt, adx);
688 	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
689 		(void) fprintf(stderr, "%s", ce_prefix[ce]);
690 		(void) vfprintf(stderr, fmt, adx);
691 		(void) fprintf(stderr, "%s", ce_suffix[ce]);
692 	}
693 }
694 
695 /*PRINTFLIKE2*/
696 void
697 cmn_err(int ce, const char *fmt, ...)
698 {
699 	va_list adx;
700 
701 	va_start(adx, fmt);
702 	vcmn_err(ce, fmt, adx);
703 	va_end(adx);
704 }
705 
706 /*
707  * =========================================================================
708  * kobj interfaces
709  * =========================================================================
710  */
711 struct _buf *
712 kobj_open_file(char *name)
713 {
714 	struct _buf *file;
715 	vnode_t *vp;
716 
717 	/* set vp as the _fd field of the file */
718 	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
719 	    -1) != 0)
720 		return ((void *)-1UL);
721 
722 	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
723 	file->_fd = (intptr_t)vp;
724 	return (file);
725 }
726 
727 int
728 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
729 {
730 	ssize_t resid;
731 
732 	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
733 	    UIO_SYSSPACE, 0, 0, 0, &resid);
734 
735 	return (size - resid);
736 }
737 
738 void
739 kobj_close_file(struct _buf *file)
740 {
741 	vn_close((vnode_t *)file->_fd);
742 	umem_free(file, sizeof (struct _buf));
743 }
744 
745 int
746 kobj_get_filesize(struct _buf *file, uint64_t *size)
747 {
748 	struct stat64 st;
749 	vnode_t *vp = (vnode_t *)file->_fd;
750 
751 	if (fstat64(vp->v_fd, &st) == -1) {
752 		vn_close(vp);
753 		return (errno);
754 	}
755 	*size = st.st_size;
756 	return (0);
757 }
758 
759 /*
760  * =========================================================================
761  * misc routines
762  * =========================================================================
763  */
764 
765 void
766 delay(clock_t ticks)
767 {
768 	poll(0, 0, ticks * (1000 / hz));
769 }
770 
771 /*
772  * Find highest one bit set.
773  *	Returns bit number + 1 of highest bit that is set, otherwise returns 0.
774  * High order bit is 31 (or 63 in _LP64 kernel).
775  */
776 int
777 highbit(ulong_t i)
778 {
779 	register int h = 1;
780 
781 	if (i == 0)
782 		return (0);
783 #ifdef _LP64
784 	if (i & 0xffffffff00000000ul) {
785 		h += 32; i >>= 32;
786 	}
787 #endif
788 	if (i & 0xffff0000) {
789 		h += 16; i >>= 16;
790 	}
791 	if (i & 0xff00) {
792 		h += 8; i >>= 8;
793 	}
794 	if (i & 0xf0) {
795 		h += 4; i >>= 4;
796 	}
797 	if (i & 0xc) {
798 		h += 2; i >>= 2;
799 	}
800 	if (i & 0x2) {
801 		h += 1;
802 	}
803 	return (h);
804 }
805 
806 static int random_fd = -1, urandom_fd = -1;
807 
808 static int
809 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
810 {
811 	size_t resid = len;
812 	ssize_t bytes;
813 
814 	ASSERT(fd != -1);
815 
816 	while (resid != 0) {
817 		bytes = read(fd, ptr, resid);
818 		ASSERT3S(bytes, >=, 0);
819 		ptr += bytes;
820 		resid -= bytes;
821 	}
822 
823 	return (0);
824 }
825 
826 int
827 random_get_bytes(uint8_t *ptr, size_t len)
828 {
829 	return (random_get_bytes_common(ptr, len, random_fd));
830 }
831 
832 int
833 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
834 {
835 	return (random_get_bytes_common(ptr, len, urandom_fd));
836 }
837 
838 int
839 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
840 {
841 	char *end;
842 
843 	*result = strtoul(hw_serial, &end, base);
844 	if (*result == 0)
845 		return (errno);
846 	return (0);
847 }
848 
849 int
850 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
851 {
852 	char *end;
853 
854 	*result = strtoull(str, &end, base);
855 	if (*result == 0)
856 		return (errno);
857 	return (0);
858 }
859 
860 /* ARGSUSED */
861 cyclic_id_t
862 cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when)
863 {
864 	return (1);
865 }
866 
867 /* ARGSUSED */
868 void
869 cyclic_remove(cyclic_id_t id)
870 {
871 }
872 
873 /* ARGSUSED */
874 int
875 cyclic_reprogram(cyclic_id_t id, hrtime_t expiration)
876 {
877 	return (1);
878 }
879 
880 /*
881  * =========================================================================
882  * kernel emulation setup & teardown
883  * =========================================================================
884  */
885 static int
886 umem_out_of_memory(void)
887 {
888 	char errmsg[] = "out of memory -- generating core dump\n";
889 
890 	write(fileno(stderr), errmsg, sizeof (errmsg));
891 	abort();
892 	return (0);
893 }
894 
895 void
896 kernel_init(int mode)
897 {
898 	extern uint_t rrw_tsd_key;
899 
900 	umem_nofail_callback(umem_out_of_memory);
901 
902 	physmem = sysconf(_SC_PHYS_PAGES);
903 
904 	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
905 	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
906 
907 	(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
908 	    (mode & FWRITE) ? gethostid() : 0);
909 
910 	VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
911 	VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
912 
913 	system_taskq_init();
914 
915 	mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
916 
917 	spa_init(mode);
918 
919 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
920 }
921 
922 void
923 kernel_fini(void)
924 {
925 	spa_fini();
926 
927 	system_taskq_fini();
928 
929 	close(random_fd);
930 	close(urandom_fd);
931 
932 	random_fd = -1;
933 	urandom_fd = -1;
934 }
935 
936 int
937 z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
938 {
939 	int ret;
940 	uLongf len = *dstlen;
941 
942 	if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
943 		*dstlen = (size_t)len;
944 
945 	return (ret);
946 }
947 
948 int
949 z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
950     int level)
951 {
952 	int ret;
953 	uLongf len = *dstlen;
954 
955 	if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
956 		*dstlen = (size_t)len;
957 
958 	return (ret);
959 }
960 
961 uid_t
962 crgetuid(cred_t *cr)
963 {
964 	return (0);
965 }
966 
967 uid_t
968 crgetruid(cred_t *cr)
969 {
970 	return (0);
971 }
972 
973 gid_t
974 crgetgid(cred_t *cr)
975 {
976 	return (0);
977 }
978 
979 int
980 crgetngroups(cred_t *cr)
981 {
982 	return (0);
983 }
984 
985 gid_t *
986 crgetgroups(cred_t *cr)
987 {
988 	return (NULL);
989 }
990 
991 int
992 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
993 {
994 	return (0);
995 }
996 
997 int
998 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
999 {
1000 	return (0);
1001 }
1002 
1003 int
1004 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
1005 {
1006 	return (0);
1007 }
1008 
1009 ksiddomain_t *
1010 ksid_lookupdomain(const char *dom)
1011 {
1012 	ksiddomain_t *kd;
1013 
1014 	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
1015 	kd->kd_name = spa_strdup(dom);
1016 	return (kd);
1017 }
1018 
1019 void
1020 ksiddomain_rele(ksiddomain_t *ksid)
1021 {
1022 	spa_strfree(ksid->kd_name);
1023 	umem_free(ksid, sizeof (ksiddomain_t));
1024 }
1025 
1026 /*
1027  * Do not change the length of the returned string; it must be freed
1028  * with strfree().
1029  */
1030 char *
1031 kmem_asprintf(const char *fmt, ...)
1032 {
1033 	int size;
1034 	va_list adx;
1035 	char *buf;
1036 
1037 	va_start(adx, fmt);
1038 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
1039 	va_end(adx);
1040 
1041 	buf = kmem_alloc(size, KM_SLEEP);
1042 
1043 	va_start(adx, fmt);
1044 	size = vsnprintf(buf, size, fmt, adx);
1045 	va_end(adx);
1046 
1047 	return (buf);
1048 }
1049 
1050 /* ARGSUSED */
1051 int
1052 zfs_onexit_fd_hold(int fd, minor_t *minorp)
1053 {
1054 	*minorp = 0;
1055 	return (0);
1056 }
1057 
1058 /* ARGSUSED */
1059 void
1060 zfs_onexit_fd_rele(int fd)
1061 {
1062 }
1063 
1064 /* ARGSUSED */
1065 int
1066 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
1067     uint64_t *action_handle)
1068 {
1069 	return (0);
1070 }
1071 
1072 /* ARGSUSED */
1073 int
1074 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
1075 {
1076 	return (0);
1077 }
1078 
1079 /* ARGSUSED */
1080 int
1081 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
1082 {
1083 	return (0);
1084 }
1085 
1086 void
1087 bioinit(buf_t *bp)
1088 {
1089 	bzero(bp, sizeof (buf_t));
1090 }
1091 
1092 void
1093 biodone(buf_t *bp)
1094 {
1095 	if (bp->b_iodone != NULL) {
1096 		(*(bp->b_iodone))(bp);
1097 		return;
1098 	}
1099 	ASSERT((bp->b_flags & B_DONE) == 0);
1100 	bp->b_flags |= B_DONE;
1101 }
1102 
1103 void
1104 bioerror(buf_t *bp, int error)
1105 {
1106 	ASSERT(bp != NULL);
1107 	ASSERT(error >= 0);
1108 
1109 	if (error != 0) {
1110 		bp->b_flags |= B_ERROR;
1111 	} else {
1112 		bp->b_flags &= ~B_ERROR;
1113 	}
1114 	bp->b_error = error;
1115 }
1116 
1117 
1118 int
1119 geterror(struct buf *bp)
1120 {
1121 	int error = 0;
1122 
1123 	if (bp->b_flags & B_ERROR) {
1124 		error = bp->b_error;
1125 		if (!error)
1126 			error = EIO;
1127 	}
1128 	return (error);
1129 }
1130