xref: /freebsd/sys/contrib/openzfs/lib/libzpool/kernel.c (revision 8c2dd68caa963f1900a8228b0732b04f5d530ffa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
25  */
26 
27 #include <assert.h>
28 #include <fcntl.h>
29 #include <libgen.h>
30 #include <poll.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <libzutil.h>
35 #include <sys/crypto/icp.h>
36 #include <sys/processor.h>
37 #include <sys/rrwlock.h>
38 #include <sys/spa.h>
39 #include <sys/stat.h>
40 #include <sys/systeminfo.h>
41 #include <sys/time.h>
42 #include <sys/utsname.h>
43 #include <sys/zfs_context.h>
44 #include <sys/zfs_onexit.h>
45 #include <sys/zfs_vfsops.h>
46 #include <sys/zstd/zstd.h>
47 #include <sys/zvol.h>
48 #include <zfs_fletcher.h>
49 #include <zlib.h>
50 
51 /*
52  * Emulation of kernel services in userland.
53  */
54 
55 uint64_t physmem;
56 char hw_serial[HW_HOSTID_LEN];
57 struct utsname hw_utsname;
58 
59 /* If set, all blocks read will be copied to the specified directory. */
60 char *vn_dumpdir = NULL;
61 
62 /* this only exists to have its address taken */
63 struct proc p0;
64 
65 /*
66  * =========================================================================
67  * threads
68  * =========================================================================
69  *
70  * TS_STACK_MIN is dictated by the minimum allowed pthread stack size.  While
71  * TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for
72  * the expected stack depth while small enough to avoid exhausting address
73  * space with high thread counts.
74  */
75 #define	TS_STACK_MIN	MAX(PTHREAD_STACK_MIN, 32768)
76 #define	TS_STACK_MAX	(256 * 1024)
77 
78 /*ARGSUSED*/
79 kthread_t *
80 zk_thread_create(void (*func)(void *), void *arg, size_t stksize, int state)
81 {
82 	pthread_attr_t attr;
83 	pthread_t tid;
84 	char *stkstr;
85 	int detachstate = PTHREAD_CREATE_DETACHED;
86 
87 	VERIFY0(pthread_attr_init(&attr));
88 
89 	if (state & TS_JOINABLE)
90 		detachstate = PTHREAD_CREATE_JOINABLE;
91 
92 	VERIFY0(pthread_attr_setdetachstate(&attr, detachstate));
93 
94 	/*
95 	 * We allow the default stack size in user space to be specified by
96 	 * setting the ZFS_STACK_SIZE environment variable.  This allows us
97 	 * the convenience of observing and debugging stack overruns in
98 	 * user space.  Explicitly specified stack sizes will be honored.
99 	 * The usage of ZFS_STACK_SIZE is discussed further in the
100 	 * ENVIRONMENT VARIABLES sections of the ztest(1) man page.
101 	 */
102 	if (stksize == 0) {
103 		stkstr = getenv("ZFS_STACK_SIZE");
104 
105 		if (stkstr == NULL)
106 			stksize = TS_STACK_MAX;
107 		else
108 			stksize = MAX(atoi(stkstr), TS_STACK_MIN);
109 	}
110 
111 	VERIFY3S(stksize, >, 0);
112 	stksize = P2ROUNDUP(MAX(stksize, TS_STACK_MIN), PAGESIZE);
113 
114 	/*
115 	 * If this ever fails, it may be because the stack size is not a
116 	 * multiple of system page size.
117 	 */
118 	VERIFY0(pthread_attr_setstacksize(&attr, stksize));
119 	VERIFY0(pthread_attr_setguardsize(&attr, PAGESIZE));
120 
121 	VERIFY0(pthread_create(&tid, &attr, (void *(*)(void *))func, arg));
122 	VERIFY0(pthread_attr_destroy(&attr));
123 
124 	return ((void *)(uintptr_t)tid);
125 }
126 
127 /*
128  * =========================================================================
129  * kstats
130  * =========================================================================
131  */
132 /*ARGSUSED*/
133 kstat_t *
134 kstat_create(const char *module, int instance, const char *name,
135     const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)
136 {
137 	return (NULL);
138 }
139 
140 /*ARGSUSED*/
141 void
142 kstat_install(kstat_t *ksp)
143 {}
144 
145 /*ARGSUSED*/
146 void
147 kstat_delete(kstat_t *ksp)
148 {}
149 
150 void
151 kstat_set_raw_ops(kstat_t *ksp,
152     int (*headers)(char *buf, size_t size),
153     int (*data)(char *buf, size_t size, void *data),
154     void *(*addr)(kstat_t *ksp, loff_t index))
155 {}
156 
157 /*
158  * =========================================================================
159  * mutexes
160  * =========================================================================
161  */
162 
163 void
164 mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
165 {
166 	VERIFY0(pthread_mutex_init(&mp->m_lock, NULL));
167 	memset(&mp->m_owner, 0, sizeof (pthread_t));
168 }
169 
170 void
171 mutex_destroy(kmutex_t *mp)
172 {
173 	VERIFY0(pthread_mutex_destroy(&mp->m_lock));
174 }
175 
176 void
177 mutex_enter(kmutex_t *mp)
178 {
179 	VERIFY0(pthread_mutex_lock(&mp->m_lock));
180 	mp->m_owner = pthread_self();
181 }
182 
183 int
184 mutex_tryenter(kmutex_t *mp)
185 {
186 	int error;
187 
188 	error = pthread_mutex_trylock(&mp->m_lock);
189 	if (error == 0) {
190 		mp->m_owner = pthread_self();
191 		return (1);
192 	} else {
193 		VERIFY3S(error, ==, EBUSY);
194 		return (0);
195 	}
196 }
197 
198 void
199 mutex_exit(kmutex_t *mp)
200 {
201 	memset(&mp->m_owner, 0, sizeof (pthread_t));
202 	VERIFY0(pthread_mutex_unlock(&mp->m_lock));
203 }
204 
205 /*
206  * =========================================================================
207  * rwlocks
208  * =========================================================================
209  */
210 
211 void
212 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
213 {
214 	VERIFY0(pthread_rwlock_init(&rwlp->rw_lock, NULL));
215 	rwlp->rw_readers = 0;
216 	rwlp->rw_owner = 0;
217 }
218 
219 void
220 rw_destroy(krwlock_t *rwlp)
221 {
222 	VERIFY0(pthread_rwlock_destroy(&rwlp->rw_lock));
223 }
224 
225 void
226 rw_enter(krwlock_t *rwlp, krw_t rw)
227 {
228 	if (rw == RW_READER) {
229 		VERIFY0(pthread_rwlock_rdlock(&rwlp->rw_lock));
230 		atomic_inc_uint(&rwlp->rw_readers);
231 	} else {
232 		VERIFY0(pthread_rwlock_wrlock(&rwlp->rw_lock));
233 		rwlp->rw_owner = pthread_self();
234 	}
235 }
236 
237 void
238 rw_exit(krwlock_t *rwlp)
239 {
240 	if (RW_READ_HELD(rwlp))
241 		atomic_dec_uint(&rwlp->rw_readers);
242 	else
243 		rwlp->rw_owner = 0;
244 
245 	VERIFY0(pthread_rwlock_unlock(&rwlp->rw_lock));
246 }
247 
248 int
249 rw_tryenter(krwlock_t *rwlp, krw_t rw)
250 {
251 	int error;
252 
253 	if (rw == RW_READER)
254 		error = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
255 	else
256 		error = pthread_rwlock_trywrlock(&rwlp->rw_lock);
257 
258 	if (error == 0) {
259 		if (rw == RW_READER)
260 			atomic_inc_uint(&rwlp->rw_readers);
261 		else
262 			rwlp->rw_owner = pthread_self();
263 
264 		return (1);
265 	}
266 
267 	VERIFY3S(error, ==, EBUSY);
268 
269 	return (0);
270 }
271 
272 /* ARGSUSED */
273 uint32_t
274 zone_get_hostid(void *zonep)
275 {
276 	/*
277 	 * We're emulating the system's hostid in userland.
278 	 */
279 	return (strtoul(hw_serial, NULL, 10));
280 }
281 
282 int
283 rw_tryupgrade(krwlock_t *rwlp)
284 {
285 	return (0);
286 }
287 
288 /*
289  * =========================================================================
290  * condition variables
291  * =========================================================================
292  */
293 
294 void
295 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
296 {
297 	VERIFY0(pthread_cond_init(cv, NULL));
298 }
299 
300 void
301 cv_destroy(kcondvar_t *cv)
302 {
303 	VERIFY0(pthread_cond_destroy(cv));
304 }
305 
306 void
307 cv_wait(kcondvar_t *cv, kmutex_t *mp)
308 {
309 	memset(&mp->m_owner, 0, sizeof (pthread_t));
310 	VERIFY0(pthread_cond_wait(cv, &mp->m_lock));
311 	mp->m_owner = pthread_self();
312 }
313 
314 int
315 cv_wait_sig(kcondvar_t *cv, kmutex_t *mp)
316 {
317 	cv_wait(cv, mp);
318 	return (1);
319 }
320 
321 int
322 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
323 {
324 	int error;
325 	struct timeval tv;
326 	struct timespec ts;
327 	clock_t delta;
328 
329 	delta = abstime - ddi_get_lbolt();
330 	if (delta <= 0)
331 		return (-1);
332 
333 	VERIFY(gettimeofday(&tv, NULL) == 0);
334 
335 	ts.tv_sec = tv.tv_sec + delta / hz;
336 	ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % hz) * (NANOSEC / hz);
337 	if (ts.tv_nsec >= NANOSEC) {
338 		ts.tv_sec++;
339 		ts.tv_nsec -= NANOSEC;
340 	}
341 
342 	memset(&mp->m_owner, 0, sizeof (pthread_t));
343 	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
344 	mp->m_owner = pthread_self();
345 
346 	if (error == ETIMEDOUT)
347 		return (-1);
348 
349 	VERIFY0(error);
350 
351 	return (1);
352 }
353 
354 /*ARGSUSED*/
355 int
356 cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
357     int flag)
358 {
359 	int error;
360 	struct timeval tv;
361 	struct timespec ts;
362 	hrtime_t delta;
363 
364 	ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
365 
366 	delta = tim;
367 	if (flag & CALLOUT_FLAG_ABSOLUTE)
368 		delta -= gethrtime();
369 
370 	if (delta <= 0)
371 		return (-1);
372 
373 	VERIFY0(gettimeofday(&tv, NULL));
374 
375 	ts.tv_sec = tv.tv_sec + delta / NANOSEC;
376 	ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % NANOSEC);
377 	if (ts.tv_nsec >= NANOSEC) {
378 		ts.tv_sec++;
379 		ts.tv_nsec -= NANOSEC;
380 	}
381 
382 	memset(&mp->m_owner, 0, sizeof (pthread_t));
383 	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
384 	mp->m_owner = pthread_self();
385 
386 	if (error == ETIMEDOUT)
387 		return (-1);
388 
389 	VERIFY0(error);
390 
391 	return (1);
392 }
393 
394 void
395 cv_signal(kcondvar_t *cv)
396 {
397 	VERIFY0(pthread_cond_signal(cv));
398 }
399 
400 void
401 cv_broadcast(kcondvar_t *cv)
402 {
403 	VERIFY0(pthread_cond_broadcast(cv));
404 }
405 
406 /*
407  * =========================================================================
408  * procfs list
409  * =========================================================================
410  */
411 
412 void
413 seq_printf(struct seq_file *m, const char *fmt, ...)
414 {}
415 
416 void
417 procfs_list_install(const char *module,
418     const char *submodule,
419     const char *name,
420     mode_t mode,
421     procfs_list_t *procfs_list,
422     int (*show)(struct seq_file *f, void *p),
423     int (*show_header)(struct seq_file *f),
424     int (*clear)(procfs_list_t *procfs_list),
425     size_t procfs_list_node_off)
426 {
427 	mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
428 	list_create(&procfs_list->pl_list,
429 	    procfs_list_node_off + sizeof (procfs_list_node_t),
430 	    procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
431 	procfs_list->pl_next_id = 1;
432 	procfs_list->pl_node_offset = procfs_list_node_off;
433 }
434 
435 void
436 procfs_list_uninstall(procfs_list_t *procfs_list)
437 {}
438 
439 void
440 procfs_list_destroy(procfs_list_t *procfs_list)
441 {
442 	ASSERT(list_is_empty(&procfs_list->pl_list));
443 	list_destroy(&procfs_list->pl_list);
444 	mutex_destroy(&procfs_list->pl_lock);
445 }
446 
447 #define	NODE_ID(procfs_list, obj) \
448 		(((procfs_list_node_t *)(((char *)obj) + \
449 		(procfs_list)->pl_node_offset))->pln_id)
450 
451 void
452 procfs_list_add(procfs_list_t *procfs_list, void *p)
453 {
454 	ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
455 	NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
456 	list_insert_tail(&procfs_list->pl_list, p);
457 }
458 
459 /*
460  * =========================================================================
461  * vnode operations
462  * =========================================================================
463  */
464 
465 /*
466  * =========================================================================
467  * Figure out which debugging statements to print
468  * =========================================================================
469  */
470 
471 static char *dprintf_string;
472 static int dprintf_print_all;
473 
474 int
475 dprintf_find_string(const char *string)
476 {
477 	char *tmp_str = dprintf_string;
478 	int len = strlen(string);
479 
480 	/*
481 	 * Find out if this is a string we want to print.
482 	 * String format: file1.c,function_name1,file2.c,file3.c
483 	 */
484 
485 	while (tmp_str != NULL) {
486 		if (strncmp(tmp_str, string, len) == 0 &&
487 		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
488 			return (1);
489 		tmp_str = strchr(tmp_str, ',');
490 		if (tmp_str != NULL)
491 			tmp_str++; /* Get rid of , */
492 	}
493 	return (0);
494 }
495 
496 void
497 dprintf_setup(int *argc, char **argv)
498 {
499 	int i, j;
500 
501 	/*
502 	 * Debugging can be specified two ways: by setting the
503 	 * environment variable ZFS_DEBUG, or by including a
504 	 * "debug=..."  argument on the command line.  The command
505 	 * line setting overrides the environment variable.
506 	 */
507 
508 	for (i = 1; i < *argc; i++) {
509 		int len = strlen("debug=");
510 		/* First look for a command line argument */
511 		if (strncmp("debug=", argv[i], len) == 0) {
512 			dprintf_string = argv[i] + len;
513 			/* Remove from args */
514 			for (j = i; j < *argc; j++)
515 				argv[j] = argv[j+1];
516 			argv[j] = NULL;
517 			(*argc)--;
518 		}
519 	}
520 
521 	if (dprintf_string == NULL) {
522 		/* Look for ZFS_DEBUG environment variable */
523 		dprintf_string = getenv("ZFS_DEBUG");
524 	}
525 
526 	/*
527 	 * Are we just turning on all debugging?
528 	 */
529 	if (dprintf_find_string("on"))
530 		dprintf_print_all = 1;
531 
532 	if (dprintf_string != NULL)
533 		zfs_flags |= ZFS_DEBUG_DPRINTF;
534 }
535 
536 /*
537  * =========================================================================
538  * debug printfs
539  * =========================================================================
540  */
541 void
542 __dprintf(boolean_t dprint, const char *file, const char *func,
543     int line, const char *fmt, ...)
544 {
545 	/* Get rid of annoying "../common/" prefix to filename. */
546 	const char *newfile = zfs_basename(file);
547 
548 	va_list adx;
549 	if (dprint) {
550 		/* dprintf messages are printed immediately */
551 
552 		if (!dprintf_print_all &&
553 		    !dprintf_find_string(newfile) &&
554 		    !dprintf_find_string(func))
555 			return;
556 
557 		/* Print out just the function name if requested */
558 		flockfile(stdout);
559 		if (dprintf_find_string("pid"))
560 			(void) printf("%d ", getpid());
561 		if (dprintf_find_string("tid"))
562 			(void) printf("%ju ",
563 			    (uintmax_t)(uintptr_t)pthread_self());
564 		if (dprintf_find_string("cpu"))
565 			(void) printf("%u ", getcpuid());
566 		if (dprintf_find_string("time"))
567 			(void) printf("%llu ", gethrtime());
568 		if (dprintf_find_string("long"))
569 			(void) printf("%s, line %d: ", newfile, line);
570 		(void) printf("dprintf: %s: ", func);
571 		va_start(adx, fmt);
572 		(void) vprintf(fmt, adx);
573 		va_end(adx);
574 		funlockfile(stdout);
575 	} else {
576 		/* zfs_dbgmsg is logged for dumping later */
577 		size_t size;
578 		char *buf;
579 		int i;
580 
581 		size = 1024;
582 		buf = umem_alloc(size, UMEM_NOFAIL);
583 		i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func);
584 
585 		if (i < size) {
586 			va_start(adx, fmt);
587 			(void) vsnprintf(buf + i, size - i, fmt, adx);
588 			va_end(adx);
589 		}
590 
591 		__zfs_dbgmsg(buf);
592 
593 		umem_free(buf, size);
594 	}
595 }
596 
597 /*
598  * =========================================================================
599  * cmn_err() and panic()
600  * =========================================================================
601  */
602 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
603 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
604 
605 void
606 vpanic(const char *fmt, va_list adx)
607 {
608 	(void) fprintf(stderr, "error: ");
609 	(void) vfprintf(stderr, fmt, adx);
610 	(void) fprintf(stderr, "\n");
611 
612 	abort();	/* think of it as a "user-level crash dump" */
613 }
614 
615 void
616 panic(const char *fmt, ...)
617 {
618 	va_list adx;
619 
620 	va_start(adx, fmt);
621 	vpanic(fmt, adx);
622 	va_end(adx);
623 }
624 
625 void
626 vcmn_err(int ce, const char *fmt, va_list adx)
627 {
628 	if (ce == CE_PANIC)
629 		vpanic(fmt, adx);
630 	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
631 		(void) fprintf(stderr, "%s", ce_prefix[ce]);
632 		(void) vfprintf(stderr, fmt, adx);
633 		(void) fprintf(stderr, "%s", ce_suffix[ce]);
634 	}
635 }
636 
637 /*PRINTFLIKE2*/
638 void
639 cmn_err(int ce, const char *fmt, ...)
640 {
641 	va_list adx;
642 
643 	va_start(adx, fmt);
644 	vcmn_err(ce, fmt, adx);
645 	va_end(adx);
646 }
647 
648 /*
649  * =========================================================================
650  * misc routines
651  * =========================================================================
652  */
653 
654 void
655 delay(clock_t ticks)
656 {
657 	(void) poll(0, 0, ticks * (1000 / hz));
658 }
659 
660 /*
661  * Find highest one bit set.
662  * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
663  * The __builtin_clzll() function is supported by both GCC and Clang.
664  */
665 int
666 highbit64(uint64_t i)
667 {
668 	if (i == 0)
669 	return (0);
670 
671 	return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
672 }
673 
674 /*
675  * Find lowest one bit set.
676  * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
677  * The __builtin_ffsll() function is supported by both GCC and Clang.
678  */
679 int
680 lowbit64(uint64_t i)
681 {
682 	if (i == 0)
683 		return (0);
684 
685 	return (__builtin_ffsll(i));
686 }
687 
688 const char *random_path = "/dev/random";
689 const char *urandom_path = "/dev/urandom";
690 static int random_fd = -1, urandom_fd = -1;
691 
692 void
693 random_init(void)
694 {
695 	VERIFY((random_fd = open(random_path, O_RDONLY | O_CLOEXEC)) != -1);
696 	VERIFY((urandom_fd = open(urandom_path, O_RDONLY | O_CLOEXEC)) != -1);
697 }
698 
699 void
700 random_fini(void)
701 {
702 	close(random_fd);
703 	close(urandom_fd);
704 
705 	random_fd = -1;
706 	urandom_fd = -1;
707 }
708 
709 static int
710 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
711 {
712 	size_t resid = len;
713 	ssize_t bytes;
714 
715 	ASSERT(fd != -1);
716 
717 	while (resid != 0) {
718 		bytes = read(fd, ptr, resid);
719 		ASSERT3S(bytes, >=, 0);
720 		ptr += bytes;
721 		resid -= bytes;
722 	}
723 
724 	return (0);
725 }
726 
727 int
728 random_get_bytes(uint8_t *ptr, size_t len)
729 {
730 	return (random_get_bytes_common(ptr, len, random_fd));
731 }
732 
733 int
734 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
735 {
736 	return (random_get_bytes_common(ptr, len, urandom_fd));
737 }
738 
739 int
740 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
741 {
742 	char *end;
743 
744 	*result = strtoul(hw_serial, &end, base);
745 	if (*result == 0)
746 		return (errno);
747 	return (0);
748 }
749 
750 int
751 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
752 {
753 	char *end;
754 
755 	*result = strtoull(str, &end, base);
756 	if (*result == 0)
757 		return (errno);
758 	return (0);
759 }
760 
761 utsname_t *
762 utsname(void)
763 {
764 	return (&hw_utsname);
765 }
766 
767 /*
768  * =========================================================================
769  * kernel emulation setup & teardown
770  * =========================================================================
771  */
772 static int
773 umem_out_of_memory(void)
774 {
775 	char errmsg[] = "out of memory -- generating core dump\n";
776 
777 	(void) fprintf(stderr, "%s", errmsg);
778 	abort();
779 	return (0);
780 }
781 
782 void
783 kernel_init(int mode)
784 {
785 	extern uint_t rrw_tsd_key;
786 
787 	umem_nofail_callback(umem_out_of_memory);
788 
789 	physmem = sysconf(_SC_PHYS_PAGES);
790 
791 	dprintf("physmem = %llu pages (%.2f GB)\n", (u_longlong_t)physmem,
792 	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
793 
794 	(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
795 	    (mode & SPA_MODE_WRITE) ? get_system_hostid() : 0);
796 
797 	random_init();
798 
799 	VERIFY0(uname(&hw_utsname));
800 
801 	system_taskq_init();
802 	icp_init();
803 
804 	zstd_init();
805 
806 	spa_init((spa_mode_t)mode);
807 
808 	fletcher_4_init();
809 
810 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
811 }
812 
813 void
814 kernel_fini(void)
815 {
816 	fletcher_4_fini();
817 	spa_fini();
818 
819 	zstd_fini();
820 
821 	icp_fini();
822 	system_taskq_fini();
823 
824 	random_fini();
825 }
826 
827 uid_t
828 crgetuid(cred_t *cr)
829 {
830 	return (0);
831 }
832 
833 uid_t
834 crgetruid(cred_t *cr)
835 {
836 	return (0);
837 }
838 
839 gid_t
840 crgetgid(cred_t *cr)
841 {
842 	return (0);
843 }
844 
845 int
846 crgetngroups(cred_t *cr)
847 {
848 	return (0);
849 }
850 
851 gid_t *
852 crgetgroups(cred_t *cr)
853 {
854 	return (NULL);
855 }
856 
857 int
858 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
859 {
860 	return (0);
861 }
862 
863 int
864 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
865 {
866 	return (0);
867 }
868 
869 int
870 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
871 {
872 	return (0);
873 }
874 
875 int
876 secpolicy_zfs(const cred_t *cr)
877 {
878 	return (0);
879 }
880 
881 int
882 secpolicy_zfs_proc(const cred_t *cr, proc_t *proc)
883 {
884 	return (0);
885 }
886 
887 ksiddomain_t *
888 ksid_lookupdomain(const char *dom)
889 {
890 	ksiddomain_t *kd;
891 
892 	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
893 	kd->kd_name = spa_strdup(dom);
894 	return (kd);
895 }
896 
897 void
898 ksiddomain_rele(ksiddomain_t *ksid)
899 {
900 	spa_strfree(ksid->kd_name);
901 	umem_free(ksid, sizeof (ksiddomain_t));
902 }
903 
904 char *
905 kmem_vasprintf(const char *fmt, va_list adx)
906 {
907 	char *buf = NULL;
908 	va_list adx_copy;
909 
910 	va_copy(adx_copy, adx);
911 	VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
912 	va_end(adx_copy);
913 
914 	return (buf);
915 }
916 
917 char *
918 kmem_asprintf(const char *fmt, ...)
919 {
920 	char *buf = NULL;
921 	va_list adx;
922 
923 	va_start(adx, fmt);
924 	VERIFY(vasprintf(&buf, fmt, adx) != -1);
925 	va_end(adx);
926 
927 	return (buf);
928 }
929 
930 /* ARGSUSED */
931 int
932 zfs_onexit_fd_hold(int fd, minor_t *minorp)
933 {
934 	*minorp = 0;
935 	return (0);
936 }
937 
938 /* ARGSUSED */
939 void
940 zfs_onexit_fd_rele(int fd)
941 {
942 }
943 
944 /* ARGSUSED */
945 int
946 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
947     uint64_t *action_handle)
948 {
949 	return (0);
950 }
951 
952 fstrans_cookie_t
953 spl_fstrans_mark(void)
954 {
955 	return ((fstrans_cookie_t)0);
956 }
957 
958 void
959 spl_fstrans_unmark(fstrans_cookie_t cookie)
960 {
961 }
962 
963 int
964 __spl_pf_fstrans_check(void)
965 {
966 	return (0);
967 }
968 
969 int
970 kmem_cache_reap_active(void)
971 {
972 	return (0);
973 }
974 
975 void *zvol_tag = "zvol_tag";
976 
977 void
978 zvol_create_minor(const char *name)
979 {
980 }
981 
982 void
983 zvol_create_minors_recursive(const char *name)
984 {
985 }
986 
987 void
988 zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
989 {
990 }
991 
992 void
993 zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname,
994     boolean_t async)
995 {
996 }
997 
998 /*
999  * Open file
1000  *
1001  * path - fully qualified path to file
1002  * flags - file attributes O_READ / O_WRITE / O_EXCL
1003  * fpp - pointer to return file pointer
1004  *
1005  * Returns 0 on success underlying error on failure.
1006  */
1007 int
1008 zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
1009 {
1010 	int fd = -1;
1011 	int dump_fd = -1;
1012 	int err;
1013 	int old_umask = 0;
1014 	zfs_file_t *fp;
1015 	struct stat64 st;
1016 
1017 	if (!(flags & O_CREAT) && stat64(path, &st) == -1)
1018 		return (errno);
1019 
1020 	if (!(flags & O_CREAT) && S_ISBLK(st.st_mode))
1021 		flags |= O_DIRECT;
1022 
1023 	if (flags & O_CREAT)
1024 		old_umask = umask(0);
1025 
1026 	fd = open64(path, flags, mode);
1027 	if (fd == -1)
1028 		return (errno);
1029 
1030 	if (flags & O_CREAT)
1031 		(void) umask(old_umask);
1032 
1033 	if (vn_dumpdir != NULL) {
1034 		char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
1035 		const char *inpath = zfs_basename(path);
1036 
1037 		(void) snprintf(dumppath, MAXPATHLEN,
1038 		    "%s/%s", vn_dumpdir, inpath);
1039 		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
1040 		umem_free(dumppath, MAXPATHLEN);
1041 		if (dump_fd == -1) {
1042 			err = errno;
1043 			close(fd);
1044 			return (err);
1045 		}
1046 	} else {
1047 		dump_fd = -1;
1048 	}
1049 
1050 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
1051 
1052 	fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL);
1053 	fp->f_fd = fd;
1054 	fp->f_dump_fd = dump_fd;
1055 	*fpp = fp;
1056 
1057 	return (0);
1058 }
1059 
1060 void
1061 zfs_file_close(zfs_file_t *fp)
1062 {
1063 	close(fp->f_fd);
1064 	if (fp->f_dump_fd != -1)
1065 		close(fp->f_dump_fd);
1066 
1067 	umem_free(fp, sizeof (zfs_file_t));
1068 }
1069 
1070 /*
1071  * Stateful write - use os internal file pointer to determine where to
1072  * write and update on successful completion.
1073  *
1074  * fp -  pointer to file (pipe, socket, etc) to write to
1075  * buf - buffer to write
1076  * count - # of bytes to write
1077  * resid -  pointer to count of unwritten bytes  (if short write)
1078  *
1079  * Returns 0 on success errno on failure.
1080  */
1081 int
1082 zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
1083 {
1084 	ssize_t rc;
1085 
1086 	rc = write(fp->f_fd, buf, count);
1087 	if (rc < 0)
1088 		return (errno);
1089 
1090 	if (resid) {
1091 		*resid = count - rc;
1092 	} else if (rc != count) {
1093 		return (EIO);
1094 	}
1095 
1096 	return (0);
1097 }
1098 
1099 /*
1100  * Stateless write - os internal file pointer is not updated.
1101  *
1102  * fp -  pointer to file (pipe, socket, etc) to write to
1103  * buf - buffer to write
1104  * count - # of bytes to write
1105  * off - file offset to write to (only valid for seekable types)
1106  * resid -  pointer to count of unwritten bytes
1107  *
1108  * Returns 0 on success errno on failure.
1109  */
1110 int
1111 zfs_file_pwrite(zfs_file_t *fp, const void *buf,
1112     size_t count, loff_t pos, ssize_t *resid)
1113 {
1114 	ssize_t rc, split, done;
1115 	int sectors;
1116 
1117 	/*
1118 	 * To simulate partial disk writes, we split writes into two
1119 	 * system calls so that the process can be killed in between.
1120 	 * This is used by ztest to simulate realistic failure modes.
1121 	 */
1122 	sectors = count >> SPA_MINBLOCKSHIFT;
1123 	split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT;
1124 	rc = pwrite64(fp->f_fd, buf, split, pos);
1125 	if (rc != -1) {
1126 		done = rc;
1127 		rc = pwrite64(fp->f_fd, (char *)buf + split,
1128 		    count - split, pos + split);
1129 	}
1130 #ifdef __linux__
1131 	if (rc == -1 && errno == EINVAL) {
1132 		/*
1133 		 * Under Linux, this most likely means an alignment issue
1134 		 * (memory or disk) due to O_DIRECT, so we abort() in order
1135 		 * to catch the offender.
1136 		 */
1137 		abort();
1138 	}
1139 #endif
1140 
1141 	if (rc < 0)
1142 		return (errno);
1143 
1144 	done += rc;
1145 
1146 	if (resid) {
1147 		*resid = count - done;
1148 	} else if (done != count) {
1149 		return (EIO);
1150 	}
1151 
1152 	return (0);
1153 }
1154 
1155 /*
1156  * Stateful read - use os internal file pointer to determine where to
1157  * read and update on successful completion.
1158  *
1159  * fp -  pointer to file (pipe, socket, etc) to read from
1160  * buf - buffer to write
1161  * count - # of bytes to read
1162  * resid -  pointer to count of unread bytes (if short read)
1163  *
1164  * Returns 0 on success errno on failure.
1165  */
1166 int
1167 zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
1168 {
1169 	int rc;
1170 
1171 	rc = read(fp->f_fd, buf, count);
1172 	if (rc < 0)
1173 		return (errno);
1174 
1175 	if (resid) {
1176 		*resid = count - rc;
1177 	} else if (rc != count) {
1178 		return (EIO);
1179 	}
1180 
1181 	return (0);
1182 }
1183 
1184 /*
1185  * Stateless read - os internal file pointer is not updated.
1186  *
1187  * fp -  pointer to file (pipe, socket, etc) to read from
1188  * buf - buffer to write
1189  * count - # of bytes to write
1190  * off - file offset to read from (only valid for seekable types)
1191  * resid -  pointer to count of unwritten bytes (if short write)
1192  *
1193  * Returns 0 on success errno on failure.
1194  */
1195 int
1196 zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
1197     ssize_t *resid)
1198 {
1199 	ssize_t rc;
1200 
1201 	rc = pread64(fp->f_fd, buf, count, off);
1202 	if (rc < 0) {
1203 #ifdef __linux__
1204 		/*
1205 		 * Under Linux, this most likely means an alignment issue
1206 		 * (memory or disk) due to O_DIRECT, so we abort() in order to
1207 		 * catch the offender.
1208 		 */
1209 		if (errno == EINVAL)
1210 			abort();
1211 #endif
1212 		return (errno);
1213 	}
1214 
1215 	if (fp->f_dump_fd != -1) {
1216 		int status;
1217 
1218 		status = pwrite64(fp->f_dump_fd, buf, rc, off);
1219 		ASSERT(status != -1);
1220 	}
1221 
1222 	if (resid) {
1223 		*resid = count - rc;
1224 	} else if (rc != count) {
1225 		return (EIO);
1226 	}
1227 
1228 	return (0);
1229 }
1230 
1231 /*
1232  * lseek - set / get file pointer
1233  *
1234  * fp -  pointer to file (pipe, socket, etc) to read from
1235  * offp - value to seek to, returns current value plus passed offset
1236  * whence - see man pages for standard lseek whence values
1237  *
1238  * Returns 0 on success errno on failure (ESPIPE for non seekable types)
1239  */
1240 int
1241 zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
1242 {
1243 	loff_t rc;
1244 
1245 	rc = lseek(fp->f_fd, *offp, whence);
1246 	if (rc < 0)
1247 		return (errno);
1248 
1249 	*offp = rc;
1250 
1251 	return (0);
1252 }
1253 
1254 /*
1255  * Get file attributes
1256  *
1257  * filp - file pointer
1258  * zfattr - pointer to file attr structure
1259  *
1260  * Currently only used for fetching size and file mode
1261  *
1262  * Returns 0 on success or error code of underlying getattr call on failure.
1263  */
1264 int
1265 zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
1266 {
1267 	struct stat64 st;
1268 
1269 	if (fstat64_blk(fp->f_fd, &st) == -1)
1270 		return (errno);
1271 
1272 	zfattr->zfa_size = st.st_size;
1273 	zfattr->zfa_mode = st.st_mode;
1274 
1275 	return (0);
1276 }
1277 
1278 /*
1279  * Sync file to disk
1280  *
1281  * filp - file pointer
1282  * flags - O_SYNC and or O_DSYNC
1283  *
1284  * Returns 0 on success or error code of underlying sync call on failure.
1285  */
1286 int
1287 zfs_file_fsync(zfs_file_t *fp, int flags)
1288 {
1289 	int rc;
1290 
1291 	rc = fsync(fp->f_fd);
1292 	if (rc < 0)
1293 		return (errno);
1294 
1295 	return (0);
1296 }
1297 
1298 /*
1299  * fallocate - allocate or free space on disk
1300  *
1301  * fp - file pointer
1302  * mode (non-standard options for hole punching etc)
1303  * offset - offset to start allocating or freeing from
1304  * len - length to free / allocate
1305  *
1306  * OPTIONAL
1307  */
1308 int
1309 zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len)
1310 {
1311 #ifdef __linux__
1312 	return (fallocate(fp->f_fd, mode, offset, len));
1313 #else
1314 	return (EOPNOTSUPP);
1315 #endif
1316 }
1317 
1318 /*
1319  * Request current file pointer offset
1320  *
1321  * fp - pointer to file
1322  *
1323  * Returns current file offset.
1324  */
1325 loff_t
1326 zfs_file_off(zfs_file_t *fp)
1327 {
1328 	return (lseek(fp->f_fd, SEEK_CUR, 0));
1329 }
1330 
1331 /*
1332  * unlink file
1333  *
1334  * path - fully qualified file path
1335  *
1336  * Returns 0 on success.
1337  *
1338  * OPTIONAL
1339  */
1340 int
1341 zfs_file_unlink(const char *path)
1342 {
1343 	return (remove(path));
1344 }
1345 
1346 /*
1347  * Get reference to file pointer
1348  *
1349  * fd - input file descriptor
1350  * fpp - pointer to file pointer
1351  *
1352  * Returns 0 on success EBADF on failure.
1353  * Unsupported in user space.
1354  */
1355 int
1356 zfs_file_get(int fd, zfs_file_t **fpp)
1357 {
1358 	abort();
1359 
1360 	return (EOPNOTSUPP);
1361 }
1362 
1363 /*
1364  * Drop reference to file pointer
1365  *
1366  * fd - input file descriptor
1367  *
1368  * Unsupported in user space.
1369  */
1370 void
1371 zfs_file_put(int fd)
1372 {
1373 	abort();
1374 }
1375 
1376 void
1377 zfsvfs_update_fromname(const char *oldname, const char *newname)
1378 {
1379 }
1380