xref: /freebsd/sys/contrib/openzfs/lib/libzpool/kernel.c (revision 1f88aa09417f1cfb3929fd37531b1ab51213c2d6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
25  */
26 
27 #include <assert.h>
28 #include <fcntl.h>
29 #include <libgen.h>
30 #include <poll.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <libzutil.h>
35 #include <sys/crypto/icp.h>
36 #include <sys/processor.h>
37 #include <sys/rrwlock.h>
38 #include <sys/spa.h>
39 #include <sys/stat.h>
40 #include <sys/systeminfo.h>
41 #include <sys/time.h>
42 #include <sys/utsname.h>
43 #include <sys/zfs_context.h>
44 #include <sys/zfs_onexit.h>
45 #include <sys/zfs_vfsops.h>
46 #include <sys/zstd/zstd.h>
47 #include <sys/zvol.h>
48 #include <zfs_fletcher.h>
49 #include <zlib.h>
50 
51 /*
52  * Emulation of kernel services in userland.
53  */
54 
55 uint64_t physmem;
56 char hw_serial[HW_HOSTID_LEN];
57 struct utsname hw_utsname;
58 
59 /* If set, all blocks read will be copied to the specified directory. */
60 char *vn_dumpdir = NULL;
61 
62 /* this only exists to have its address taken */
63 struct proc p0;
64 
65 /*
66  * =========================================================================
67  * threads
68  * =========================================================================
69  *
70  * TS_STACK_MIN is dictated by the minimum allowed pthread stack size.  While
71  * TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for
72  * the expected stack depth while small enough to avoid exhausting address
73  * space with high thread counts.
74  */
75 #define	TS_STACK_MIN	MAX(PTHREAD_STACK_MIN, 32768)
76 #define	TS_STACK_MAX	(256 * 1024)
77 
78 /*ARGSUSED*/
79 kthread_t *
80 zk_thread_create(void (*func)(void *), void *arg, size_t stksize, int state)
81 {
82 	pthread_attr_t attr;
83 	pthread_t tid;
84 	char *stkstr;
85 	int detachstate = PTHREAD_CREATE_DETACHED;
86 
87 	VERIFY0(pthread_attr_init(&attr));
88 
89 	if (state & TS_JOINABLE)
90 		detachstate = PTHREAD_CREATE_JOINABLE;
91 
92 	VERIFY0(pthread_attr_setdetachstate(&attr, detachstate));
93 
94 	/*
95 	 * We allow the default stack size in user space to be specified by
96 	 * setting the ZFS_STACK_SIZE environment variable.  This allows us
97 	 * the convenience of observing and debugging stack overruns in
98 	 * user space.  Explicitly specified stack sizes will be honored.
99 	 * The usage of ZFS_STACK_SIZE is discussed further in the
100 	 * ENVIRONMENT VARIABLES sections of the ztest(1) man page.
101 	 */
102 	if (stksize == 0) {
103 		stkstr = getenv("ZFS_STACK_SIZE");
104 
105 		if (stkstr == NULL)
106 			stksize = TS_STACK_MAX;
107 		else
108 			stksize = MAX(atoi(stkstr), TS_STACK_MIN);
109 	}
110 
111 	VERIFY3S(stksize, >, 0);
112 	stksize = P2ROUNDUP(MAX(stksize, TS_STACK_MIN), PAGESIZE);
113 
114 	/*
115 	 * If this ever fails, it may be because the stack size is not a
116 	 * multiple of system page size.
117 	 */
118 	VERIFY0(pthread_attr_setstacksize(&attr, stksize));
119 	VERIFY0(pthread_attr_setguardsize(&attr, PAGESIZE));
120 
121 	VERIFY0(pthread_create(&tid, &attr, (void *(*)(void *))func, arg));
122 	VERIFY0(pthread_attr_destroy(&attr));
123 
124 	return ((void *)(uintptr_t)tid);
125 }
126 
127 /*
128  * =========================================================================
129  * kstats
130  * =========================================================================
131  */
132 /*ARGSUSED*/
133 kstat_t *
134 kstat_create(const char *module, int instance, const char *name,
135     const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)
136 {
137 	return (NULL);
138 }
139 
140 /*ARGSUSED*/
141 void
142 kstat_install(kstat_t *ksp)
143 {}
144 
145 /*ARGSUSED*/
146 void
147 kstat_delete(kstat_t *ksp)
148 {}
149 
150 void
151 kstat_set_raw_ops(kstat_t *ksp,
152     int (*headers)(char *buf, size_t size),
153     int (*data)(char *buf, size_t size, void *data),
154     void *(*addr)(kstat_t *ksp, loff_t index))
155 {}
156 
157 /*
158  * =========================================================================
159  * mutexes
160  * =========================================================================
161  */
162 
163 void
164 mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
165 {
166 	VERIFY0(pthread_mutex_init(&mp->m_lock, NULL));
167 	memset(&mp->m_owner, 0, sizeof (pthread_t));
168 }
169 
170 void
171 mutex_destroy(kmutex_t *mp)
172 {
173 	VERIFY0(pthread_mutex_destroy(&mp->m_lock));
174 }
175 
176 void
177 mutex_enter(kmutex_t *mp)
178 {
179 	VERIFY0(pthread_mutex_lock(&mp->m_lock));
180 	mp->m_owner = pthread_self();
181 }
182 
183 int
184 mutex_tryenter(kmutex_t *mp)
185 {
186 	int error;
187 
188 	error = pthread_mutex_trylock(&mp->m_lock);
189 	if (error == 0) {
190 		mp->m_owner = pthread_self();
191 		return (1);
192 	} else {
193 		VERIFY3S(error, ==, EBUSY);
194 		return (0);
195 	}
196 }
197 
198 void
199 mutex_exit(kmutex_t *mp)
200 {
201 	memset(&mp->m_owner, 0, sizeof (pthread_t));
202 	VERIFY0(pthread_mutex_unlock(&mp->m_lock));
203 }
204 
205 /*
206  * =========================================================================
207  * rwlocks
208  * =========================================================================
209  */
210 
211 void
212 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
213 {
214 	VERIFY0(pthread_rwlock_init(&rwlp->rw_lock, NULL));
215 	rwlp->rw_readers = 0;
216 	rwlp->rw_owner = 0;
217 }
218 
219 void
220 rw_destroy(krwlock_t *rwlp)
221 {
222 	VERIFY0(pthread_rwlock_destroy(&rwlp->rw_lock));
223 }
224 
225 void
226 rw_enter(krwlock_t *rwlp, krw_t rw)
227 {
228 	if (rw == RW_READER) {
229 		VERIFY0(pthread_rwlock_rdlock(&rwlp->rw_lock));
230 		atomic_inc_uint(&rwlp->rw_readers);
231 	} else {
232 		VERIFY0(pthread_rwlock_wrlock(&rwlp->rw_lock));
233 		rwlp->rw_owner = pthread_self();
234 	}
235 }
236 
237 void
238 rw_exit(krwlock_t *rwlp)
239 {
240 	if (RW_READ_HELD(rwlp))
241 		atomic_dec_uint(&rwlp->rw_readers);
242 	else
243 		rwlp->rw_owner = 0;
244 
245 	VERIFY0(pthread_rwlock_unlock(&rwlp->rw_lock));
246 }
247 
248 int
249 rw_tryenter(krwlock_t *rwlp, krw_t rw)
250 {
251 	int error;
252 
253 	if (rw == RW_READER)
254 		error = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
255 	else
256 		error = pthread_rwlock_trywrlock(&rwlp->rw_lock);
257 
258 	if (error == 0) {
259 		if (rw == RW_READER)
260 			atomic_inc_uint(&rwlp->rw_readers);
261 		else
262 			rwlp->rw_owner = pthread_self();
263 
264 		return (1);
265 	}
266 
267 	VERIFY3S(error, ==, EBUSY);
268 
269 	return (0);
270 }
271 
272 /* ARGSUSED */
273 uint32_t
274 zone_get_hostid(void *zonep)
275 {
276 	/*
277 	 * We're emulating the system's hostid in userland.
278 	 */
279 	return (strtoul(hw_serial, NULL, 10));
280 }
281 
282 int
283 rw_tryupgrade(krwlock_t *rwlp)
284 {
285 	return (0);
286 }
287 
288 /*
289  * =========================================================================
290  * condition variables
291  * =========================================================================
292  */
293 
294 void
295 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
296 {
297 	VERIFY0(pthread_cond_init(cv, NULL));
298 }
299 
300 void
301 cv_destroy(kcondvar_t *cv)
302 {
303 	VERIFY0(pthread_cond_destroy(cv));
304 }
305 
306 void
307 cv_wait(kcondvar_t *cv, kmutex_t *mp)
308 {
309 	memset(&mp->m_owner, 0, sizeof (pthread_t));
310 	VERIFY0(pthread_cond_wait(cv, &mp->m_lock));
311 	mp->m_owner = pthread_self();
312 }
313 
314 int
315 cv_wait_sig(kcondvar_t *cv, kmutex_t *mp)
316 {
317 	cv_wait(cv, mp);
318 	return (1);
319 }
320 
321 int
322 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
323 {
324 	int error;
325 	struct timeval tv;
326 	struct timespec ts;
327 	clock_t delta;
328 
329 	delta = abstime - ddi_get_lbolt();
330 	if (delta <= 0)
331 		return (-1);
332 
333 	VERIFY(gettimeofday(&tv, NULL) == 0);
334 
335 	ts.tv_sec = tv.tv_sec + delta / hz;
336 	ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % hz) * (NANOSEC / hz);
337 	if (ts.tv_nsec >= NANOSEC) {
338 		ts.tv_sec++;
339 		ts.tv_nsec -= NANOSEC;
340 	}
341 
342 	memset(&mp->m_owner, 0, sizeof (pthread_t));
343 	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
344 	mp->m_owner = pthread_self();
345 
346 	if (error == ETIMEDOUT)
347 		return (-1);
348 
349 	VERIFY0(error);
350 
351 	return (1);
352 }
353 
354 /*ARGSUSED*/
355 int
356 cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
357     int flag)
358 {
359 	int error;
360 	struct timeval tv;
361 	struct timespec ts;
362 	hrtime_t delta;
363 
364 	ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
365 
366 	delta = tim;
367 	if (flag & CALLOUT_FLAG_ABSOLUTE)
368 		delta -= gethrtime();
369 
370 	if (delta <= 0)
371 		return (-1);
372 
373 	VERIFY0(gettimeofday(&tv, NULL));
374 
375 	ts.tv_sec = tv.tv_sec + delta / NANOSEC;
376 	ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % NANOSEC);
377 	if (ts.tv_nsec >= NANOSEC) {
378 		ts.tv_sec++;
379 		ts.tv_nsec -= NANOSEC;
380 	}
381 
382 	memset(&mp->m_owner, 0, sizeof (pthread_t));
383 	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
384 	mp->m_owner = pthread_self();
385 
386 	if (error == ETIMEDOUT)
387 		return (-1);
388 
389 	VERIFY0(error);
390 
391 	return (1);
392 }
393 
394 void
395 cv_signal(kcondvar_t *cv)
396 {
397 	VERIFY0(pthread_cond_signal(cv));
398 }
399 
400 void
401 cv_broadcast(kcondvar_t *cv)
402 {
403 	VERIFY0(pthread_cond_broadcast(cv));
404 }
405 
406 /*
407  * =========================================================================
408  * procfs list
409  * =========================================================================
410  */
411 
412 void
413 seq_printf(struct seq_file *m, const char *fmt, ...)
414 {}
415 
416 void
417 procfs_list_install(const char *module,
418     const char *submodule,
419     const char *name,
420     mode_t mode,
421     procfs_list_t *procfs_list,
422     int (*show)(struct seq_file *f, void *p),
423     int (*show_header)(struct seq_file *f),
424     int (*clear)(procfs_list_t *procfs_list),
425     size_t procfs_list_node_off)
426 {
427 	mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
428 	list_create(&procfs_list->pl_list,
429 	    procfs_list_node_off + sizeof (procfs_list_node_t),
430 	    procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
431 	procfs_list->pl_next_id = 1;
432 	procfs_list->pl_node_offset = procfs_list_node_off;
433 }
434 
435 void
436 procfs_list_uninstall(procfs_list_t *procfs_list)
437 {}
438 
439 void
440 procfs_list_destroy(procfs_list_t *procfs_list)
441 {
442 	ASSERT(list_is_empty(&procfs_list->pl_list));
443 	list_destroy(&procfs_list->pl_list);
444 	mutex_destroy(&procfs_list->pl_lock);
445 }
446 
447 #define	NODE_ID(procfs_list, obj) \
448 		(((procfs_list_node_t *)(((char *)obj) + \
449 		(procfs_list)->pl_node_offset))->pln_id)
450 
451 void
452 procfs_list_add(procfs_list_t *procfs_list, void *p)
453 {
454 	ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
455 	NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
456 	list_insert_tail(&procfs_list->pl_list, p);
457 }
458 
459 /*
460  * =========================================================================
461  * vnode operations
462  * =========================================================================
463  */
464 
465 /*
466  * =========================================================================
467  * Figure out which debugging statements to print
468  * =========================================================================
469  */
470 
471 static char *dprintf_string;
472 static int dprintf_print_all;
473 
474 int
475 dprintf_find_string(const char *string)
476 {
477 	char *tmp_str = dprintf_string;
478 	int len = strlen(string);
479 
480 	/*
481 	 * Find out if this is a string we want to print.
482 	 * String format: file1.c,function_name1,file2.c,file3.c
483 	 */
484 
485 	while (tmp_str != NULL) {
486 		if (strncmp(tmp_str, string, len) == 0 &&
487 		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
488 			return (1);
489 		tmp_str = strchr(tmp_str, ',');
490 		if (tmp_str != NULL)
491 			tmp_str++; /* Get rid of , */
492 	}
493 	return (0);
494 }
495 
496 void
497 dprintf_setup(int *argc, char **argv)
498 {
499 	int i, j;
500 
501 	/*
502 	 * Debugging can be specified two ways: by setting the
503 	 * environment variable ZFS_DEBUG, or by including a
504 	 * "debug=..."  argument on the command line.  The command
505 	 * line setting overrides the environment variable.
506 	 */
507 
508 	for (i = 1; i < *argc; i++) {
509 		int len = strlen("debug=");
510 		/* First look for a command line argument */
511 		if (strncmp("debug=", argv[i], len) == 0) {
512 			dprintf_string = argv[i] + len;
513 			/* Remove from args */
514 			for (j = i; j < *argc; j++)
515 				argv[j] = argv[j+1];
516 			argv[j] = NULL;
517 			(*argc)--;
518 		}
519 	}
520 
521 	if (dprintf_string == NULL) {
522 		/* Look for ZFS_DEBUG environment variable */
523 		dprintf_string = getenv("ZFS_DEBUG");
524 	}
525 
526 	/*
527 	 * Are we just turning on all debugging?
528 	 */
529 	if (dprintf_find_string("on"))
530 		dprintf_print_all = 1;
531 
532 	if (dprintf_string != NULL)
533 		zfs_flags |= ZFS_DEBUG_DPRINTF;
534 }
535 
536 /*
537  * =========================================================================
538  * debug printfs
539  * =========================================================================
540  */
541 void
542 __dprintf(boolean_t dprint, const char *file, const char *func,
543     int line, const char *fmt, ...)
544 {
545 	/* Get rid of annoying "../common/" prefix to filename. */
546 	const char *newfile = zfs_basename(file);
547 
548 	va_list adx;
549 	if (dprint) {
550 		/* dprintf messages are printed immediately */
551 
552 		if (!dprintf_print_all &&
553 		    !dprintf_find_string(newfile) &&
554 		    !dprintf_find_string(func))
555 			return;
556 
557 		/* Print out just the function name if requested */
558 		flockfile(stdout);
559 		if (dprintf_find_string("pid"))
560 			(void) printf("%d ", getpid());
561 		if (dprintf_find_string("tid"))
562 			(void) printf("%ju ",
563 			    (uintmax_t)(uintptr_t)pthread_self());
564 		if (dprintf_find_string("cpu"))
565 			(void) printf("%u ", getcpuid());
566 		if (dprintf_find_string("time"))
567 			(void) printf("%llu ", gethrtime());
568 		if (dprintf_find_string("long"))
569 			(void) printf("%s, line %d: ", newfile, line);
570 		(void) printf("dprintf: %s: ", func);
571 		va_start(adx, fmt);
572 		(void) vprintf(fmt, adx);
573 		va_end(adx);
574 		funlockfile(stdout);
575 	} else {
576 		/* zfs_dbgmsg is logged for dumping later */
577 		size_t size;
578 		char *buf;
579 		int i;
580 
581 		size = 1024;
582 		buf = umem_alloc(size, UMEM_NOFAIL);
583 		i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func);
584 
585 		if (i < size) {
586 			va_start(adx, fmt);
587 			(void) vsnprintf(buf + i, size - i, fmt, adx);
588 			va_end(adx);
589 		}
590 
591 		__zfs_dbgmsg(buf);
592 
593 		umem_free(buf, size);
594 	}
595 }
596 
597 /*
598  * =========================================================================
599  * cmn_err() and panic()
600  * =========================================================================
601  */
602 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
603 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
604 
605 void
606 vpanic(const char *fmt, va_list adx)
607 {
608 	(void) fprintf(stderr, "error: ");
609 	(void) vfprintf(stderr, fmt, adx);
610 	(void) fprintf(stderr, "\n");
611 
612 	abort();	/* think of it as a "user-level crash dump" */
613 }
614 
615 void
616 panic(const char *fmt, ...)
617 {
618 	va_list adx;
619 
620 	va_start(adx, fmt);
621 	vpanic(fmt, adx);
622 	va_end(adx);
623 }
624 
625 void
626 vcmn_err(int ce, const char *fmt, va_list adx)
627 {
628 	if (ce == CE_PANIC)
629 		vpanic(fmt, adx);
630 	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
631 		(void) fprintf(stderr, "%s", ce_prefix[ce]);
632 		(void) vfprintf(stderr, fmt, adx);
633 		(void) fprintf(stderr, "%s", ce_suffix[ce]);
634 	}
635 }
636 
637 void
638 cmn_err(int ce, const char *fmt, ...)
639 {
640 	va_list adx;
641 
642 	va_start(adx, fmt);
643 	vcmn_err(ce, fmt, adx);
644 	va_end(adx);
645 }
646 
647 /*
648  * =========================================================================
649  * misc routines
650  * =========================================================================
651  */
652 
653 void
654 delay(clock_t ticks)
655 {
656 	(void) poll(0, 0, ticks * (1000 / hz));
657 }
658 
659 /*
660  * Find highest one bit set.
661  * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
662  * The __builtin_clzll() function is supported by both GCC and Clang.
663  */
664 int
665 highbit64(uint64_t i)
666 {
667 	if (i == 0)
668 	return (0);
669 
670 	return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
671 }
672 
673 /*
674  * Find lowest one bit set.
675  * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
676  * The __builtin_ffsll() function is supported by both GCC and Clang.
677  */
678 int
679 lowbit64(uint64_t i)
680 {
681 	if (i == 0)
682 		return (0);
683 
684 	return (__builtin_ffsll(i));
685 }
686 
687 const char *random_path = "/dev/random";
688 const char *urandom_path = "/dev/urandom";
689 static int random_fd = -1, urandom_fd = -1;
690 
691 void
692 random_init(void)
693 {
694 	VERIFY((random_fd = open(random_path, O_RDONLY | O_CLOEXEC)) != -1);
695 	VERIFY((urandom_fd = open(urandom_path, O_RDONLY | O_CLOEXEC)) != -1);
696 }
697 
698 void
699 random_fini(void)
700 {
701 	close(random_fd);
702 	close(urandom_fd);
703 
704 	random_fd = -1;
705 	urandom_fd = -1;
706 }
707 
708 static int
709 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
710 {
711 	size_t resid = len;
712 	ssize_t bytes;
713 
714 	ASSERT(fd != -1);
715 
716 	while (resid != 0) {
717 		bytes = read(fd, ptr, resid);
718 		ASSERT3S(bytes, >=, 0);
719 		ptr += bytes;
720 		resid -= bytes;
721 	}
722 
723 	return (0);
724 }
725 
726 int
727 random_get_bytes(uint8_t *ptr, size_t len)
728 {
729 	return (random_get_bytes_common(ptr, len, random_fd));
730 }
731 
732 int
733 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
734 {
735 	return (random_get_bytes_common(ptr, len, urandom_fd));
736 }
737 
738 int
739 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
740 {
741 	char *end;
742 
743 	*result = strtoul(hw_serial, &end, base);
744 	if (*result == 0)
745 		return (errno);
746 	return (0);
747 }
748 
749 int
750 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
751 {
752 	char *end;
753 
754 	*result = strtoull(str, &end, base);
755 	if (*result == 0)
756 		return (errno);
757 	return (0);
758 }
759 
760 utsname_t *
761 utsname(void)
762 {
763 	return (&hw_utsname);
764 }
765 
766 /*
767  * =========================================================================
768  * kernel emulation setup & teardown
769  * =========================================================================
770  */
771 static int
772 umem_out_of_memory(void)
773 {
774 	char errmsg[] = "out of memory -- generating core dump\n";
775 
776 	(void) fprintf(stderr, "%s", errmsg);
777 	abort();
778 	return (0);
779 }
780 
781 void
782 kernel_init(int mode)
783 {
784 	extern uint_t rrw_tsd_key;
785 
786 	umem_nofail_callback(umem_out_of_memory);
787 
788 	physmem = sysconf(_SC_PHYS_PAGES);
789 
790 	dprintf("physmem = %llu pages (%.2f GB)\n", (u_longlong_t)physmem,
791 	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
792 
793 	(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
794 	    (mode & SPA_MODE_WRITE) ? get_system_hostid() : 0);
795 
796 	random_init();
797 
798 	VERIFY0(uname(&hw_utsname));
799 
800 	system_taskq_init();
801 	icp_init();
802 
803 	zstd_init();
804 
805 	spa_init((spa_mode_t)mode);
806 
807 	fletcher_4_init();
808 
809 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
810 }
811 
812 void
813 kernel_fini(void)
814 {
815 	fletcher_4_fini();
816 	spa_fini();
817 
818 	zstd_fini();
819 
820 	icp_fini();
821 	system_taskq_fini();
822 
823 	random_fini();
824 }
825 
826 uid_t
827 crgetuid(cred_t *cr)
828 {
829 	return (0);
830 }
831 
832 uid_t
833 crgetruid(cred_t *cr)
834 {
835 	return (0);
836 }
837 
838 gid_t
839 crgetgid(cred_t *cr)
840 {
841 	return (0);
842 }
843 
844 int
845 crgetngroups(cred_t *cr)
846 {
847 	return (0);
848 }
849 
850 gid_t *
851 crgetgroups(cred_t *cr)
852 {
853 	return (NULL);
854 }
855 
856 int
857 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
858 {
859 	return (0);
860 }
861 
862 int
863 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
864 {
865 	return (0);
866 }
867 
868 int
869 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
870 {
871 	return (0);
872 }
873 
874 int
875 secpolicy_zfs(const cred_t *cr)
876 {
877 	return (0);
878 }
879 
880 int
881 secpolicy_zfs_proc(const cred_t *cr, proc_t *proc)
882 {
883 	return (0);
884 }
885 
886 ksiddomain_t *
887 ksid_lookupdomain(const char *dom)
888 {
889 	ksiddomain_t *kd;
890 
891 	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
892 	kd->kd_name = spa_strdup(dom);
893 	return (kd);
894 }
895 
896 void
897 ksiddomain_rele(ksiddomain_t *ksid)
898 {
899 	spa_strfree(ksid->kd_name);
900 	umem_free(ksid, sizeof (ksiddomain_t));
901 }
902 
903 char *
904 kmem_vasprintf(const char *fmt, va_list adx)
905 {
906 	char *buf = NULL;
907 	va_list adx_copy;
908 
909 	va_copy(adx_copy, adx);
910 	VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
911 	va_end(adx_copy);
912 
913 	return (buf);
914 }
915 
916 char *
917 kmem_asprintf(const char *fmt, ...)
918 {
919 	char *buf = NULL;
920 	va_list adx;
921 
922 	va_start(adx, fmt);
923 	VERIFY(vasprintf(&buf, fmt, adx) != -1);
924 	va_end(adx);
925 
926 	return (buf);
927 }
928 
929 /* ARGSUSED */
930 zfs_file_t *
931 zfs_onexit_fd_hold(int fd, minor_t *minorp)
932 {
933 	*minorp = 0;
934 	return (NULL);
935 }
936 
937 /* ARGSUSED */
938 void
939 zfs_onexit_fd_rele(zfs_file_t *fp)
940 {
941 }
942 
943 /* ARGSUSED */
944 int
945 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
946     uint64_t *action_handle)
947 {
948 	return (0);
949 }
950 
951 fstrans_cookie_t
952 spl_fstrans_mark(void)
953 {
954 	return ((fstrans_cookie_t)0);
955 }
956 
957 void
958 spl_fstrans_unmark(fstrans_cookie_t cookie)
959 {
960 }
961 
962 int
963 __spl_pf_fstrans_check(void)
964 {
965 	return (0);
966 }
967 
968 int
969 kmem_cache_reap_active(void)
970 {
971 	return (0);
972 }
973 
974 void *zvol_tag = "zvol_tag";
975 
976 void
977 zvol_create_minor(const char *name)
978 {
979 }
980 
981 void
982 zvol_create_minors_recursive(const char *name)
983 {
984 }
985 
986 void
987 zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
988 {
989 }
990 
991 void
992 zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname,
993     boolean_t async)
994 {
995 }
996 
997 /*
998  * Open file
999  *
1000  * path - fully qualified path to file
1001  * flags - file attributes O_READ / O_WRITE / O_EXCL
1002  * fpp - pointer to return file pointer
1003  *
1004  * Returns 0 on success underlying error on failure.
1005  */
1006 int
1007 zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
1008 {
1009 	int fd = -1;
1010 	int dump_fd = -1;
1011 	int err;
1012 	int old_umask = 0;
1013 	zfs_file_t *fp;
1014 	struct stat64 st;
1015 
1016 	if (!(flags & O_CREAT) && stat64(path, &st) == -1)
1017 		return (errno);
1018 
1019 	if (!(flags & O_CREAT) && S_ISBLK(st.st_mode))
1020 		flags |= O_DIRECT;
1021 
1022 	if (flags & O_CREAT)
1023 		old_umask = umask(0);
1024 
1025 	fd = open64(path, flags, mode);
1026 	if (fd == -1)
1027 		return (errno);
1028 
1029 	if (flags & O_CREAT)
1030 		(void) umask(old_umask);
1031 
1032 	if (vn_dumpdir != NULL) {
1033 		char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
1034 		const char *inpath = zfs_basename(path);
1035 
1036 		(void) snprintf(dumppath, MAXPATHLEN,
1037 		    "%s/%s", vn_dumpdir, inpath);
1038 		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
1039 		umem_free(dumppath, MAXPATHLEN);
1040 		if (dump_fd == -1) {
1041 			err = errno;
1042 			close(fd);
1043 			return (err);
1044 		}
1045 	} else {
1046 		dump_fd = -1;
1047 	}
1048 
1049 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
1050 
1051 	fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL);
1052 	fp->f_fd = fd;
1053 	fp->f_dump_fd = dump_fd;
1054 	*fpp = fp;
1055 
1056 	return (0);
1057 }
1058 
1059 void
1060 zfs_file_close(zfs_file_t *fp)
1061 {
1062 	close(fp->f_fd);
1063 	if (fp->f_dump_fd != -1)
1064 		close(fp->f_dump_fd);
1065 
1066 	umem_free(fp, sizeof (zfs_file_t));
1067 }
1068 
1069 /*
1070  * Stateful write - use os internal file pointer to determine where to
1071  * write and update on successful completion.
1072  *
1073  * fp -  pointer to file (pipe, socket, etc) to write to
1074  * buf - buffer to write
1075  * count - # of bytes to write
1076  * resid -  pointer to count of unwritten bytes  (if short write)
1077  *
1078  * Returns 0 on success errno on failure.
1079  */
1080 int
1081 zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
1082 {
1083 	ssize_t rc;
1084 
1085 	rc = write(fp->f_fd, buf, count);
1086 	if (rc < 0)
1087 		return (errno);
1088 
1089 	if (resid) {
1090 		*resid = count - rc;
1091 	} else if (rc != count) {
1092 		return (EIO);
1093 	}
1094 
1095 	return (0);
1096 }
1097 
1098 /*
1099  * Stateless write - os internal file pointer is not updated.
1100  *
1101  * fp -  pointer to file (pipe, socket, etc) to write to
1102  * buf - buffer to write
1103  * count - # of bytes to write
1104  * off - file offset to write to (only valid for seekable types)
1105  * resid -  pointer to count of unwritten bytes
1106  *
1107  * Returns 0 on success errno on failure.
1108  */
1109 int
1110 zfs_file_pwrite(zfs_file_t *fp, const void *buf,
1111     size_t count, loff_t pos, ssize_t *resid)
1112 {
1113 	ssize_t rc, split, done;
1114 	int sectors;
1115 
1116 	/*
1117 	 * To simulate partial disk writes, we split writes into two
1118 	 * system calls so that the process can be killed in between.
1119 	 * This is used by ztest to simulate realistic failure modes.
1120 	 */
1121 	sectors = count >> SPA_MINBLOCKSHIFT;
1122 	split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT;
1123 	rc = pwrite64(fp->f_fd, buf, split, pos);
1124 	if (rc != -1) {
1125 		done = rc;
1126 		rc = pwrite64(fp->f_fd, (char *)buf + split,
1127 		    count - split, pos + split);
1128 	}
1129 #ifdef __linux__
1130 	if (rc == -1 && errno == EINVAL) {
1131 		/*
1132 		 * Under Linux, this most likely means an alignment issue
1133 		 * (memory or disk) due to O_DIRECT, so we abort() in order
1134 		 * to catch the offender.
1135 		 */
1136 		abort();
1137 	}
1138 #endif
1139 
1140 	if (rc < 0)
1141 		return (errno);
1142 
1143 	done += rc;
1144 
1145 	if (resid) {
1146 		*resid = count - done;
1147 	} else if (done != count) {
1148 		return (EIO);
1149 	}
1150 
1151 	return (0);
1152 }
1153 
1154 /*
1155  * Stateful read - use os internal file pointer to determine where to
1156  * read and update on successful completion.
1157  *
1158  * fp -  pointer to file (pipe, socket, etc) to read from
1159  * buf - buffer to write
1160  * count - # of bytes to read
1161  * resid -  pointer to count of unread bytes (if short read)
1162  *
1163  * Returns 0 on success errno on failure.
1164  */
1165 int
1166 zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
1167 {
1168 	int rc;
1169 
1170 	rc = read(fp->f_fd, buf, count);
1171 	if (rc < 0)
1172 		return (errno);
1173 
1174 	if (resid) {
1175 		*resid = count - rc;
1176 	} else if (rc != count) {
1177 		return (EIO);
1178 	}
1179 
1180 	return (0);
1181 }
1182 
1183 /*
1184  * Stateless read - os internal file pointer is not updated.
1185  *
1186  * fp -  pointer to file (pipe, socket, etc) to read from
1187  * buf - buffer to write
1188  * count - # of bytes to write
1189  * off - file offset to read from (only valid for seekable types)
1190  * resid -  pointer to count of unwritten bytes (if short write)
1191  *
1192  * Returns 0 on success errno on failure.
1193  */
1194 int
1195 zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
1196     ssize_t *resid)
1197 {
1198 	ssize_t rc;
1199 
1200 	rc = pread64(fp->f_fd, buf, count, off);
1201 	if (rc < 0) {
1202 #ifdef __linux__
1203 		/*
1204 		 * Under Linux, this most likely means an alignment issue
1205 		 * (memory or disk) due to O_DIRECT, so we abort() in order to
1206 		 * catch the offender.
1207 		 */
1208 		if (errno == EINVAL)
1209 			abort();
1210 #endif
1211 		return (errno);
1212 	}
1213 
1214 	if (fp->f_dump_fd != -1) {
1215 		int status;
1216 
1217 		status = pwrite64(fp->f_dump_fd, buf, rc, off);
1218 		ASSERT(status != -1);
1219 	}
1220 
1221 	if (resid) {
1222 		*resid = count - rc;
1223 	} else if (rc != count) {
1224 		return (EIO);
1225 	}
1226 
1227 	return (0);
1228 }
1229 
1230 /*
1231  * lseek - set / get file pointer
1232  *
1233  * fp -  pointer to file (pipe, socket, etc) to read from
1234  * offp - value to seek to, returns current value plus passed offset
1235  * whence - see man pages for standard lseek whence values
1236  *
1237  * Returns 0 on success errno on failure (ESPIPE for non seekable types)
1238  */
1239 int
1240 zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
1241 {
1242 	loff_t rc;
1243 
1244 	rc = lseek(fp->f_fd, *offp, whence);
1245 	if (rc < 0)
1246 		return (errno);
1247 
1248 	*offp = rc;
1249 
1250 	return (0);
1251 }
1252 
1253 /*
1254  * Get file attributes
1255  *
1256  * filp - file pointer
1257  * zfattr - pointer to file attr structure
1258  *
1259  * Currently only used for fetching size and file mode
1260  *
1261  * Returns 0 on success or error code of underlying getattr call on failure.
1262  */
1263 int
1264 zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
1265 {
1266 	struct stat64 st;
1267 
1268 	if (fstat64_blk(fp->f_fd, &st) == -1)
1269 		return (errno);
1270 
1271 	zfattr->zfa_size = st.st_size;
1272 	zfattr->zfa_mode = st.st_mode;
1273 
1274 	return (0);
1275 }
1276 
1277 /*
1278  * Sync file to disk
1279  *
1280  * filp - file pointer
1281  * flags - O_SYNC and or O_DSYNC
1282  *
1283  * Returns 0 on success or error code of underlying sync call on failure.
1284  */
1285 int
1286 zfs_file_fsync(zfs_file_t *fp, int flags)
1287 {
1288 	int rc;
1289 
1290 	rc = fsync(fp->f_fd);
1291 	if (rc < 0)
1292 		return (errno);
1293 
1294 	return (0);
1295 }
1296 
1297 /*
1298  * fallocate - allocate or free space on disk
1299  *
1300  * fp - file pointer
1301  * mode (non-standard options for hole punching etc)
1302  * offset - offset to start allocating or freeing from
1303  * len - length to free / allocate
1304  *
1305  * OPTIONAL
1306  */
1307 int
1308 zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len)
1309 {
1310 #ifdef __linux__
1311 	return (fallocate(fp->f_fd, mode, offset, len));
1312 #else
1313 	return (EOPNOTSUPP);
1314 #endif
1315 }
1316 
1317 /*
1318  * Request current file pointer offset
1319  *
1320  * fp - pointer to file
1321  *
1322  * Returns current file offset.
1323  */
1324 loff_t
1325 zfs_file_off(zfs_file_t *fp)
1326 {
1327 	return (lseek(fp->f_fd, SEEK_CUR, 0));
1328 }
1329 
1330 /*
1331  * unlink file
1332  *
1333  * path - fully qualified file path
1334  *
1335  * Returns 0 on success.
1336  *
1337  * OPTIONAL
1338  */
1339 int
1340 zfs_file_unlink(const char *path)
1341 {
1342 	return (remove(path));
1343 }
1344 
1345 /*
1346  * Get reference to file pointer
1347  *
1348  * fd - input file descriptor
1349  *
1350  * Returns pointer to file struct or NULL.
1351  * Unsupported in user space.
1352  */
1353 zfs_file_t *
1354 zfs_file_get(int fd)
1355 {
1356 	abort();
1357 
1358 	return (NULL);
1359 }
1360 /*
1361  * Drop reference to file pointer
1362  *
1363  * fp - pointer to file struct
1364  *
1365  * Unsupported in user space.
1366  */
1367 void
1368 zfs_file_put(zfs_file_t *fp)
1369 {
1370 	abort();
1371 }
1372 
1373 void
1374 zfsvfs_update_fromname(const char *oldname, const char *newname)
1375 {
1376 }
1377