xref: /illumos-gate/usr/src/uts/intel/io/vmm/vmm_reservoir.c (revision 6bba8b59433dd1331c03414e1d551355c4bd0e06)
17c8c0b82SPatrick Mooney /*
27c8c0b82SPatrick Mooney  * This file and its contents are supplied under the terms of the
37c8c0b82SPatrick Mooney  * Common Development and Distribution License ("CDDL"), version 1.0.
47c8c0b82SPatrick Mooney  * You may only use this file in accordance with the terms of version
57c8c0b82SPatrick Mooney  * 1.0 of the CDDL.
67c8c0b82SPatrick Mooney  *
77c8c0b82SPatrick Mooney  * A full copy of the text of the CDDL should have accompanied this
87c8c0b82SPatrick Mooney  * source.  A copy of the CDDL is also available via the Internet at
97c8c0b82SPatrick Mooney  * http://www.illumos.org/license/CDDL.
107c8c0b82SPatrick Mooney  */
117c8c0b82SPatrick Mooney 
127c8c0b82SPatrick Mooney /*
13*6bba8b59SPatrick Mooney  * Copyright 2023 Oxide Computer Company
147c8c0b82SPatrick Mooney  */
157c8c0b82SPatrick Mooney 
167c8c0b82SPatrick Mooney /*
177c8c0b82SPatrick Mooney  * VMM Memory Reservoir
187c8c0b82SPatrick Mooney  *
197c8c0b82SPatrick Mooney  *
207c8c0b82SPatrick Mooney  * In order to make the allocation of large (multi-GiB) chunks of memory
217c8c0b82SPatrick Mooney  * for bhyve VMs easier, we introduce the "VMM Reservoir", where system
227c8c0b82SPatrick Mooney  * operators can set aside a substantial portion of system memory exclusively
237c8c0b82SPatrick Mooney  * for VMs.  This memory is unavailable for general use by the rest of the
247c8c0b82SPatrick Mooney  * system.  Rather than having to scour the freelist, reap kmem caches, or put
257c8c0b82SPatrick Mooney  * pressure on the ARC, bhyve guest memory allocations can quickly determine if
267c8c0b82SPatrick Mooney  * there is adequate reservoir memory available.  Since the pages stored in the
277c8c0b82SPatrick Mooney  * reservoir are pre-zeroed, it can be immediately used when allocated to a
287c8c0b82SPatrick Mooney  * guest.  When the memory is returned to the reservoir, it is zeroed once more
297c8c0b82SPatrick Mooney  * to avoid leaking any sensitive data from that guest.
307c8c0b82SPatrick Mooney  *
317c8c0b82SPatrick Mooney  *
327c8c0b82SPatrick Mooney  * Transient Allocations
337c8c0b82SPatrick Mooney  *
347c8c0b82SPatrick Mooney  * While the explicit reservoir model may work well for some applications,
357c8c0b82SPatrick Mooney  * others may want a more traditional model, where pages for guest memory
367c8c0b82SPatrick Mooney  * objects are allocated on demand, rather than from a pool set aside from the
377c8c0b82SPatrick Mooney  * system.  In this case, the allocation can be made in "transient" mode, where
387c8c0b82SPatrick Mooney  * the memory is allocated normally, even if there is free capacity in the
397c8c0b82SPatrick Mooney  * reservoir.  When use of the transient allocation is complete (the guest is
407c8c0b82SPatrick Mooney  * halted and destroyed), the pages will be freed back to the system, rather
417c8c0b82SPatrick Mooney  * than added back to the reservoir.
427c8c0b82SPatrick Mooney  *
437c8c0b82SPatrick Mooney  * From an implementation standpoint, transient allocations follow the same
447c8c0b82SPatrick Mooney  * code paths as ones using the reservoir normally.  Those allocations have a
457c8c0b82SPatrick Mooney  * tag which marks them as transient, and used/free size tallies are maintained
467c8c0b82SPatrick Mooney  * separately for normal and transient operations.  When performing a transient
477c8c0b82SPatrick Mooney  * allocation, that amount of memory is immediately added to the reservoir ,
487c8c0b82SPatrick Mooney  * from which the allocation can be made.  When freeing a transient allocation,
497c8c0b82SPatrick Mooney  * a matching amount of memory is removed from the reservoir as part of the
507c8c0b82SPatrick Mooney  * operation.  This allows both allocation types to coexist without too much
517c8c0b82SPatrick Mooney  * additional machinery.
527c8c0b82SPatrick Mooney  *
537c8c0b82SPatrick Mooney  *
547c8c0b82SPatrick Mooney  * Administration
557c8c0b82SPatrick Mooney  *
56*6bba8b59SPatrick Mooney  * Operators may attempt to alter the amount of memory allocated to the
57*6bba8b59SPatrick Mooney  * reservoir via an ioctl against the vmmctl device.  The total amount of memory
58*6bba8b59SPatrick Mooney  * in the reservoir (free, or allocated to VMs) is arbitrarily limited at this
59*6bba8b59SPatrick Mooney  * time by `vmmr_total_limit`, which defaults to 80% of physmem.  This is done
60*6bba8b59SPatrick Mooney  * to prevent the reservoir from inadvertently growing to a size where the
61*6bba8b59SPatrick Mooney  * system has inadequate memory to make forward progress.  Shrinking the
62*6bba8b59SPatrick Mooney  * reservoir is only possible when it contains free (not allocated by any guest
63*6bba8b59SPatrick Mooney  * VMs) memory.
647c8c0b82SPatrick Mooney  *
657c8c0b82SPatrick Mooney  *
667c8c0b82SPatrick Mooney  * Page Tracking
677c8c0b82SPatrick Mooney  *
687c8c0b82SPatrick Mooney  * The reservoir currently uses vnode association to keep track of pages under
697c8c0b82SPatrick Mooney  * its control (either designated to the reservoir and free, or allocated to a
707c8c0b82SPatrick Mooney  * guest VM object).  This means using the existing VM system primitives for
717c8c0b82SPatrick Mooney  * page_t instances being associated with a given (vnode, offset) tuple.  It
727c8c0b82SPatrick Mooney  * means that spans of pages, either free or allocated, need only to store a
737c8c0b82SPatrick Mooney  * length (of the span) and an offset (into the vnode) in order to gain access
747c8c0b82SPatrick Mooney  * to all of the underlying pages associated with that span.  Associating the
757c8c0b82SPatrick Mooney  * pages against `kvps[KV_VVP]` (the VMM kernel vnode) means they will be
767c8c0b82SPatrick Mooney  * properly tracked as KAS pages, but be excluded from normal dumps (unless the
777c8c0b82SPatrick Mooney  * operator has chosen to dump all of RAM).
787c8c0b82SPatrick Mooney  */
797c8c0b82SPatrick Mooney 
807c8c0b82SPatrick Mooney #include <sys/types.h>
817c8c0b82SPatrick Mooney #include <sys/mutex.h>
827c8c0b82SPatrick Mooney #include <sys/avl.h>
837c8c0b82SPatrick Mooney #include <sys/list.h>
847c8c0b82SPatrick Mooney #include <sys/machparam.h>
857c8c0b82SPatrick Mooney #include <sys/kmem.h>
867c8c0b82SPatrick Mooney #include <sys/stddef.h>
877c8c0b82SPatrick Mooney #include <sys/null.h>
887c8c0b82SPatrick Mooney #include <sys/errno.h>
897c8c0b82SPatrick Mooney #include <sys/systm.h>
907c8c0b82SPatrick Mooney #include <sys/sunddi.h>
917c8c0b82SPatrick Mooney #include <sys/policy.h>
927c8c0b82SPatrick Mooney #include <vm/seg_kmem.h>
937c8c0b82SPatrick Mooney #include <vm/hat_i86.h>
94*6bba8b59SPatrick Mooney #include <sys/kstat.h>
957c8c0b82SPatrick Mooney 
967c8c0b82SPatrick Mooney #include <sys/vmm_reservoir.h>
977c8c0b82SPatrick Mooney #include <sys/vmm_dev.h>
98*6bba8b59SPatrick Mooney #include <sys/vmm_impl.h>
99*6bba8b59SPatrick Mooney 
100*6bba8b59SPatrick Mooney #define	VMMR_TARGET_INACTIVE	SIZE_MAX
1017c8c0b82SPatrick Mooney 
1027c8c0b82SPatrick Mooney static kmutex_t vmmr_lock;
1037c8c0b82SPatrick Mooney 
1047c8c0b82SPatrick Mooney static size_t vmmr_free_sz;
1057c8c0b82SPatrick Mooney static size_t vmmr_free_transient_sz;
1067c8c0b82SPatrick Mooney static size_t vmmr_adding_sz;
1077c8c0b82SPatrick Mooney static size_t vmmr_alloc_sz;
1087c8c0b82SPatrick Mooney static size_t vmmr_alloc_transient_sz;
1097c8c0b82SPatrick Mooney static size_t vmmr_empty_sz;
1107c8c0b82SPatrick Mooney 
111*6bba8b59SPatrick Mooney /*
112*6bba8b59SPatrick Mooney  * Target size of the reservoir during active vmmr_set_target() operation.
113*6bba8b59SPatrick Mooney  * It holds the sentinel value of VMMR_TARGET_INACTIVE when no resize is active.
114*6bba8b59SPatrick Mooney  */
115*6bba8b59SPatrick Mooney static size_t vmmr_target_sz;
116*6bba8b59SPatrick Mooney 
1177c8c0b82SPatrick Mooney static uintptr_t vmmr_empty_last;
1187c8c0b82SPatrick Mooney /* Upper limit for the size (free + allocated) of the reservoir */
1197c8c0b82SPatrick Mooney static size_t vmmr_total_limit;
1207c8c0b82SPatrick Mooney 
1217c8c0b82SPatrick Mooney /* VA range allocated from the VMM arena for the mappings */
1227c8c0b82SPatrick Mooney static uintptr_t vmmr_va;
1237c8c0b82SPatrick Mooney static uintptr_t vmmr_va_sz;
1247c8c0b82SPatrick Mooney 
125*6bba8b59SPatrick Mooney static kstat_t *vmmr_kstat;
126*6bba8b59SPatrick Mooney 
1277c8c0b82SPatrick Mooney /* Pair of AVL trees to store set of spans ordered by addr and size */
1287c8c0b82SPatrick Mooney typedef struct vmmr_treepair {
1297c8c0b82SPatrick Mooney 	avl_tree_t by_addr;
1307c8c0b82SPatrick Mooney 	avl_tree_t by_size;
1317c8c0b82SPatrick Mooney } vmmr_treepair_t;
1327c8c0b82SPatrick Mooney 
1337c8c0b82SPatrick Mooney /* Spans of free memory in the reservoir */
1347c8c0b82SPatrick Mooney static vmmr_treepair_t vmmr_free_tp;
1357c8c0b82SPatrick Mooney 
1367c8c0b82SPatrick Mooney /* Spans of empty (not backed by memory) space in the reservoir */
1377c8c0b82SPatrick Mooney static vmmr_treepair_t vmmr_empty_tp;
1387c8c0b82SPatrick Mooney 
1397c8c0b82SPatrick Mooney /* Regions of memory allocated from the reservoir */
1407c8c0b82SPatrick Mooney static list_t vmmr_alloc_regions;
1417c8c0b82SPatrick Mooney 
1427c8c0b82SPatrick Mooney struct vmmr_span {
1437c8c0b82SPatrick Mooney 	uintptr_t	vs_addr;
1447c8c0b82SPatrick Mooney 	size_t		vs_size;
1457c8c0b82SPatrick Mooney 	avl_node_t	vs_by_addr;
1467c8c0b82SPatrick Mooney 	avl_node_t	vs_by_size;
1477c8c0b82SPatrick Mooney 	uintptr_t	vs_region_addr;
1487c8c0b82SPatrick Mooney };
1497c8c0b82SPatrick Mooney typedef struct vmmr_span vmmr_span_t;
1507c8c0b82SPatrick Mooney 
1517c8c0b82SPatrick Mooney struct vmmr_region {
1527c8c0b82SPatrick Mooney 	size_t		vr_size;
1537c8c0b82SPatrick Mooney 	avl_tree_t	vr_spans;
1547c8c0b82SPatrick Mooney 	list_node_t	vr_node;
1557c8c0b82SPatrick Mooney 	bool		vr_transient;
1567c8c0b82SPatrick Mooney };
1577c8c0b82SPatrick Mooney 
158*6bba8b59SPatrick Mooney typedef struct vmmr_kstats {
159*6bba8b59SPatrick Mooney 	kstat_named_t	vmrks_bytes_free;
160*6bba8b59SPatrick Mooney 	kstat_named_t	vmrks_bytes_alloc;
161*6bba8b59SPatrick Mooney 	kstat_named_t	vmrks_bytes_transient;
162*6bba8b59SPatrick Mooney 	kstat_named_t	vmrks_bytes_limit;
163*6bba8b59SPatrick Mooney } vmmr_kstats_t;
164*6bba8b59SPatrick Mooney 
165*6bba8b59SPatrick Mooney 
166*6bba8b59SPatrick Mooney static int vmmr_add(size_t, bool);
167*6bba8b59SPatrick Mooney static int vmmr_remove(size_t, bool);
168*6bba8b59SPatrick Mooney 
1697c8c0b82SPatrick Mooney static int
1707c8c0b82SPatrick Mooney vmmr_cmp_addr(const void *a, const void *b)
1717c8c0b82SPatrick Mooney {
1727c8c0b82SPatrick Mooney 	const vmmr_span_t *sa = a;
1737c8c0b82SPatrick Mooney 	const vmmr_span_t *sb = b;
1747c8c0b82SPatrick Mooney 
1757c8c0b82SPatrick Mooney 	if (sa->vs_addr == sb->vs_addr) {
1767c8c0b82SPatrick Mooney 		return (0);
1777c8c0b82SPatrick Mooney 	} else if (sa->vs_addr < sb->vs_addr) {
1787c8c0b82SPatrick Mooney 		return (-1);
1797c8c0b82SPatrick Mooney 	} else {
1807c8c0b82SPatrick Mooney 		return (1);
1817c8c0b82SPatrick Mooney 	}
1827c8c0b82SPatrick Mooney }
1837c8c0b82SPatrick Mooney 
1847c8c0b82SPatrick Mooney static int
1857c8c0b82SPatrick Mooney vmmr_cmp_size(const void *a, const void *b)
1867c8c0b82SPatrick Mooney {
1877c8c0b82SPatrick Mooney 	const vmmr_span_t *sa = a;
1887c8c0b82SPatrick Mooney 	const vmmr_span_t *sb = b;
1897c8c0b82SPatrick Mooney 
1907c8c0b82SPatrick Mooney 	if (sa->vs_size == sb->vs_size) {
1917c8c0b82SPatrick Mooney 		/*
1927c8c0b82SPatrick Mooney 		 * Since discontiguous spans could have the same size in a
1937c8c0b82SPatrick Mooney 		 * by-size tree, differentiate them (as required by AVL) by
1947c8c0b82SPatrick Mooney 		 * address so they can safely coexist while remaining sorted.
1957c8c0b82SPatrick Mooney 		 */
1967c8c0b82SPatrick Mooney 		return (vmmr_cmp_addr(a, b));
1977c8c0b82SPatrick Mooney 	} else if (sa->vs_size < sb->vs_size) {
1987c8c0b82SPatrick Mooney 		return (-1);
1997c8c0b82SPatrick Mooney 	} else {
2007c8c0b82SPatrick Mooney 		return (1);
2017c8c0b82SPatrick Mooney 	}
2027c8c0b82SPatrick Mooney }
2037c8c0b82SPatrick Mooney 
2047c8c0b82SPatrick Mooney static int
2057c8c0b82SPatrick Mooney vmmr_cmp_region_addr(const void *a, const void *b)
2067c8c0b82SPatrick Mooney {
2077c8c0b82SPatrick Mooney 	const vmmr_span_t *sa = a;
2087c8c0b82SPatrick Mooney 	const vmmr_span_t *sb = b;
2097c8c0b82SPatrick Mooney 
2107c8c0b82SPatrick Mooney 	if (sa->vs_region_addr == sb->vs_region_addr) {
2117c8c0b82SPatrick Mooney 		return (0);
2127c8c0b82SPatrick Mooney 	} else if (sa->vs_region_addr < sb->vs_region_addr) {
2137c8c0b82SPatrick Mooney 		return (-1);
2147c8c0b82SPatrick Mooney 	} else {
2157c8c0b82SPatrick Mooney 		return (1);
2167c8c0b82SPatrick Mooney 	}
2177c8c0b82SPatrick Mooney }
2187c8c0b82SPatrick Mooney 
2197c8c0b82SPatrick Mooney static void
2207c8c0b82SPatrick Mooney vmmr_tp_init(vmmr_treepair_t *tree)
2217c8c0b82SPatrick Mooney {
2227c8c0b82SPatrick Mooney 	avl_create(&tree->by_addr, vmmr_cmp_addr, sizeof (vmmr_span_t),
2237c8c0b82SPatrick Mooney 	    offsetof(vmmr_span_t, vs_by_addr));
2247c8c0b82SPatrick Mooney 	avl_create(&tree->by_size, vmmr_cmp_size, sizeof (vmmr_span_t),
2257c8c0b82SPatrick Mooney 	    offsetof(vmmr_span_t, vs_by_size));
2267c8c0b82SPatrick Mooney }
2277c8c0b82SPatrick Mooney 
2287c8c0b82SPatrick Mooney static void
2297c8c0b82SPatrick Mooney vmmr_tp_destroy(vmmr_treepair_t *tree)
2307c8c0b82SPatrick Mooney {
2317c8c0b82SPatrick Mooney 	void *vcp = NULL;
2327c8c0b82SPatrick Mooney 	vmmr_span_t *span;
2337c8c0b82SPatrick Mooney 
2347c8c0b82SPatrick Mooney 	while (avl_destroy_nodes(&tree->by_addr, &vcp) != NULL) {
2357c8c0b82SPatrick Mooney 		/* Freeing spans will be done when tearing down by-size tree */
2367c8c0b82SPatrick Mooney 	}
2377c8c0b82SPatrick Mooney 	while ((span = avl_destroy_nodes(&tree->by_size, &vcp)) != NULL) {
2387c8c0b82SPatrick Mooney 		kmem_free(span, sizeof (*span));
2397c8c0b82SPatrick Mooney 	}
2407c8c0b82SPatrick Mooney 	avl_destroy(&tree->by_addr);
2417c8c0b82SPatrick Mooney 	avl_destroy(&tree->by_size);
2427c8c0b82SPatrick Mooney }
2437c8c0b82SPatrick Mooney 
2447c8c0b82SPatrick Mooney /*
2457c8c0b82SPatrick Mooney  * Insert a vmmr_span_t into a treepair, concatenating if possible with adjacent
2467c8c0b82SPatrick Mooney  * span(s).  Such concatenation could result in the `to_add` span being freed,
2477c8c0b82SPatrick Mooney  * so the caller cannot use it after this returns.
2487c8c0b82SPatrick Mooney  */
2497c8c0b82SPatrick Mooney static void
2507c8c0b82SPatrick Mooney vmmr_tp_insert_concat(vmmr_span_t *to_add, vmmr_treepair_t *tree)
2517c8c0b82SPatrick Mooney {
2527c8c0b82SPatrick Mooney 	avl_tree_t *by_addr = &tree->by_addr;
2537c8c0b82SPatrick Mooney 	avl_tree_t *by_size = &tree->by_size;
2547c8c0b82SPatrick Mooney 	vmmr_span_t *node;
2557c8c0b82SPatrick Mooney 	avl_index_t where;
2567c8c0b82SPatrick Mooney 
2577c8c0b82SPatrick Mooney 	/* This addr should not already exist in the treepair */
2587c8c0b82SPatrick Mooney 	node = avl_find(by_addr, to_add, &where);
2597c8c0b82SPatrick Mooney 	ASSERT3P(node, ==, NULL);
2607c8c0b82SPatrick Mooney 
2617c8c0b82SPatrick Mooney 	node = avl_nearest(by_addr, where, AVL_BEFORE);
2627c8c0b82SPatrick Mooney 	if (node != NULL &&
2637c8c0b82SPatrick Mooney 	    (node->vs_addr + node->vs_size) == to_add->vs_addr) {
2647c8c0b82SPatrick Mooney 		/* concat with preceeding item */
2657c8c0b82SPatrick Mooney 		avl_remove(by_addr, node);
2667c8c0b82SPatrick Mooney 		avl_remove(by_size, node);
2677c8c0b82SPatrick Mooney 		node->vs_size += to_add->vs_size;
2687c8c0b82SPatrick Mooney 		kmem_free(to_add, sizeof (*to_add));
2697c8c0b82SPatrick Mooney 
2707c8c0b82SPatrick Mooney 		/*
2717c8c0b82SPatrick Mooney 		 * Since this now-concatenated span could be adjacent one
2727c8c0b82SPatrick Mooney 		 * trailing it, fall through to perform that check.
2737c8c0b82SPatrick Mooney 		 */
2747c8c0b82SPatrick Mooney 		to_add = node;
2757c8c0b82SPatrick Mooney 	}
2767c8c0b82SPatrick Mooney 
2777c8c0b82SPatrick Mooney 	node = avl_nearest(by_addr, where, AVL_AFTER);
2787c8c0b82SPatrick Mooney 	if (node != NULL &&
2797c8c0b82SPatrick Mooney 	    (to_add->vs_addr + to_add->vs_size) == node->vs_addr) {
2807c8c0b82SPatrick Mooney 		/* concat with trailing item */
2817c8c0b82SPatrick Mooney 		avl_remove(by_addr, node);
2827c8c0b82SPatrick Mooney 		avl_remove(by_size, node);
2837c8c0b82SPatrick Mooney 		node->vs_addr = to_add->vs_addr;
2847c8c0b82SPatrick Mooney 		node->vs_size += to_add->vs_size;
2857c8c0b82SPatrick Mooney 		avl_add(by_addr, node);
2867c8c0b82SPatrick Mooney 		avl_add(by_size, node);
2877c8c0b82SPatrick Mooney 
2887c8c0b82SPatrick Mooney 		kmem_free(to_add, sizeof (*to_add));
2897c8c0b82SPatrick Mooney 		return;
2907c8c0b82SPatrick Mooney 	}
2917c8c0b82SPatrick Mooney 
2927c8c0b82SPatrick Mooney 	/* simply insert */
2937c8c0b82SPatrick Mooney 	avl_add(by_addr, to_add);
2947c8c0b82SPatrick Mooney 	avl_add(by_size, to_add);
2957c8c0b82SPatrick Mooney }
2967c8c0b82SPatrick Mooney 
2977c8c0b82SPatrick Mooney /*
2987c8c0b82SPatrick Mooney  * Remove a vmmr_span_t from a treepair, splitting if necessary when a span of
2997c8c0b82SPatrick Mooney  * the exact target size is not present, but a larger one is.  May return a span
3007c8c0b82SPatrick Mooney  * with a size smaller than the target if splitting is not an option.
3017c8c0b82SPatrick Mooney  */
3027c8c0b82SPatrick Mooney static vmmr_span_t *
3037c8c0b82SPatrick Mooney vmmr_tp_remove_split(size_t target_sz, vmmr_treepair_t *tree)
3047c8c0b82SPatrick Mooney {
3057c8c0b82SPatrick Mooney 	avl_tree_t *by_addr = &tree->by_addr;
3067c8c0b82SPatrick Mooney 	avl_tree_t *by_size = &tree->by_size;
3077c8c0b82SPatrick Mooney 	vmmr_span_t *span;
3087c8c0b82SPatrick Mooney 	avl_index_t where;
3097c8c0b82SPatrick Mooney 
3107c8c0b82SPatrick Mooney 	ASSERT3U(target_sz, !=, 0);
3117c8c0b82SPatrick Mooney 	ASSERT(!avl_is_empty(by_addr));
3127c8c0b82SPatrick Mooney 	ASSERT(!avl_is_empty(by_size));
3137c8c0b82SPatrick Mooney 
3147c8c0b82SPatrick Mooney 	vmmr_span_t search = { .vs_size = target_sz };
3157c8c0b82SPatrick Mooney 	span = avl_find(by_size, &search, &where);
3167c8c0b82SPatrick Mooney 	if (span == NULL) {
3177c8c0b82SPatrick Mooney 		/* Try for a larger span (instead of exact match) */
3187c8c0b82SPatrick Mooney 		span = avl_nearest(by_size, where, AVL_AFTER);
3197c8c0b82SPatrick Mooney 		if (span == NULL) {
3207c8c0b82SPatrick Mooney 			/*
3217c8c0b82SPatrick Mooney 			 * Caller will need to collect several smaller spans in
3227c8c0b82SPatrick Mooney 			 * order to fulfill their request.
3237c8c0b82SPatrick Mooney 			 */
3247c8c0b82SPatrick Mooney 			span = avl_nearest(by_size, where, AVL_BEFORE);
3257c8c0b82SPatrick Mooney 			ASSERT3P(span, !=, NULL);
3267c8c0b82SPatrick Mooney 		}
3277c8c0b82SPatrick Mooney 	}
3287c8c0b82SPatrick Mooney 
3297c8c0b82SPatrick Mooney 	if (span->vs_size <= target_sz) {
3307c8c0b82SPatrick Mooney 		avl_remove(by_size, span);
3317c8c0b82SPatrick Mooney 		avl_remove(by_addr, span);
3327c8c0b82SPatrick Mooney 
3337c8c0b82SPatrick Mooney 		return (span);
3347c8c0b82SPatrick Mooney 	} else {
3357c8c0b82SPatrick Mooney 		/* Split off adequate chunk from larger span */
3367c8c0b82SPatrick Mooney 		uintptr_t start = span->vs_addr + span->vs_size - target_sz;
3377c8c0b82SPatrick Mooney 
3387c8c0b82SPatrick Mooney 		avl_remove(by_size, span);
3397c8c0b82SPatrick Mooney 		span->vs_size -= target_sz;
3407c8c0b82SPatrick Mooney 		avl_add(by_size, span);
3417c8c0b82SPatrick Mooney 
3427c8c0b82SPatrick Mooney 		vmmr_span_t *split_span =
3437c8c0b82SPatrick Mooney 		    kmem_zalloc(sizeof (vmmr_span_t), KM_SLEEP);
3447c8c0b82SPatrick Mooney 		split_span->vs_addr = start;
3457c8c0b82SPatrick Mooney 		split_span->vs_size = target_sz;
3467c8c0b82SPatrick Mooney 
3477c8c0b82SPatrick Mooney 		return (split_span);
3487c8c0b82SPatrick Mooney 	}
3497c8c0b82SPatrick Mooney }
3507c8c0b82SPatrick Mooney 
351*6bba8b59SPatrick Mooney static int
352*6bba8b59SPatrick Mooney vmmr_kstat_update(struct kstat *ksp, int rw)
353*6bba8b59SPatrick Mooney {
354*6bba8b59SPatrick Mooney 	vmmr_kstats_t *vkp = ksp->ks_data;
355*6bba8b59SPatrick Mooney 
356*6bba8b59SPatrick Mooney 	mutex_enter(&vmmr_lock);
357*6bba8b59SPatrick Mooney 	vkp->vmrks_bytes_free.value.ui64 = vmmr_free_sz;
358*6bba8b59SPatrick Mooney 	vkp->vmrks_bytes_alloc.value.ui64 = vmmr_alloc_sz;
359*6bba8b59SPatrick Mooney 	/*
360*6bba8b59SPatrick Mooney 	 * In addition to the memory which is actually actually allocated to
361*6bba8b59SPatrick Mooney 	 * transient consumers, memory which is considered free-for-transient is
362*6bba8b59SPatrick Mooney 	 * also included in the sizing.
363*6bba8b59SPatrick Mooney 	 */
364*6bba8b59SPatrick Mooney 	vkp->vmrks_bytes_transient.value.ui64 =
365*6bba8b59SPatrick Mooney 	    vmmr_alloc_transient_sz + vmmr_free_transient_sz;
366*6bba8b59SPatrick Mooney 	vkp->vmrks_bytes_limit.value.ui64 = vmmr_total_limit;
367*6bba8b59SPatrick Mooney 	mutex_exit(&vmmr_lock);
368*6bba8b59SPatrick Mooney 
369*6bba8b59SPatrick Mooney 	return (0);
370*6bba8b59SPatrick Mooney }
371*6bba8b59SPatrick Mooney 
372*6bba8b59SPatrick Mooney int
3737c8c0b82SPatrick Mooney vmmr_init()
3747c8c0b82SPatrick Mooney {
3757c8c0b82SPatrick Mooney 	mutex_init(&vmmr_lock, NULL, MUTEX_DEFAULT, NULL);
3767c8c0b82SPatrick Mooney 
3777c8c0b82SPatrick Mooney 	/*
3787c8c0b82SPatrick Mooney 	 * `vmm_total_limit` represents the absolute maximum size of the VMM
3797c8c0b82SPatrick Mooney 	 * memory reservoir.  It is meant to provide some measure of protection
3807c8c0b82SPatrick Mooney 	 * against an operator pushing the system into unrecoverable memory
3817c8c0b82SPatrick Mooney 	 * starvation through explicit or transient additions to the reservoir.
3827c8c0b82SPatrick Mooney 	 *
3837c8c0b82SPatrick Mooney 	 * There will be many situations where this limit would be inadequate to
3847c8c0b82SPatrick Mooney 	 * prevent kernel memory starvation in the face of certain operator
3857c8c0b82SPatrick Mooney 	 * actions.  It is a balance to be struck between safety and allowing
3867c8c0b82SPatrick Mooney 	 * large systems to reach high utilization.
3877c8c0b82SPatrick Mooney 	 *
3887c8c0b82SPatrick Mooney 	 * The value is based off of pages_pp_maximum: "Number of currently
3897c8c0b82SPatrick Mooney 	 * available pages that cannot be 'locked'".  It is sized as all of
3907c8c0b82SPatrick Mooney 	 * `physmem` less 120% of `pages_pp_maximum`.
3917c8c0b82SPatrick Mooney 	 */
3927c8c0b82SPatrick Mooney 	vmmr_total_limit =
3937c8c0b82SPatrick Mooney 	    (((physmem * 10)  - (pages_pp_maximum * 12)) * PAGESIZE) / 10;
3947c8c0b82SPatrick Mooney 
3957c8c0b82SPatrick Mooney 	vmmr_empty_last = 0;
3967c8c0b82SPatrick Mooney 	vmmr_free_sz = 0;
3977c8c0b82SPatrick Mooney 	vmmr_alloc_sz = 0;
3987c8c0b82SPatrick Mooney 	vmmr_empty_sz = 0;
3997c8c0b82SPatrick Mooney 	vmmr_adding_sz = 0;
4007c8c0b82SPatrick Mooney 	vmmr_free_transient_sz = 0;
4017c8c0b82SPatrick Mooney 	vmmr_alloc_transient_sz = 0;
402*6bba8b59SPatrick Mooney 	vmmr_target_sz = VMMR_TARGET_INACTIVE;
403*6bba8b59SPatrick Mooney 
404*6bba8b59SPatrick Mooney 	/*
405*6bba8b59SPatrick Mooney 	 * Attempt kstat allocation early, since it is the only part of
406*6bba8b59SPatrick Mooney 	 * reservoir initialization which is fallible.
407*6bba8b59SPatrick Mooney 	 */
408*6bba8b59SPatrick Mooney 	kstat_t *ksp = kstat_create_zone(VMM_MODULE_NAME, 0, "vmm_reservoir",
409*6bba8b59SPatrick Mooney 	    VMM_KSTAT_CLASS, KSTAT_TYPE_NAMED,
410*6bba8b59SPatrick Mooney 	    sizeof (vmmr_kstats_t) / sizeof (kstat_named_t), 0, GLOBAL_ZONEID);
411*6bba8b59SPatrick Mooney 	if (ksp == NULL) {
412*6bba8b59SPatrick Mooney 		mutex_destroy(&vmmr_lock);
413*6bba8b59SPatrick Mooney 		return (ENOMEM);
414*6bba8b59SPatrick Mooney 	}
415*6bba8b59SPatrick Mooney 
416*6bba8b59SPatrick Mooney 	vmmr_kstats_t *vkp = ksp->ks_data;
417*6bba8b59SPatrick Mooney 
418*6bba8b59SPatrick Mooney 	kstat_named_init(&vkp->vmrks_bytes_free, "bytes_free",
419*6bba8b59SPatrick Mooney 	    KSTAT_DATA_UINT64);
420*6bba8b59SPatrick Mooney 	kstat_named_init(&vkp->vmrks_bytes_alloc, "bytes_alloc",
421*6bba8b59SPatrick Mooney 	    KSTAT_DATA_UINT64);
422*6bba8b59SPatrick Mooney 	kstat_named_init(&vkp->vmrks_bytes_transient, "bytes_transient_alloc",
423*6bba8b59SPatrick Mooney 	    KSTAT_DATA_UINT64);
424*6bba8b59SPatrick Mooney 	kstat_named_init(&vkp->vmrks_bytes_limit, "bytes_limit",
425*6bba8b59SPatrick Mooney 	    KSTAT_DATA_UINT64);
426*6bba8b59SPatrick Mooney 	ksp->ks_private = NULL;
427*6bba8b59SPatrick Mooney 	ksp->ks_update = vmmr_kstat_update;
428*6bba8b59SPatrick Mooney 	vmmr_kstat = ksp;
4297c8c0b82SPatrick Mooney 
4307c8c0b82SPatrick Mooney 	vmmr_tp_init(&vmmr_free_tp);
4317c8c0b82SPatrick Mooney 	vmmr_tp_init(&vmmr_empty_tp);
4327c8c0b82SPatrick Mooney 
4337c8c0b82SPatrick Mooney 	list_create(&vmmr_alloc_regions, sizeof (vmmr_region_t),
4347c8c0b82SPatrick Mooney 	    offsetof(vmmr_region_t, vr_node));
4357c8c0b82SPatrick Mooney 
4367c8c0b82SPatrick Mooney 	/* Grab a chunk of VA for the reservoir */
4377c8c0b82SPatrick Mooney 	vmmr_va_sz = physmem * PAGESIZE;
4387c8c0b82SPatrick Mooney 	vmmr_va = (uintptr_t)vmem_alloc(kvmm_arena, vmmr_va_sz, VM_SLEEP);
439*6bba8b59SPatrick Mooney 
440*6bba8b59SPatrick Mooney 	kstat_install(vmmr_kstat);
441*6bba8b59SPatrick Mooney 
442*6bba8b59SPatrick Mooney 	return (0);
4437c8c0b82SPatrick Mooney }
4447c8c0b82SPatrick Mooney 
4457c8c0b82SPatrick Mooney void
4467c8c0b82SPatrick Mooney vmmr_fini()
4477c8c0b82SPatrick Mooney {
4487c8c0b82SPatrick Mooney 	mutex_enter(&vmmr_lock);
4497c8c0b82SPatrick Mooney 	VERIFY3U(vmmr_alloc_sz, ==, 0);
4507c8c0b82SPatrick Mooney 	VERIFY3U(vmmr_free_sz, ==, 0);
4517c8c0b82SPatrick Mooney 	VERIFY3U(vmmr_adding_sz, ==, 0);
4527c8c0b82SPatrick Mooney 	VERIFY3U(vmmr_alloc_transient_sz, ==, 0);
4537c8c0b82SPatrick Mooney 	VERIFY3U(vmmr_free_transient_sz, ==, 0);
4547c8c0b82SPatrick Mooney 	VERIFY(avl_is_empty(&vmmr_free_tp.by_addr));
4557c8c0b82SPatrick Mooney 	VERIFY(avl_is_empty(&vmmr_free_tp.by_size));
4567c8c0b82SPatrick Mooney 	VERIFY(list_is_empty(&vmmr_alloc_regions));
4577c8c0b82SPatrick Mooney 
458*6bba8b59SPatrick Mooney 	kstat_delete(vmmr_kstat);
459*6bba8b59SPatrick Mooney 	vmmr_kstat = NULL;
460*6bba8b59SPatrick Mooney 
4617c8c0b82SPatrick Mooney 	vmmr_tp_destroy(&vmmr_free_tp);
4627c8c0b82SPatrick Mooney 	vmmr_tp_destroy(&vmmr_empty_tp);
4637c8c0b82SPatrick Mooney 	list_destroy(&vmmr_alloc_regions);
4647c8c0b82SPatrick Mooney 
4657c8c0b82SPatrick Mooney 	/* Release reservoir VA chunk */
4667c8c0b82SPatrick Mooney 	vmem_free(kvmm_arena, (void *)vmmr_va, vmmr_va_sz);
4677c8c0b82SPatrick Mooney 	vmmr_va = 0;
4687c8c0b82SPatrick Mooney 	vmmr_va_sz = 0;
4697c8c0b82SPatrick Mooney 	vmmr_total_limit = 0;
4707c8c0b82SPatrick Mooney 	vmmr_empty_last = 0;
4717c8c0b82SPatrick Mooney 
4727c8c0b82SPatrick Mooney 	mutex_exit(&vmmr_lock);
4737c8c0b82SPatrick Mooney 	mutex_destroy(&vmmr_lock);
4747c8c0b82SPatrick Mooney }
4757c8c0b82SPatrick Mooney 
4767c8c0b82SPatrick Mooney bool
4777c8c0b82SPatrick Mooney vmmr_is_empty()
4787c8c0b82SPatrick Mooney {
4797c8c0b82SPatrick Mooney 	mutex_enter(&vmmr_lock);
4807c8c0b82SPatrick Mooney 	bool res = (vmmr_alloc_sz == 0 && vmmr_alloc_transient_sz == 0 &&
4817c8c0b82SPatrick Mooney 	    vmmr_free_sz == 0 && vmmr_free_transient_sz == 0);
4827c8c0b82SPatrick Mooney 	mutex_exit(&vmmr_lock);
4837c8c0b82SPatrick Mooney 	return (res);
4847c8c0b82SPatrick Mooney }
4857c8c0b82SPatrick Mooney 
4867c8c0b82SPatrick Mooney int
4877c8c0b82SPatrick Mooney vmmr_alloc(size_t sz, bool transient, vmmr_region_t **resp)
4887c8c0b82SPatrick Mooney {
4897c8c0b82SPatrick Mooney 	VERIFY3U(sz & PAGEOFFSET, ==, 0);
4907c8c0b82SPatrick Mooney 
4917c8c0b82SPatrick Mooney 	if (!transient) {
4927c8c0b82SPatrick Mooney 		mutex_enter(&vmmr_lock);
4937c8c0b82SPatrick Mooney 		if (sz > vmmr_free_sz) {
4947c8c0b82SPatrick Mooney 			mutex_exit(&vmmr_lock);
4957c8c0b82SPatrick Mooney 			return (ENOSPC);
4967c8c0b82SPatrick Mooney 		}
4977c8c0b82SPatrick Mooney 	} else {
4987c8c0b82SPatrick Mooney 		int err;
4997c8c0b82SPatrick Mooney 
500*6bba8b59SPatrick Mooney 		mutex_enter(&vmmr_lock);
5017c8c0b82SPatrick Mooney 		err = vmmr_add(sz, true);
5027c8c0b82SPatrick Mooney 		if (err != 0) {
503*6bba8b59SPatrick Mooney 			mutex_exit(&vmmr_lock);
5047c8c0b82SPatrick Mooney 			return (err);
5057c8c0b82SPatrick Mooney 		}
5067c8c0b82SPatrick Mooney 		VERIFY3U(vmmr_free_transient_sz, >=, sz);
5077c8c0b82SPatrick Mooney 	}
5087c8c0b82SPatrick Mooney 
5097c8c0b82SPatrick Mooney 	vmmr_region_t *region;
5107c8c0b82SPatrick Mooney 	region = kmem_zalloc(sizeof (vmmr_region_t), KM_SLEEP);
5117c8c0b82SPatrick Mooney 	avl_create(&region->vr_spans, vmmr_cmp_region_addr,
5127c8c0b82SPatrick Mooney 	    sizeof (vmmr_span_t), offsetof(vmmr_span_t, vs_by_addr));
5137c8c0b82SPatrick Mooney 	region->vr_size = sz;
5147c8c0b82SPatrick Mooney 
5157c8c0b82SPatrick Mooney 	size_t remain = sz;
5167c8c0b82SPatrick Mooney 	uintptr_t map_at = 0;
5177c8c0b82SPatrick Mooney 	while (remain > 0) {
5187c8c0b82SPatrick Mooney 		vmmr_span_t *span = vmmr_tp_remove_split(remain, &vmmr_free_tp);
5197c8c0b82SPatrick Mooney 
5207c8c0b82SPatrick Mooney 		/*
5217c8c0b82SPatrick Mooney 		 * We have already ensured that adequate free memory is present
5227c8c0b82SPatrick Mooney 		 * in the reservoir for this allocation.
5237c8c0b82SPatrick Mooney 		 */
5247c8c0b82SPatrick Mooney 		VERIFY3P(span, !=, NULL);
5257c8c0b82SPatrick Mooney 		ASSERT3U(span->vs_size, <=, remain);
5267c8c0b82SPatrick Mooney 
5277c8c0b82SPatrick Mooney 		span->vs_region_addr = map_at;
5287c8c0b82SPatrick Mooney 		avl_add(&region->vr_spans, span);
5297c8c0b82SPatrick Mooney 		map_at += span->vs_size;
5307c8c0b82SPatrick Mooney 		remain -= span->vs_size;
5317c8c0b82SPatrick Mooney 	}
5327c8c0b82SPatrick Mooney 
5337c8c0b82SPatrick Mooney 	if (!transient) {
5347c8c0b82SPatrick Mooney 		vmmr_free_sz -= sz;
5357c8c0b82SPatrick Mooney 		vmmr_alloc_sz += sz;
5367c8c0b82SPatrick Mooney 	} else {
5377c8c0b82SPatrick Mooney 		vmmr_free_transient_sz -= sz;
5387c8c0b82SPatrick Mooney 		vmmr_alloc_transient_sz += sz;
5397c8c0b82SPatrick Mooney 		region->vr_transient = true;
5407c8c0b82SPatrick Mooney 	}
5417c8c0b82SPatrick Mooney 	list_insert_tail(&vmmr_alloc_regions, region);
5427c8c0b82SPatrick Mooney 	mutex_exit(&vmmr_lock);
5437c8c0b82SPatrick Mooney 
5447c8c0b82SPatrick Mooney 	*resp = region;
5457c8c0b82SPatrick Mooney 	return (0);
5467c8c0b82SPatrick Mooney }
5477c8c0b82SPatrick Mooney 
5487c8c0b82SPatrick Mooney void *
5497c8c0b82SPatrick Mooney vmmr_region_mem_at(vmmr_region_t *region, uintptr_t off)
5507c8c0b82SPatrick Mooney {
5517c8c0b82SPatrick Mooney 	/* just use KPM region for now */
5527c8c0b82SPatrick Mooney 	return (hat_kpm_pfn2va(vmmr_region_pfn_at(region, off)));
5537c8c0b82SPatrick Mooney }
5547c8c0b82SPatrick Mooney 
5557c8c0b82SPatrick Mooney pfn_t
5567c8c0b82SPatrick Mooney vmmr_region_pfn_at(vmmr_region_t *region, uintptr_t off)
5577c8c0b82SPatrick Mooney {
5587c8c0b82SPatrick Mooney 	VERIFY3U(off & PAGEOFFSET, ==, 0);
5597c8c0b82SPatrick Mooney 	VERIFY3U(off, <, region->vr_size);
5607c8c0b82SPatrick Mooney 
5617c8c0b82SPatrick Mooney 	vmmr_span_t search = {
5627c8c0b82SPatrick Mooney 		.vs_region_addr = off
5637c8c0b82SPatrick Mooney 	};
5647c8c0b82SPatrick Mooney 	avl_index_t where;
5657c8c0b82SPatrick Mooney 	vmmr_span_t *span = avl_find(&region->vr_spans, &search, &where);
5667c8c0b82SPatrick Mooney 
5677c8c0b82SPatrick Mooney 	if (span == NULL) {
5687c8c0b82SPatrick Mooney 		span = avl_nearest(&region->vr_spans, where, AVL_BEFORE);
5697c8c0b82SPatrick Mooney 		ASSERT3P(span, !=, NULL);
5707c8c0b82SPatrick Mooney 	}
5717c8c0b82SPatrick Mooney 	uintptr_t span_off = off - span->vs_region_addr + span->vs_addr;
5727c8c0b82SPatrick Mooney 	page_t *pp = page_find(&kvps[KV_VVP], (u_offset_t)span_off);
5737c8c0b82SPatrick Mooney 	VERIFY(pp != NULL);
5747c8c0b82SPatrick Mooney 	return (pp->p_pagenum);
5757c8c0b82SPatrick Mooney }
5767c8c0b82SPatrick Mooney 
5777c8c0b82SPatrick Mooney void
5787c8c0b82SPatrick Mooney vmmr_free(vmmr_region_t *region)
5797c8c0b82SPatrick Mooney {
5807c8c0b82SPatrick Mooney 	mutex_enter(&vmmr_lock);
5817c8c0b82SPatrick Mooney 	if (!region->vr_transient) {
5827c8c0b82SPatrick Mooney 		VERIFY3U(region->vr_size, <=, vmmr_alloc_sz);
5837c8c0b82SPatrick Mooney 	} else {
5847c8c0b82SPatrick Mooney 		VERIFY3U(region->vr_size, <=, vmmr_alloc_transient_sz);
5857c8c0b82SPatrick Mooney 	}
5867c8c0b82SPatrick Mooney 	list_remove(&vmmr_alloc_regions, region);
5877c8c0b82SPatrick Mooney 	mutex_exit(&vmmr_lock);
5887c8c0b82SPatrick Mooney 
589*6bba8b59SPatrick Mooney 	/* Zero the contents (while not monopolizing vmmr_lock) */
5907c8c0b82SPatrick Mooney 	for (uintptr_t off = 0; off < region->vr_size; off += PAGESIZE) {
5917c8c0b82SPatrick Mooney 		bzero(vmmr_region_mem_at(region, off), PAGESIZE);
5927c8c0b82SPatrick Mooney 	}
5937c8c0b82SPatrick Mooney 
5947c8c0b82SPatrick Mooney 	mutex_enter(&vmmr_lock);
5957c8c0b82SPatrick Mooney 
5967c8c0b82SPatrick Mooney 	/* Put the contained span(s) back in the free pool */
5977c8c0b82SPatrick Mooney 	void *cookie = NULL;
5987c8c0b82SPatrick Mooney 	vmmr_span_t *span;
5997c8c0b82SPatrick Mooney 	while ((span = avl_destroy_nodes(&region->vr_spans, &cookie)) != NULL) {
6007c8c0b82SPatrick Mooney 		span->vs_region_addr = 0;
6017c8c0b82SPatrick Mooney 		vmmr_tp_insert_concat(span, &vmmr_free_tp);
6027c8c0b82SPatrick Mooney 	}
6037c8c0b82SPatrick Mooney 	avl_destroy(&region->vr_spans);
6047c8c0b82SPatrick Mooney 	if (!region->vr_transient) {
6057c8c0b82SPatrick Mooney 		vmmr_free_sz += region->vr_size;
6067c8c0b82SPatrick Mooney 		vmmr_alloc_sz -= region->vr_size;
6077c8c0b82SPatrick Mooney 	} else {
6087c8c0b82SPatrick Mooney 		vmmr_free_transient_sz += region->vr_size;
6097c8c0b82SPatrick Mooney 		vmmr_alloc_transient_sz -= region->vr_size;
6107c8c0b82SPatrick Mooney 	}
6117c8c0b82SPatrick Mooney 
6127c8c0b82SPatrick Mooney 	if (region->vr_transient) {
613e0994bd2SPatrick Mooney 		/*
614e0994bd2SPatrick Mooney 		 * Since the transient capacity was previously allocated for
615e0994bd2SPatrick Mooney 		 * this region, its removal should not fail.
616e0994bd2SPatrick Mooney 		 */
617e0994bd2SPatrick Mooney 		VERIFY0(vmmr_remove(region->vr_size, true));
6187c8c0b82SPatrick Mooney 	}
6197c8c0b82SPatrick Mooney 	kmem_free(region, sizeof (*region));
620*6bba8b59SPatrick Mooney 	mutex_exit(&vmmr_lock);
6217c8c0b82SPatrick Mooney }
6227c8c0b82SPatrick Mooney 
6237c8c0b82SPatrick Mooney static void
6247c8c0b82SPatrick Mooney vmmr_destroy_pages(vmmr_span_t *span)
6257c8c0b82SPatrick Mooney {
6267c8c0b82SPatrick Mooney 	const uintptr_t end = span->vs_addr + span->vs_size;
6277c8c0b82SPatrick Mooney 	struct vnode *vp = &kvps[KV_VVP];
6287c8c0b82SPatrick Mooney 	for (uintptr_t pos = span->vs_addr; pos < end; pos += PAGESIZE) {
6297c8c0b82SPatrick Mooney 		page_t *pp;
6307c8c0b82SPatrick Mooney 
6317c8c0b82SPatrick Mooney 		/* Page-free logic cribbed from segkmem_xfree(): */
6327c8c0b82SPatrick Mooney 		pp = page_find(vp, (u_offset_t)pos);
6337c8c0b82SPatrick Mooney 		VERIFY(pp != NULL);
6347c8c0b82SPatrick Mooney 		if (!page_tryupgrade(pp)) {
6357c8c0b82SPatrick Mooney 			/*
6367c8c0b82SPatrick Mooney 			 * Some other thread has a sharelock. Wait for
6377c8c0b82SPatrick Mooney 			 * it to drop the lock so we can free this page.
6387c8c0b82SPatrick Mooney 			 */
6397c8c0b82SPatrick Mooney 			page_unlock(pp);
6407c8c0b82SPatrick Mooney 			pp = page_lookup(vp, (u_offset_t)pos, SE_EXCL);
6417c8c0b82SPatrick Mooney 		}
6427c8c0b82SPatrick Mooney 
6437c8c0b82SPatrick Mooney 		/*
6447c8c0b82SPatrick Mooney 		 * Clear p_lckcnt so page_destroy() doesn't update availrmem.
6457c8c0b82SPatrick Mooney 		 * That will be taken care of later via page_unresv().
6467c8c0b82SPatrick Mooney 		 */
6477c8c0b82SPatrick Mooney 		pp->p_lckcnt = 0;
6487c8c0b82SPatrick Mooney 		page_destroy(pp, 0);
6497c8c0b82SPatrick Mooney 	}
6507c8c0b82SPatrick Mooney }
6517c8c0b82SPatrick Mooney 
6527c8c0b82SPatrick Mooney static int
6537c8c0b82SPatrick Mooney vmmr_alloc_pages(const vmmr_span_t *span)
6547c8c0b82SPatrick Mooney {
6557c8c0b82SPatrick Mooney 	struct seg kseg = {
6567c8c0b82SPatrick Mooney 		.s_as = &kas
6577c8c0b82SPatrick Mooney 	};
6587c8c0b82SPatrick Mooney 	struct vnode *vp = &kvps[KV_VVP];
6597c8c0b82SPatrick Mooney 
6607c8c0b82SPatrick Mooney 	const uintptr_t end = span->vs_addr + span->vs_size;
6617c8c0b82SPatrick Mooney 	for (uintptr_t pos = span->vs_addr; pos < end; pos += PAGESIZE) {
6627c8c0b82SPatrick Mooney 		page_t *pp;
6637c8c0b82SPatrick Mooney 
6647c8c0b82SPatrick Mooney 		pp = page_create_va(vp, (u_offset_t)pos, PAGESIZE,
6657c8c0b82SPatrick Mooney 		    PG_EXCL | PG_NORELOC, &kseg, (void *)(vmmr_va + pos));
6667c8c0b82SPatrick Mooney 
6677c8c0b82SPatrick Mooney 		if (pp == NULL) {
6687c8c0b82SPatrick Mooney 			/* Destroy any already-created pages */
6697c8c0b82SPatrick Mooney 			if (pos != span->vs_addr) {
6707c8c0b82SPatrick Mooney 				vmmr_span_t destroy_span = {
6717c8c0b82SPatrick Mooney 					.vs_addr = span->vs_addr,
6727c8c0b82SPatrick Mooney 					.vs_size = pos - span->vs_addr,
6737c8c0b82SPatrick Mooney 				};
6747c8c0b82SPatrick Mooney 
6757c8c0b82SPatrick Mooney 				vmmr_destroy_pages(&destroy_span);
6767c8c0b82SPatrick Mooney 			}
6777c8c0b82SPatrick Mooney 			return (ENOMEM);
6787c8c0b82SPatrick Mooney 		}
6797c8c0b82SPatrick Mooney 
6807c8c0b82SPatrick Mooney 		/* mimic page state from segkmem */
6817c8c0b82SPatrick Mooney 		ASSERT(PAGE_EXCL(pp));
6827c8c0b82SPatrick Mooney 		page_io_unlock(pp);
6837c8c0b82SPatrick Mooney 		pp->p_lckcnt = 1;
6847c8c0b82SPatrick Mooney 		page_downgrade(pp);
6857c8c0b82SPatrick Mooney 
6867c8c0b82SPatrick Mooney 		/* pre-zero the page */
6877c8c0b82SPatrick Mooney 		bzero(hat_kpm_pfn2va(pp->p_pagenum), PAGESIZE);
6887c8c0b82SPatrick Mooney 	}
6897c8c0b82SPatrick Mooney 
6907c8c0b82SPatrick Mooney 	return (0);
6917c8c0b82SPatrick Mooney }
6927c8c0b82SPatrick Mooney 
6937c8c0b82SPatrick Mooney static int
6947c8c0b82SPatrick Mooney vmmr_resv_wait()
6957c8c0b82SPatrick Mooney {
6967c8c0b82SPatrick Mooney 	if (delay_sig(hz >> 2) != 0) {
6977c8c0b82SPatrick Mooney 		/* bail due to interruption */
6987c8c0b82SPatrick Mooney 		return (0);
6997c8c0b82SPatrick Mooney 	}
7007c8c0b82SPatrick Mooney 	return (1);
7017c8c0b82SPatrick Mooney }
7027c8c0b82SPatrick Mooney 
7037c8c0b82SPatrick Mooney static void
7047c8c0b82SPatrick Mooney vmmr_remove_raw(size_t sz)
7057c8c0b82SPatrick Mooney {
7067c8c0b82SPatrick Mooney 	VERIFY3U(sz & PAGEOFFSET, ==, 0);
7077c8c0b82SPatrick Mooney 	VERIFY(MUTEX_HELD(&vmmr_lock));
7087c8c0b82SPatrick Mooney 
7097c8c0b82SPatrick Mooney 	size_t remain = sz;
7107c8c0b82SPatrick Mooney 	while (remain > 0) {
7117c8c0b82SPatrick Mooney 		vmmr_span_t *span = vmmr_tp_remove_split(remain, &vmmr_free_tp);
7127c8c0b82SPatrick Mooney 
7137c8c0b82SPatrick Mooney 		/*
7147c8c0b82SPatrick Mooney 		 * The caller must ensure that at least `sz` amount is present
7157c8c0b82SPatrick Mooney 		 * in the free treepair.
7167c8c0b82SPatrick Mooney 		 */
7177c8c0b82SPatrick Mooney 		VERIFY3P(span, !=, NULL);
7187c8c0b82SPatrick Mooney 		ASSERT3U(span->vs_size, <=, remain);
7197c8c0b82SPatrick Mooney 
7207c8c0b82SPatrick Mooney 		/* TODO: perhaps arrange to destroy pages outside the lock? */
7217c8c0b82SPatrick Mooney 		vmmr_destroy_pages(span);
7227c8c0b82SPatrick Mooney 
7237c8c0b82SPatrick Mooney 		remain -= span->vs_size;
7247c8c0b82SPatrick Mooney 		vmmr_tp_insert_concat(span, &vmmr_empty_tp);
7257c8c0b82SPatrick Mooney 	}
7267c8c0b82SPatrick Mooney 
7277c8c0b82SPatrick Mooney 	vmmr_empty_sz += sz;
7287c8c0b82SPatrick Mooney }
7297c8c0b82SPatrick Mooney 
730*6bba8b59SPatrick Mooney /*
731*6bba8b59SPatrick Mooney  * Add memory to vmm reservoir.  Memory may be marked for transient use, where
732*6bba8b59SPatrick Mooney  * the addition is part of a transient allocation from the reservoir.  Otherwise
733*6bba8b59SPatrick Mooney  * it is placed in the reservoir to be available for non-transient allocations.
734*6bba8b59SPatrick Mooney  *
735*6bba8b59SPatrick Mooney  * Expects vmmr_lock to be held when called, and will return with it held, but
736*6bba8b59SPatrick Mooney  * will drop it during portions of the addition.
737*6bba8b59SPatrick Mooney  */
738*6bba8b59SPatrick Mooney static int
7397c8c0b82SPatrick Mooney vmmr_add(size_t sz, bool transient)
7407c8c0b82SPatrick Mooney {
7417c8c0b82SPatrick Mooney 	VERIFY3U(sz & PAGEOFFSET, ==, 0);
742*6bba8b59SPatrick Mooney 	VERIFY3U(sz, >, 0);
743*6bba8b59SPatrick Mooney 	VERIFY(MUTEX_HELD(&vmmr_lock));
7447c8c0b82SPatrick Mooney 
7457c8c0b82SPatrick Mooney 	/*
7467c8c0b82SPatrick Mooney 	 * Make sure that the amount added is not going to breach the limits
7477c8c0b82SPatrick Mooney 	 * we've chosen
7487c8c0b82SPatrick Mooney 	 */
7497c8c0b82SPatrick Mooney 	const size_t current_total =
7507c8c0b82SPatrick Mooney 	    vmmr_alloc_sz + vmmr_free_sz + vmmr_adding_sz +
7517c8c0b82SPatrick Mooney 	    vmmr_alloc_transient_sz + vmmr_free_transient_sz;
7527c8c0b82SPatrick Mooney 	if ((current_total + sz) < current_total) {
7537c8c0b82SPatrick Mooney 		return (EOVERFLOW);
7547c8c0b82SPatrick Mooney 	}
7557c8c0b82SPatrick Mooney 	if ((current_total + sz) > vmmr_total_limit) {
7567c8c0b82SPatrick Mooney 		return (ENOSPC);
7577c8c0b82SPatrick Mooney 	}
7587c8c0b82SPatrick Mooney 	vmmr_adding_sz += sz;
7597c8c0b82SPatrick Mooney 	mutex_exit(&vmmr_lock);
7607c8c0b82SPatrick Mooney 
7617c8c0b82SPatrick Mooney 	/* Wait for enough pages to become available */
7627c8c0b82SPatrick Mooney 	if (page_xresv(sz >> PAGESHIFT, KM_SLEEP, vmmr_resv_wait) == 0) {
7637c8c0b82SPatrick Mooney 		mutex_enter(&vmmr_lock);
7647c8c0b82SPatrick Mooney 		vmmr_adding_sz -= sz;
7657c8c0b82SPatrick Mooney 		return (EINTR);
7667c8c0b82SPatrick Mooney 	}
7677c8c0b82SPatrick Mooney 
7687c8c0b82SPatrick Mooney 	mutex_enter(&vmmr_lock);
7697c8c0b82SPatrick Mooney 	size_t added = 0;
7707c8c0b82SPatrick Mooney 	size_t remain = sz;
7717c8c0b82SPatrick Mooney 	while (added < sz) {
7727c8c0b82SPatrick Mooney 		vmmr_span_t *span = NULL;
7737c8c0b82SPatrick Mooney 
7747c8c0b82SPatrick Mooney 		if (vmmr_empty_sz > 0) {
7757c8c0b82SPatrick Mooney 			span = vmmr_tp_remove_split(remain, &vmmr_empty_tp);
7767c8c0b82SPatrick Mooney 
7777c8c0b82SPatrick Mooney 			vmmr_empty_sz -= span->vs_size;
7787c8c0b82SPatrick Mooney 		} else {
7797c8c0b82SPatrick Mooney 			/*
7807c8c0b82SPatrick Mooney 			 * No empty space to fill with new pages, so just tack
7817c8c0b82SPatrick Mooney 			 * it on at the end instead.
7827c8c0b82SPatrick Mooney 			 */
7837c8c0b82SPatrick Mooney 			span = kmem_zalloc(sizeof (vmmr_span_t), KM_SLEEP);
7847c8c0b82SPatrick Mooney 			span->vs_addr = vmmr_empty_last;
7857c8c0b82SPatrick Mooney 			span->vs_size = remain;
7867c8c0b82SPatrick Mooney 			vmmr_empty_last += remain;
7877c8c0b82SPatrick Mooney 		}
7887c8c0b82SPatrick Mooney 		VERIFY3P(span, !=, NULL);
7897c8c0b82SPatrick Mooney 
7907c8c0b82SPatrick Mooney 
7917c8c0b82SPatrick Mooney 		/* Allocate the actual pages to back this span */
7927c8c0b82SPatrick Mooney 		mutex_exit(&vmmr_lock);
7937c8c0b82SPatrick Mooney 		int err = vmmr_alloc_pages(span);
7947c8c0b82SPatrick Mooney 		mutex_enter(&vmmr_lock);
7957c8c0b82SPatrick Mooney 
7967c8c0b82SPatrick Mooney 		/*
7977c8c0b82SPatrick Mooney 		 * If an error is encountered during page allocation for the
7987c8c0b82SPatrick Mooney 		 * span, unwind any progress made by the addition request.
7997c8c0b82SPatrick Mooney 		 */
8007c8c0b82SPatrick Mooney 		if (err != 0) {
8017c8c0b82SPatrick Mooney 			/*
8027c8c0b82SPatrick Mooney 			 * Without pages allocated to this span, it is now
8037c8c0b82SPatrick Mooney 			 * tracked as empty.
8047c8c0b82SPatrick Mooney 			 */
8057c8c0b82SPatrick Mooney 			vmmr_empty_sz += span->vs_size;
8067c8c0b82SPatrick Mooney 			vmmr_tp_insert_concat(span, &vmmr_empty_tp);
8077c8c0b82SPatrick Mooney 
8087c8c0b82SPatrick Mooney 			if (added != 0) {
8097c8c0b82SPatrick Mooney 				vmmr_remove_raw(added);
8107c8c0b82SPatrick Mooney 			}
8117c8c0b82SPatrick Mooney 
8127c8c0b82SPatrick Mooney 			vmmr_adding_sz -= sz;
8137c8c0b82SPatrick Mooney 
8147c8c0b82SPatrick Mooney 			page_unresv(sz >> PAGESHIFT);
8157c8c0b82SPatrick Mooney 			return (err);
8167c8c0b82SPatrick Mooney 		}
8177c8c0b82SPatrick Mooney 
8187c8c0b82SPatrick Mooney 		/*
8197c8c0b82SPatrick Mooney 		 * The allocated-page-bearing span is placed in the "free"
8207c8c0b82SPatrick Mooney 		 * treepair now, but is not officially exposed for consumption
8217c8c0b82SPatrick Mooney 		 * until `vmm_free_sz` or `vmm_free_transient_sz` are updated.
8227c8c0b82SPatrick Mooney 		 *
8237c8c0b82SPatrick Mooney 		 * This allows us to unwind the allocation in case of a failure
8247c8c0b82SPatrick Mooney 		 * without the risk of the freshly added span(s) being snapped
8257c8c0b82SPatrick Mooney 		 * up by a consumer already.
8267c8c0b82SPatrick Mooney 		 */
8277c8c0b82SPatrick Mooney 		added += span->vs_size;
8287c8c0b82SPatrick Mooney 		remain -= span->vs_size;
8297c8c0b82SPatrick Mooney 		vmmr_tp_insert_concat(span, &vmmr_free_tp);
8307c8c0b82SPatrick Mooney 	}
8317c8c0b82SPatrick Mooney 
8327c8c0b82SPatrick Mooney 	/* Make the added memory usable by exposing it to the size accounting */
8337c8c0b82SPatrick Mooney 	if (!transient) {
8347c8c0b82SPatrick Mooney 		vmmr_free_sz += added;
8357c8c0b82SPatrick Mooney 	} else {
8367c8c0b82SPatrick Mooney 		vmmr_free_transient_sz += added;
8377c8c0b82SPatrick Mooney 	}
8387c8c0b82SPatrick Mooney 	ASSERT3U(added, ==, sz);
8397c8c0b82SPatrick Mooney 	vmmr_adding_sz -= added;
8407c8c0b82SPatrick Mooney 
8417c8c0b82SPatrick Mooney 	return (0);
8427c8c0b82SPatrick Mooney }
8437c8c0b82SPatrick Mooney 
844*6bba8b59SPatrick Mooney /*
845*6bba8b59SPatrick Mooney  * Remove memory from vmm reservoir.  Normally this will remove memory from the
846*6bba8b59SPatrick Mooney  * reservoir which was available for non-transient allocations.  If the removal
847*6bba8b59SPatrick Mooney  * is part of a vmmr_free() of a transient allocation, it will act on only that
848*6bba8b59SPatrick Mooney  * transient region being freed, not the available memory in the reservoir.
849*6bba8b59SPatrick Mooney  *
850*6bba8b59SPatrick Mooney  * Expects vmmr_lock to be held when called, and will return with it held, but
851*6bba8b59SPatrick Mooney  * may drop it during portions of the removal.
852*6bba8b59SPatrick Mooney  */
853*6bba8b59SPatrick Mooney static int
8547c8c0b82SPatrick Mooney vmmr_remove(size_t sz, bool transient)
8557c8c0b82SPatrick Mooney {
8567c8c0b82SPatrick Mooney 	VERIFY3U(sz & PAGEOFFSET, ==, 0);
857*6bba8b59SPatrick Mooney 	VERIFY(sz);
858*6bba8b59SPatrick Mooney 	VERIFY(MUTEX_HELD(&vmmr_lock));
8597c8c0b82SPatrick Mooney 
8607c8c0b82SPatrick Mooney 	if ((!transient && sz > vmmr_free_sz) ||
8617c8c0b82SPatrick Mooney 	    (transient && sz > vmmr_free_transient_sz)) {
8627c8c0b82SPatrick Mooney 		return (ENOSPC);
8637c8c0b82SPatrick Mooney 	}
8647c8c0b82SPatrick Mooney 
8657c8c0b82SPatrick Mooney 	vmmr_remove_raw(sz);
8667c8c0b82SPatrick Mooney 
8677c8c0b82SPatrick Mooney 	if (!transient) {
8687c8c0b82SPatrick Mooney 		vmmr_free_sz -= sz;
8697c8c0b82SPatrick Mooney 	} else {
8707c8c0b82SPatrick Mooney 		vmmr_free_transient_sz -= sz;
8717c8c0b82SPatrick Mooney 	}
8727c8c0b82SPatrick Mooney 	page_unresv(sz >> PAGESHIFT);
8737c8c0b82SPatrick Mooney 	return (0);
8747c8c0b82SPatrick Mooney }
8757c8c0b82SPatrick Mooney 
876*6bba8b59SPatrick Mooney static int
877*6bba8b59SPatrick Mooney vmmr_set_target(size_t target_sz, size_t chunk_sz, size_t *resp)
878*6bba8b59SPatrick Mooney {
879*6bba8b59SPatrick Mooney 	VERIFY(resp != NULL);
880*6bba8b59SPatrick Mooney 
881*6bba8b59SPatrick Mooney 	mutex_enter(&vmmr_lock);
882*6bba8b59SPatrick Mooney 
883*6bba8b59SPatrick Mooney 	size_t current_sz = vmmr_alloc_sz + vmmr_free_sz;
884*6bba8b59SPatrick Mooney 
885*6bba8b59SPatrick Mooney 	/* Be sure to communicate current size in case of an early bail-out */
886*6bba8b59SPatrick Mooney 	*resp = current_sz;
887*6bba8b59SPatrick Mooney 
888*6bba8b59SPatrick Mooney 	if ((target_sz & PAGEOFFSET) != 0 ||
889*6bba8b59SPatrick Mooney 	    (chunk_sz & PAGEOFFSET) != 0) {
890*6bba8b59SPatrick Mooney 		mutex_exit(&vmmr_lock);
891*6bba8b59SPatrick Mooney 		return (EINVAL);
892*6bba8b59SPatrick Mooney 	}
893*6bba8b59SPatrick Mooney 	/* Reject sentinel value */
894*6bba8b59SPatrick Mooney 	if (target_sz == VMMR_TARGET_INACTIVE) {
895*6bba8b59SPatrick Mooney 		mutex_exit(&vmmr_lock);
896*6bba8b59SPatrick Mooney 		return (EINVAL);
897*6bba8b59SPatrick Mooney 	}
898*6bba8b59SPatrick Mooney 
899*6bba8b59SPatrick Mooney 	/* Already at target size */
900*6bba8b59SPatrick Mooney 	if (target_sz == current_sz) {
901*6bba8b59SPatrick Mooney 		mutex_exit(&vmmr_lock);
902*6bba8b59SPatrick Mooney 		return (0);
903*6bba8b59SPatrick Mooney 	}
904*6bba8b59SPatrick Mooney 
905*6bba8b59SPatrick Mooney 	/* Reject racing requests size */
906*6bba8b59SPatrick Mooney 	if (vmmr_target_sz != VMMR_TARGET_INACTIVE) {
907*6bba8b59SPatrick Mooney 		mutex_exit(&vmmr_lock);
908*6bba8b59SPatrick Mooney 		return (EALREADY);
909*6bba8b59SPatrick Mooney 	}
910*6bba8b59SPatrick Mooney 	/* Record the target now to excluding a racing request */
911*6bba8b59SPatrick Mooney 	vmmr_target_sz = target_sz;
912*6bba8b59SPatrick Mooney 
913*6bba8b59SPatrick Mooney 	int err = 0;
914*6bba8b59SPatrick Mooney 	do {
915*6bba8b59SPatrick Mooney 		/* Be sensitive to signal interruption */
916*6bba8b59SPatrick Mooney 		if (issig(JUSTLOOKING) != 0) {
917*6bba8b59SPatrick Mooney 			mutex_exit(&vmmr_lock);
918*6bba8b59SPatrick Mooney 			const bool sig_bail = issig(FORREAL) != 0;
919*6bba8b59SPatrick Mooney 			mutex_enter(&vmmr_lock);
920*6bba8b59SPatrick Mooney 			if (sig_bail) {
921*6bba8b59SPatrick Mooney 				err = EINTR;
922*6bba8b59SPatrick Mooney 				break;
923*6bba8b59SPatrick Mooney 			}
924*6bba8b59SPatrick Mooney 		}
925*6bba8b59SPatrick Mooney 
926*6bba8b59SPatrick Mooney 		if (current_sz > target_sz) {
927*6bba8b59SPatrick Mooney 			/* Shrinking reservoir */
928*6bba8b59SPatrick Mooney 
929*6bba8b59SPatrick Mooney 			size_t req_sz = current_sz - target_sz;
930*6bba8b59SPatrick Mooney 			if (chunk_sz != 0) {
931*6bba8b59SPatrick Mooney 				req_sz = MIN(req_sz, chunk_sz);
932*6bba8b59SPatrick Mooney 			}
933*6bba8b59SPatrick Mooney 			err = vmmr_remove(req_sz, false);
934*6bba8b59SPatrick Mooney 		} else {
935*6bba8b59SPatrick Mooney 			/* Growing reservoir */
936*6bba8b59SPatrick Mooney 			ASSERT(current_sz < target_sz);
937*6bba8b59SPatrick Mooney 
938*6bba8b59SPatrick Mooney 			size_t req_sz = target_sz - current_sz;
939*6bba8b59SPatrick Mooney 			if (chunk_sz != 0) {
940*6bba8b59SPatrick Mooney 				req_sz = MIN(req_sz, chunk_sz);
941*6bba8b59SPatrick Mooney 			}
942*6bba8b59SPatrick Mooney 			err = vmmr_add(req_sz, false);
943*6bba8b59SPatrick Mooney 		}
944*6bba8b59SPatrick Mooney 
945*6bba8b59SPatrick Mooney 		current_sz = vmmr_alloc_sz + vmmr_free_sz;
946*6bba8b59SPatrick Mooney 	} while (err == 0 && current_sz != target_sz);
947*6bba8b59SPatrick Mooney 
948*6bba8b59SPatrick Mooney 	/* Clear the target now that we are done (success or not) */
949*6bba8b59SPatrick Mooney 	vmmr_target_sz = VMMR_TARGET_INACTIVE;
950*6bba8b59SPatrick Mooney 	mutex_exit(&vmmr_lock);
951*6bba8b59SPatrick Mooney 	*resp = current_sz;
952*6bba8b59SPatrick Mooney 	return (err);
953*6bba8b59SPatrick Mooney }
954*6bba8b59SPatrick Mooney 
9557c8c0b82SPatrick Mooney int
9567c8c0b82SPatrick Mooney vmmr_ioctl(int cmd, intptr_t arg, int md, cred_t *cr, int *rvalp)
9577c8c0b82SPatrick Mooney {
958*6bba8b59SPatrick Mooney 	/*
959*6bba8b59SPatrick Mooney 	 * Since an LP64 datamodel is enforced by our caller (vmm_ioctl()), we
960*6bba8b59SPatrick Mooney 	 * do not need to duplicate such checks here.
961*6bba8b59SPatrick Mooney 	 */
962*6bba8b59SPatrick Mooney 
9637c8c0b82SPatrick Mooney 	switch (cmd) {
9647c8c0b82SPatrick Mooney 	case VMM_RESV_QUERY: {
9657c8c0b82SPatrick Mooney 		struct vmm_resv_query res;
9667c8c0b82SPatrick Mooney 		void *datap = (void *)(uintptr_t)arg;
9677c8c0b82SPatrick Mooney 
9687c8c0b82SPatrick Mooney 		/* For now, anyone in GZ can query */
9697c8c0b82SPatrick Mooney 		if (crgetzoneid(cr) != GLOBAL_ZONEID) {
9707c8c0b82SPatrick Mooney 			return (EPERM);
9717c8c0b82SPatrick Mooney 		}
9727c8c0b82SPatrick Mooney 		mutex_enter(&vmmr_lock);
9737c8c0b82SPatrick Mooney 		res.vrq_free_sz = vmmr_free_sz;
9747c8c0b82SPatrick Mooney 		res.vrq_alloc_sz = vmmr_alloc_sz;
9757c8c0b82SPatrick Mooney 		res.vrq_alloc_transient_sz = vmmr_alloc_transient_sz;
9767c8c0b82SPatrick Mooney 		res.vrq_limit = vmmr_total_limit;
9777c8c0b82SPatrick Mooney 		mutex_exit(&vmmr_lock);
9787c8c0b82SPatrick Mooney 		if (ddi_copyout(&res, datap, sizeof (res), md) != 0) {
9797c8c0b82SPatrick Mooney 			return (EFAULT);
9807c8c0b82SPatrick Mooney 		}
9817c8c0b82SPatrick Mooney 		break;
9827c8c0b82SPatrick Mooney 	}
983*6bba8b59SPatrick Mooney 	case VMM_RESV_SET_TARGET: {
9847c8c0b82SPatrick Mooney 		if (secpolicy_sys_config(cr, B_FALSE) != 0) {
9857c8c0b82SPatrick Mooney 			return (EPERM);
9867c8c0b82SPatrick Mooney 		}
987*6bba8b59SPatrick Mooney 
988*6bba8b59SPatrick Mooney 		struct vmm_resv_target tgt;
989*6bba8b59SPatrick Mooney 		void *datap = (void *)(uintptr_t)arg;
990*6bba8b59SPatrick Mooney 
991*6bba8b59SPatrick Mooney 		if (ddi_copyin(datap, &tgt, sizeof (tgt), md) != 0) {
992*6bba8b59SPatrick Mooney 			return (EFAULT);
9937c8c0b82SPatrick Mooney 		}
994*6bba8b59SPatrick Mooney 
995*6bba8b59SPatrick Mooney 		int err = vmmr_set_target(tgt.vrt_target_sz, tgt.vrt_chunk_sz,
996*6bba8b59SPatrick Mooney 		    &tgt.vrt_result_sz);
997*6bba8b59SPatrick Mooney 
998*6bba8b59SPatrick Mooney 		/*
999*6bba8b59SPatrick Mooney 		 * Attempt to communicate the resultant size of the reservoir if
1000*6bba8b59SPatrick Mooney 		 * setting it to the target was a success, or if we were
1001*6bba8b59SPatrick Mooney 		 * interrupted (by a signal) while doing so.
1002*6bba8b59SPatrick Mooney 		 */
1003*6bba8b59SPatrick Mooney 		if (err == 0 || err == EINTR) {
1004*6bba8b59SPatrick Mooney 			if (ddi_copyout(&tgt, datap, sizeof (tgt), md) != 0) {
1005*6bba8b59SPatrick Mooney 				err = EFAULT;
10067c8c0b82SPatrick Mooney 			}
1007*6bba8b59SPatrick Mooney 		}
1008*6bba8b59SPatrick Mooney 
1009*6bba8b59SPatrick Mooney 		return (err);
10107c8c0b82SPatrick Mooney 	}
10117c8c0b82SPatrick Mooney 	default:
10127c8c0b82SPatrick Mooney 		return (ENOTTY);
10137c8c0b82SPatrick Mooney 	}
10147c8c0b82SPatrick Mooney 	return (0);
10157c8c0b82SPatrick Mooney }
1016