xref: /illumos-gate/usr/src/uts/intel/io/vmm/vmm_reservoir.c (revision 40fc84a2910bb99408fbe8d10a06d5d5f1eaf953)
17c8c0b82SPatrick Mooney /*
27c8c0b82SPatrick Mooney  * This file and its contents are supplied under the terms of the
37c8c0b82SPatrick Mooney  * Common Development and Distribution License ("CDDL"), version 1.0.
47c8c0b82SPatrick Mooney  * You may only use this file in accordance with the terms of version
57c8c0b82SPatrick Mooney  * 1.0 of the CDDL.
67c8c0b82SPatrick Mooney  *
77c8c0b82SPatrick Mooney  * A full copy of the text of the CDDL should have accompanied this
87c8c0b82SPatrick Mooney  * source.  A copy of the CDDL is also available via the Internet at
97c8c0b82SPatrick Mooney  * http://www.illumos.org/license/CDDL.
107c8c0b82SPatrick Mooney  */
117c8c0b82SPatrick Mooney 
127c8c0b82SPatrick Mooney /*
136bba8b59SPatrick Mooney  * Copyright 2023 Oxide Computer Company
147c8c0b82SPatrick Mooney  */
157c8c0b82SPatrick Mooney 
167c8c0b82SPatrick Mooney /*
177c8c0b82SPatrick Mooney  * VMM Memory Reservoir
187c8c0b82SPatrick Mooney  *
197c8c0b82SPatrick Mooney  *
207c8c0b82SPatrick Mooney  * In order to make the allocation of large (multi-GiB) chunks of memory
217c8c0b82SPatrick Mooney  * for bhyve VMs easier, we introduce the "VMM Reservoir", where system
227c8c0b82SPatrick Mooney  * operators can set aside a substantial portion of system memory exclusively
237c8c0b82SPatrick Mooney  * for VMs.  This memory is unavailable for general use by the rest of the
247c8c0b82SPatrick Mooney  * system.  Rather than having to scour the freelist, reap kmem caches, or put
257c8c0b82SPatrick Mooney  * pressure on the ARC, bhyve guest memory allocations can quickly determine if
267c8c0b82SPatrick Mooney  * there is adequate reservoir memory available.  Since the pages stored in the
277c8c0b82SPatrick Mooney  * reservoir are pre-zeroed, it can be immediately used when allocated to a
287c8c0b82SPatrick Mooney  * guest.  When the memory is returned to the reservoir, it is zeroed once more
297c8c0b82SPatrick Mooney  * to avoid leaking any sensitive data from that guest.
307c8c0b82SPatrick Mooney  *
317c8c0b82SPatrick Mooney  *
327c8c0b82SPatrick Mooney  * Transient Allocations
337c8c0b82SPatrick Mooney  *
347c8c0b82SPatrick Mooney  * While the explicit reservoir model may work well for some applications,
357c8c0b82SPatrick Mooney  * others may want a more traditional model, where pages for guest memory
367c8c0b82SPatrick Mooney  * objects are allocated on demand, rather than from a pool set aside from the
377c8c0b82SPatrick Mooney  * system.  In this case, the allocation can be made in "transient" mode, where
387c8c0b82SPatrick Mooney  * the memory is allocated normally, even if there is free capacity in the
397c8c0b82SPatrick Mooney  * reservoir.  When use of the transient allocation is complete (the guest is
407c8c0b82SPatrick Mooney  * halted and destroyed), the pages will be freed back to the system, rather
417c8c0b82SPatrick Mooney  * than added back to the reservoir.
427c8c0b82SPatrick Mooney  *
437c8c0b82SPatrick Mooney  * From an implementation standpoint, transient allocations follow the same
447c8c0b82SPatrick Mooney  * code paths as ones using the reservoir normally.  Those allocations have a
457c8c0b82SPatrick Mooney  * tag which marks them as transient, and used/free size tallies are maintained
467c8c0b82SPatrick Mooney  * separately for normal and transient operations.  When performing a transient
477c8c0b82SPatrick Mooney  * allocation, that amount of memory is immediately added to the reservoir ,
487c8c0b82SPatrick Mooney  * from which the allocation can be made.  When freeing a transient allocation,
497c8c0b82SPatrick Mooney  * a matching amount of memory is removed from the reservoir as part of the
507c8c0b82SPatrick Mooney  * operation.  This allows both allocation types to coexist without too much
517c8c0b82SPatrick Mooney  * additional machinery.
527c8c0b82SPatrick Mooney  *
537c8c0b82SPatrick Mooney  *
547c8c0b82SPatrick Mooney  * Administration
557c8c0b82SPatrick Mooney  *
566bba8b59SPatrick Mooney  * Operators may attempt to alter the amount of memory allocated to the
576bba8b59SPatrick Mooney  * reservoir via an ioctl against the vmmctl device.  The total amount of memory
58*40fc84a2SJordan Paige Hendricks  * in the reservoir (free, or allocated to VMs) is limited by
59*40fc84a2SJordan Paige Hendricks  * `vmm_total_limit` (see its definition for how this limit is calculated).
60*40fc84a2SJordan Paige Hendricks  *
61*40fc84a2SJordan Paige Hendricks  * The limit is in place to prevent the reservoir from inadvertently growing
62*40fc84a2SJordan Paige Hendricks  * to a size where the system has inadequate memory to make forward progress.
63*40fc84a2SJordan Paige Hendricks  * Shrinking the reservoir is only possible when it contains free (not
64*40fc84a2SJordan Paige Hendricks  * allocated by any guest VMs) memory.
657c8c0b82SPatrick Mooney  *
667c8c0b82SPatrick Mooney  *
677c8c0b82SPatrick Mooney  * Page Tracking
687c8c0b82SPatrick Mooney  *
697c8c0b82SPatrick Mooney  * The reservoir currently uses vnode association to keep track of pages under
707c8c0b82SPatrick Mooney  * its control (either designated to the reservoir and free, or allocated to a
717c8c0b82SPatrick Mooney  * guest VM object).  This means using the existing VM system primitives for
727c8c0b82SPatrick Mooney  * page_t instances being associated with a given (vnode, offset) tuple.  It
737c8c0b82SPatrick Mooney  * means that spans of pages, either free or allocated, need only to store a
747c8c0b82SPatrick Mooney  * length (of the span) and an offset (into the vnode) in order to gain access
757c8c0b82SPatrick Mooney  * to all of the underlying pages associated with that span.  Associating the
767c8c0b82SPatrick Mooney  * pages against `kvps[KV_VVP]` (the VMM kernel vnode) means they will be
777c8c0b82SPatrick Mooney  * properly tracked as KAS pages, but be excluded from normal dumps (unless the
787c8c0b82SPatrick Mooney  * operator has chosen to dump all of RAM).
797c8c0b82SPatrick Mooney  */
807c8c0b82SPatrick Mooney 
817c8c0b82SPatrick Mooney #include <sys/types.h>
827c8c0b82SPatrick Mooney #include <sys/mutex.h>
837c8c0b82SPatrick Mooney #include <sys/avl.h>
847c8c0b82SPatrick Mooney #include <sys/list.h>
857c8c0b82SPatrick Mooney #include <sys/machparam.h>
867c8c0b82SPatrick Mooney #include <sys/kmem.h>
877c8c0b82SPatrick Mooney #include <sys/stddef.h>
887c8c0b82SPatrick Mooney #include <sys/null.h>
897c8c0b82SPatrick Mooney #include <sys/errno.h>
907c8c0b82SPatrick Mooney #include <sys/systm.h>
917c8c0b82SPatrick Mooney #include <sys/sunddi.h>
927c8c0b82SPatrick Mooney #include <sys/policy.h>
937c8c0b82SPatrick Mooney #include <vm/seg_kmem.h>
947c8c0b82SPatrick Mooney #include <vm/hat_i86.h>
956bba8b59SPatrick Mooney #include <sys/kstat.h>
967c8c0b82SPatrick Mooney 
977c8c0b82SPatrick Mooney #include <sys/vmm_reservoir.h>
987c8c0b82SPatrick Mooney #include <sys/vmm_dev.h>
996bba8b59SPatrick Mooney #include <sys/vmm_impl.h>
1006bba8b59SPatrick Mooney 
1016bba8b59SPatrick Mooney #define	VMMR_TARGET_INACTIVE	SIZE_MAX
1027c8c0b82SPatrick Mooney 
1037c8c0b82SPatrick Mooney static kmutex_t vmmr_lock;
1047c8c0b82SPatrick Mooney 
1057c8c0b82SPatrick Mooney static size_t vmmr_free_sz;
1067c8c0b82SPatrick Mooney static size_t vmmr_free_transient_sz;
1077c8c0b82SPatrick Mooney static size_t vmmr_adding_sz;
1087c8c0b82SPatrick Mooney static size_t vmmr_alloc_sz;
1097c8c0b82SPatrick Mooney static size_t vmmr_alloc_transient_sz;
1107c8c0b82SPatrick Mooney static size_t vmmr_empty_sz;
1117c8c0b82SPatrick Mooney 
1126bba8b59SPatrick Mooney /*
1136bba8b59SPatrick Mooney  * Target size of the reservoir during active vmmr_set_target() operation.
1146bba8b59SPatrick Mooney  * It holds the sentinel value of VMMR_TARGET_INACTIVE when no resize is active.
1156bba8b59SPatrick Mooney  */
1166bba8b59SPatrick Mooney static size_t vmmr_target_sz;
1176bba8b59SPatrick Mooney 
1187c8c0b82SPatrick Mooney static uintptr_t vmmr_empty_last;
1197c8c0b82SPatrick Mooney /* Upper limit for the size (free + allocated) of the reservoir */
1207c8c0b82SPatrick Mooney static size_t vmmr_total_limit;
1217c8c0b82SPatrick Mooney 
1227c8c0b82SPatrick Mooney /* VA range allocated from the VMM arena for the mappings */
1237c8c0b82SPatrick Mooney static uintptr_t vmmr_va;
1247c8c0b82SPatrick Mooney static uintptr_t vmmr_va_sz;
1257c8c0b82SPatrick Mooney 
1266bba8b59SPatrick Mooney static kstat_t *vmmr_kstat;
1276bba8b59SPatrick Mooney 
1287c8c0b82SPatrick Mooney /* Pair of AVL trees to store set of spans ordered by addr and size */
1297c8c0b82SPatrick Mooney typedef struct vmmr_treepair {
1307c8c0b82SPatrick Mooney 	avl_tree_t by_addr;
1317c8c0b82SPatrick Mooney 	avl_tree_t by_size;
1327c8c0b82SPatrick Mooney } vmmr_treepair_t;
1337c8c0b82SPatrick Mooney 
1347c8c0b82SPatrick Mooney /* Spans of free memory in the reservoir */
1357c8c0b82SPatrick Mooney static vmmr_treepair_t vmmr_free_tp;
1367c8c0b82SPatrick Mooney 
1377c8c0b82SPatrick Mooney /* Spans of empty (not backed by memory) space in the reservoir */
1387c8c0b82SPatrick Mooney static vmmr_treepair_t vmmr_empty_tp;
1397c8c0b82SPatrick Mooney 
1407c8c0b82SPatrick Mooney /* Regions of memory allocated from the reservoir */
1417c8c0b82SPatrick Mooney static list_t vmmr_alloc_regions;
1427c8c0b82SPatrick Mooney 
1437c8c0b82SPatrick Mooney struct vmmr_span {
1447c8c0b82SPatrick Mooney 	uintptr_t	vs_addr;
1457c8c0b82SPatrick Mooney 	size_t		vs_size;
1467c8c0b82SPatrick Mooney 	avl_node_t	vs_by_addr;
1477c8c0b82SPatrick Mooney 	avl_node_t	vs_by_size;
1487c8c0b82SPatrick Mooney 	uintptr_t	vs_region_addr;
1497c8c0b82SPatrick Mooney };
1507c8c0b82SPatrick Mooney typedef struct vmmr_span vmmr_span_t;
1517c8c0b82SPatrick Mooney 
1527c8c0b82SPatrick Mooney struct vmmr_region {
1537c8c0b82SPatrick Mooney 	size_t		vr_size;
1547c8c0b82SPatrick Mooney 	avl_tree_t	vr_spans;
1557c8c0b82SPatrick Mooney 	list_node_t	vr_node;
1567c8c0b82SPatrick Mooney 	bool		vr_transient;
1577c8c0b82SPatrick Mooney };
1587c8c0b82SPatrick Mooney 
1596bba8b59SPatrick Mooney typedef struct vmmr_kstats {
1606bba8b59SPatrick Mooney 	kstat_named_t	vmrks_bytes_free;
1616bba8b59SPatrick Mooney 	kstat_named_t	vmrks_bytes_alloc;
1626bba8b59SPatrick Mooney 	kstat_named_t	vmrks_bytes_transient;
1636bba8b59SPatrick Mooney 	kstat_named_t	vmrks_bytes_limit;
1646bba8b59SPatrick Mooney } vmmr_kstats_t;
1656bba8b59SPatrick Mooney 
1666bba8b59SPatrick Mooney 
1676bba8b59SPatrick Mooney static int vmmr_add(size_t, bool);
1686bba8b59SPatrick Mooney static int vmmr_remove(size_t, bool);
1696bba8b59SPatrick Mooney 
1707c8c0b82SPatrick Mooney static int
vmmr_cmp_addr(const void * a,const void * b)1717c8c0b82SPatrick Mooney vmmr_cmp_addr(const void *a, const void *b)
1727c8c0b82SPatrick Mooney {
1737c8c0b82SPatrick Mooney 	const vmmr_span_t *sa = a;
1747c8c0b82SPatrick Mooney 	const vmmr_span_t *sb = b;
1757c8c0b82SPatrick Mooney 
1767c8c0b82SPatrick Mooney 	if (sa->vs_addr == sb->vs_addr) {
1777c8c0b82SPatrick Mooney 		return (0);
1787c8c0b82SPatrick Mooney 	} else if (sa->vs_addr < sb->vs_addr) {
1797c8c0b82SPatrick Mooney 		return (-1);
1807c8c0b82SPatrick Mooney 	} else {
1817c8c0b82SPatrick Mooney 		return (1);
1827c8c0b82SPatrick Mooney 	}
1837c8c0b82SPatrick Mooney }
1847c8c0b82SPatrick Mooney 
1857c8c0b82SPatrick Mooney static int
vmmr_cmp_size(const void * a,const void * b)1867c8c0b82SPatrick Mooney vmmr_cmp_size(const void *a, const void *b)
1877c8c0b82SPatrick Mooney {
1887c8c0b82SPatrick Mooney 	const vmmr_span_t *sa = a;
1897c8c0b82SPatrick Mooney 	const vmmr_span_t *sb = b;
1907c8c0b82SPatrick Mooney 
1917c8c0b82SPatrick Mooney 	if (sa->vs_size == sb->vs_size) {
1927c8c0b82SPatrick Mooney 		/*
1937c8c0b82SPatrick Mooney 		 * Since discontiguous spans could have the same size in a
1947c8c0b82SPatrick Mooney 		 * by-size tree, differentiate them (as required by AVL) by
1957c8c0b82SPatrick Mooney 		 * address so they can safely coexist while remaining sorted.
1967c8c0b82SPatrick Mooney 		 */
1977c8c0b82SPatrick Mooney 		return (vmmr_cmp_addr(a, b));
1987c8c0b82SPatrick Mooney 	} else if (sa->vs_size < sb->vs_size) {
1997c8c0b82SPatrick Mooney 		return (-1);
2007c8c0b82SPatrick Mooney 	} else {
2017c8c0b82SPatrick Mooney 		return (1);
2027c8c0b82SPatrick Mooney 	}
2037c8c0b82SPatrick Mooney }
2047c8c0b82SPatrick Mooney 
2057c8c0b82SPatrick Mooney static int
vmmr_cmp_region_addr(const void * a,const void * b)2067c8c0b82SPatrick Mooney vmmr_cmp_region_addr(const void *a, const void *b)
2077c8c0b82SPatrick Mooney {
2087c8c0b82SPatrick Mooney 	const vmmr_span_t *sa = a;
2097c8c0b82SPatrick Mooney 	const vmmr_span_t *sb = b;
2107c8c0b82SPatrick Mooney 
2117c8c0b82SPatrick Mooney 	if (sa->vs_region_addr == sb->vs_region_addr) {
2127c8c0b82SPatrick Mooney 		return (0);
2137c8c0b82SPatrick Mooney 	} else if (sa->vs_region_addr < sb->vs_region_addr) {
2147c8c0b82SPatrick Mooney 		return (-1);
2157c8c0b82SPatrick Mooney 	} else {
2167c8c0b82SPatrick Mooney 		return (1);
2177c8c0b82SPatrick Mooney 	}
2187c8c0b82SPatrick Mooney }
2197c8c0b82SPatrick Mooney 
2207c8c0b82SPatrick Mooney static void
vmmr_tp_init(vmmr_treepair_t * tree)2217c8c0b82SPatrick Mooney vmmr_tp_init(vmmr_treepair_t *tree)
2227c8c0b82SPatrick Mooney {
2237c8c0b82SPatrick Mooney 	avl_create(&tree->by_addr, vmmr_cmp_addr, sizeof (vmmr_span_t),
2247c8c0b82SPatrick Mooney 	    offsetof(vmmr_span_t, vs_by_addr));
2257c8c0b82SPatrick Mooney 	avl_create(&tree->by_size, vmmr_cmp_size, sizeof (vmmr_span_t),
2267c8c0b82SPatrick Mooney 	    offsetof(vmmr_span_t, vs_by_size));
2277c8c0b82SPatrick Mooney }
2287c8c0b82SPatrick Mooney 
2297c8c0b82SPatrick Mooney static void
vmmr_tp_destroy(vmmr_treepair_t * tree)2307c8c0b82SPatrick Mooney vmmr_tp_destroy(vmmr_treepair_t *tree)
2317c8c0b82SPatrick Mooney {
2327c8c0b82SPatrick Mooney 	void *vcp = NULL;
2337c8c0b82SPatrick Mooney 	vmmr_span_t *span;
2347c8c0b82SPatrick Mooney 
2357c8c0b82SPatrick Mooney 	while (avl_destroy_nodes(&tree->by_addr, &vcp) != NULL) {
2367c8c0b82SPatrick Mooney 		/* Freeing spans will be done when tearing down by-size tree */
2377c8c0b82SPatrick Mooney 	}
2387c8c0b82SPatrick Mooney 	while ((span = avl_destroy_nodes(&tree->by_size, &vcp)) != NULL) {
2397c8c0b82SPatrick Mooney 		kmem_free(span, sizeof (*span));
2407c8c0b82SPatrick Mooney 	}
2417c8c0b82SPatrick Mooney 	avl_destroy(&tree->by_addr);
2427c8c0b82SPatrick Mooney 	avl_destroy(&tree->by_size);
2437c8c0b82SPatrick Mooney }
2447c8c0b82SPatrick Mooney 
2457c8c0b82SPatrick Mooney /*
2467c8c0b82SPatrick Mooney  * Insert a vmmr_span_t into a treepair, concatenating if possible with adjacent
2477c8c0b82SPatrick Mooney  * span(s).  Such concatenation could result in the `to_add` span being freed,
2487c8c0b82SPatrick Mooney  * so the caller cannot use it after this returns.
2497c8c0b82SPatrick Mooney  */
2507c8c0b82SPatrick Mooney static void
vmmr_tp_insert_concat(vmmr_span_t * to_add,vmmr_treepair_t * tree)2517c8c0b82SPatrick Mooney vmmr_tp_insert_concat(vmmr_span_t *to_add, vmmr_treepair_t *tree)
2527c8c0b82SPatrick Mooney {
2537c8c0b82SPatrick Mooney 	avl_tree_t *by_addr = &tree->by_addr;
2547c8c0b82SPatrick Mooney 	avl_tree_t *by_size = &tree->by_size;
2557c8c0b82SPatrick Mooney 	vmmr_span_t *node;
2567c8c0b82SPatrick Mooney 	avl_index_t where;
2577c8c0b82SPatrick Mooney 
2587c8c0b82SPatrick Mooney 	/* This addr should not already exist in the treepair */
2597c8c0b82SPatrick Mooney 	node = avl_find(by_addr, to_add, &where);
2607c8c0b82SPatrick Mooney 	ASSERT3P(node, ==, NULL);
2617c8c0b82SPatrick Mooney 
2627c8c0b82SPatrick Mooney 	node = avl_nearest(by_addr, where, AVL_BEFORE);
2637c8c0b82SPatrick Mooney 	if (node != NULL &&
2647c8c0b82SPatrick Mooney 	    (node->vs_addr + node->vs_size) == to_add->vs_addr) {
2657c8c0b82SPatrick Mooney 		/* concat with preceeding item */
2667c8c0b82SPatrick Mooney 		avl_remove(by_addr, node);
2677c8c0b82SPatrick Mooney 		avl_remove(by_size, node);
2687c8c0b82SPatrick Mooney 		node->vs_size += to_add->vs_size;
2697c8c0b82SPatrick Mooney 		kmem_free(to_add, sizeof (*to_add));
2707c8c0b82SPatrick Mooney 
2717c8c0b82SPatrick Mooney 		/*
2727c8c0b82SPatrick Mooney 		 * Since this now-concatenated span could be adjacent one
2737c8c0b82SPatrick Mooney 		 * trailing it, fall through to perform that check.
2747c8c0b82SPatrick Mooney 		 */
2757c8c0b82SPatrick Mooney 		to_add = node;
2767c8c0b82SPatrick Mooney 	}
2777c8c0b82SPatrick Mooney 
2787c8c0b82SPatrick Mooney 	node = avl_nearest(by_addr, where, AVL_AFTER);
2797c8c0b82SPatrick Mooney 	if (node != NULL &&
2807c8c0b82SPatrick Mooney 	    (to_add->vs_addr + to_add->vs_size) == node->vs_addr) {
2817c8c0b82SPatrick Mooney 		/* concat with trailing item */
2827c8c0b82SPatrick Mooney 		avl_remove(by_addr, node);
2837c8c0b82SPatrick Mooney 		avl_remove(by_size, node);
2847c8c0b82SPatrick Mooney 		node->vs_addr = to_add->vs_addr;
2857c8c0b82SPatrick Mooney 		node->vs_size += to_add->vs_size;
2867c8c0b82SPatrick Mooney 		avl_add(by_addr, node);
2877c8c0b82SPatrick Mooney 		avl_add(by_size, node);
2887c8c0b82SPatrick Mooney 
2897c8c0b82SPatrick Mooney 		kmem_free(to_add, sizeof (*to_add));
2907c8c0b82SPatrick Mooney 		return;
2917c8c0b82SPatrick Mooney 	}
2927c8c0b82SPatrick Mooney 
2937c8c0b82SPatrick Mooney 	/* simply insert */
2947c8c0b82SPatrick Mooney 	avl_add(by_addr, to_add);
2957c8c0b82SPatrick Mooney 	avl_add(by_size, to_add);
2967c8c0b82SPatrick Mooney }
2977c8c0b82SPatrick Mooney 
2987c8c0b82SPatrick Mooney /*
2997c8c0b82SPatrick Mooney  * Remove a vmmr_span_t from a treepair, splitting if necessary when a span of
3007c8c0b82SPatrick Mooney  * the exact target size is not present, but a larger one is.  May return a span
3017c8c0b82SPatrick Mooney  * with a size smaller than the target if splitting is not an option.
3027c8c0b82SPatrick Mooney  */
3037c8c0b82SPatrick Mooney static vmmr_span_t *
vmmr_tp_remove_split(size_t target_sz,vmmr_treepair_t * tree)3047c8c0b82SPatrick Mooney vmmr_tp_remove_split(size_t target_sz, vmmr_treepair_t *tree)
3057c8c0b82SPatrick Mooney {
3067c8c0b82SPatrick Mooney 	avl_tree_t *by_addr = &tree->by_addr;
3077c8c0b82SPatrick Mooney 	avl_tree_t *by_size = &tree->by_size;
3087c8c0b82SPatrick Mooney 	vmmr_span_t *span;
3097c8c0b82SPatrick Mooney 	avl_index_t where;
3107c8c0b82SPatrick Mooney 
3117c8c0b82SPatrick Mooney 	ASSERT3U(target_sz, !=, 0);
3127c8c0b82SPatrick Mooney 	ASSERT(!avl_is_empty(by_addr));
3137c8c0b82SPatrick Mooney 	ASSERT(!avl_is_empty(by_size));
3147c8c0b82SPatrick Mooney 
3157c8c0b82SPatrick Mooney 	vmmr_span_t search = { .vs_size = target_sz };
3167c8c0b82SPatrick Mooney 	span = avl_find(by_size, &search, &where);
3177c8c0b82SPatrick Mooney 	if (span == NULL) {
3187c8c0b82SPatrick Mooney 		/* Try for a larger span (instead of exact match) */
3197c8c0b82SPatrick Mooney 		span = avl_nearest(by_size, where, AVL_AFTER);
3207c8c0b82SPatrick Mooney 		if (span == NULL) {
3217c8c0b82SPatrick Mooney 			/*
3227c8c0b82SPatrick Mooney 			 * Caller will need to collect several smaller spans in
3237c8c0b82SPatrick Mooney 			 * order to fulfill their request.
3247c8c0b82SPatrick Mooney 			 */
3257c8c0b82SPatrick Mooney 			span = avl_nearest(by_size, where, AVL_BEFORE);
3267c8c0b82SPatrick Mooney 			ASSERT3P(span, !=, NULL);
3277c8c0b82SPatrick Mooney 		}
3287c8c0b82SPatrick Mooney 	}
3297c8c0b82SPatrick Mooney 
3307c8c0b82SPatrick Mooney 	if (span->vs_size <= target_sz) {
3317c8c0b82SPatrick Mooney 		avl_remove(by_size, span);
3327c8c0b82SPatrick Mooney 		avl_remove(by_addr, span);
3337c8c0b82SPatrick Mooney 
3347c8c0b82SPatrick Mooney 		return (span);
3357c8c0b82SPatrick Mooney 	} else {
3367c8c0b82SPatrick Mooney 		/* Split off adequate chunk from larger span */
3377c8c0b82SPatrick Mooney 		uintptr_t start = span->vs_addr + span->vs_size - target_sz;
3387c8c0b82SPatrick Mooney 
3397c8c0b82SPatrick Mooney 		avl_remove(by_size, span);
3407c8c0b82SPatrick Mooney 		span->vs_size -= target_sz;
3417c8c0b82SPatrick Mooney 		avl_add(by_size, span);
3427c8c0b82SPatrick Mooney 
3437c8c0b82SPatrick Mooney 		vmmr_span_t *split_span =
3447c8c0b82SPatrick Mooney 		    kmem_zalloc(sizeof (vmmr_span_t), KM_SLEEP);
3457c8c0b82SPatrick Mooney 		split_span->vs_addr = start;
3467c8c0b82SPatrick Mooney 		split_span->vs_size = target_sz;
3477c8c0b82SPatrick Mooney 
3487c8c0b82SPatrick Mooney 		return (split_span);
3497c8c0b82SPatrick Mooney 	}
3507c8c0b82SPatrick Mooney }
3517c8c0b82SPatrick Mooney 
3526bba8b59SPatrick Mooney static int
vmmr_kstat_update(struct kstat * ksp,int rw)3536bba8b59SPatrick Mooney vmmr_kstat_update(struct kstat *ksp, int rw)
3546bba8b59SPatrick Mooney {
3556bba8b59SPatrick Mooney 	vmmr_kstats_t *vkp = ksp->ks_data;
3566bba8b59SPatrick Mooney 
3576bba8b59SPatrick Mooney 	mutex_enter(&vmmr_lock);
3586bba8b59SPatrick Mooney 	vkp->vmrks_bytes_free.value.ui64 = vmmr_free_sz;
3596bba8b59SPatrick Mooney 	vkp->vmrks_bytes_alloc.value.ui64 = vmmr_alloc_sz;
3606bba8b59SPatrick Mooney 	/*
3616bba8b59SPatrick Mooney 	 * In addition to the memory which is actually actually allocated to
3626bba8b59SPatrick Mooney 	 * transient consumers, memory which is considered free-for-transient is
3636bba8b59SPatrick Mooney 	 * also included in the sizing.
3646bba8b59SPatrick Mooney 	 */
3656bba8b59SPatrick Mooney 	vkp->vmrks_bytes_transient.value.ui64 =
3666bba8b59SPatrick Mooney 	    vmmr_alloc_transient_sz + vmmr_free_transient_sz;
3676bba8b59SPatrick Mooney 	vkp->vmrks_bytes_limit.value.ui64 = vmmr_total_limit;
3686bba8b59SPatrick Mooney 	mutex_exit(&vmmr_lock);
3696bba8b59SPatrick Mooney 
3706bba8b59SPatrick Mooney 	return (0);
3716bba8b59SPatrick Mooney }
3726bba8b59SPatrick Mooney 
3736bba8b59SPatrick Mooney int
vmmr_init()3747c8c0b82SPatrick Mooney vmmr_init()
3757c8c0b82SPatrick Mooney {
3767c8c0b82SPatrick Mooney 	mutex_init(&vmmr_lock, NULL, MUTEX_DEFAULT, NULL);
3777c8c0b82SPatrick Mooney 
3787c8c0b82SPatrick Mooney 	/*
3797c8c0b82SPatrick Mooney 	 * `vmm_total_limit` represents the absolute maximum size of the VMM
3807c8c0b82SPatrick Mooney 	 * memory reservoir.  It is meant to provide some measure of protection
3817c8c0b82SPatrick Mooney 	 * against an operator pushing the system into unrecoverable memory
3827c8c0b82SPatrick Mooney 	 * starvation through explicit or transient additions to the reservoir.
3837c8c0b82SPatrick Mooney 	 *
3847c8c0b82SPatrick Mooney 	 * There will be many situations where this limit would be inadequate to
3857c8c0b82SPatrick Mooney 	 * prevent kernel memory starvation in the face of certain operator
3867c8c0b82SPatrick Mooney 	 * actions.  It is a balance to be struck between safety and allowing
3877c8c0b82SPatrick Mooney 	 * large systems to reach high utilization.
3887c8c0b82SPatrick Mooney 	 *
3897c8c0b82SPatrick Mooney 	 * The value is based off of pages_pp_maximum: "Number of currently
3907c8c0b82SPatrick Mooney 	 * available pages that cannot be 'locked'".  It is sized as all of
3917c8c0b82SPatrick Mooney 	 * `physmem` less 120% of `pages_pp_maximum`.
3927c8c0b82SPatrick Mooney 	 */
3937c8c0b82SPatrick Mooney 	vmmr_total_limit =
3947c8c0b82SPatrick Mooney 	    (((physmem * 10)  - (pages_pp_maximum * 12)) * PAGESIZE) / 10;
3957c8c0b82SPatrick Mooney 
3967c8c0b82SPatrick Mooney 	vmmr_empty_last = 0;
3977c8c0b82SPatrick Mooney 	vmmr_free_sz = 0;
3987c8c0b82SPatrick Mooney 	vmmr_alloc_sz = 0;
3997c8c0b82SPatrick Mooney 	vmmr_empty_sz = 0;
4007c8c0b82SPatrick Mooney 	vmmr_adding_sz = 0;
4017c8c0b82SPatrick Mooney 	vmmr_free_transient_sz = 0;
4027c8c0b82SPatrick Mooney 	vmmr_alloc_transient_sz = 0;
4036bba8b59SPatrick Mooney 	vmmr_target_sz = VMMR_TARGET_INACTIVE;
4046bba8b59SPatrick Mooney 
4056bba8b59SPatrick Mooney 	/*
4066bba8b59SPatrick Mooney 	 * Attempt kstat allocation early, since it is the only part of
4076bba8b59SPatrick Mooney 	 * reservoir initialization which is fallible.
4086bba8b59SPatrick Mooney 	 */
4096bba8b59SPatrick Mooney 	kstat_t *ksp = kstat_create_zone(VMM_MODULE_NAME, 0, "vmm_reservoir",
4106bba8b59SPatrick Mooney 	    VMM_KSTAT_CLASS, KSTAT_TYPE_NAMED,
4116bba8b59SPatrick Mooney 	    sizeof (vmmr_kstats_t) / sizeof (kstat_named_t), 0, GLOBAL_ZONEID);
4126bba8b59SPatrick Mooney 	if (ksp == NULL) {
4136bba8b59SPatrick Mooney 		mutex_destroy(&vmmr_lock);
4146bba8b59SPatrick Mooney 		return (ENOMEM);
4156bba8b59SPatrick Mooney 	}
4166bba8b59SPatrick Mooney 
4176bba8b59SPatrick Mooney 	vmmr_kstats_t *vkp = ksp->ks_data;
4186bba8b59SPatrick Mooney 
4196bba8b59SPatrick Mooney 	kstat_named_init(&vkp->vmrks_bytes_free, "bytes_free",
4206bba8b59SPatrick Mooney 	    KSTAT_DATA_UINT64);
4216bba8b59SPatrick Mooney 	kstat_named_init(&vkp->vmrks_bytes_alloc, "bytes_alloc",
4226bba8b59SPatrick Mooney 	    KSTAT_DATA_UINT64);
4236bba8b59SPatrick Mooney 	kstat_named_init(&vkp->vmrks_bytes_transient, "bytes_transient_alloc",
4246bba8b59SPatrick Mooney 	    KSTAT_DATA_UINT64);
4256bba8b59SPatrick Mooney 	kstat_named_init(&vkp->vmrks_bytes_limit, "bytes_limit",
4266bba8b59SPatrick Mooney 	    KSTAT_DATA_UINT64);
4276bba8b59SPatrick Mooney 	ksp->ks_private = NULL;
4286bba8b59SPatrick Mooney 	ksp->ks_update = vmmr_kstat_update;
4296bba8b59SPatrick Mooney 	vmmr_kstat = ksp;
4307c8c0b82SPatrick Mooney 
4317c8c0b82SPatrick Mooney 	vmmr_tp_init(&vmmr_free_tp);
4327c8c0b82SPatrick Mooney 	vmmr_tp_init(&vmmr_empty_tp);
4337c8c0b82SPatrick Mooney 
4347c8c0b82SPatrick Mooney 	list_create(&vmmr_alloc_regions, sizeof (vmmr_region_t),
4357c8c0b82SPatrick Mooney 	    offsetof(vmmr_region_t, vr_node));
4367c8c0b82SPatrick Mooney 
4377c8c0b82SPatrick Mooney 	/* Grab a chunk of VA for the reservoir */
4387c8c0b82SPatrick Mooney 	vmmr_va_sz = physmem * PAGESIZE;
4397c8c0b82SPatrick Mooney 	vmmr_va = (uintptr_t)vmem_alloc(kvmm_arena, vmmr_va_sz, VM_SLEEP);
4406bba8b59SPatrick Mooney 
4416bba8b59SPatrick Mooney 	kstat_install(vmmr_kstat);
4426bba8b59SPatrick Mooney 
4436bba8b59SPatrick Mooney 	return (0);
4447c8c0b82SPatrick Mooney }
4457c8c0b82SPatrick Mooney 
4467c8c0b82SPatrick Mooney void
vmmr_fini()4477c8c0b82SPatrick Mooney vmmr_fini()
4487c8c0b82SPatrick Mooney {
4497c8c0b82SPatrick Mooney 	mutex_enter(&vmmr_lock);
4507c8c0b82SPatrick Mooney 	VERIFY3U(vmmr_alloc_sz, ==, 0);
4517c8c0b82SPatrick Mooney 	VERIFY3U(vmmr_free_sz, ==, 0);
4527c8c0b82SPatrick Mooney 	VERIFY3U(vmmr_adding_sz, ==, 0);
4537c8c0b82SPatrick Mooney 	VERIFY3U(vmmr_alloc_transient_sz, ==, 0);
4547c8c0b82SPatrick Mooney 	VERIFY3U(vmmr_free_transient_sz, ==, 0);
4557c8c0b82SPatrick Mooney 	VERIFY(avl_is_empty(&vmmr_free_tp.by_addr));
4567c8c0b82SPatrick Mooney 	VERIFY(avl_is_empty(&vmmr_free_tp.by_size));
4577c8c0b82SPatrick Mooney 	VERIFY(list_is_empty(&vmmr_alloc_regions));
4587c8c0b82SPatrick Mooney 
4596bba8b59SPatrick Mooney 	kstat_delete(vmmr_kstat);
4606bba8b59SPatrick Mooney 	vmmr_kstat = NULL;
4616bba8b59SPatrick Mooney 
4627c8c0b82SPatrick Mooney 	vmmr_tp_destroy(&vmmr_free_tp);
4637c8c0b82SPatrick Mooney 	vmmr_tp_destroy(&vmmr_empty_tp);
4647c8c0b82SPatrick Mooney 	list_destroy(&vmmr_alloc_regions);
4657c8c0b82SPatrick Mooney 
4667c8c0b82SPatrick Mooney 	/* Release reservoir VA chunk */
4677c8c0b82SPatrick Mooney 	vmem_free(kvmm_arena, (void *)vmmr_va, vmmr_va_sz);
4687c8c0b82SPatrick Mooney 	vmmr_va = 0;
4697c8c0b82SPatrick Mooney 	vmmr_va_sz = 0;
4707c8c0b82SPatrick Mooney 	vmmr_total_limit = 0;
4717c8c0b82SPatrick Mooney 	vmmr_empty_last = 0;
4727c8c0b82SPatrick Mooney 
4737c8c0b82SPatrick Mooney 	mutex_exit(&vmmr_lock);
4747c8c0b82SPatrick Mooney 	mutex_destroy(&vmmr_lock);
4757c8c0b82SPatrick Mooney }
4767c8c0b82SPatrick Mooney 
4777c8c0b82SPatrick Mooney bool
vmmr_is_empty()4787c8c0b82SPatrick Mooney vmmr_is_empty()
4797c8c0b82SPatrick Mooney {
4807c8c0b82SPatrick Mooney 	mutex_enter(&vmmr_lock);
4817c8c0b82SPatrick Mooney 	bool res = (vmmr_alloc_sz == 0 && vmmr_alloc_transient_sz == 0 &&
4827c8c0b82SPatrick Mooney 	    vmmr_free_sz == 0 && vmmr_free_transient_sz == 0);
4837c8c0b82SPatrick Mooney 	mutex_exit(&vmmr_lock);
4847c8c0b82SPatrick Mooney 	return (res);
4857c8c0b82SPatrick Mooney }
4867c8c0b82SPatrick Mooney 
4877c8c0b82SPatrick Mooney int
vmmr_alloc(size_t sz,bool transient,vmmr_region_t ** resp)4887c8c0b82SPatrick Mooney vmmr_alloc(size_t sz, bool transient, vmmr_region_t **resp)
4897c8c0b82SPatrick Mooney {
4907c8c0b82SPatrick Mooney 	VERIFY3U(sz & PAGEOFFSET, ==, 0);
4917c8c0b82SPatrick Mooney 
4927c8c0b82SPatrick Mooney 	if (!transient) {
4937c8c0b82SPatrick Mooney 		mutex_enter(&vmmr_lock);
4947c8c0b82SPatrick Mooney 		if (sz > vmmr_free_sz) {
4957c8c0b82SPatrick Mooney 			mutex_exit(&vmmr_lock);
4967c8c0b82SPatrick Mooney 			return (ENOSPC);
4977c8c0b82SPatrick Mooney 		}
4987c8c0b82SPatrick Mooney 	} else {
4997c8c0b82SPatrick Mooney 		int err;
5007c8c0b82SPatrick Mooney 
5016bba8b59SPatrick Mooney 		mutex_enter(&vmmr_lock);
5027c8c0b82SPatrick Mooney 		err = vmmr_add(sz, true);
5037c8c0b82SPatrick Mooney 		if (err != 0) {
5046bba8b59SPatrick Mooney 			mutex_exit(&vmmr_lock);
5057c8c0b82SPatrick Mooney 			return (err);
5067c8c0b82SPatrick Mooney 		}
5077c8c0b82SPatrick Mooney 		VERIFY3U(vmmr_free_transient_sz, >=, sz);
5087c8c0b82SPatrick Mooney 	}
5097c8c0b82SPatrick Mooney 
5107c8c0b82SPatrick Mooney 	vmmr_region_t *region;
5117c8c0b82SPatrick Mooney 	region = kmem_zalloc(sizeof (vmmr_region_t), KM_SLEEP);
5127c8c0b82SPatrick Mooney 	avl_create(&region->vr_spans, vmmr_cmp_region_addr,
5137c8c0b82SPatrick Mooney 	    sizeof (vmmr_span_t), offsetof(vmmr_span_t, vs_by_addr));
5147c8c0b82SPatrick Mooney 	region->vr_size = sz;
5157c8c0b82SPatrick Mooney 
5167c8c0b82SPatrick Mooney 	size_t remain = sz;
5177c8c0b82SPatrick Mooney 	uintptr_t map_at = 0;
5187c8c0b82SPatrick Mooney 	while (remain > 0) {
5197c8c0b82SPatrick Mooney 		vmmr_span_t *span = vmmr_tp_remove_split(remain, &vmmr_free_tp);
5207c8c0b82SPatrick Mooney 
5217c8c0b82SPatrick Mooney 		/*
5227c8c0b82SPatrick Mooney 		 * We have already ensured that adequate free memory is present
5237c8c0b82SPatrick Mooney 		 * in the reservoir for this allocation.
5247c8c0b82SPatrick Mooney 		 */
5257c8c0b82SPatrick Mooney 		VERIFY3P(span, !=, NULL);
5267c8c0b82SPatrick Mooney 		ASSERT3U(span->vs_size, <=, remain);
5277c8c0b82SPatrick Mooney 
5287c8c0b82SPatrick Mooney 		span->vs_region_addr = map_at;
5297c8c0b82SPatrick Mooney 		avl_add(&region->vr_spans, span);
5307c8c0b82SPatrick Mooney 		map_at += span->vs_size;
5317c8c0b82SPatrick Mooney 		remain -= span->vs_size;
5327c8c0b82SPatrick Mooney 	}
5337c8c0b82SPatrick Mooney 
5347c8c0b82SPatrick Mooney 	if (!transient) {
5357c8c0b82SPatrick Mooney 		vmmr_free_sz -= sz;
5367c8c0b82SPatrick Mooney 		vmmr_alloc_sz += sz;
5377c8c0b82SPatrick Mooney 	} else {
5387c8c0b82SPatrick Mooney 		vmmr_free_transient_sz -= sz;
5397c8c0b82SPatrick Mooney 		vmmr_alloc_transient_sz += sz;
5407c8c0b82SPatrick Mooney 		region->vr_transient = true;
5417c8c0b82SPatrick Mooney 	}
5427c8c0b82SPatrick Mooney 	list_insert_tail(&vmmr_alloc_regions, region);
5437c8c0b82SPatrick Mooney 	mutex_exit(&vmmr_lock);
5447c8c0b82SPatrick Mooney 
5457c8c0b82SPatrick Mooney 	*resp = region;
5467c8c0b82SPatrick Mooney 	return (0);
5477c8c0b82SPatrick Mooney }
5487c8c0b82SPatrick Mooney 
5497c8c0b82SPatrick Mooney void *
vmmr_region_mem_at(vmmr_region_t * region,uintptr_t off)5507c8c0b82SPatrick Mooney vmmr_region_mem_at(vmmr_region_t *region, uintptr_t off)
5517c8c0b82SPatrick Mooney {
5527c8c0b82SPatrick Mooney 	/* just use KPM region for now */
5537c8c0b82SPatrick Mooney 	return (hat_kpm_pfn2va(vmmr_region_pfn_at(region, off)));
5547c8c0b82SPatrick Mooney }
5557c8c0b82SPatrick Mooney 
5567c8c0b82SPatrick Mooney pfn_t
vmmr_region_pfn_at(vmmr_region_t * region,uintptr_t off)5577c8c0b82SPatrick Mooney vmmr_region_pfn_at(vmmr_region_t *region, uintptr_t off)
5587c8c0b82SPatrick Mooney {
5597c8c0b82SPatrick Mooney 	VERIFY3U(off & PAGEOFFSET, ==, 0);
5607c8c0b82SPatrick Mooney 	VERIFY3U(off, <, region->vr_size);
5617c8c0b82SPatrick Mooney 
5627c8c0b82SPatrick Mooney 	vmmr_span_t search = {
5637c8c0b82SPatrick Mooney 		.vs_region_addr = off
5647c8c0b82SPatrick Mooney 	};
5657c8c0b82SPatrick Mooney 	avl_index_t where;
5667c8c0b82SPatrick Mooney 	vmmr_span_t *span = avl_find(&region->vr_spans, &search, &where);
5677c8c0b82SPatrick Mooney 
5687c8c0b82SPatrick Mooney 	if (span == NULL) {
5697c8c0b82SPatrick Mooney 		span = avl_nearest(&region->vr_spans, where, AVL_BEFORE);
5707c8c0b82SPatrick Mooney 		ASSERT3P(span, !=, NULL);
5717c8c0b82SPatrick Mooney 	}
5727c8c0b82SPatrick Mooney 	uintptr_t span_off = off - span->vs_region_addr + span->vs_addr;
5737c8c0b82SPatrick Mooney 	page_t *pp = page_find(&kvps[KV_VVP], (u_offset_t)span_off);
5747c8c0b82SPatrick Mooney 	VERIFY(pp != NULL);
5757c8c0b82SPatrick Mooney 	return (pp->p_pagenum);
5767c8c0b82SPatrick Mooney }
5777c8c0b82SPatrick Mooney 
5787c8c0b82SPatrick Mooney void
vmmr_free(vmmr_region_t * region)5797c8c0b82SPatrick Mooney vmmr_free(vmmr_region_t *region)
5807c8c0b82SPatrick Mooney {
5817c8c0b82SPatrick Mooney 	mutex_enter(&vmmr_lock);
5827c8c0b82SPatrick Mooney 	if (!region->vr_transient) {
5837c8c0b82SPatrick Mooney 		VERIFY3U(region->vr_size, <=, vmmr_alloc_sz);
5847c8c0b82SPatrick Mooney 	} else {
5857c8c0b82SPatrick Mooney 		VERIFY3U(region->vr_size, <=, vmmr_alloc_transient_sz);
5867c8c0b82SPatrick Mooney 	}
5877c8c0b82SPatrick Mooney 	list_remove(&vmmr_alloc_regions, region);
5887c8c0b82SPatrick Mooney 	mutex_exit(&vmmr_lock);
5897c8c0b82SPatrick Mooney 
5906bba8b59SPatrick Mooney 	/* Zero the contents (while not monopolizing vmmr_lock) */
5917c8c0b82SPatrick Mooney 	for (uintptr_t off = 0; off < region->vr_size; off += PAGESIZE) {
5927c8c0b82SPatrick Mooney 		bzero(vmmr_region_mem_at(region, off), PAGESIZE);
5937c8c0b82SPatrick Mooney 	}
5947c8c0b82SPatrick Mooney 
5957c8c0b82SPatrick Mooney 	mutex_enter(&vmmr_lock);
5967c8c0b82SPatrick Mooney 
5977c8c0b82SPatrick Mooney 	/* Put the contained span(s) back in the free pool */
5987c8c0b82SPatrick Mooney 	void *cookie = NULL;
5997c8c0b82SPatrick Mooney 	vmmr_span_t *span;
6007c8c0b82SPatrick Mooney 	while ((span = avl_destroy_nodes(&region->vr_spans, &cookie)) != NULL) {
6017c8c0b82SPatrick Mooney 		span->vs_region_addr = 0;
6027c8c0b82SPatrick Mooney 		vmmr_tp_insert_concat(span, &vmmr_free_tp);
6037c8c0b82SPatrick Mooney 	}
6047c8c0b82SPatrick Mooney 	avl_destroy(&region->vr_spans);
6057c8c0b82SPatrick Mooney 	if (!region->vr_transient) {
6067c8c0b82SPatrick Mooney 		vmmr_free_sz += region->vr_size;
6077c8c0b82SPatrick Mooney 		vmmr_alloc_sz -= region->vr_size;
6087c8c0b82SPatrick Mooney 	} else {
6097c8c0b82SPatrick Mooney 		vmmr_free_transient_sz += region->vr_size;
6107c8c0b82SPatrick Mooney 		vmmr_alloc_transient_sz -= region->vr_size;
6117c8c0b82SPatrick Mooney 	}
6127c8c0b82SPatrick Mooney 
6137c8c0b82SPatrick Mooney 	if (region->vr_transient) {
614e0994bd2SPatrick Mooney 		/*
615e0994bd2SPatrick Mooney 		 * Since the transient capacity was previously allocated for
616e0994bd2SPatrick Mooney 		 * this region, its removal should not fail.
617e0994bd2SPatrick Mooney 		 */
618e0994bd2SPatrick Mooney 		VERIFY0(vmmr_remove(region->vr_size, true));
6197c8c0b82SPatrick Mooney 	}
6207c8c0b82SPatrick Mooney 	kmem_free(region, sizeof (*region));
6216bba8b59SPatrick Mooney 	mutex_exit(&vmmr_lock);
6227c8c0b82SPatrick Mooney }
6237c8c0b82SPatrick Mooney 
6247c8c0b82SPatrick Mooney static void
vmmr_destroy_pages(vmmr_span_t * span)6257c8c0b82SPatrick Mooney vmmr_destroy_pages(vmmr_span_t *span)
6267c8c0b82SPatrick Mooney {
6277c8c0b82SPatrick Mooney 	const uintptr_t end = span->vs_addr + span->vs_size;
6287c8c0b82SPatrick Mooney 	struct vnode *vp = &kvps[KV_VVP];
6297c8c0b82SPatrick Mooney 	for (uintptr_t pos = span->vs_addr; pos < end; pos += PAGESIZE) {
6307c8c0b82SPatrick Mooney 		page_t *pp;
6317c8c0b82SPatrick Mooney 
6327c8c0b82SPatrick Mooney 		/* Page-free logic cribbed from segkmem_xfree(): */
6337c8c0b82SPatrick Mooney 		pp = page_find(vp, (u_offset_t)pos);
6347c8c0b82SPatrick Mooney 		VERIFY(pp != NULL);
6357c8c0b82SPatrick Mooney 		if (!page_tryupgrade(pp)) {
6367c8c0b82SPatrick Mooney 			/*
6377c8c0b82SPatrick Mooney 			 * Some other thread has a sharelock. Wait for
6387c8c0b82SPatrick Mooney 			 * it to drop the lock so we can free this page.
6397c8c0b82SPatrick Mooney 			 */
6407c8c0b82SPatrick Mooney 			page_unlock(pp);
6417c8c0b82SPatrick Mooney 			pp = page_lookup(vp, (u_offset_t)pos, SE_EXCL);
6427c8c0b82SPatrick Mooney 		}
6437c8c0b82SPatrick Mooney 
6447c8c0b82SPatrick Mooney 		/*
6457c8c0b82SPatrick Mooney 		 * Clear p_lckcnt so page_destroy() doesn't update availrmem.
6467c8c0b82SPatrick Mooney 		 * That will be taken care of later via page_unresv().
6477c8c0b82SPatrick Mooney 		 */
6487c8c0b82SPatrick Mooney 		pp->p_lckcnt = 0;
6497c8c0b82SPatrick Mooney 		page_destroy(pp, 0);
6507c8c0b82SPatrick Mooney 	}
6517c8c0b82SPatrick Mooney }
6527c8c0b82SPatrick Mooney 
6537c8c0b82SPatrick Mooney static int
vmmr_alloc_pages(const vmmr_span_t * span)6547c8c0b82SPatrick Mooney vmmr_alloc_pages(const vmmr_span_t *span)
6557c8c0b82SPatrick Mooney {
6567c8c0b82SPatrick Mooney 	struct seg kseg = {
6577c8c0b82SPatrick Mooney 		.s_as = &kas
6587c8c0b82SPatrick Mooney 	};
6597c8c0b82SPatrick Mooney 	struct vnode *vp = &kvps[KV_VVP];
6607c8c0b82SPatrick Mooney 
6617c8c0b82SPatrick Mooney 	const uintptr_t end = span->vs_addr + span->vs_size;
6627c8c0b82SPatrick Mooney 	for (uintptr_t pos = span->vs_addr; pos < end; pos += PAGESIZE) {
6637c8c0b82SPatrick Mooney 		page_t *pp;
6647c8c0b82SPatrick Mooney 
6657c8c0b82SPatrick Mooney 		pp = page_create_va(vp, (u_offset_t)pos, PAGESIZE,
6667c8c0b82SPatrick Mooney 		    PG_EXCL | PG_NORELOC, &kseg, (void *)(vmmr_va + pos));
6677c8c0b82SPatrick Mooney 
6687c8c0b82SPatrick Mooney 		if (pp == NULL) {
6697c8c0b82SPatrick Mooney 			/* Destroy any already-created pages */
6707c8c0b82SPatrick Mooney 			if (pos != span->vs_addr) {
6717c8c0b82SPatrick Mooney 				vmmr_span_t destroy_span = {
6727c8c0b82SPatrick Mooney 					.vs_addr = span->vs_addr,
6737c8c0b82SPatrick Mooney 					.vs_size = pos - span->vs_addr,
6747c8c0b82SPatrick Mooney 				};
6757c8c0b82SPatrick Mooney 
6767c8c0b82SPatrick Mooney 				vmmr_destroy_pages(&destroy_span);
6777c8c0b82SPatrick Mooney 			}
6787c8c0b82SPatrick Mooney 			return (ENOMEM);
6797c8c0b82SPatrick Mooney 		}
6807c8c0b82SPatrick Mooney 
6817c8c0b82SPatrick Mooney 		/* mimic page state from segkmem */
6827c8c0b82SPatrick Mooney 		ASSERT(PAGE_EXCL(pp));
6837c8c0b82SPatrick Mooney 		page_io_unlock(pp);
6847c8c0b82SPatrick Mooney 		pp->p_lckcnt = 1;
6857c8c0b82SPatrick Mooney 		page_downgrade(pp);
6867c8c0b82SPatrick Mooney 
6877c8c0b82SPatrick Mooney 		/* pre-zero the page */
6887c8c0b82SPatrick Mooney 		bzero(hat_kpm_pfn2va(pp->p_pagenum), PAGESIZE);
6897c8c0b82SPatrick Mooney 	}
6907c8c0b82SPatrick Mooney 
6917c8c0b82SPatrick Mooney 	return (0);
6927c8c0b82SPatrick Mooney }
6937c8c0b82SPatrick Mooney 
6947c8c0b82SPatrick Mooney static int
vmmr_resv_wait()6957c8c0b82SPatrick Mooney vmmr_resv_wait()
6967c8c0b82SPatrick Mooney {
6977c8c0b82SPatrick Mooney 	if (delay_sig(hz >> 2) != 0) {
6987c8c0b82SPatrick Mooney 		/* bail due to interruption */
6997c8c0b82SPatrick Mooney 		return (0);
7007c8c0b82SPatrick Mooney 	}
7017c8c0b82SPatrick Mooney 	return (1);
7027c8c0b82SPatrick Mooney }
7037c8c0b82SPatrick Mooney 
7047c8c0b82SPatrick Mooney static void
vmmr_remove_raw(size_t sz)7057c8c0b82SPatrick Mooney vmmr_remove_raw(size_t sz)
7067c8c0b82SPatrick Mooney {
7077c8c0b82SPatrick Mooney 	VERIFY3U(sz & PAGEOFFSET, ==, 0);
7087c8c0b82SPatrick Mooney 	VERIFY(MUTEX_HELD(&vmmr_lock));
7097c8c0b82SPatrick Mooney 
7107c8c0b82SPatrick Mooney 	size_t remain = sz;
7117c8c0b82SPatrick Mooney 	while (remain > 0) {
7127c8c0b82SPatrick Mooney 		vmmr_span_t *span = vmmr_tp_remove_split(remain, &vmmr_free_tp);
7137c8c0b82SPatrick Mooney 
7147c8c0b82SPatrick Mooney 		/*
7157c8c0b82SPatrick Mooney 		 * The caller must ensure that at least `sz` amount is present
7167c8c0b82SPatrick Mooney 		 * in the free treepair.
7177c8c0b82SPatrick Mooney 		 */
7187c8c0b82SPatrick Mooney 		VERIFY3P(span, !=, NULL);
7197c8c0b82SPatrick Mooney 		ASSERT3U(span->vs_size, <=, remain);
7207c8c0b82SPatrick Mooney 
7217c8c0b82SPatrick Mooney 		/* TODO: perhaps arrange to destroy pages outside the lock? */
7227c8c0b82SPatrick Mooney 		vmmr_destroy_pages(span);
7237c8c0b82SPatrick Mooney 
7247c8c0b82SPatrick Mooney 		remain -= span->vs_size;
7257c8c0b82SPatrick Mooney 		vmmr_tp_insert_concat(span, &vmmr_empty_tp);
7267c8c0b82SPatrick Mooney 	}
7277c8c0b82SPatrick Mooney 
7287c8c0b82SPatrick Mooney 	vmmr_empty_sz += sz;
7297c8c0b82SPatrick Mooney }
7307c8c0b82SPatrick Mooney 
7316bba8b59SPatrick Mooney /*
7326bba8b59SPatrick Mooney  * Add memory to vmm reservoir.  Memory may be marked for transient use, where
7336bba8b59SPatrick Mooney  * the addition is part of a transient allocation from the reservoir.  Otherwise
7346bba8b59SPatrick Mooney  * it is placed in the reservoir to be available for non-transient allocations.
7356bba8b59SPatrick Mooney  *
7366bba8b59SPatrick Mooney  * Expects vmmr_lock to be held when called, and will return with it held, but
7376bba8b59SPatrick Mooney  * will drop it during portions of the addition.
7386bba8b59SPatrick Mooney  */
7396bba8b59SPatrick Mooney static int
vmmr_add(size_t sz,bool transient)7407c8c0b82SPatrick Mooney vmmr_add(size_t sz, bool transient)
7417c8c0b82SPatrick Mooney {
7427c8c0b82SPatrick Mooney 	VERIFY3U(sz & PAGEOFFSET, ==, 0);
7436bba8b59SPatrick Mooney 	VERIFY3U(sz, >, 0);
7446bba8b59SPatrick Mooney 	VERIFY(MUTEX_HELD(&vmmr_lock));
7457c8c0b82SPatrick Mooney 
7467c8c0b82SPatrick Mooney 	/*
7477c8c0b82SPatrick Mooney 	 * Make sure that the amount added is not going to breach the limits
7487c8c0b82SPatrick Mooney 	 * we've chosen
7497c8c0b82SPatrick Mooney 	 */
7507c8c0b82SPatrick Mooney 	const size_t current_total =
7517c8c0b82SPatrick Mooney 	    vmmr_alloc_sz + vmmr_free_sz + vmmr_adding_sz +
7527c8c0b82SPatrick Mooney 	    vmmr_alloc_transient_sz + vmmr_free_transient_sz;
7537c8c0b82SPatrick Mooney 	if ((current_total + sz) < current_total) {
7547c8c0b82SPatrick Mooney 		return (EOVERFLOW);
7557c8c0b82SPatrick Mooney 	}
7567c8c0b82SPatrick Mooney 	if ((current_total + sz) > vmmr_total_limit) {
7577c8c0b82SPatrick Mooney 		return (ENOSPC);
7587c8c0b82SPatrick Mooney 	}
7597c8c0b82SPatrick Mooney 	vmmr_adding_sz += sz;
7607c8c0b82SPatrick Mooney 	mutex_exit(&vmmr_lock);
7617c8c0b82SPatrick Mooney 
7627c8c0b82SPatrick Mooney 	/* Wait for enough pages to become available */
7637c8c0b82SPatrick Mooney 	if (page_xresv(sz >> PAGESHIFT, KM_SLEEP, vmmr_resv_wait) == 0) {
7647c8c0b82SPatrick Mooney 		mutex_enter(&vmmr_lock);
7657c8c0b82SPatrick Mooney 		vmmr_adding_sz -= sz;
7667c8c0b82SPatrick Mooney 		return (EINTR);
7677c8c0b82SPatrick Mooney 	}
7687c8c0b82SPatrick Mooney 
7697c8c0b82SPatrick Mooney 	mutex_enter(&vmmr_lock);
7707c8c0b82SPatrick Mooney 	size_t added = 0;
7717c8c0b82SPatrick Mooney 	size_t remain = sz;
7727c8c0b82SPatrick Mooney 	while (added < sz) {
7737c8c0b82SPatrick Mooney 		vmmr_span_t *span = NULL;
7747c8c0b82SPatrick Mooney 
7757c8c0b82SPatrick Mooney 		if (vmmr_empty_sz > 0) {
7767c8c0b82SPatrick Mooney 			span = vmmr_tp_remove_split(remain, &vmmr_empty_tp);
7777c8c0b82SPatrick Mooney 
7787c8c0b82SPatrick Mooney 			vmmr_empty_sz -= span->vs_size;
7797c8c0b82SPatrick Mooney 		} else {
7807c8c0b82SPatrick Mooney 			/*
7817c8c0b82SPatrick Mooney 			 * No empty space to fill with new pages, so just tack
7827c8c0b82SPatrick Mooney 			 * it on at the end instead.
7837c8c0b82SPatrick Mooney 			 */
7847c8c0b82SPatrick Mooney 			span = kmem_zalloc(sizeof (vmmr_span_t), KM_SLEEP);
7857c8c0b82SPatrick Mooney 			span->vs_addr = vmmr_empty_last;
7867c8c0b82SPatrick Mooney 			span->vs_size = remain;
7877c8c0b82SPatrick Mooney 			vmmr_empty_last += remain;
7887c8c0b82SPatrick Mooney 		}
7897c8c0b82SPatrick Mooney 		VERIFY3P(span, !=, NULL);
7907c8c0b82SPatrick Mooney 
7917c8c0b82SPatrick Mooney 
7927c8c0b82SPatrick Mooney 		/* Allocate the actual pages to back this span */
7937c8c0b82SPatrick Mooney 		mutex_exit(&vmmr_lock);
7947c8c0b82SPatrick Mooney 		int err = vmmr_alloc_pages(span);
7957c8c0b82SPatrick Mooney 		mutex_enter(&vmmr_lock);
7967c8c0b82SPatrick Mooney 
7977c8c0b82SPatrick Mooney 		/*
7987c8c0b82SPatrick Mooney 		 * If an error is encountered during page allocation for the
7997c8c0b82SPatrick Mooney 		 * span, unwind any progress made by the addition request.
8007c8c0b82SPatrick Mooney 		 */
8017c8c0b82SPatrick Mooney 		if (err != 0) {
8027c8c0b82SPatrick Mooney 			/*
8037c8c0b82SPatrick Mooney 			 * Without pages allocated to this span, it is now
8047c8c0b82SPatrick Mooney 			 * tracked as empty.
8057c8c0b82SPatrick Mooney 			 */
8067c8c0b82SPatrick Mooney 			vmmr_empty_sz += span->vs_size;
8077c8c0b82SPatrick Mooney 			vmmr_tp_insert_concat(span, &vmmr_empty_tp);
8087c8c0b82SPatrick Mooney 
8097c8c0b82SPatrick Mooney 			if (added != 0) {
8107c8c0b82SPatrick Mooney 				vmmr_remove_raw(added);
8117c8c0b82SPatrick Mooney 			}
8127c8c0b82SPatrick Mooney 
8137c8c0b82SPatrick Mooney 			vmmr_adding_sz -= sz;
8147c8c0b82SPatrick Mooney 
8157c8c0b82SPatrick Mooney 			page_unresv(sz >> PAGESHIFT);
8167c8c0b82SPatrick Mooney 			return (err);
8177c8c0b82SPatrick Mooney 		}
8187c8c0b82SPatrick Mooney 
8197c8c0b82SPatrick Mooney 		/*
8207c8c0b82SPatrick Mooney 		 * The allocated-page-bearing span is placed in the "free"
8217c8c0b82SPatrick Mooney 		 * treepair now, but is not officially exposed for consumption
8227c8c0b82SPatrick Mooney 		 * until `vmm_free_sz` or `vmm_free_transient_sz` are updated.
8237c8c0b82SPatrick Mooney 		 *
8247c8c0b82SPatrick Mooney 		 * This allows us to unwind the allocation in case of a failure
8257c8c0b82SPatrick Mooney 		 * without the risk of the freshly added span(s) being snapped
8267c8c0b82SPatrick Mooney 		 * up by a consumer already.
8277c8c0b82SPatrick Mooney 		 */
8287c8c0b82SPatrick Mooney 		added += span->vs_size;
8297c8c0b82SPatrick Mooney 		remain -= span->vs_size;
8307c8c0b82SPatrick Mooney 		vmmr_tp_insert_concat(span, &vmmr_free_tp);
8317c8c0b82SPatrick Mooney 	}
8327c8c0b82SPatrick Mooney 
8337c8c0b82SPatrick Mooney 	/* Make the added memory usable by exposing it to the size accounting */
8347c8c0b82SPatrick Mooney 	if (!transient) {
8357c8c0b82SPatrick Mooney 		vmmr_free_sz += added;
8367c8c0b82SPatrick Mooney 	} else {
8377c8c0b82SPatrick Mooney 		vmmr_free_transient_sz += added;
8387c8c0b82SPatrick Mooney 	}
8397c8c0b82SPatrick Mooney 	ASSERT3U(added, ==, sz);
8407c8c0b82SPatrick Mooney 	vmmr_adding_sz -= added;
8417c8c0b82SPatrick Mooney 
8427c8c0b82SPatrick Mooney 	return (0);
8437c8c0b82SPatrick Mooney }
8447c8c0b82SPatrick Mooney 
8456bba8b59SPatrick Mooney /*
8466bba8b59SPatrick Mooney  * Remove memory from vmm reservoir.  Normally this will remove memory from the
8476bba8b59SPatrick Mooney  * reservoir which was available for non-transient allocations.  If the removal
8486bba8b59SPatrick Mooney  * is part of a vmmr_free() of a transient allocation, it will act on only that
8496bba8b59SPatrick Mooney  * transient region being freed, not the available memory in the reservoir.
8506bba8b59SPatrick Mooney  *
8516bba8b59SPatrick Mooney  * Expects vmmr_lock to be held when called, and will return with it held, but
8526bba8b59SPatrick Mooney  * may drop it during portions of the removal.
8536bba8b59SPatrick Mooney  */
8546bba8b59SPatrick Mooney static int
vmmr_remove(size_t sz,bool transient)8557c8c0b82SPatrick Mooney vmmr_remove(size_t sz, bool transient)
8567c8c0b82SPatrick Mooney {
8577c8c0b82SPatrick Mooney 	VERIFY3U(sz & PAGEOFFSET, ==, 0);
8586bba8b59SPatrick Mooney 	VERIFY(sz);
8596bba8b59SPatrick Mooney 	VERIFY(MUTEX_HELD(&vmmr_lock));
8607c8c0b82SPatrick Mooney 
8617c8c0b82SPatrick Mooney 	if ((!transient && sz > vmmr_free_sz) ||
8627c8c0b82SPatrick Mooney 	    (transient && sz > vmmr_free_transient_sz)) {
8637c8c0b82SPatrick Mooney 		return (ENOSPC);
8647c8c0b82SPatrick Mooney 	}
8657c8c0b82SPatrick Mooney 
8667c8c0b82SPatrick Mooney 	vmmr_remove_raw(sz);
8677c8c0b82SPatrick Mooney 
8687c8c0b82SPatrick Mooney 	if (!transient) {
8697c8c0b82SPatrick Mooney 		vmmr_free_sz -= sz;
8707c8c0b82SPatrick Mooney 	} else {
8717c8c0b82SPatrick Mooney 		vmmr_free_transient_sz -= sz;
8727c8c0b82SPatrick Mooney 	}
8737c8c0b82SPatrick Mooney 	page_unresv(sz >> PAGESHIFT);
8747c8c0b82SPatrick Mooney 	return (0);
8757c8c0b82SPatrick Mooney }
8767c8c0b82SPatrick Mooney 
8776bba8b59SPatrick Mooney static int
vmmr_set_target(size_t target_sz,size_t chunk_sz,size_t * resp)8786bba8b59SPatrick Mooney vmmr_set_target(size_t target_sz, size_t chunk_sz, size_t *resp)
8796bba8b59SPatrick Mooney {
8806bba8b59SPatrick Mooney 	VERIFY(resp != NULL);
8816bba8b59SPatrick Mooney 
8826bba8b59SPatrick Mooney 	mutex_enter(&vmmr_lock);
8836bba8b59SPatrick Mooney 
8846bba8b59SPatrick Mooney 	size_t current_sz = vmmr_alloc_sz + vmmr_free_sz;
8856bba8b59SPatrick Mooney 
8866bba8b59SPatrick Mooney 	/* Be sure to communicate current size in case of an early bail-out */
8876bba8b59SPatrick Mooney 	*resp = current_sz;
8886bba8b59SPatrick Mooney 
8896bba8b59SPatrick Mooney 	if ((target_sz & PAGEOFFSET) != 0 ||
8906bba8b59SPatrick Mooney 	    (chunk_sz & PAGEOFFSET) != 0) {
8916bba8b59SPatrick Mooney 		mutex_exit(&vmmr_lock);
8926bba8b59SPatrick Mooney 		return (EINVAL);
8936bba8b59SPatrick Mooney 	}
8946bba8b59SPatrick Mooney 	/* Reject sentinel value */
8956bba8b59SPatrick Mooney 	if (target_sz == VMMR_TARGET_INACTIVE) {
8966bba8b59SPatrick Mooney 		mutex_exit(&vmmr_lock);
8976bba8b59SPatrick Mooney 		return (EINVAL);
8986bba8b59SPatrick Mooney 	}
8996bba8b59SPatrick Mooney 
9006bba8b59SPatrick Mooney 	/* Already at target size */
9016bba8b59SPatrick Mooney 	if (target_sz == current_sz) {
9026bba8b59SPatrick Mooney 		mutex_exit(&vmmr_lock);
9036bba8b59SPatrick Mooney 		return (0);
9046bba8b59SPatrick Mooney 	}
9056bba8b59SPatrick Mooney 
9066bba8b59SPatrick Mooney 	/* Reject racing requests size */
9076bba8b59SPatrick Mooney 	if (vmmr_target_sz != VMMR_TARGET_INACTIVE) {
9086bba8b59SPatrick Mooney 		mutex_exit(&vmmr_lock);
9096bba8b59SPatrick Mooney 		return (EALREADY);
9106bba8b59SPatrick Mooney 	}
9116bba8b59SPatrick Mooney 	/* Record the target now to excluding a racing request */
9126bba8b59SPatrick Mooney 	vmmr_target_sz = target_sz;
9136bba8b59SPatrick Mooney 
9146bba8b59SPatrick Mooney 	int err = 0;
9156bba8b59SPatrick Mooney 	do {
9166bba8b59SPatrick Mooney 		/* Be sensitive to signal interruption */
9176bba8b59SPatrick Mooney 		if (issig(JUSTLOOKING) != 0) {
9186bba8b59SPatrick Mooney 			mutex_exit(&vmmr_lock);
9196bba8b59SPatrick Mooney 			const bool sig_bail = issig(FORREAL) != 0;
9206bba8b59SPatrick Mooney 			mutex_enter(&vmmr_lock);
9216bba8b59SPatrick Mooney 			if (sig_bail) {
9226bba8b59SPatrick Mooney 				err = EINTR;
9236bba8b59SPatrick Mooney 				break;
9246bba8b59SPatrick Mooney 			}
9256bba8b59SPatrick Mooney 		}
9266bba8b59SPatrick Mooney 
9276bba8b59SPatrick Mooney 		if (current_sz > target_sz) {
9286bba8b59SPatrick Mooney 			/* Shrinking reservoir */
9296bba8b59SPatrick Mooney 
9306bba8b59SPatrick Mooney 			size_t req_sz = current_sz - target_sz;
9316bba8b59SPatrick Mooney 			if (chunk_sz != 0) {
9326bba8b59SPatrick Mooney 				req_sz = MIN(req_sz, chunk_sz);
9336bba8b59SPatrick Mooney 			}
9346bba8b59SPatrick Mooney 			err = vmmr_remove(req_sz, false);
9356bba8b59SPatrick Mooney 		} else {
9366bba8b59SPatrick Mooney 			/* Growing reservoir */
9376bba8b59SPatrick Mooney 			ASSERT(current_sz < target_sz);
9386bba8b59SPatrick Mooney 
9396bba8b59SPatrick Mooney 			size_t req_sz = target_sz - current_sz;
9406bba8b59SPatrick Mooney 			if (chunk_sz != 0) {
9416bba8b59SPatrick Mooney 				req_sz = MIN(req_sz, chunk_sz);
9426bba8b59SPatrick Mooney 			}
9436bba8b59SPatrick Mooney 			err = vmmr_add(req_sz, false);
9446bba8b59SPatrick Mooney 		}
9456bba8b59SPatrick Mooney 
9466bba8b59SPatrick Mooney 		current_sz = vmmr_alloc_sz + vmmr_free_sz;
9476bba8b59SPatrick Mooney 	} while (err == 0 && current_sz != target_sz);
9486bba8b59SPatrick Mooney 
9496bba8b59SPatrick Mooney 	/* Clear the target now that we are done (success or not) */
9506bba8b59SPatrick Mooney 	vmmr_target_sz = VMMR_TARGET_INACTIVE;
9516bba8b59SPatrick Mooney 	mutex_exit(&vmmr_lock);
9526bba8b59SPatrick Mooney 	*resp = current_sz;
9536bba8b59SPatrick Mooney 	return (err);
9546bba8b59SPatrick Mooney }
9556bba8b59SPatrick Mooney 
9567c8c0b82SPatrick Mooney int
vmmr_ioctl(int cmd,intptr_t arg,int md,cred_t * cr,int * rvalp)9577c8c0b82SPatrick Mooney vmmr_ioctl(int cmd, intptr_t arg, int md, cred_t *cr, int *rvalp)
9587c8c0b82SPatrick Mooney {
9596bba8b59SPatrick Mooney 	/*
9606bba8b59SPatrick Mooney 	 * Since an LP64 datamodel is enforced by our caller (vmm_ioctl()), we
9616bba8b59SPatrick Mooney 	 * do not need to duplicate such checks here.
9626bba8b59SPatrick Mooney 	 */
9636bba8b59SPatrick Mooney 
9647c8c0b82SPatrick Mooney 	switch (cmd) {
9657c8c0b82SPatrick Mooney 	case VMM_RESV_QUERY: {
9667c8c0b82SPatrick Mooney 		struct vmm_resv_query res;
9677c8c0b82SPatrick Mooney 		void *datap = (void *)(uintptr_t)arg;
9687c8c0b82SPatrick Mooney 
969f4659490SPatrick Mooney 		/* For now, anyone with access to vmmctl device can query */
9707c8c0b82SPatrick Mooney 		mutex_enter(&vmmr_lock);
9717c8c0b82SPatrick Mooney 		res.vrq_free_sz = vmmr_free_sz;
9727c8c0b82SPatrick Mooney 		res.vrq_alloc_sz = vmmr_alloc_sz;
9737c8c0b82SPatrick Mooney 		res.vrq_alloc_transient_sz = vmmr_alloc_transient_sz;
9747c8c0b82SPatrick Mooney 		res.vrq_limit = vmmr_total_limit;
9757c8c0b82SPatrick Mooney 		mutex_exit(&vmmr_lock);
9767c8c0b82SPatrick Mooney 		if (ddi_copyout(&res, datap, sizeof (res), md) != 0) {
9777c8c0b82SPatrick Mooney 			return (EFAULT);
9787c8c0b82SPatrick Mooney 		}
9797c8c0b82SPatrick Mooney 		break;
9807c8c0b82SPatrick Mooney 	}
9816bba8b59SPatrick Mooney 	case VMM_RESV_SET_TARGET: {
9827c8c0b82SPatrick Mooney 		if (secpolicy_sys_config(cr, B_FALSE) != 0) {
9837c8c0b82SPatrick Mooney 			return (EPERM);
9847c8c0b82SPatrick Mooney 		}
9856bba8b59SPatrick Mooney 
9866bba8b59SPatrick Mooney 		struct vmm_resv_target tgt;
9876bba8b59SPatrick Mooney 		void *datap = (void *)(uintptr_t)arg;
9886bba8b59SPatrick Mooney 
9896bba8b59SPatrick Mooney 		if (ddi_copyin(datap, &tgt, sizeof (tgt), md) != 0) {
9906bba8b59SPatrick Mooney 			return (EFAULT);
9917c8c0b82SPatrick Mooney 		}
9926bba8b59SPatrick Mooney 
9936bba8b59SPatrick Mooney 		int err = vmmr_set_target(tgt.vrt_target_sz, tgt.vrt_chunk_sz,
9946bba8b59SPatrick Mooney 		    &tgt.vrt_result_sz);
9956bba8b59SPatrick Mooney 
9966bba8b59SPatrick Mooney 		/*
9976bba8b59SPatrick Mooney 		 * Attempt to communicate the resultant size of the reservoir if
9986bba8b59SPatrick Mooney 		 * setting it to the target was a success, or if we were
9996bba8b59SPatrick Mooney 		 * interrupted (by a signal) while doing so.
10006bba8b59SPatrick Mooney 		 */
10016bba8b59SPatrick Mooney 		if (err == 0 || err == EINTR) {
10026bba8b59SPatrick Mooney 			if (ddi_copyout(&tgt, datap, sizeof (tgt), md) != 0) {
10036bba8b59SPatrick Mooney 				err = EFAULT;
10047c8c0b82SPatrick Mooney 			}
10056bba8b59SPatrick Mooney 		}
10066bba8b59SPatrick Mooney 
10076bba8b59SPatrick Mooney 		return (err);
10087c8c0b82SPatrick Mooney 	}
10097c8c0b82SPatrick Mooney 	default:
10107c8c0b82SPatrick Mooney 		return (ENOTTY);
10117c8c0b82SPatrick Mooney 	}
10127c8c0b82SPatrick Mooney 	return (0);
10137c8c0b82SPatrick Mooney }
1014