17c8c0b82SPatrick Mooney /* 27c8c0b82SPatrick Mooney * This file and its contents are supplied under the terms of the 37c8c0b82SPatrick Mooney * Common Development and Distribution License ("CDDL"), version 1.0. 47c8c0b82SPatrick Mooney * You may only use this file in accordance with the terms of version 57c8c0b82SPatrick Mooney * 1.0 of the CDDL. 67c8c0b82SPatrick Mooney * 77c8c0b82SPatrick Mooney * A full copy of the text of the CDDL should have accompanied this 87c8c0b82SPatrick Mooney * source. A copy of the CDDL is also available via the Internet at 97c8c0b82SPatrick Mooney * http://www.illumos.org/license/CDDL. 107c8c0b82SPatrick Mooney */ 117c8c0b82SPatrick Mooney 127c8c0b82SPatrick Mooney /* 137c8c0b82SPatrick Mooney * Copyright 2021 Oxide Computer Company 147c8c0b82SPatrick Mooney */ 157c8c0b82SPatrick Mooney 167c8c0b82SPatrick Mooney /* 177c8c0b82SPatrick Mooney * VMM Memory Reservoir 187c8c0b82SPatrick Mooney * 197c8c0b82SPatrick Mooney * 207c8c0b82SPatrick Mooney * In order to make the allocation of large (multi-GiB) chunks of memory 217c8c0b82SPatrick Mooney * for bhyve VMs easier, we introduce the "VMM Reservoir", where system 227c8c0b82SPatrick Mooney * operators can set aside a substantial portion of system memory exclusively 237c8c0b82SPatrick Mooney * for VMs. This memory is unavailable for general use by the rest of the 247c8c0b82SPatrick Mooney * system. Rather than having to scour the freelist, reap kmem caches, or put 257c8c0b82SPatrick Mooney * pressure on the ARC, bhyve guest memory allocations can quickly determine if 267c8c0b82SPatrick Mooney * there is adequate reservoir memory available. Since the pages stored in the 277c8c0b82SPatrick Mooney * reservoir are pre-zeroed, it can be immediately used when allocated to a 287c8c0b82SPatrick Mooney * guest. When the memory is returned to the reservoir, it is zeroed once more 297c8c0b82SPatrick Mooney * to avoid leaking any sensitive data from that guest. 307c8c0b82SPatrick Mooney * 317c8c0b82SPatrick Mooney * 327c8c0b82SPatrick Mooney * Transient Allocations 337c8c0b82SPatrick Mooney * 347c8c0b82SPatrick Mooney * While the explicit reservoir model may work well for some applications, 357c8c0b82SPatrick Mooney * others may want a more traditional model, where pages for guest memory 367c8c0b82SPatrick Mooney * objects are allocated on demand, rather than from a pool set aside from the 377c8c0b82SPatrick Mooney * system. In this case, the allocation can be made in "transient" mode, where 387c8c0b82SPatrick Mooney * the memory is allocated normally, even if there is free capacity in the 397c8c0b82SPatrick Mooney * reservoir. When use of the transient allocation is complete (the guest is 407c8c0b82SPatrick Mooney * halted and destroyed), the pages will be freed back to the system, rather 417c8c0b82SPatrick Mooney * than added back to the reservoir. 427c8c0b82SPatrick Mooney * 437c8c0b82SPatrick Mooney * From an implementation standpoint, transient allocations follow the same 447c8c0b82SPatrick Mooney * code paths as ones using the reservoir normally. Those allocations have a 457c8c0b82SPatrick Mooney * tag which marks them as transient, and used/free size tallies are maintained 467c8c0b82SPatrick Mooney * separately for normal and transient operations. When performing a transient 477c8c0b82SPatrick Mooney * allocation, that amount of memory is immediately added to the reservoir , 487c8c0b82SPatrick Mooney * from which the allocation can be made. When freeing a transient allocation, 497c8c0b82SPatrick Mooney * a matching amount of memory is removed from the reservoir as part of the 507c8c0b82SPatrick Mooney * operation. This allows both allocation types to coexist without too much 517c8c0b82SPatrick Mooney * additional machinery. 527c8c0b82SPatrick Mooney * 537c8c0b82SPatrick Mooney * 547c8c0b82SPatrick Mooney * Administration 557c8c0b82SPatrick Mooney * 567c8c0b82SPatrick Mooney * Operators may increase, decrease, and query the the amount of memory 577c8c0b82SPatrick Mooney * allocated to the reservoir and from to VMs via ioctls against the vmmctl 587c8c0b82SPatrick Mooney * device. The total amount added to the reservoir is arbitrarily limited at 597c8c0b82SPatrick Mooney * this time by `vmmr_total_limit` which defaults to 80% of physmem. This is 607c8c0b82SPatrick Mooney * done to prevent the reservoir from inadvertently growing to a size where the 617c8c0b82SPatrick Mooney * system has inadequate memory to make forward progress. Memory may only be 627c8c0b82SPatrick Mooney * removed from the reservoir when it is free (not allocated by any guest VMs). 637c8c0b82SPatrick Mooney * 647c8c0b82SPatrick Mooney * 657c8c0b82SPatrick Mooney * Page Tracking 667c8c0b82SPatrick Mooney * 677c8c0b82SPatrick Mooney * The reservoir currently uses vnode association to keep track of pages under 687c8c0b82SPatrick Mooney * its control (either designated to the reservoir and free, or allocated to a 697c8c0b82SPatrick Mooney * guest VM object). This means using the existing VM system primitives for 707c8c0b82SPatrick Mooney * page_t instances being associated with a given (vnode, offset) tuple. It 717c8c0b82SPatrick Mooney * means that spans of pages, either free or allocated, need only to store a 727c8c0b82SPatrick Mooney * length (of the span) and an offset (into the vnode) in order to gain access 737c8c0b82SPatrick Mooney * to all of the underlying pages associated with that span. Associating the 747c8c0b82SPatrick Mooney * pages against `kvps[KV_VVP]` (the VMM kernel vnode) means they will be 757c8c0b82SPatrick Mooney * properly tracked as KAS pages, but be excluded from normal dumps (unless the 767c8c0b82SPatrick Mooney * operator has chosen to dump all of RAM). 777c8c0b82SPatrick Mooney */ 787c8c0b82SPatrick Mooney 797c8c0b82SPatrick Mooney #include <sys/types.h> 807c8c0b82SPatrick Mooney #include <sys/mutex.h> 817c8c0b82SPatrick Mooney #include <sys/avl.h> 827c8c0b82SPatrick Mooney #include <sys/list.h> 837c8c0b82SPatrick Mooney #include <sys/machparam.h> 847c8c0b82SPatrick Mooney #include <sys/kmem.h> 857c8c0b82SPatrick Mooney #include <sys/stddef.h> 867c8c0b82SPatrick Mooney #include <sys/null.h> 877c8c0b82SPatrick Mooney #include <sys/errno.h> 887c8c0b82SPatrick Mooney #include <sys/systm.h> 897c8c0b82SPatrick Mooney #include <sys/sunddi.h> 907c8c0b82SPatrick Mooney #include <sys/policy.h> 917c8c0b82SPatrick Mooney #include <vm/seg_kmem.h> 927c8c0b82SPatrick Mooney #include <vm/hat_i86.h> 937c8c0b82SPatrick Mooney 947c8c0b82SPatrick Mooney #include <sys/vmm_reservoir.h> 957c8c0b82SPatrick Mooney #include <sys/vmm_dev.h> 967c8c0b82SPatrick Mooney 977c8c0b82SPatrick Mooney static kmutex_t vmmr_lock; 987c8c0b82SPatrick Mooney 997c8c0b82SPatrick Mooney static size_t vmmr_free_sz; 1007c8c0b82SPatrick Mooney static size_t vmmr_free_transient_sz; 1017c8c0b82SPatrick Mooney static size_t vmmr_adding_sz; 1027c8c0b82SPatrick Mooney static size_t vmmr_alloc_sz; 1037c8c0b82SPatrick Mooney static size_t vmmr_alloc_transient_sz; 1047c8c0b82SPatrick Mooney static size_t vmmr_empty_sz; 1057c8c0b82SPatrick Mooney 1067c8c0b82SPatrick Mooney static uintptr_t vmmr_empty_last; 1077c8c0b82SPatrick Mooney /* Upper limit for the size (free + allocated) of the reservoir */ 1087c8c0b82SPatrick Mooney static size_t vmmr_total_limit; 1097c8c0b82SPatrick Mooney 1107c8c0b82SPatrick Mooney /* VA range allocated from the VMM arena for the mappings */ 1117c8c0b82SPatrick Mooney static uintptr_t vmmr_va; 1127c8c0b82SPatrick Mooney static uintptr_t vmmr_va_sz; 1137c8c0b82SPatrick Mooney 1147c8c0b82SPatrick Mooney /* Pair of AVL trees to store set of spans ordered by addr and size */ 1157c8c0b82SPatrick Mooney typedef struct vmmr_treepair { 1167c8c0b82SPatrick Mooney avl_tree_t by_addr; 1177c8c0b82SPatrick Mooney avl_tree_t by_size; 1187c8c0b82SPatrick Mooney } vmmr_treepair_t; 1197c8c0b82SPatrick Mooney 1207c8c0b82SPatrick Mooney /* Spans of free memory in the reservoir */ 1217c8c0b82SPatrick Mooney static vmmr_treepair_t vmmr_free_tp; 1227c8c0b82SPatrick Mooney 1237c8c0b82SPatrick Mooney /* Spans of empty (not backed by memory) space in the reservoir */ 1247c8c0b82SPatrick Mooney static vmmr_treepair_t vmmr_empty_tp; 1257c8c0b82SPatrick Mooney 1267c8c0b82SPatrick Mooney /* Regions of memory allocated from the reservoir */ 1277c8c0b82SPatrick Mooney static list_t vmmr_alloc_regions; 1287c8c0b82SPatrick Mooney 1297c8c0b82SPatrick Mooney struct vmmr_span { 1307c8c0b82SPatrick Mooney uintptr_t vs_addr; 1317c8c0b82SPatrick Mooney size_t vs_size; 1327c8c0b82SPatrick Mooney avl_node_t vs_by_addr; 1337c8c0b82SPatrick Mooney avl_node_t vs_by_size; 1347c8c0b82SPatrick Mooney uintptr_t vs_region_addr; 1357c8c0b82SPatrick Mooney }; 1367c8c0b82SPatrick Mooney typedef struct vmmr_span vmmr_span_t; 1377c8c0b82SPatrick Mooney 1387c8c0b82SPatrick Mooney struct vmmr_region { 1397c8c0b82SPatrick Mooney size_t vr_size; 1407c8c0b82SPatrick Mooney avl_tree_t vr_spans; 1417c8c0b82SPatrick Mooney list_node_t vr_node; 1427c8c0b82SPatrick Mooney bool vr_transient; 1437c8c0b82SPatrick Mooney }; 1447c8c0b82SPatrick Mooney 1457c8c0b82SPatrick Mooney static int 1467c8c0b82SPatrick Mooney vmmr_cmp_addr(const void *a, const void *b) 1477c8c0b82SPatrick Mooney { 1487c8c0b82SPatrick Mooney const vmmr_span_t *sa = a; 1497c8c0b82SPatrick Mooney const vmmr_span_t *sb = b; 1507c8c0b82SPatrick Mooney 1517c8c0b82SPatrick Mooney if (sa->vs_addr == sb->vs_addr) { 1527c8c0b82SPatrick Mooney return (0); 1537c8c0b82SPatrick Mooney } else if (sa->vs_addr < sb->vs_addr) { 1547c8c0b82SPatrick Mooney return (-1); 1557c8c0b82SPatrick Mooney } else { 1567c8c0b82SPatrick Mooney return (1); 1577c8c0b82SPatrick Mooney } 1587c8c0b82SPatrick Mooney } 1597c8c0b82SPatrick Mooney 1607c8c0b82SPatrick Mooney static int 1617c8c0b82SPatrick Mooney vmmr_cmp_size(const void *a, const void *b) 1627c8c0b82SPatrick Mooney { 1637c8c0b82SPatrick Mooney const vmmr_span_t *sa = a; 1647c8c0b82SPatrick Mooney const vmmr_span_t *sb = b; 1657c8c0b82SPatrick Mooney 1667c8c0b82SPatrick Mooney if (sa->vs_size == sb->vs_size) { 1677c8c0b82SPatrick Mooney /* 1687c8c0b82SPatrick Mooney * Since discontiguous spans could have the same size in a 1697c8c0b82SPatrick Mooney * by-size tree, differentiate them (as required by AVL) by 1707c8c0b82SPatrick Mooney * address so they can safely coexist while remaining sorted. 1717c8c0b82SPatrick Mooney */ 1727c8c0b82SPatrick Mooney return (vmmr_cmp_addr(a, b)); 1737c8c0b82SPatrick Mooney } else if (sa->vs_size < sb->vs_size) { 1747c8c0b82SPatrick Mooney return (-1); 1757c8c0b82SPatrick Mooney } else { 1767c8c0b82SPatrick Mooney return (1); 1777c8c0b82SPatrick Mooney } 1787c8c0b82SPatrick Mooney } 1797c8c0b82SPatrick Mooney 1807c8c0b82SPatrick Mooney static int 1817c8c0b82SPatrick Mooney vmmr_cmp_region_addr(const void *a, const void *b) 1827c8c0b82SPatrick Mooney { 1837c8c0b82SPatrick Mooney const vmmr_span_t *sa = a; 1847c8c0b82SPatrick Mooney const vmmr_span_t *sb = b; 1857c8c0b82SPatrick Mooney 1867c8c0b82SPatrick Mooney if (sa->vs_region_addr == sb->vs_region_addr) { 1877c8c0b82SPatrick Mooney return (0); 1887c8c0b82SPatrick Mooney } else if (sa->vs_region_addr < sb->vs_region_addr) { 1897c8c0b82SPatrick Mooney return (-1); 1907c8c0b82SPatrick Mooney } else { 1917c8c0b82SPatrick Mooney return (1); 1927c8c0b82SPatrick Mooney } 1937c8c0b82SPatrick Mooney } 1947c8c0b82SPatrick Mooney 1957c8c0b82SPatrick Mooney static void 1967c8c0b82SPatrick Mooney vmmr_tp_init(vmmr_treepair_t *tree) 1977c8c0b82SPatrick Mooney { 1987c8c0b82SPatrick Mooney avl_create(&tree->by_addr, vmmr_cmp_addr, sizeof (vmmr_span_t), 1997c8c0b82SPatrick Mooney offsetof(vmmr_span_t, vs_by_addr)); 2007c8c0b82SPatrick Mooney avl_create(&tree->by_size, vmmr_cmp_size, sizeof (vmmr_span_t), 2017c8c0b82SPatrick Mooney offsetof(vmmr_span_t, vs_by_size)); 2027c8c0b82SPatrick Mooney } 2037c8c0b82SPatrick Mooney 2047c8c0b82SPatrick Mooney static void 2057c8c0b82SPatrick Mooney vmmr_tp_destroy(vmmr_treepair_t *tree) 2067c8c0b82SPatrick Mooney { 2077c8c0b82SPatrick Mooney void *vcp = NULL; 2087c8c0b82SPatrick Mooney vmmr_span_t *span; 2097c8c0b82SPatrick Mooney 2107c8c0b82SPatrick Mooney while (avl_destroy_nodes(&tree->by_addr, &vcp) != NULL) { 2117c8c0b82SPatrick Mooney /* Freeing spans will be done when tearing down by-size tree */ 2127c8c0b82SPatrick Mooney } 2137c8c0b82SPatrick Mooney while ((span = avl_destroy_nodes(&tree->by_size, &vcp)) != NULL) { 2147c8c0b82SPatrick Mooney kmem_free(span, sizeof (*span)); 2157c8c0b82SPatrick Mooney } 2167c8c0b82SPatrick Mooney avl_destroy(&tree->by_addr); 2177c8c0b82SPatrick Mooney avl_destroy(&tree->by_size); 2187c8c0b82SPatrick Mooney } 2197c8c0b82SPatrick Mooney 2207c8c0b82SPatrick Mooney /* 2217c8c0b82SPatrick Mooney * Insert a vmmr_span_t into a treepair, concatenating if possible with adjacent 2227c8c0b82SPatrick Mooney * span(s). Such concatenation could result in the `to_add` span being freed, 2237c8c0b82SPatrick Mooney * so the caller cannot use it after this returns. 2247c8c0b82SPatrick Mooney */ 2257c8c0b82SPatrick Mooney static void 2267c8c0b82SPatrick Mooney vmmr_tp_insert_concat(vmmr_span_t *to_add, vmmr_treepair_t *tree) 2277c8c0b82SPatrick Mooney { 2287c8c0b82SPatrick Mooney avl_tree_t *by_addr = &tree->by_addr; 2297c8c0b82SPatrick Mooney avl_tree_t *by_size = &tree->by_size; 2307c8c0b82SPatrick Mooney vmmr_span_t *node; 2317c8c0b82SPatrick Mooney avl_index_t where; 2327c8c0b82SPatrick Mooney 2337c8c0b82SPatrick Mooney /* This addr should not already exist in the treepair */ 2347c8c0b82SPatrick Mooney node = avl_find(by_addr, to_add, &where); 2357c8c0b82SPatrick Mooney ASSERT3P(node, ==, NULL); 2367c8c0b82SPatrick Mooney 2377c8c0b82SPatrick Mooney node = avl_nearest(by_addr, where, AVL_BEFORE); 2387c8c0b82SPatrick Mooney if (node != NULL && 2397c8c0b82SPatrick Mooney (node->vs_addr + node->vs_size) == to_add->vs_addr) { 2407c8c0b82SPatrick Mooney /* concat with preceeding item */ 2417c8c0b82SPatrick Mooney avl_remove(by_addr, node); 2427c8c0b82SPatrick Mooney avl_remove(by_size, node); 2437c8c0b82SPatrick Mooney node->vs_size += to_add->vs_size; 2447c8c0b82SPatrick Mooney kmem_free(to_add, sizeof (*to_add)); 2457c8c0b82SPatrick Mooney 2467c8c0b82SPatrick Mooney /* 2477c8c0b82SPatrick Mooney * Since this now-concatenated span could be adjacent one 2487c8c0b82SPatrick Mooney * trailing it, fall through to perform that check. 2497c8c0b82SPatrick Mooney */ 2507c8c0b82SPatrick Mooney to_add = node; 2517c8c0b82SPatrick Mooney } 2527c8c0b82SPatrick Mooney 2537c8c0b82SPatrick Mooney node = avl_nearest(by_addr, where, AVL_AFTER); 2547c8c0b82SPatrick Mooney if (node != NULL && 2557c8c0b82SPatrick Mooney (to_add->vs_addr + to_add->vs_size) == node->vs_addr) { 2567c8c0b82SPatrick Mooney /* concat with trailing item */ 2577c8c0b82SPatrick Mooney avl_remove(by_addr, node); 2587c8c0b82SPatrick Mooney avl_remove(by_size, node); 2597c8c0b82SPatrick Mooney node->vs_addr = to_add->vs_addr; 2607c8c0b82SPatrick Mooney node->vs_size += to_add->vs_size; 2617c8c0b82SPatrick Mooney avl_add(by_addr, node); 2627c8c0b82SPatrick Mooney avl_add(by_size, node); 2637c8c0b82SPatrick Mooney 2647c8c0b82SPatrick Mooney kmem_free(to_add, sizeof (*to_add)); 2657c8c0b82SPatrick Mooney return; 2667c8c0b82SPatrick Mooney } 2677c8c0b82SPatrick Mooney 2687c8c0b82SPatrick Mooney /* simply insert */ 2697c8c0b82SPatrick Mooney avl_add(by_addr, to_add); 2707c8c0b82SPatrick Mooney avl_add(by_size, to_add); 2717c8c0b82SPatrick Mooney } 2727c8c0b82SPatrick Mooney 2737c8c0b82SPatrick Mooney /* 2747c8c0b82SPatrick Mooney * Remove a vmmr_span_t from a treepair, splitting if necessary when a span of 2757c8c0b82SPatrick Mooney * the exact target size is not present, but a larger one is. May return a span 2767c8c0b82SPatrick Mooney * with a size smaller than the target if splitting is not an option. 2777c8c0b82SPatrick Mooney */ 2787c8c0b82SPatrick Mooney static vmmr_span_t * 2797c8c0b82SPatrick Mooney vmmr_tp_remove_split(size_t target_sz, vmmr_treepair_t *tree) 2807c8c0b82SPatrick Mooney { 2817c8c0b82SPatrick Mooney avl_tree_t *by_addr = &tree->by_addr; 2827c8c0b82SPatrick Mooney avl_tree_t *by_size = &tree->by_size; 2837c8c0b82SPatrick Mooney vmmr_span_t *span; 2847c8c0b82SPatrick Mooney avl_index_t where; 2857c8c0b82SPatrick Mooney 2867c8c0b82SPatrick Mooney ASSERT3U(target_sz, !=, 0); 2877c8c0b82SPatrick Mooney ASSERT(!avl_is_empty(by_addr)); 2887c8c0b82SPatrick Mooney ASSERT(!avl_is_empty(by_size)); 2897c8c0b82SPatrick Mooney 2907c8c0b82SPatrick Mooney vmmr_span_t search = { .vs_size = target_sz }; 2917c8c0b82SPatrick Mooney span = avl_find(by_size, &search, &where); 2927c8c0b82SPatrick Mooney if (span == NULL) { 2937c8c0b82SPatrick Mooney /* Try for a larger span (instead of exact match) */ 2947c8c0b82SPatrick Mooney span = avl_nearest(by_size, where, AVL_AFTER); 2957c8c0b82SPatrick Mooney if (span == NULL) { 2967c8c0b82SPatrick Mooney /* 2977c8c0b82SPatrick Mooney * Caller will need to collect several smaller spans in 2987c8c0b82SPatrick Mooney * order to fulfill their request. 2997c8c0b82SPatrick Mooney */ 3007c8c0b82SPatrick Mooney span = avl_nearest(by_size, where, AVL_BEFORE); 3017c8c0b82SPatrick Mooney ASSERT3P(span, !=, NULL); 3027c8c0b82SPatrick Mooney } 3037c8c0b82SPatrick Mooney } 3047c8c0b82SPatrick Mooney 3057c8c0b82SPatrick Mooney if (span->vs_size <= target_sz) { 3067c8c0b82SPatrick Mooney avl_remove(by_size, span); 3077c8c0b82SPatrick Mooney avl_remove(by_addr, span); 3087c8c0b82SPatrick Mooney 3097c8c0b82SPatrick Mooney return (span); 3107c8c0b82SPatrick Mooney } else { 3117c8c0b82SPatrick Mooney /* Split off adequate chunk from larger span */ 3127c8c0b82SPatrick Mooney uintptr_t start = span->vs_addr + span->vs_size - target_sz; 3137c8c0b82SPatrick Mooney 3147c8c0b82SPatrick Mooney avl_remove(by_size, span); 3157c8c0b82SPatrick Mooney span->vs_size -= target_sz; 3167c8c0b82SPatrick Mooney avl_add(by_size, span); 3177c8c0b82SPatrick Mooney 3187c8c0b82SPatrick Mooney vmmr_span_t *split_span = 3197c8c0b82SPatrick Mooney kmem_zalloc(sizeof (vmmr_span_t), KM_SLEEP); 3207c8c0b82SPatrick Mooney split_span->vs_addr = start; 3217c8c0b82SPatrick Mooney split_span->vs_size = target_sz; 3227c8c0b82SPatrick Mooney 3237c8c0b82SPatrick Mooney return (split_span); 3247c8c0b82SPatrick Mooney } 3257c8c0b82SPatrick Mooney } 3267c8c0b82SPatrick Mooney 3277c8c0b82SPatrick Mooney void 3287c8c0b82SPatrick Mooney vmmr_init() 3297c8c0b82SPatrick Mooney { 3307c8c0b82SPatrick Mooney mutex_init(&vmmr_lock, NULL, MUTEX_DEFAULT, NULL); 3317c8c0b82SPatrick Mooney 3327c8c0b82SPatrick Mooney /* 3337c8c0b82SPatrick Mooney * `vmm_total_limit` represents the absolute maximum size of the VMM 3347c8c0b82SPatrick Mooney * memory reservoir. It is meant to provide some measure of protection 3357c8c0b82SPatrick Mooney * against an operator pushing the system into unrecoverable memory 3367c8c0b82SPatrick Mooney * starvation through explicit or transient additions to the reservoir. 3377c8c0b82SPatrick Mooney * 3387c8c0b82SPatrick Mooney * There will be many situations where this limit would be inadequate to 3397c8c0b82SPatrick Mooney * prevent kernel memory starvation in the face of certain operator 3407c8c0b82SPatrick Mooney * actions. It is a balance to be struck between safety and allowing 3417c8c0b82SPatrick Mooney * large systems to reach high utilization. 3427c8c0b82SPatrick Mooney * 3437c8c0b82SPatrick Mooney * The value is based off of pages_pp_maximum: "Number of currently 3447c8c0b82SPatrick Mooney * available pages that cannot be 'locked'". It is sized as all of 3457c8c0b82SPatrick Mooney * `physmem` less 120% of `pages_pp_maximum`. 3467c8c0b82SPatrick Mooney */ 3477c8c0b82SPatrick Mooney vmmr_total_limit = 3487c8c0b82SPatrick Mooney (((physmem * 10) - (pages_pp_maximum * 12)) * PAGESIZE) / 10; 3497c8c0b82SPatrick Mooney 3507c8c0b82SPatrick Mooney vmmr_empty_last = 0; 3517c8c0b82SPatrick Mooney vmmr_free_sz = 0; 3527c8c0b82SPatrick Mooney vmmr_alloc_sz = 0; 3537c8c0b82SPatrick Mooney vmmr_empty_sz = 0; 3547c8c0b82SPatrick Mooney vmmr_adding_sz = 0; 3557c8c0b82SPatrick Mooney vmmr_free_transient_sz = 0; 3567c8c0b82SPatrick Mooney vmmr_alloc_transient_sz = 0; 3577c8c0b82SPatrick Mooney 3587c8c0b82SPatrick Mooney vmmr_tp_init(&vmmr_free_tp); 3597c8c0b82SPatrick Mooney vmmr_tp_init(&vmmr_empty_tp); 3607c8c0b82SPatrick Mooney 3617c8c0b82SPatrick Mooney list_create(&vmmr_alloc_regions, sizeof (vmmr_region_t), 3627c8c0b82SPatrick Mooney offsetof(vmmr_region_t, vr_node)); 3637c8c0b82SPatrick Mooney 3647c8c0b82SPatrick Mooney /* Grab a chunk of VA for the reservoir */ 3657c8c0b82SPatrick Mooney vmmr_va_sz = physmem * PAGESIZE; 3667c8c0b82SPatrick Mooney vmmr_va = (uintptr_t)vmem_alloc(kvmm_arena, vmmr_va_sz, VM_SLEEP); 3677c8c0b82SPatrick Mooney } 3687c8c0b82SPatrick Mooney 3697c8c0b82SPatrick Mooney void 3707c8c0b82SPatrick Mooney vmmr_fini() 3717c8c0b82SPatrick Mooney { 3727c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 3737c8c0b82SPatrick Mooney VERIFY3U(vmmr_alloc_sz, ==, 0); 3747c8c0b82SPatrick Mooney VERIFY3U(vmmr_free_sz, ==, 0); 3757c8c0b82SPatrick Mooney VERIFY3U(vmmr_adding_sz, ==, 0); 3767c8c0b82SPatrick Mooney VERIFY3U(vmmr_alloc_transient_sz, ==, 0); 3777c8c0b82SPatrick Mooney VERIFY3U(vmmr_free_transient_sz, ==, 0); 3787c8c0b82SPatrick Mooney VERIFY(avl_is_empty(&vmmr_free_tp.by_addr)); 3797c8c0b82SPatrick Mooney VERIFY(avl_is_empty(&vmmr_free_tp.by_size)); 3807c8c0b82SPatrick Mooney VERIFY(list_is_empty(&vmmr_alloc_regions)); 3817c8c0b82SPatrick Mooney 3827c8c0b82SPatrick Mooney vmmr_tp_destroy(&vmmr_free_tp); 3837c8c0b82SPatrick Mooney vmmr_tp_destroy(&vmmr_empty_tp); 3847c8c0b82SPatrick Mooney list_destroy(&vmmr_alloc_regions); 3857c8c0b82SPatrick Mooney 3867c8c0b82SPatrick Mooney /* Release reservoir VA chunk */ 3877c8c0b82SPatrick Mooney vmem_free(kvmm_arena, (void *)vmmr_va, vmmr_va_sz); 3887c8c0b82SPatrick Mooney vmmr_va = 0; 3897c8c0b82SPatrick Mooney vmmr_va_sz = 0; 3907c8c0b82SPatrick Mooney vmmr_total_limit = 0; 3917c8c0b82SPatrick Mooney vmmr_empty_last = 0; 3927c8c0b82SPatrick Mooney 3937c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 3947c8c0b82SPatrick Mooney mutex_destroy(&vmmr_lock); 3957c8c0b82SPatrick Mooney } 3967c8c0b82SPatrick Mooney 3977c8c0b82SPatrick Mooney bool 3987c8c0b82SPatrick Mooney vmmr_is_empty() 3997c8c0b82SPatrick Mooney { 4007c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 4017c8c0b82SPatrick Mooney bool res = (vmmr_alloc_sz == 0 && vmmr_alloc_transient_sz == 0 && 4027c8c0b82SPatrick Mooney vmmr_free_sz == 0 && vmmr_free_transient_sz == 0); 4037c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 4047c8c0b82SPatrick Mooney return (res); 4057c8c0b82SPatrick Mooney } 4067c8c0b82SPatrick Mooney 4077c8c0b82SPatrick Mooney int 4087c8c0b82SPatrick Mooney vmmr_alloc(size_t sz, bool transient, vmmr_region_t **resp) 4097c8c0b82SPatrick Mooney { 4107c8c0b82SPatrick Mooney VERIFY3U(sz & PAGEOFFSET, ==, 0); 4117c8c0b82SPatrick Mooney 4127c8c0b82SPatrick Mooney if (!transient) { 4137c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 4147c8c0b82SPatrick Mooney if (sz > vmmr_free_sz) { 4157c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 4167c8c0b82SPatrick Mooney return (ENOSPC); 4177c8c0b82SPatrick Mooney } 4187c8c0b82SPatrick Mooney } else { 4197c8c0b82SPatrick Mooney int err; 4207c8c0b82SPatrick Mooney 4217c8c0b82SPatrick Mooney err = vmmr_add(sz, true); 4227c8c0b82SPatrick Mooney if (err != 0) { 4237c8c0b82SPatrick Mooney return (err); 4247c8c0b82SPatrick Mooney } 4257c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 4267c8c0b82SPatrick Mooney VERIFY3U(vmmr_free_transient_sz, >=, sz); 4277c8c0b82SPatrick Mooney } 4287c8c0b82SPatrick Mooney 4297c8c0b82SPatrick Mooney vmmr_region_t *region; 4307c8c0b82SPatrick Mooney region = kmem_zalloc(sizeof (vmmr_region_t), KM_SLEEP); 4317c8c0b82SPatrick Mooney avl_create(®ion->vr_spans, vmmr_cmp_region_addr, 4327c8c0b82SPatrick Mooney sizeof (vmmr_span_t), offsetof(vmmr_span_t, vs_by_addr)); 4337c8c0b82SPatrick Mooney region->vr_size = sz; 4347c8c0b82SPatrick Mooney 4357c8c0b82SPatrick Mooney size_t remain = sz; 4367c8c0b82SPatrick Mooney uintptr_t map_at = 0; 4377c8c0b82SPatrick Mooney while (remain > 0) { 4387c8c0b82SPatrick Mooney vmmr_span_t *span = vmmr_tp_remove_split(remain, &vmmr_free_tp); 4397c8c0b82SPatrick Mooney 4407c8c0b82SPatrick Mooney /* 4417c8c0b82SPatrick Mooney * We have already ensured that adequate free memory is present 4427c8c0b82SPatrick Mooney * in the reservoir for this allocation. 4437c8c0b82SPatrick Mooney */ 4447c8c0b82SPatrick Mooney VERIFY3P(span, !=, NULL); 4457c8c0b82SPatrick Mooney ASSERT3U(span->vs_size, <=, remain); 4467c8c0b82SPatrick Mooney 4477c8c0b82SPatrick Mooney span->vs_region_addr = map_at; 4487c8c0b82SPatrick Mooney avl_add(®ion->vr_spans, span); 4497c8c0b82SPatrick Mooney map_at += span->vs_size; 4507c8c0b82SPatrick Mooney remain -= span->vs_size; 4517c8c0b82SPatrick Mooney } 4527c8c0b82SPatrick Mooney 4537c8c0b82SPatrick Mooney if (!transient) { 4547c8c0b82SPatrick Mooney vmmr_free_sz -= sz; 4557c8c0b82SPatrick Mooney vmmr_alloc_sz += sz; 4567c8c0b82SPatrick Mooney } else { 4577c8c0b82SPatrick Mooney vmmr_free_transient_sz -= sz; 4587c8c0b82SPatrick Mooney vmmr_alloc_transient_sz += sz; 4597c8c0b82SPatrick Mooney region->vr_transient = true; 4607c8c0b82SPatrick Mooney } 4617c8c0b82SPatrick Mooney list_insert_tail(&vmmr_alloc_regions, region); 4627c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 4637c8c0b82SPatrick Mooney 4647c8c0b82SPatrick Mooney *resp = region; 4657c8c0b82SPatrick Mooney return (0); 4667c8c0b82SPatrick Mooney } 4677c8c0b82SPatrick Mooney 4687c8c0b82SPatrick Mooney void * 4697c8c0b82SPatrick Mooney vmmr_region_mem_at(vmmr_region_t *region, uintptr_t off) 4707c8c0b82SPatrick Mooney { 4717c8c0b82SPatrick Mooney /* just use KPM region for now */ 4727c8c0b82SPatrick Mooney return (hat_kpm_pfn2va(vmmr_region_pfn_at(region, off))); 4737c8c0b82SPatrick Mooney } 4747c8c0b82SPatrick Mooney 4757c8c0b82SPatrick Mooney pfn_t 4767c8c0b82SPatrick Mooney vmmr_region_pfn_at(vmmr_region_t *region, uintptr_t off) 4777c8c0b82SPatrick Mooney { 4787c8c0b82SPatrick Mooney VERIFY3U(off & PAGEOFFSET, ==, 0); 4797c8c0b82SPatrick Mooney VERIFY3U(off, <, region->vr_size); 4807c8c0b82SPatrick Mooney 4817c8c0b82SPatrick Mooney vmmr_span_t search = { 4827c8c0b82SPatrick Mooney .vs_region_addr = off 4837c8c0b82SPatrick Mooney }; 4847c8c0b82SPatrick Mooney avl_index_t where; 4857c8c0b82SPatrick Mooney vmmr_span_t *span = avl_find(®ion->vr_spans, &search, &where); 4867c8c0b82SPatrick Mooney 4877c8c0b82SPatrick Mooney if (span == NULL) { 4887c8c0b82SPatrick Mooney span = avl_nearest(®ion->vr_spans, where, AVL_BEFORE); 4897c8c0b82SPatrick Mooney ASSERT3P(span, !=, NULL); 4907c8c0b82SPatrick Mooney } 4917c8c0b82SPatrick Mooney uintptr_t span_off = off - span->vs_region_addr + span->vs_addr; 4927c8c0b82SPatrick Mooney page_t *pp = page_find(&kvps[KV_VVP], (u_offset_t)span_off); 4937c8c0b82SPatrick Mooney VERIFY(pp != NULL); 4947c8c0b82SPatrick Mooney return (pp->p_pagenum); 4957c8c0b82SPatrick Mooney } 4967c8c0b82SPatrick Mooney 4977c8c0b82SPatrick Mooney void 4987c8c0b82SPatrick Mooney vmmr_free(vmmr_region_t *region) 4997c8c0b82SPatrick Mooney { 5007c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 5017c8c0b82SPatrick Mooney if (!region->vr_transient) { 5027c8c0b82SPatrick Mooney VERIFY3U(region->vr_size, <=, vmmr_alloc_sz); 5037c8c0b82SPatrick Mooney } else { 5047c8c0b82SPatrick Mooney VERIFY3U(region->vr_size, <=, vmmr_alloc_transient_sz); 5057c8c0b82SPatrick Mooney } 5067c8c0b82SPatrick Mooney list_remove(&vmmr_alloc_regions, region); 5077c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 5087c8c0b82SPatrick Mooney 5097c8c0b82SPatrick Mooney /* Zero the contents */ 5107c8c0b82SPatrick Mooney for (uintptr_t off = 0; off < region->vr_size; off += PAGESIZE) { 5117c8c0b82SPatrick Mooney bzero(vmmr_region_mem_at(region, off), PAGESIZE); 5127c8c0b82SPatrick Mooney } 5137c8c0b82SPatrick Mooney 5147c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 5157c8c0b82SPatrick Mooney 5167c8c0b82SPatrick Mooney /* Put the contained span(s) back in the free pool */ 5177c8c0b82SPatrick Mooney void *cookie = NULL; 5187c8c0b82SPatrick Mooney vmmr_span_t *span; 5197c8c0b82SPatrick Mooney while ((span = avl_destroy_nodes(®ion->vr_spans, &cookie)) != NULL) { 5207c8c0b82SPatrick Mooney span->vs_region_addr = 0; 5217c8c0b82SPatrick Mooney vmmr_tp_insert_concat(span, &vmmr_free_tp); 5227c8c0b82SPatrick Mooney } 5237c8c0b82SPatrick Mooney avl_destroy(®ion->vr_spans); 5247c8c0b82SPatrick Mooney if (!region->vr_transient) { 5257c8c0b82SPatrick Mooney vmmr_free_sz += region->vr_size; 5267c8c0b82SPatrick Mooney vmmr_alloc_sz -= region->vr_size; 5277c8c0b82SPatrick Mooney } else { 5287c8c0b82SPatrick Mooney vmmr_free_transient_sz += region->vr_size; 5297c8c0b82SPatrick Mooney vmmr_alloc_transient_sz -= region->vr_size; 5307c8c0b82SPatrick Mooney } 5317c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 5327c8c0b82SPatrick Mooney 5337c8c0b82SPatrick Mooney if (region->vr_transient) { 534*e0994bd2SPatrick Mooney /* 535*e0994bd2SPatrick Mooney * Since the transient capacity was previously allocated for 536*e0994bd2SPatrick Mooney * this region, its removal should not fail. 537*e0994bd2SPatrick Mooney */ 538*e0994bd2SPatrick Mooney VERIFY0(vmmr_remove(region->vr_size, true)); 5397c8c0b82SPatrick Mooney } 5407c8c0b82SPatrick Mooney kmem_free(region, sizeof (*region)); 5417c8c0b82SPatrick Mooney } 5427c8c0b82SPatrick Mooney 5437c8c0b82SPatrick Mooney static void 5447c8c0b82SPatrick Mooney vmmr_destroy_pages(vmmr_span_t *span) 5457c8c0b82SPatrick Mooney { 5467c8c0b82SPatrick Mooney const uintptr_t end = span->vs_addr + span->vs_size; 5477c8c0b82SPatrick Mooney struct vnode *vp = &kvps[KV_VVP]; 5487c8c0b82SPatrick Mooney for (uintptr_t pos = span->vs_addr; pos < end; pos += PAGESIZE) { 5497c8c0b82SPatrick Mooney page_t *pp; 5507c8c0b82SPatrick Mooney 5517c8c0b82SPatrick Mooney /* Page-free logic cribbed from segkmem_xfree(): */ 5527c8c0b82SPatrick Mooney pp = page_find(vp, (u_offset_t)pos); 5537c8c0b82SPatrick Mooney VERIFY(pp != NULL); 5547c8c0b82SPatrick Mooney if (!page_tryupgrade(pp)) { 5557c8c0b82SPatrick Mooney /* 5567c8c0b82SPatrick Mooney * Some other thread has a sharelock. Wait for 5577c8c0b82SPatrick Mooney * it to drop the lock so we can free this page. 5587c8c0b82SPatrick Mooney */ 5597c8c0b82SPatrick Mooney page_unlock(pp); 5607c8c0b82SPatrick Mooney pp = page_lookup(vp, (u_offset_t)pos, SE_EXCL); 5617c8c0b82SPatrick Mooney } 5627c8c0b82SPatrick Mooney 5637c8c0b82SPatrick Mooney /* 5647c8c0b82SPatrick Mooney * Clear p_lckcnt so page_destroy() doesn't update availrmem. 5657c8c0b82SPatrick Mooney * That will be taken care of later via page_unresv(). 5667c8c0b82SPatrick Mooney */ 5677c8c0b82SPatrick Mooney pp->p_lckcnt = 0; 5687c8c0b82SPatrick Mooney page_destroy(pp, 0); 5697c8c0b82SPatrick Mooney } 5707c8c0b82SPatrick Mooney } 5717c8c0b82SPatrick Mooney 5727c8c0b82SPatrick Mooney static int 5737c8c0b82SPatrick Mooney vmmr_alloc_pages(const vmmr_span_t *span) 5747c8c0b82SPatrick Mooney { 5757c8c0b82SPatrick Mooney struct seg kseg = { 5767c8c0b82SPatrick Mooney .s_as = &kas 5777c8c0b82SPatrick Mooney }; 5787c8c0b82SPatrick Mooney struct vnode *vp = &kvps[KV_VVP]; 5797c8c0b82SPatrick Mooney 5807c8c0b82SPatrick Mooney const uintptr_t end = span->vs_addr + span->vs_size; 5817c8c0b82SPatrick Mooney for (uintptr_t pos = span->vs_addr; pos < end; pos += PAGESIZE) { 5827c8c0b82SPatrick Mooney page_t *pp; 5837c8c0b82SPatrick Mooney 5847c8c0b82SPatrick Mooney pp = page_create_va(vp, (u_offset_t)pos, PAGESIZE, 5857c8c0b82SPatrick Mooney PG_EXCL | PG_NORELOC, &kseg, (void *)(vmmr_va + pos)); 5867c8c0b82SPatrick Mooney 5877c8c0b82SPatrick Mooney if (pp == NULL) { 5887c8c0b82SPatrick Mooney /* Destroy any already-created pages */ 5897c8c0b82SPatrick Mooney if (pos != span->vs_addr) { 5907c8c0b82SPatrick Mooney vmmr_span_t destroy_span = { 5917c8c0b82SPatrick Mooney .vs_addr = span->vs_addr, 5927c8c0b82SPatrick Mooney .vs_size = pos - span->vs_addr, 5937c8c0b82SPatrick Mooney }; 5947c8c0b82SPatrick Mooney 5957c8c0b82SPatrick Mooney vmmr_destroy_pages(&destroy_span); 5967c8c0b82SPatrick Mooney } 5977c8c0b82SPatrick Mooney return (ENOMEM); 5987c8c0b82SPatrick Mooney } 5997c8c0b82SPatrick Mooney 6007c8c0b82SPatrick Mooney /* mimic page state from segkmem */ 6017c8c0b82SPatrick Mooney ASSERT(PAGE_EXCL(pp)); 6027c8c0b82SPatrick Mooney page_io_unlock(pp); 6037c8c0b82SPatrick Mooney pp->p_lckcnt = 1; 6047c8c0b82SPatrick Mooney page_downgrade(pp); 6057c8c0b82SPatrick Mooney 6067c8c0b82SPatrick Mooney /* pre-zero the page */ 6077c8c0b82SPatrick Mooney bzero(hat_kpm_pfn2va(pp->p_pagenum), PAGESIZE); 6087c8c0b82SPatrick Mooney } 6097c8c0b82SPatrick Mooney 6107c8c0b82SPatrick Mooney return (0); 6117c8c0b82SPatrick Mooney } 6127c8c0b82SPatrick Mooney 6137c8c0b82SPatrick Mooney static int 6147c8c0b82SPatrick Mooney vmmr_resv_wait() 6157c8c0b82SPatrick Mooney { 6167c8c0b82SPatrick Mooney if (delay_sig(hz >> 2) != 0) { 6177c8c0b82SPatrick Mooney /* bail due to interruption */ 6187c8c0b82SPatrick Mooney return (0); 6197c8c0b82SPatrick Mooney } 6207c8c0b82SPatrick Mooney return (1); 6217c8c0b82SPatrick Mooney } 6227c8c0b82SPatrick Mooney 6237c8c0b82SPatrick Mooney static void 6247c8c0b82SPatrick Mooney vmmr_remove_raw(size_t sz) 6257c8c0b82SPatrick Mooney { 6267c8c0b82SPatrick Mooney VERIFY3U(sz & PAGEOFFSET, ==, 0); 6277c8c0b82SPatrick Mooney VERIFY(MUTEX_HELD(&vmmr_lock)); 6287c8c0b82SPatrick Mooney 6297c8c0b82SPatrick Mooney size_t remain = sz; 6307c8c0b82SPatrick Mooney while (remain > 0) { 6317c8c0b82SPatrick Mooney vmmr_span_t *span = vmmr_tp_remove_split(remain, &vmmr_free_tp); 6327c8c0b82SPatrick Mooney 6337c8c0b82SPatrick Mooney /* 6347c8c0b82SPatrick Mooney * The caller must ensure that at least `sz` amount is present 6357c8c0b82SPatrick Mooney * in the free treepair. 6367c8c0b82SPatrick Mooney */ 6377c8c0b82SPatrick Mooney VERIFY3P(span, !=, NULL); 6387c8c0b82SPatrick Mooney ASSERT3U(span->vs_size, <=, remain); 6397c8c0b82SPatrick Mooney 6407c8c0b82SPatrick Mooney /* TODO: perhaps arrange to destroy pages outside the lock? */ 6417c8c0b82SPatrick Mooney vmmr_destroy_pages(span); 6427c8c0b82SPatrick Mooney 6437c8c0b82SPatrick Mooney remain -= span->vs_size; 6447c8c0b82SPatrick Mooney vmmr_tp_insert_concat(span, &vmmr_empty_tp); 6457c8c0b82SPatrick Mooney } 6467c8c0b82SPatrick Mooney 6477c8c0b82SPatrick Mooney vmmr_empty_sz += sz; 6487c8c0b82SPatrick Mooney } 6497c8c0b82SPatrick Mooney 6507c8c0b82SPatrick Mooney int 6517c8c0b82SPatrick Mooney vmmr_add(size_t sz, bool transient) 6527c8c0b82SPatrick Mooney { 6537c8c0b82SPatrick Mooney VERIFY3U(sz & PAGEOFFSET, ==, 0); 6547c8c0b82SPatrick Mooney 6557c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 6567c8c0b82SPatrick Mooney /* 6577c8c0b82SPatrick Mooney * Make sure that the amount added is not going to breach the limits 6587c8c0b82SPatrick Mooney * we've chosen 6597c8c0b82SPatrick Mooney */ 6607c8c0b82SPatrick Mooney const size_t current_total = 6617c8c0b82SPatrick Mooney vmmr_alloc_sz + vmmr_free_sz + vmmr_adding_sz + 6627c8c0b82SPatrick Mooney vmmr_alloc_transient_sz + vmmr_free_transient_sz; 6637c8c0b82SPatrick Mooney if ((current_total + sz) < current_total) { 6647c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 6657c8c0b82SPatrick Mooney return (EOVERFLOW); 6667c8c0b82SPatrick Mooney } 6677c8c0b82SPatrick Mooney if ((current_total + sz) > vmmr_total_limit) { 6687c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 6697c8c0b82SPatrick Mooney return (ENOSPC); 6707c8c0b82SPatrick Mooney } 6717c8c0b82SPatrick Mooney vmmr_adding_sz += sz; 6727c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 6737c8c0b82SPatrick Mooney 6747c8c0b82SPatrick Mooney /* Wait for enough pages to become available */ 6757c8c0b82SPatrick Mooney if (page_xresv(sz >> PAGESHIFT, KM_SLEEP, vmmr_resv_wait) == 0) { 6767c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 6777c8c0b82SPatrick Mooney vmmr_adding_sz -= sz; 6787c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 6797c8c0b82SPatrick Mooney 6807c8c0b82SPatrick Mooney return (EINTR); 6817c8c0b82SPatrick Mooney } 6827c8c0b82SPatrick Mooney 6837c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 6847c8c0b82SPatrick Mooney size_t added = 0; 6857c8c0b82SPatrick Mooney size_t remain = sz; 6867c8c0b82SPatrick Mooney while (added < sz) { 6877c8c0b82SPatrick Mooney vmmr_span_t *span = NULL; 6887c8c0b82SPatrick Mooney 6897c8c0b82SPatrick Mooney if (vmmr_empty_sz > 0) { 6907c8c0b82SPatrick Mooney span = vmmr_tp_remove_split(remain, &vmmr_empty_tp); 6917c8c0b82SPatrick Mooney 6927c8c0b82SPatrick Mooney vmmr_empty_sz -= span->vs_size; 6937c8c0b82SPatrick Mooney } else { 6947c8c0b82SPatrick Mooney /* 6957c8c0b82SPatrick Mooney * No empty space to fill with new pages, so just tack 6967c8c0b82SPatrick Mooney * it on at the end instead. 6977c8c0b82SPatrick Mooney */ 6987c8c0b82SPatrick Mooney span = kmem_zalloc(sizeof (vmmr_span_t), KM_SLEEP); 6997c8c0b82SPatrick Mooney span->vs_addr = vmmr_empty_last; 7007c8c0b82SPatrick Mooney span->vs_size = remain; 7017c8c0b82SPatrick Mooney vmmr_empty_last += remain; 7027c8c0b82SPatrick Mooney } 7037c8c0b82SPatrick Mooney VERIFY3P(span, !=, NULL); 7047c8c0b82SPatrick Mooney 7057c8c0b82SPatrick Mooney 7067c8c0b82SPatrick Mooney /* Allocate the actual pages to back this span */ 7077c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 7087c8c0b82SPatrick Mooney int err = vmmr_alloc_pages(span); 7097c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 7107c8c0b82SPatrick Mooney 7117c8c0b82SPatrick Mooney /* 7127c8c0b82SPatrick Mooney * If an error is encountered during page allocation for the 7137c8c0b82SPatrick Mooney * span, unwind any progress made by the addition request. 7147c8c0b82SPatrick Mooney */ 7157c8c0b82SPatrick Mooney if (err != 0) { 7167c8c0b82SPatrick Mooney /* 7177c8c0b82SPatrick Mooney * Without pages allocated to this span, it is now 7187c8c0b82SPatrick Mooney * tracked as empty. 7197c8c0b82SPatrick Mooney */ 7207c8c0b82SPatrick Mooney vmmr_empty_sz += span->vs_size; 7217c8c0b82SPatrick Mooney vmmr_tp_insert_concat(span, &vmmr_empty_tp); 7227c8c0b82SPatrick Mooney 7237c8c0b82SPatrick Mooney if (added != 0) { 7247c8c0b82SPatrick Mooney vmmr_remove_raw(added); 7257c8c0b82SPatrick Mooney } 7267c8c0b82SPatrick Mooney 7277c8c0b82SPatrick Mooney vmmr_adding_sz -= sz; 7287c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 7297c8c0b82SPatrick Mooney 7307c8c0b82SPatrick Mooney page_unresv(sz >> PAGESHIFT); 7317c8c0b82SPatrick Mooney return (err); 7327c8c0b82SPatrick Mooney } 7337c8c0b82SPatrick Mooney 7347c8c0b82SPatrick Mooney /* 7357c8c0b82SPatrick Mooney * The allocated-page-bearing span is placed in the "free" 7367c8c0b82SPatrick Mooney * treepair now, but is not officially exposed for consumption 7377c8c0b82SPatrick Mooney * until `vmm_free_sz` or `vmm_free_transient_sz` are updated. 7387c8c0b82SPatrick Mooney * 7397c8c0b82SPatrick Mooney * This allows us to unwind the allocation in case of a failure 7407c8c0b82SPatrick Mooney * without the risk of the freshly added span(s) being snapped 7417c8c0b82SPatrick Mooney * up by a consumer already. 7427c8c0b82SPatrick Mooney */ 7437c8c0b82SPatrick Mooney added += span->vs_size; 7447c8c0b82SPatrick Mooney remain -= span->vs_size; 7457c8c0b82SPatrick Mooney vmmr_tp_insert_concat(span, &vmmr_free_tp); 7467c8c0b82SPatrick Mooney } 7477c8c0b82SPatrick Mooney 7487c8c0b82SPatrick Mooney /* Make the added memory usable by exposing it to the size accounting */ 7497c8c0b82SPatrick Mooney if (!transient) { 7507c8c0b82SPatrick Mooney vmmr_free_sz += added; 7517c8c0b82SPatrick Mooney } else { 7527c8c0b82SPatrick Mooney vmmr_free_transient_sz += added; 7537c8c0b82SPatrick Mooney } 7547c8c0b82SPatrick Mooney ASSERT3U(added, ==, sz); 7557c8c0b82SPatrick Mooney vmmr_adding_sz -= added; 7567c8c0b82SPatrick Mooney 7577c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 7587c8c0b82SPatrick Mooney return (0); 7597c8c0b82SPatrick Mooney } 7607c8c0b82SPatrick Mooney 7617c8c0b82SPatrick Mooney int 7627c8c0b82SPatrick Mooney vmmr_remove(size_t sz, bool transient) 7637c8c0b82SPatrick Mooney { 7647c8c0b82SPatrick Mooney VERIFY3U(sz & PAGEOFFSET, ==, 0); 7657c8c0b82SPatrick Mooney 7667c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 7677c8c0b82SPatrick Mooney if ((!transient && sz > vmmr_free_sz) || 7687c8c0b82SPatrick Mooney (transient && sz > vmmr_free_transient_sz)) { 7697c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 7707c8c0b82SPatrick Mooney return (ENOSPC); 7717c8c0b82SPatrick Mooney } 7727c8c0b82SPatrick Mooney 7737c8c0b82SPatrick Mooney vmmr_remove_raw(sz); 7747c8c0b82SPatrick Mooney 7757c8c0b82SPatrick Mooney if (!transient) { 7767c8c0b82SPatrick Mooney vmmr_free_sz -= sz; 7777c8c0b82SPatrick Mooney } else { 7787c8c0b82SPatrick Mooney vmmr_free_transient_sz -= sz; 7797c8c0b82SPatrick Mooney } 7807c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 7817c8c0b82SPatrick Mooney page_unresv(sz >> PAGESHIFT); 7827c8c0b82SPatrick Mooney return (0); 7837c8c0b82SPatrick Mooney } 7847c8c0b82SPatrick Mooney 7857c8c0b82SPatrick Mooney int 7867c8c0b82SPatrick Mooney vmmr_ioctl(int cmd, intptr_t arg, int md, cred_t *cr, int *rvalp) 7877c8c0b82SPatrick Mooney { 7887c8c0b82SPatrick Mooney switch (cmd) { 7897c8c0b82SPatrick Mooney case VMM_RESV_QUERY: { 7907c8c0b82SPatrick Mooney struct vmm_resv_query res; 7917c8c0b82SPatrick Mooney void *datap = (void *)(uintptr_t)arg; 7927c8c0b82SPatrick Mooney 7937c8c0b82SPatrick Mooney /* For now, anyone in GZ can query */ 7947c8c0b82SPatrick Mooney if (crgetzoneid(cr) != GLOBAL_ZONEID) { 7957c8c0b82SPatrick Mooney return (EPERM); 7967c8c0b82SPatrick Mooney } 7977c8c0b82SPatrick Mooney mutex_enter(&vmmr_lock); 7987c8c0b82SPatrick Mooney res.vrq_free_sz = vmmr_free_sz; 7997c8c0b82SPatrick Mooney res.vrq_alloc_sz = vmmr_alloc_sz; 8007c8c0b82SPatrick Mooney res.vrq_alloc_transient_sz = vmmr_alloc_transient_sz; 8017c8c0b82SPatrick Mooney res.vrq_limit = vmmr_total_limit; 8027c8c0b82SPatrick Mooney mutex_exit(&vmmr_lock); 8037c8c0b82SPatrick Mooney if (ddi_copyout(&res, datap, sizeof (res), md) != 0) { 8047c8c0b82SPatrick Mooney return (EFAULT); 8057c8c0b82SPatrick Mooney } 8067c8c0b82SPatrick Mooney break; 8077c8c0b82SPatrick Mooney } 8087c8c0b82SPatrick Mooney case VMM_RESV_ADD: { 8097c8c0b82SPatrick Mooney if (secpolicy_sys_config(cr, B_FALSE) != 0) { 8107c8c0b82SPatrick Mooney return (EPERM); 8117c8c0b82SPatrick Mooney } 8127c8c0b82SPatrick Mooney return (vmmr_add((size_t)arg, false)); 8137c8c0b82SPatrick Mooney } 8147c8c0b82SPatrick Mooney case VMM_RESV_REMOVE: { 8157c8c0b82SPatrick Mooney if (secpolicy_sys_config(cr, B_FALSE) != 0) { 8167c8c0b82SPatrick Mooney return (EPERM); 8177c8c0b82SPatrick Mooney } 8187c8c0b82SPatrick Mooney return (vmmr_remove((size_t)arg, false)); 8197c8c0b82SPatrick Mooney } 8207c8c0b82SPatrick Mooney default: 8217c8c0b82SPatrick Mooney return (ENOTTY); 8227c8c0b82SPatrick Mooney } 8237c8c0b82SPatrick Mooney return (0); 8247c8c0b82SPatrick Mooney } 825