160727d8bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * This code is derived from software contributed to Berkeley by 6df8bae1dSRodney W. Grimes * The Mach Operating System project at Carnegie-Mellon University. 7df8bae1dSRodney W. Grimes * 8df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 9df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 10df8bae1dSRodney W. Grimes * are met: 11df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 12df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 13df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 14df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 15df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 16df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 17df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 18df8bae1dSRodney W. Grimes * without specific prior written permission. 19df8bae1dSRodney W. Grimes * 20df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30df8bae1dSRodney W. Grimes * SUCH DAMAGE. 31df8bae1dSRodney W. Grimes * 323c4dd356SDavid Greenman * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 33df8bae1dSRodney W. Grimes * 34df8bae1dSRodney W. Grimes * 35df8bae1dSRodney W. Grimes * Copyright (c) 1987, 1990 Carnegie-Mellon University. 36df8bae1dSRodney W. Grimes * All rights reserved. 37df8bae1dSRodney W. Grimes * 38df8bae1dSRodney W. Grimes * Authors: Avadis Tevanian, Jr., Michael Wayne Young 39df8bae1dSRodney W. Grimes * 40df8bae1dSRodney W. Grimes * Permission to use, copy, modify and distribute this software and 41df8bae1dSRodney W. Grimes * its documentation is hereby granted, provided that both the copyright 42df8bae1dSRodney W. Grimes * notice and this permission notice appear in all copies of the 43df8bae1dSRodney W. Grimes * software, derivative works or modified versions, and any portions 44df8bae1dSRodney W. Grimes * thereof, and that both notices appear in supporting documentation. 45df8bae1dSRodney W. Grimes * 46df8bae1dSRodney W. Grimes * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 47df8bae1dSRodney W. Grimes * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 48df8bae1dSRodney W. Grimes * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 49df8bae1dSRodney W. Grimes * 50df8bae1dSRodney W. Grimes * Carnegie Mellon requests users of this software to return to 51df8bae1dSRodney W. Grimes * 52df8bae1dSRodney W. Grimes * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 53df8bae1dSRodney W. Grimes * School of Computer Science 54df8bae1dSRodney W. Grimes * Carnegie Mellon University 55df8bae1dSRodney W. Grimes * Pittsburgh PA 15213-3890 56df8bae1dSRodney W. Grimes * 57df8bae1dSRodney W. Grimes * any improvements or extensions that they make and grant Carnegie the 58df8bae1dSRodney W. Grimes * rights to redistribute these changes. 59df8bae1dSRodney W. Grimes */ 60df8bae1dSRodney W. Grimes 61df8bae1dSRodney W. Grimes /* 62df8bae1dSRodney W. Grimes * Kernel memory management. 63df8bae1dSRodney W. Grimes */ 64df8bae1dSRodney W. Grimes 65874651b1SDavid E. O'Brien #include <sys/cdefs.h> 66874651b1SDavid E. O'Brien __FBSDID("$FreeBSD$"); 67874651b1SDavid E. O'Brien 68df8bae1dSRodney W. Grimes #include <sys/param.h> 69df8bae1dSRodney W. Grimes #include <sys/systm.h> 7060363fb9SLuigi Rizzo #include <sys/kernel.h> /* for ticks and hz */ 710f2c2ce0SPawel Jakub Dawidek #include <sys/eventhandler.h> 72fb919e4dSMark Murray #include <sys/lock.h> 73fb919e4dSMark Murray #include <sys/mutex.h> 74f23b4c91SGarrett Wollman #include <sys/proc.h> 75a1f6d91cSDavid Greenman #include <sys/malloc.h> 7686f08737SRobert Watson #include <sys/sysctl.h> 77df8bae1dSRodney W. Grimes 78df8bae1dSRodney W. Grimes #include <vm/vm.h> 79efeaf95aSDavid Greenman #include <vm/vm_param.h> 80efeaf95aSDavid Greenman #include <vm/pmap.h> 81efeaf95aSDavid Greenman #include <vm/vm_map.h> 82efeaf95aSDavid Greenman #include <vm/vm_object.h> 83df8bae1dSRodney W. Grimes #include <vm/vm_page.h> 84df8bae1dSRodney W. Grimes #include <vm/vm_pageout.h> 859b4288a3SBruce Evans #include <vm/vm_extern.h> 860f2c2ce0SPawel Jakub Dawidek #include <vm/uma.h> 87df8bae1dSRodney W. Grimes 885b0a7408SJohn Dyson vm_map_t kernel_map=0; 895b0a7408SJohn Dyson vm_map_t kmem_map=0; 905b0a7408SJohn Dyson vm_map_t exec_map=0; 91cebde069SMike Silbersack vm_map_t pipe_map; 925b0a7408SJohn Dyson vm_map_t buffer_map=0; 93f23b4c91SGarrett Wollman 9489cb2a19SMatthew D Fleming const void *zero_region; 9589cb2a19SMatthew D Fleming CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0); 9689cb2a19SMatthew D Fleming 97df8bae1dSRodney W. Grimes /* 98a839bdc8SDmitrij Tejblum * kmem_alloc_nofault: 99a839bdc8SDmitrij Tejblum * 100b77c2bcdSAlan Cox * Allocate a virtual address range with no underlying object and 101b77c2bcdSAlan Cox * no initial mapping to physical memory. Any mapping from this 102b77c2bcdSAlan Cox * range to physical memory must be explicitly created prior to 103b77c2bcdSAlan Cox * its use, typically with pmap_qenter(). Any attempt to create 104b77c2bcdSAlan Cox * a mapping on demand through vm_fault() will result in a panic. 105a839bdc8SDmitrij Tejblum */ 106a839bdc8SDmitrij Tejblum vm_offset_t 107a839bdc8SDmitrij Tejblum kmem_alloc_nofault(map, size) 108a839bdc8SDmitrij Tejblum vm_map_t map; 109030f2369SAlfred Perlstein vm_size_t size; 110a839bdc8SDmitrij Tejblum { 111a839bdc8SDmitrij Tejblum vm_offset_t addr; 112030f2369SAlfred Perlstein int result; 113a839bdc8SDmitrij Tejblum 114a839bdc8SDmitrij Tejblum size = round_page(size); 115a839bdc8SDmitrij Tejblum addr = vm_map_min(map); 1163202ed75SAlan Cox result = vm_map_find(map, NULL, 0, &addr, size, VMFS_ANY_SPACE, 1173202ed75SAlan Cox VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 118a839bdc8SDmitrij Tejblum if (result != KERN_SUCCESS) { 119a839bdc8SDmitrij Tejblum return (0); 120a839bdc8SDmitrij Tejblum } 121a839bdc8SDmitrij Tejblum return (addr); 122a839bdc8SDmitrij Tejblum } 123a839bdc8SDmitrij Tejblum 124a839bdc8SDmitrij Tejblum /* 125ca596a25SJuli Mallett * kmem_alloc_nofault_space: 126ca596a25SJuli Mallett * 127ca596a25SJuli Mallett * Allocate a virtual address range with no underlying object and 128ca596a25SJuli Mallett * no initial mapping to physical memory within the specified 129ca596a25SJuli Mallett * address space. Any mapping from this range to physical memory 130ca596a25SJuli Mallett * must be explicitly created prior to its use, typically with 131ca596a25SJuli Mallett * pmap_qenter(). Any attempt to create a mapping on demand 132ca596a25SJuli Mallett * through vm_fault() will result in a panic. 133ca596a25SJuli Mallett */ 134ca596a25SJuli Mallett vm_offset_t 135ca596a25SJuli Mallett kmem_alloc_nofault_space(map, size, find_space) 136ca596a25SJuli Mallett vm_map_t map; 137ca596a25SJuli Mallett vm_size_t size; 138ca596a25SJuli Mallett int find_space; 139ca596a25SJuli Mallett { 140ca596a25SJuli Mallett vm_offset_t addr; 141ca596a25SJuli Mallett int result; 142ca596a25SJuli Mallett 143ca596a25SJuli Mallett size = round_page(size); 144ca596a25SJuli Mallett addr = vm_map_min(map); 145ca596a25SJuli Mallett result = vm_map_find(map, NULL, 0, &addr, size, find_space, 146ca596a25SJuli Mallett VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 147ca596a25SJuli Mallett if (result != KERN_SUCCESS) { 148ca596a25SJuli Mallett return (0); 149ca596a25SJuli Mallett } 150ca596a25SJuli Mallett return (addr); 151ca596a25SJuli Mallett } 152ca596a25SJuli Mallett 153ca596a25SJuli Mallett /* 154df8bae1dSRodney W. Grimes * Allocate wired-down memory in the kernel's address map 155df8bae1dSRodney W. Grimes * or a submap. 156df8bae1dSRodney W. Grimes */ 1570d94caffSDavid Greenman vm_offset_t 1580d94caffSDavid Greenman kmem_alloc(map, size) 159030f2369SAlfred Perlstein vm_map_t map; 160030f2369SAlfred Perlstein vm_size_t size; 161df8bae1dSRodney W. Grimes { 162df8bae1dSRodney W. Grimes vm_offset_t addr; 163030f2369SAlfred Perlstein vm_offset_t offset; 164df8bae1dSRodney W. Grimes vm_offset_t i; 165df8bae1dSRodney W. Grimes 166df8bae1dSRodney W. Grimes size = round_page(size); 167df8bae1dSRodney W. Grimes 168df8bae1dSRodney W. Grimes /* 1690d94caffSDavid Greenman * Use the kernel object for wired-down kernel pages. Assume that no 1700d94caffSDavid Greenman * region of the kernel object is referenced more than once. 171df8bae1dSRodney W. Grimes */ 172df8bae1dSRodney W. Grimes 173df8bae1dSRodney W. Grimes /* 1740d94caffSDavid Greenman * Locate sufficient space in the map. This will give us the final 1750d94caffSDavid Greenman * virtual address for the new memory, and thus will tell us the 1760d94caffSDavid Greenman * offset within the kernel map. 177df8bae1dSRodney W. Grimes */ 178df8bae1dSRodney W. Grimes vm_map_lock(map); 179e47ed70bSJohn Dyson if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 180df8bae1dSRodney W. Grimes vm_map_unlock(map); 181df8bae1dSRodney W. Grimes return (0); 182df8bae1dSRodney W. Grimes } 183df8bae1dSRodney W. Grimes offset = addr - VM_MIN_KERNEL_ADDRESS; 184df8bae1dSRodney W. Grimes vm_object_reference(kernel_object); 185bd7e5f99SJohn Dyson vm_map_insert(map, kernel_object, offset, addr, addr + size, 186bd7e5f99SJohn Dyson VM_PROT_ALL, VM_PROT_ALL, 0); 187df8bae1dSRodney W. Grimes vm_map_unlock(map); 188df8bae1dSRodney W. Grimes 189df8bae1dSRodney W. Grimes /* 1900d94caffSDavid Greenman * Guarantee that there are pages already in this object before 1918f101a2fSJonathan Mini * calling vm_map_wire. This is to prevent the following 1920d94caffSDavid Greenman * scenario: 193df8bae1dSRodney W. Grimes * 1940d94caffSDavid Greenman * 1) Threads have swapped out, so that there is a pager for the 1950d94caffSDavid Greenman * kernel_object. 2) The kmsg zone is empty, and so we are 1968f101a2fSJonathan Mini * kmem_allocing a new page for it. 3) vm_map_wire calls vm_fault; 1970d94caffSDavid Greenman * there is no page, but there is a pager, so we call 1980d94caffSDavid Greenman * pager_data_request. But the kmsg zone is empty, so we must 1990d94caffSDavid Greenman * kmem_alloc. 4) goto 1 5) Even if the kmsg zone is not empty: when 2000d94caffSDavid Greenman * we get the data back from the pager, it will be (very stale) 2010d94caffSDavid Greenman * non-zero data. kmem_alloc is defined to return zero-filled memory. 202df8bae1dSRodney W. Grimes * 2030d94caffSDavid Greenman * We're intentionally not activating the pages we allocate to prevent a 2048f101a2fSJonathan Mini * race with page-out. vm_map_wire will wire the pages. 205df8bae1dSRodney W. Grimes */ 20649c06616SAlan Cox VM_OBJECT_LOCK(kernel_object); 207df8bae1dSRodney W. Grimes for (i = 0; i < size; i += PAGE_SIZE) { 208df8bae1dSRodney W. Grimes vm_page_t mem; 209df8bae1dSRodney W. Grimes 21095461b45SJohn Dyson mem = vm_page_grab(kernel_object, OFF_TO_IDX(offset + i), 211ddf4bb37SAlan Cox VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 2127fb0c17eSDavid Greenman mem->valid = VM_PAGE_BITS_ALL; 2139f5c801bSAlan Cox KASSERT((mem->flags & PG_UNMANAGED) != 0, 2149f5c801bSAlan Cox ("kmem_alloc: page %p is managed", mem)); 215df8bae1dSRodney W. Grimes } 21649c06616SAlan Cox VM_OBJECT_UNLOCK(kernel_object); 217df8bae1dSRodney W. Grimes 218df8bae1dSRodney W. Grimes /* 219df8bae1dSRodney W. Grimes * And finally, mark the data as non-pageable. 220df8bae1dSRodney W. Grimes */ 221abd498aaSBruce M Simpson (void) vm_map_wire(map, addr, addr + size, 222abd498aaSBruce M Simpson VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); 223df8bae1dSRodney W. Grimes 224df8bae1dSRodney W. Grimes return (addr); 225df8bae1dSRodney W. Grimes } 226df8bae1dSRodney W. Grimes 227df8bae1dSRodney W. Grimes /* 228df8bae1dSRodney W. Grimes * kmem_free: 229df8bae1dSRodney W. Grimes * 230df8bae1dSRodney W. Grimes * Release a region of kernel virtual memory allocated 231df8bae1dSRodney W. Grimes * with kmem_alloc, and return the physical pages 232df8bae1dSRodney W. Grimes * associated with that region. 2331c7c3c6aSMatthew Dillon * 2341c7c3c6aSMatthew Dillon * This routine may not block on kernel maps. 235df8bae1dSRodney W. Grimes */ 2360d94caffSDavid Greenman void 2370d94caffSDavid Greenman kmem_free(map, addr, size) 238df8bae1dSRodney W. Grimes vm_map_t map; 239030f2369SAlfred Perlstein vm_offset_t addr; 240df8bae1dSRodney W. Grimes vm_size_t size; 241df8bae1dSRodney W. Grimes { 24223955314SAlfred Perlstein 243df8bae1dSRodney W. Grimes (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size)); 244df8bae1dSRodney W. Grimes } 245df8bae1dSRodney W. Grimes 246df8bae1dSRodney W. Grimes /* 247df8bae1dSRodney W. Grimes * kmem_suballoc: 248df8bae1dSRodney W. Grimes * 249df8bae1dSRodney W. Grimes * Allocates a map to manage a subrange 250df8bae1dSRodney W. Grimes * of the kernel virtual address space. 251df8bae1dSRodney W. Grimes * 252df8bae1dSRodney W. Grimes * Arguments are as follows: 253df8bae1dSRodney W. Grimes * 254df8bae1dSRodney W. Grimes * parent Map to take range from 255df8bae1dSRodney W. Grimes * min, max Returned endpoints of map 256030f2369SAlfred Perlstein * size Size of range to find 2573202ed75SAlan Cox * superpage_align Request that min is superpage aligned 258df8bae1dSRodney W. Grimes */ 2590d94caffSDavid Greenman vm_map_t 2603202ed75SAlan Cox kmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max, 2613202ed75SAlan Cox vm_size_t size, boolean_t superpage_align) 262df8bae1dSRodney W. Grimes { 2636e4f51d1SAlfred Perlstein int ret; 264df8bae1dSRodney W. Grimes vm_map_t result; 26523955314SAlfred Perlstein 266df8bae1dSRodney W. Grimes size = round_page(size); 267df8bae1dSRodney W. Grimes 2682bc24aa9SAlan Cox *min = vm_map_min(parent); 2693202ed75SAlan Cox ret = vm_map_find(parent, NULL, 0, min, size, superpage_align ? 2703364c323SKonstantin Belousov VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 2713364c323SKonstantin Belousov MAP_ACC_NO_CHARGE); 27224dedba9SAlan Cox if (ret != KERN_SUCCESS) 27324dedba9SAlan Cox panic("kmem_suballoc: bad status return of %d", ret); 274df8bae1dSRodney W. Grimes *max = *min + size; 2752d8acc0fSJohn Dyson result = vm_map_create(vm_map_pmap(parent), *min, *max); 276df8bae1dSRodney W. Grimes if (result == NULL) 277df8bae1dSRodney W. Grimes panic("kmem_suballoc: cannot create submap"); 2786e4f51d1SAlfred Perlstein if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS) 279df8bae1dSRodney W. Grimes panic("kmem_suballoc: unable to change range to submap"); 280df8bae1dSRodney W. Grimes return (result); 281df8bae1dSRodney W. Grimes } 282df8bae1dSRodney W. Grimes 283df8bae1dSRodney W. Grimes /* 2841c7c3c6aSMatthew Dillon * kmem_malloc: 2851c7c3c6aSMatthew Dillon * 286df8bae1dSRodney W. Grimes * Allocate wired-down memory in the kernel's address map for the higher 287df8bae1dSRodney W. Grimes * level kernel memory allocator (kern/kern_malloc.c). We cannot use 288df8bae1dSRodney W. Grimes * kmem_alloc() because we may need to allocate memory at interrupt 289df8bae1dSRodney W. Grimes * level where we cannot block (canwait == FALSE). 290df8bae1dSRodney W. Grimes * 291df8bae1dSRodney W. Grimes * This routine has its own private kernel submap (kmem_map) and object 292df8bae1dSRodney W. Grimes * (kmem_object). This, combined with the fact that only malloc uses 293df8bae1dSRodney W. Grimes * this routine, ensures that we will never block in map or object waits. 294df8bae1dSRodney W. Grimes * 295df8bae1dSRodney W. Grimes * We don't worry about expanding the map (adding entries) since entries 296df8bae1dSRodney W. Grimes * for wired maps are statically allocated. 2971c7c3c6aSMatthew Dillon * 29808442f8aSBosko Milekic * `map' is ONLY allowed to be kmem_map or one of the mbuf submaps to 29908442f8aSBosko Milekic * which we never free. 300df8bae1dSRodney W. Grimes */ 301df8bae1dSRodney W. Grimes vm_offset_t 3021c7c3c6aSMatthew Dillon kmem_malloc(map, size, flags) 303030f2369SAlfred Perlstein vm_map_t map; 304030f2369SAlfred Perlstein vm_size_t size; 3051c7c3c6aSMatthew Dillon int flags; 306df8bae1dSRodney W. Grimes { 307df8bae1dSRodney W. Grimes vm_offset_t addr; 308e3813573SMatthew D Fleming int i, rv; 309df8bae1dSRodney W. Grimes 310df8bae1dSRodney W. Grimes size = round_page(size); 311df8bae1dSRodney W. Grimes addr = vm_map_min(map); 312df8bae1dSRodney W. Grimes 313df8bae1dSRodney W. Grimes /* 3140d94caffSDavid Greenman * Locate sufficient space in the map. This will give us the final 3150d94caffSDavid Greenman * virtual address for the new memory, and thus will tell us the 3160d94caffSDavid Greenman * offset within the kernel map. 317df8bae1dSRodney W. Grimes */ 318df8bae1dSRodney W. Grimes vm_map_lock(map); 319e47ed70bSJohn Dyson if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { 320df8bae1dSRodney W. Grimes vm_map_unlock(map); 3210f2c2ce0SPawel Jakub Dawidek if ((flags & M_NOWAIT) == 0) { 32279c2840dSPawel Jakub Dawidek for (i = 0; i < 8; i++) { 3230f2c2ce0SPawel Jakub Dawidek EVENTHANDLER_INVOKE(vm_lowmem, 0); 3240f2c2ce0SPawel Jakub Dawidek uma_reclaim(); 3250f2c2ce0SPawel Jakub Dawidek vm_map_lock(map); 32679c2840dSPawel Jakub Dawidek if (vm_map_findspace(map, vm_map_min(map), 32779c2840dSPawel Jakub Dawidek size, &addr) == 0) { 32879c2840dSPawel Jakub Dawidek break; 32979c2840dSPawel Jakub Dawidek } 3300f2c2ce0SPawel Jakub Dawidek vm_map_unlock(map); 33179c2840dSPawel Jakub Dawidek tsleep(&i, 0, "nokva", (hz / 4) * (i + 1)); 33279c2840dSPawel Jakub Dawidek } 33379c2840dSPawel Jakub Dawidek if (i == 8) { 3343efc015bSPeter Wemm panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated", 3353efc015bSPeter Wemm (long)size, (long)map->size); 3360f2c2ce0SPawel Jakub Dawidek } 3370f2c2ce0SPawel Jakub Dawidek } else { 338f31c239dSAlan Cox return (0); 339df8bae1dSRodney W. Grimes } 3400f2c2ce0SPawel Jakub Dawidek } 341e3813573SMatthew D Fleming 342e3813573SMatthew D Fleming rv = kmem_back(map, addr, size, flags); 343e3813573SMatthew D Fleming vm_map_unlock(map); 344e3813573SMatthew D Fleming return (rv == KERN_SUCCESS ? addr : 0); 345e3813573SMatthew D Fleming } 346e3813573SMatthew D Fleming 347e3813573SMatthew D Fleming /* 348e3813573SMatthew D Fleming * kmem_back: 349e3813573SMatthew D Fleming * 350e3813573SMatthew D Fleming * Allocate physical pages for the specified virtual address range. 351e3813573SMatthew D Fleming */ 352e3813573SMatthew D Fleming int 353e3813573SMatthew D Fleming kmem_back(vm_map_t map, vm_offset_t addr, vm_size_t size, int flags) 354e3813573SMatthew D Fleming { 355e3813573SMatthew D Fleming vm_offset_t offset, i; 356e3813573SMatthew D Fleming vm_map_entry_t entry; 357e3813573SMatthew D Fleming vm_page_t m; 358e3813573SMatthew D Fleming int pflags; 35956bdf2dbSKonstantin Belousov boolean_t found; 360e3813573SMatthew D Fleming 361d69b01efSMatthew D Fleming KASSERT(vm_map_locked(map), ("kmem_back: map %p is not locked", map)); 3620891ef4cSJohn Dyson offset = addr - VM_MIN_KERNEL_ADDRESS; 363df8bae1dSRodney W. Grimes vm_object_reference(kmem_object); 364bd7e5f99SJohn Dyson vm_map_insert(map, kmem_object, offset, addr, addr + size, 365bd7e5f99SJohn Dyson VM_PROT_ALL, VM_PROT_ALL, 0); 366df8bae1dSRodney W. Grimes 36756bdf2dbSKonstantin Belousov /* 36856bdf2dbSKonstantin Belousov * Assert: vm_map_insert() will never be able to extend the 36956bdf2dbSKonstantin Belousov * previous entry so vm_map_lookup_entry() will find a new 37056bdf2dbSKonstantin Belousov * entry exactly corresponding to this address range and it 37156bdf2dbSKonstantin Belousov * will have wired_count == 0. 37256bdf2dbSKonstantin Belousov */ 37356bdf2dbSKonstantin Belousov found = vm_map_lookup_entry(map, addr, &entry); 37456bdf2dbSKonstantin Belousov KASSERT(found && entry->start == addr && entry->end == addr + size && 37556bdf2dbSKonstantin Belousov entry->wired_count == 0 && (entry->eflags & MAP_ENTRY_IN_TRANSITION) 37656bdf2dbSKonstantin Belousov == 0, ("kmem_back: entry not found or misaligned")); 37756bdf2dbSKonstantin Belousov 37895f24639SJeff Roberson if ((flags & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) 379a623fedeSAlan Cox pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED; 38095f24639SJeff Roberson else 381a623fedeSAlan Cox pflags = VM_ALLOC_SYSTEM | VM_ALLOC_WIRED; 38295f24639SJeff Roberson 38395f24639SJeff Roberson if (flags & M_ZERO) 38495f24639SJeff Roberson pflags |= VM_ALLOC_ZERO; 38595f24639SJeff Roberson 386acbff226SAlan Cox VM_OBJECT_LOCK(kmem_object); 3871e081f88SJeff Roberson for (i = 0; i < size; i += PAGE_SIZE) { 3881e081f88SJeff Roberson retry: 38995f24639SJeff Roberson m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), pflags); 390df8bae1dSRodney W. Grimes 391df8bae1dSRodney W. Grimes /* 3920d94caffSDavid Greenman * Ran out of space, free everything up and return. Don't need 3930d94caffSDavid Greenman * to lock page queues here as we know that the pages we got 3940d94caffSDavid Greenman * aren't on any queues. 395df8bae1dSRodney W. Grimes */ 396df8bae1dSRodney W. Grimes if (m == NULL) { 3971c7c3c6aSMatthew Dillon if ((flags & M_NOWAIT) == 0) { 398acbff226SAlan Cox VM_OBJECT_UNLOCK(kmem_object); 39956bdf2dbSKonstantin Belousov entry->eflags |= MAP_ENTRY_IN_TRANSITION; 400c7003c69SAlan Cox vm_map_unlock(map); 401b18bfc3dSJohn Dyson VM_WAIT; 402c7003c69SAlan Cox vm_map_lock(map); 40356bdf2dbSKonstantin Belousov KASSERT( 40456bdf2dbSKonstantin Belousov (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_NEEDS_WAKEUP)) == 40556bdf2dbSKonstantin Belousov MAP_ENTRY_IN_TRANSITION, 40656bdf2dbSKonstantin Belousov ("kmem_back: volatile entry")); 40756bdf2dbSKonstantin Belousov entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; 408acbff226SAlan Cox VM_OBJECT_LOCK(kmem_object); 409b18bfc3dSJohn Dyson goto retry; 410b18bfc3dSJohn Dyson } 411ff91d780STor Egge /* 412ff91d780STor Egge * Free the pages before removing the map entry. 413ff91d780STor Egge * They are already marked busy. Calling 414ff91d780STor Egge * vm_map_delete before the pages has been freed or 415ff91d780STor Egge * unbusied will cause a deadlock. 416ff91d780STor Egge */ 417ff91d780STor Egge while (i != 0) { 418ff91d780STor Egge i -= PAGE_SIZE; 419ff91d780STor Egge m = vm_page_lookup(kmem_object, 420ff91d780STor Egge OFF_TO_IDX(offset + i)); 421a623fedeSAlan Cox vm_page_unwire(m, 0); 422ff91d780STor Egge vm_page_free(m); 423ff91d780STor Egge } 424acbff226SAlan Cox VM_OBJECT_UNLOCK(kmem_object); 425655c3490SKonstantin Belousov vm_map_delete(map, addr, addr + size); 426e3813573SMatthew D Fleming return (KERN_NO_SPACE); 427df8bae1dSRodney W. Grimes } 4281e081f88SJeff Roberson if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) 429fff6062aSAlan Cox pmap_zero_page(m); 43049c06616SAlan Cox m->valid = VM_PAGE_BITS_ALL; 4319f5c801bSAlan Cox KASSERT((m->flags & PG_UNMANAGED) != 0, 4329f5c801bSAlan Cox ("kmem_malloc: page %p is managed", m)); 433df8bae1dSRodney W. Grimes } 434acbff226SAlan Cox VM_OBJECT_UNLOCK(kmem_object); 435df8bae1dSRodney W. Grimes 436df8bae1dSRodney W. Grimes /* 43756bdf2dbSKonstantin Belousov * Mark map entry as non-pageable. Repeat the assert. 438df8bae1dSRodney W. Grimes */ 43956bdf2dbSKonstantin Belousov KASSERT(entry->start == addr && entry->end == addr + size && 44056bdf2dbSKonstantin Belousov entry->wired_count == 0, 44156bdf2dbSKonstantin Belousov ("kmem_back: entry not found or misaligned after allocation")); 442c7003c69SAlan Cox entry->wired_count = 1; 443df8bae1dSRodney W. Grimes 444ff5dcf25SAlan Cox /* 445ff5dcf25SAlan Cox * At this point, the kmem_object must be unlocked because 446ff5dcf25SAlan Cox * vm_map_simplify_entry() calls vm_object_deallocate(), which 447ff5dcf25SAlan Cox * locks the kmem_object. 448ff5dcf25SAlan Cox */ 449b7b2aac2SJohn Dyson vm_map_simplify_entry(map, entry); 450b7b2aac2SJohn Dyson 451df8bae1dSRodney W. Grimes /* 4520f3b612aSAlan Cox * Loop thru pages, entering them in the pmap. 453df8bae1dSRodney W. Grimes */ 454acbff226SAlan Cox VM_OBJECT_LOCK(kmem_object); 455ff5dcf25SAlan Cox for (i = 0; i < size; i += PAGE_SIZE) { 456a316d390SJohn Dyson m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); 4571c7c3c6aSMatthew Dillon /* 4581c7c3c6aSMatthew Dillon * Because this is kernel_pmap, this call will not block. 4591c7c3c6aSMatthew Dillon */ 460eb2a0517SAlan Cox pmap_enter(kernel_pmap, addr + i, VM_PROT_ALL, m, VM_PROT_ALL, 461eb2a0517SAlan Cox TRUE); 46266bdd5d6SAlan Cox vm_page_wakeup(m); 463df8bae1dSRodney W. Grimes } 464ff5dcf25SAlan Cox VM_OBJECT_UNLOCK(kmem_object); 465df8bae1dSRodney W. Grimes 466e3813573SMatthew D Fleming return (KERN_SUCCESS); 467df8bae1dSRodney W. Grimes } 468df8bae1dSRodney W. Grimes 469df8bae1dSRodney W. Grimes /* 4701c7c3c6aSMatthew Dillon * kmem_alloc_wait: 471df8bae1dSRodney W. Grimes * 472df8bae1dSRodney W. Grimes * Allocates pageable memory from a sub-map of the kernel. If the submap 473df8bae1dSRodney W. Grimes * has no room, the caller sleeps waiting for more memory in the submap. 474df8bae1dSRodney W. Grimes * 4751c7c3c6aSMatthew Dillon * This routine may block. 476df8bae1dSRodney W. Grimes */ 4770d94caffSDavid Greenman vm_offset_t 4780d94caffSDavid Greenman kmem_alloc_wait(map, size) 479df8bae1dSRodney W. Grimes vm_map_t map; 480df8bae1dSRodney W. Grimes vm_size_t size; 481df8bae1dSRodney W. Grimes { 482df8bae1dSRodney W. Grimes vm_offset_t addr; 48323955314SAlfred Perlstein 484df8bae1dSRodney W. Grimes size = round_page(size); 4853364c323SKonstantin Belousov if (!swap_reserve(size)) 4863364c323SKonstantin Belousov return (0); 487df8bae1dSRodney W. Grimes 488df8bae1dSRodney W. Grimes for (;;) { 489df8bae1dSRodney W. Grimes /* 4900d94caffSDavid Greenman * To make this work for more than one map, use the map's lock 4910d94caffSDavid Greenman * to lock out sleepers/wakers. 492df8bae1dSRodney W. Grimes */ 493df8bae1dSRodney W. Grimes vm_map_lock(map); 494e47ed70bSJohn Dyson if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0) 495df8bae1dSRodney W. Grimes break; 496df8bae1dSRodney W. Grimes /* no space now; see if we can ever get space */ 497df8bae1dSRodney W. Grimes if (vm_map_max(map) - vm_map_min(map) < size) { 498df8bae1dSRodney W. Grimes vm_map_unlock(map); 4993364c323SKonstantin Belousov swap_release(size); 500df8bae1dSRodney W. Grimes return (0); 501df8bae1dSRodney W. Grimes } 5029688f931SAlan Cox map->needs_wakeup = TRUE; 5038ce2d00aSPawel Jakub Dawidek vm_map_unlock_and_wait(map, 0); 504df8bae1dSRodney W. Grimes } 5053364c323SKonstantin Belousov vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_ALL, 5063364c323SKonstantin Belousov VM_PROT_ALL, MAP_ACC_CHARGED); 507df8bae1dSRodney W. Grimes vm_map_unlock(map); 508df8bae1dSRodney W. Grimes return (addr); 509df8bae1dSRodney W. Grimes } 510df8bae1dSRodney W. Grimes 511df8bae1dSRodney W. Grimes /* 5121c7c3c6aSMatthew Dillon * kmem_free_wakeup: 513df8bae1dSRodney W. Grimes * 51424a1cce3SDavid Greenman * Returns memory to a submap of the kernel, and wakes up any processes 515df8bae1dSRodney W. Grimes * waiting for memory in that map. 516df8bae1dSRodney W. Grimes */ 5170d94caffSDavid Greenman void 5180d94caffSDavid Greenman kmem_free_wakeup(map, addr, size) 519df8bae1dSRodney W. Grimes vm_map_t map; 520df8bae1dSRodney W. Grimes vm_offset_t addr; 521df8bae1dSRodney W. Grimes vm_size_t size; 522df8bae1dSRodney W. Grimes { 52323955314SAlfred Perlstein 524df8bae1dSRodney W. Grimes vm_map_lock(map); 525655c3490SKonstantin Belousov (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); 5269688f931SAlan Cox if (map->needs_wakeup) { 5279688f931SAlan Cox map->needs_wakeup = FALSE; 5289688f931SAlan Cox vm_map_wakeup(map); 5299688f931SAlan Cox } 530df8bae1dSRodney W. Grimes vm_map_unlock(map); 531df8bae1dSRodney W. Grimes } 532df8bae1dSRodney W. Grimes 53389cb2a19SMatthew D Fleming static void 53489cb2a19SMatthew D Fleming kmem_init_zero_region(void) 53589cb2a19SMatthew D Fleming { 536*cfb00e5aSMatthew D Fleming vm_offset_t addr, i; 53789cb2a19SMatthew D Fleming vm_page_t m; 53889cb2a19SMatthew D Fleming int error; 53989cb2a19SMatthew D Fleming 540*cfb00e5aSMatthew D Fleming /* 541*cfb00e5aSMatthew D Fleming * Map a single physical page of zeros to a larger virtual range. 542*cfb00e5aSMatthew D Fleming * This requires less looping in places that want large amounts of 543*cfb00e5aSMatthew D Fleming * zeros, while not using much more physical resources. 544*cfb00e5aSMatthew D Fleming */ 54589cb2a19SMatthew D Fleming addr = kmem_alloc_nofault(kernel_map, ZERO_REGION_SIZE); 54689cb2a19SMatthew D Fleming m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS), 54789cb2a19SMatthew D Fleming VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 54889cb2a19SMatthew D Fleming if ((m->flags & PG_ZERO) == 0) 54989cb2a19SMatthew D Fleming pmap_zero_page(m); 55089cb2a19SMatthew D Fleming for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE) 55189cb2a19SMatthew D Fleming pmap_qenter(addr + i, &m, 1); 55289cb2a19SMatthew D Fleming error = vm_map_protect(kernel_map, addr, addr + ZERO_REGION_SIZE, 55389cb2a19SMatthew D Fleming VM_PROT_READ, TRUE); 55489cb2a19SMatthew D Fleming KASSERT(error == 0, ("error=%d", error)); 55589cb2a19SMatthew D Fleming 55689cb2a19SMatthew D Fleming zero_region = (const void *)addr; 55789cb2a19SMatthew D Fleming } 55889cb2a19SMatthew D Fleming 559df8bae1dSRodney W. Grimes /* 5601c7c3c6aSMatthew Dillon * kmem_init: 5611c7c3c6aSMatthew Dillon * 5621c7c3c6aSMatthew Dillon * Create the kernel map; insert a mapping covering kernel text, 5631c7c3c6aSMatthew Dillon * data, bss, and all space allocated thus far (`boostrap' data). The 5641c7c3c6aSMatthew Dillon * new map will thus map the range between VM_MIN_KERNEL_ADDRESS and 5651c7c3c6aSMatthew Dillon * `start' as allocated, and the range between `start' and `end' as free. 566df8bae1dSRodney W. Grimes */ 5670d94caffSDavid Greenman void 5680d94caffSDavid Greenman kmem_init(start, end) 569df8bae1dSRodney W. Grimes vm_offset_t start, end; 570df8bae1dSRodney W. Grimes { 571030f2369SAlfred Perlstein vm_map_t m; 572df8bae1dSRodney W. Grimes 5732d8acc0fSJohn Dyson m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); 574c9267356SAlan Cox m->system_map = 1; 575df8bae1dSRodney W. Grimes vm_map_lock(m); 576df8bae1dSRodney W. Grimes /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 577df8bae1dSRodney W. Grimes kernel_map = m; 578c9267356SAlan Cox (void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, 5795cfa90e9SAlan Cox #ifdef __amd64__ 5805cfa90e9SAlan Cox KERNBASE, 5815cfa90e9SAlan Cox #else 5825cfa90e9SAlan Cox VM_MIN_KERNEL_ADDRESS, 5835cfa90e9SAlan Cox #endif 5845cfa90e9SAlan Cox start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 585df8bae1dSRodney W. Grimes /* ... and ending with the completion of the above `insert' */ 586df8bae1dSRodney W. Grimes vm_map_unlock(m); 58789cb2a19SMatthew D Fleming 58889cb2a19SMatthew D Fleming kmem_init_zero_region(); 589df8bae1dSRodney W. Grimes } 59086f08737SRobert Watson 5919309e63cSRobert Watson #ifdef DIAGNOSTIC 59286f08737SRobert Watson /* 59386f08737SRobert Watson * Allow userspace to directly trigger the VM drain routine for testing 59486f08737SRobert Watson * purposes. 59586f08737SRobert Watson */ 59686f08737SRobert Watson static int 59786f08737SRobert Watson debug_vm_lowmem(SYSCTL_HANDLER_ARGS) 59886f08737SRobert Watson { 59986f08737SRobert Watson int error, i; 60086f08737SRobert Watson 60186f08737SRobert Watson i = 0; 60286f08737SRobert Watson error = sysctl_handle_int(oidp, &i, 0, req); 60386f08737SRobert Watson if (error) 60486f08737SRobert Watson return (error); 60586f08737SRobert Watson if (i) 60686f08737SRobert Watson EVENTHANDLER_INVOKE(vm_lowmem, 0); 60786f08737SRobert Watson return (0); 60886f08737SRobert Watson } 60986f08737SRobert Watson 61086f08737SRobert Watson SYSCTL_PROC(_debug, OID_AUTO, vm_lowmem, CTLTYPE_INT | CTLFLAG_RW, 0, 0, 61186f08737SRobert Watson debug_vm_lowmem, "I", "set to trigger vm_lowmem event"); 6129309e63cSRobert Watson #endif 613