1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * from: @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 33 * 34 * 35 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 36 * All rights reserved. 37 * 38 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 39 * 40 * Permission to use, copy, modify and distribute this software and 41 * its documentation is hereby granted, provided that both the copyright 42 * notice and this permission notice appear in all copies of the 43 * software, derivative works or modified versions, and any portions 44 * thereof, and that both notices appear in supporting documentation. 45 * 46 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 47 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 48 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 49 * 50 * Carnegie Mellon requests users of this software to return to 51 * 52 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 53 * School of Computer Science 54 * Carnegie Mellon University 55 * Pittsburgh PA 15213-3890 56 * 57 * any improvements or extensions that they make and grant Carnegie the 58 * rights to redistribute these changes. 59 */ 60 61 /* 62 * Kernel memory management. 63 */ 64 65 #include <sys/cdefs.h> 66 __FBSDID("$FreeBSD$"); 67 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/kernel.h> /* for ticks and hz */ 71 #include <sys/eventhandler.h> 72 #include <sys/lock.h> 73 #include <sys/proc.h> 74 #include <sys/malloc.h> 75 #include <sys/rwlock.h> 76 #include <sys/sysctl.h> 77 #include <sys/vmem.h> 78 79 #include <vm/vm.h> 80 #include <vm/vm_param.h> 81 #include <vm/vm_kern.h> 82 #include <vm/pmap.h> 83 #include <vm/vm_map.h> 84 #include <vm/vm_object.h> 85 #include <vm/vm_page.h> 86 #include <vm/vm_pageout.h> 87 #include <vm/vm_extern.h> 88 #include <vm/uma.h> 89 90 vm_map_t kernel_map; 91 vm_map_t exec_map; 92 vm_map_t pipe_map; 93 94 const void *zero_region; 95 CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0); 96 97 /* NB: Used by kernel debuggers. */ 98 const u_long vm_maxuser_address = VM_MAXUSER_ADDRESS; 99 100 SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD, 101 SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address"); 102 103 SYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD, 104 #if defined(__arm__) || defined(__sparc64__) 105 &vm_max_kernel_address, 0, 106 #else 107 SYSCTL_NULL_ULONG_PTR, VM_MAX_KERNEL_ADDRESS, 108 #endif 109 "Max kernel address"); 110 111 /* 112 * kva_alloc: 113 * 114 * Allocate a virtual address range with no underlying object and 115 * no initial mapping to physical memory. Any mapping from this 116 * range to physical memory must be explicitly created prior to 117 * its use, typically with pmap_qenter(). Any attempt to create 118 * a mapping on demand through vm_fault() will result in a panic. 119 */ 120 vm_offset_t 121 kva_alloc(size) 122 vm_size_t size; 123 { 124 vm_offset_t addr; 125 126 size = round_page(size); 127 if (vmem_alloc(kernel_arena, size, M_BESTFIT | M_NOWAIT, &addr)) 128 return (0); 129 130 return (addr); 131 } 132 133 /* 134 * kva_free: 135 * 136 * Release a region of kernel virtual memory allocated 137 * with kva_alloc, and return the physical pages 138 * associated with that region. 139 * 140 * This routine may not block on kernel maps. 141 */ 142 void 143 kva_free(addr, size) 144 vm_offset_t addr; 145 vm_size_t size; 146 { 147 148 size = round_page(size); 149 vmem_free(kernel_arena, addr, size); 150 } 151 152 /* 153 * Allocates a region from the kernel address map and physical pages 154 * within the specified address range to the kernel object. Creates a 155 * wired mapping from this region to these pages, and returns the 156 * region's starting virtual address. The allocated pages are not 157 * necessarily physically contiguous. If M_ZERO is specified through the 158 * given flags, then the pages are zeroed before they are mapped. 159 */ 160 vm_offset_t 161 kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, 162 vm_paddr_t high, vm_memattr_t memattr) 163 { 164 vm_object_t object = vmem == kmem_arena ? kmem_object : kernel_object; 165 vm_offset_t addr, i; 166 vm_ooffset_t offset; 167 vm_page_t m; 168 int pflags, tries; 169 170 size = round_page(size); 171 if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr)) 172 return (0); 173 offset = addr - VM_MIN_KERNEL_ADDRESS; 174 pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; 175 VM_OBJECT_WLOCK(object); 176 for (i = 0; i < size; i += PAGE_SIZE) { 177 tries = 0; 178 retry: 179 m = vm_page_alloc_contig(object, OFF_TO_IDX(offset + i), 180 pflags, 1, low, high, PAGE_SIZE, 0, memattr); 181 if (m == NULL) { 182 VM_OBJECT_WUNLOCK(object); 183 if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { 184 if (!vm_page_reclaim_contig(pflags, 1, 185 low, high, PAGE_SIZE, 0) && 186 (flags & M_WAITOK) != 0) 187 VM_WAIT; 188 VM_OBJECT_WLOCK(object); 189 tries++; 190 goto retry; 191 } 192 kmem_unback(object, addr, i); 193 vmem_free(vmem, addr, size); 194 return (0); 195 } 196 if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) 197 pmap_zero_page(m); 198 m->valid = VM_PAGE_BITS_ALL; 199 pmap_enter(kernel_pmap, addr + i, m, VM_PROT_ALL, 200 VM_PROT_ALL | PMAP_ENTER_WIRED, 0); 201 } 202 VM_OBJECT_WUNLOCK(object); 203 return (addr); 204 } 205 206 /* 207 * Allocates a region from the kernel address map and physically 208 * contiguous pages within the specified address range to the kernel 209 * object. Creates a wired mapping from this region to these pages, and 210 * returns the region's starting virtual address. If M_ZERO is specified 211 * through the given flags, then the pages are zeroed before they are 212 * mapped. 213 */ 214 vm_offset_t 215 kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, 216 vm_paddr_t high, u_long alignment, vm_paddr_t boundary, 217 vm_memattr_t memattr) 218 { 219 vm_object_t object = vmem == kmem_arena ? kmem_object : kernel_object; 220 vm_offset_t addr, tmp; 221 vm_ooffset_t offset; 222 vm_page_t end_m, m; 223 u_long npages; 224 int pflags, tries; 225 226 size = round_page(size); 227 if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) 228 return (0); 229 offset = addr - VM_MIN_KERNEL_ADDRESS; 230 pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; 231 npages = atop(size); 232 VM_OBJECT_WLOCK(object); 233 tries = 0; 234 retry: 235 m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 236 npages, low, high, alignment, boundary, memattr); 237 if (m == NULL) { 238 VM_OBJECT_WUNLOCK(object); 239 if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { 240 if (!vm_page_reclaim_contig(pflags, npages, low, high, 241 alignment, boundary) && (flags & M_WAITOK) != 0) 242 VM_WAIT; 243 VM_OBJECT_WLOCK(object); 244 tries++; 245 goto retry; 246 } 247 vmem_free(vmem, addr, size); 248 return (0); 249 } 250 end_m = m + npages; 251 tmp = addr; 252 for (; m < end_m; m++) { 253 if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) 254 pmap_zero_page(m); 255 m->valid = VM_PAGE_BITS_ALL; 256 pmap_enter(kernel_pmap, tmp, m, VM_PROT_ALL, 257 VM_PROT_ALL | PMAP_ENTER_WIRED, 0); 258 tmp += PAGE_SIZE; 259 } 260 VM_OBJECT_WUNLOCK(object); 261 return (addr); 262 } 263 264 /* 265 * kmem_suballoc: 266 * 267 * Allocates a map to manage a subrange 268 * of the kernel virtual address space. 269 * 270 * Arguments are as follows: 271 * 272 * parent Map to take range from 273 * min, max Returned endpoints of map 274 * size Size of range to find 275 * superpage_align Request that min is superpage aligned 276 */ 277 vm_map_t 278 kmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max, 279 vm_size_t size, boolean_t superpage_align) 280 { 281 int ret; 282 vm_map_t result; 283 284 size = round_page(size); 285 286 *min = vm_map_min(parent); 287 ret = vm_map_find(parent, NULL, 0, min, size, 0, superpage_align ? 288 VMFS_SUPER_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, 289 MAP_ACC_NO_CHARGE); 290 if (ret != KERN_SUCCESS) 291 panic("kmem_suballoc: bad status return of %d", ret); 292 *max = *min + size; 293 result = vm_map_create(vm_map_pmap(parent), *min, *max); 294 if (result == NULL) 295 panic("kmem_suballoc: cannot create submap"); 296 if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS) 297 panic("kmem_suballoc: unable to change range to submap"); 298 return (result); 299 } 300 301 /* 302 * kmem_malloc: 303 * 304 * Allocate wired-down pages in the kernel's address space. 305 */ 306 vm_offset_t 307 kmem_malloc(struct vmem *vmem, vm_size_t size, int flags) 308 { 309 vm_offset_t addr; 310 int rv; 311 312 size = round_page(size); 313 if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) 314 return (0); 315 316 rv = kmem_back((vmem == kmem_arena) ? kmem_object : kernel_object, 317 addr, size, flags); 318 if (rv != KERN_SUCCESS) { 319 vmem_free(vmem, addr, size); 320 return (0); 321 } 322 return (addr); 323 } 324 325 /* 326 * kmem_back: 327 * 328 * Allocate physical pages for the specified virtual address range. 329 */ 330 int 331 kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) 332 { 333 vm_offset_t offset, i; 334 vm_page_t m; 335 int pflags; 336 337 KASSERT(object == kmem_object || object == kernel_object, 338 ("kmem_back: only supports kernel objects.")); 339 340 offset = addr - VM_MIN_KERNEL_ADDRESS; 341 pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; 342 343 VM_OBJECT_WLOCK(object); 344 for (i = 0; i < size; i += PAGE_SIZE) { 345 retry: 346 m = vm_page_alloc(object, OFF_TO_IDX(offset + i), pflags); 347 348 /* 349 * Ran out of space, free everything up and return. Don't need 350 * to lock page queues here as we know that the pages we got 351 * aren't on any queues. 352 */ 353 if (m == NULL) { 354 VM_OBJECT_WUNLOCK(object); 355 if ((flags & M_NOWAIT) == 0) { 356 VM_WAIT; 357 VM_OBJECT_WLOCK(object); 358 goto retry; 359 } 360 kmem_unback(object, addr, i); 361 return (KERN_NO_SPACE); 362 } 363 if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) 364 pmap_zero_page(m); 365 KASSERT((m->oflags & VPO_UNMANAGED) != 0, 366 ("kmem_malloc: page %p is managed", m)); 367 m->valid = VM_PAGE_BITS_ALL; 368 pmap_enter(kernel_pmap, addr + i, m, VM_PROT_ALL, 369 VM_PROT_ALL | PMAP_ENTER_WIRED, 0); 370 } 371 VM_OBJECT_WUNLOCK(object); 372 373 return (KERN_SUCCESS); 374 } 375 376 /* 377 * kmem_unback: 378 * 379 * Unmap and free the physical pages underlying the specified virtual 380 * address range. 381 * 382 * A physical page must exist within the specified object at each index 383 * that is being unmapped. 384 */ 385 void 386 kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) 387 { 388 vm_page_t m; 389 vm_offset_t i, offset; 390 391 KASSERT(object == kmem_object || object == kernel_object, 392 ("kmem_unback: only supports kernel objects.")); 393 394 pmap_remove(kernel_pmap, addr, addr + size); 395 offset = addr - VM_MIN_KERNEL_ADDRESS; 396 VM_OBJECT_WLOCK(object); 397 for (i = 0; i < size; i += PAGE_SIZE) { 398 m = vm_page_lookup(object, OFF_TO_IDX(offset + i)); 399 vm_page_unwire(m, PQ_NONE); 400 vm_page_free(m); 401 } 402 VM_OBJECT_WUNLOCK(object); 403 } 404 405 /* 406 * kmem_free: 407 * 408 * Free memory allocated with kmem_malloc. The size must match the 409 * original allocation. 410 */ 411 void 412 kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size) 413 { 414 415 size = round_page(size); 416 kmem_unback((vmem == kmem_arena) ? kmem_object : kernel_object, 417 addr, size); 418 vmem_free(vmem, addr, size); 419 } 420 421 /* 422 * kmap_alloc_wait: 423 * 424 * Allocates pageable memory from a sub-map of the kernel. If the submap 425 * has no room, the caller sleeps waiting for more memory in the submap. 426 * 427 * This routine may block. 428 */ 429 vm_offset_t 430 kmap_alloc_wait(map, size) 431 vm_map_t map; 432 vm_size_t size; 433 { 434 vm_offset_t addr; 435 436 size = round_page(size); 437 if (!swap_reserve(size)) 438 return (0); 439 440 for (;;) { 441 /* 442 * To make this work for more than one map, use the map's lock 443 * to lock out sleepers/wakers. 444 */ 445 vm_map_lock(map); 446 if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0) 447 break; 448 /* no space now; see if we can ever get space */ 449 if (vm_map_max(map) - vm_map_min(map) < size) { 450 vm_map_unlock(map); 451 swap_release(size); 452 return (0); 453 } 454 map->needs_wakeup = TRUE; 455 vm_map_unlock_and_wait(map, 0); 456 } 457 vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_ALL, 458 VM_PROT_ALL, MAP_ACC_CHARGED); 459 vm_map_unlock(map); 460 return (addr); 461 } 462 463 /* 464 * kmap_free_wakeup: 465 * 466 * Returns memory to a submap of the kernel, and wakes up any processes 467 * waiting for memory in that map. 468 */ 469 void 470 kmap_free_wakeup(map, addr, size) 471 vm_map_t map; 472 vm_offset_t addr; 473 vm_size_t size; 474 { 475 476 vm_map_lock(map); 477 (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size)); 478 if (map->needs_wakeup) { 479 map->needs_wakeup = FALSE; 480 vm_map_wakeup(map); 481 } 482 vm_map_unlock(map); 483 } 484 485 void 486 kmem_init_zero_region(void) 487 { 488 vm_offset_t addr, i; 489 vm_page_t m; 490 491 /* 492 * Map a single physical page of zeros to a larger virtual range. 493 * This requires less looping in places that want large amounts of 494 * zeros, while not using much more physical resources. 495 */ 496 addr = kva_alloc(ZERO_REGION_SIZE); 497 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 498 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 499 if ((m->flags & PG_ZERO) == 0) 500 pmap_zero_page(m); 501 for (i = 0; i < ZERO_REGION_SIZE; i += PAGE_SIZE) 502 pmap_qenter(addr + i, &m, 1); 503 pmap_protect(kernel_pmap, addr, addr + ZERO_REGION_SIZE, VM_PROT_READ); 504 505 zero_region = (const void *)addr; 506 } 507 508 /* 509 * kmem_init: 510 * 511 * Create the kernel map; insert a mapping covering kernel text, 512 * data, bss, and all space allocated thus far (`boostrap' data). The 513 * new map will thus map the range between VM_MIN_KERNEL_ADDRESS and 514 * `start' as allocated, and the range between `start' and `end' as free. 515 */ 516 void 517 kmem_init(start, end) 518 vm_offset_t start, end; 519 { 520 vm_map_t m; 521 522 m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end); 523 m->system_map = 1; 524 vm_map_lock(m); 525 /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ 526 kernel_map = m; 527 (void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, 528 #ifdef __amd64__ 529 KERNBASE, 530 #else 531 VM_MIN_KERNEL_ADDRESS, 532 #endif 533 start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); 534 /* ... and ending with the completion of the above `insert' */ 535 vm_map_unlock(m); 536 } 537 538 #ifdef DIAGNOSTIC 539 /* 540 * Allow userspace to directly trigger the VM drain routine for testing 541 * purposes. 542 */ 543 static int 544 debug_vm_lowmem(SYSCTL_HANDLER_ARGS) 545 { 546 int error, i; 547 548 i = 0; 549 error = sysctl_handle_int(oidp, &i, 0, req); 550 if (error) 551 return (error); 552 if (i) 553 EVENTHANDLER_INVOKE(vm_lowmem, 0); 554 return (0); 555 } 556 557 SYSCTL_PROC(_debug, OID_AUTO, vm_lowmem, CTLTYPE_INT | CTLFLAG_RW, 0, 0, 558 debug_vm_lowmem, "I", "set to trigger vm_lowmem event"); 559 #endif 560