1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2005, Bosko Milekic <bmilekic@FreeBSD.org>. 5 * Copyright (c) 2010 Isilon Systems, Inc. (http://www.isilon.com/) 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice unmodified, this list of conditions, and the following 13 * disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 /* 34 * MemGuard is a simple replacement allocator for debugging only 35 * which provides ElectricFence-style memory barrier protection on 36 * objects being allocated, and is used to detect tampering-after-free 37 * scenarios. 38 * 39 * See the memguard(9) man page for more information on using MemGuard. 40 */ 41 42 #include "opt_vm.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/types.h> 48 #include <sys/queue.h> 49 #include <sys/lock.h> 50 #include <sys/mutex.h> 51 #include <sys/malloc.h> 52 #include <sys/sysctl.h> 53 #include <sys/vmem.h> 54 #include <sys/vmmeter.h> 55 56 #include <vm/vm.h> 57 #include <vm/uma.h> 58 #include <vm/vm_param.h> 59 #include <vm/vm_page.h> 60 #include <vm/vm_map.h> 61 #include <vm/vm_object.h> 62 #include <vm/vm_kern.h> 63 #include <vm/vm_extern.h> 64 #include <vm/uma_int.h> 65 #include <vm/memguard.h> 66 67 static SYSCTL_NODE(_vm, OID_AUTO, memguard, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 68 "MemGuard data"); 69 /* 70 * The vm_memguard_divisor variable controls how much of kernel_arena should be 71 * reserved for MemGuard. 72 */ 73 static u_int vm_memguard_divisor; 74 SYSCTL_UINT(_vm_memguard, OID_AUTO, divisor, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 75 &vm_memguard_divisor, 76 0, "(kmem_size/memguard_divisor) == memguard submap size"); 77 78 /* 79 * Short description (ks_shortdesc) of memory type to monitor. 80 */ 81 static char vm_memguard_desc[128] = ""; 82 static struct malloc_type *vm_memguard_mtype = NULL; 83 TUNABLE_STR("vm.memguard.desc", vm_memguard_desc, sizeof(vm_memguard_desc)); 84 static int 85 memguard_sysctl_desc(SYSCTL_HANDLER_ARGS) 86 { 87 char desc[sizeof(vm_memguard_desc)]; 88 int error; 89 90 strlcpy(desc, vm_memguard_desc, sizeof(desc)); 91 error = sysctl_handle_string(oidp, desc, sizeof(desc), req); 92 if (error != 0 || req->newptr == NULL) 93 return (error); 94 95 mtx_lock(&malloc_mtx); 96 /* If mtp is NULL, it will be initialized in memguard_cmp() */ 97 vm_memguard_mtype = malloc_desc2type(desc); 98 strlcpy(vm_memguard_desc, desc, sizeof(vm_memguard_desc)); 99 mtx_unlock(&malloc_mtx); 100 return (error); 101 } 102 SYSCTL_PROC(_vm_memguard, OID_AUTO, desc, 103 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 104 memguard_sysctl_desc, "A", "Short description of memory type to monitor"); 105 106 static int 107 memguard_sysctl_mapused(SYSCTL_HANDLER_ARGS) 108 { 109 vmem_size_t size; 110 111 size = vmem_size(memguard_arena, VMEM_ALLOC); 112 return (sysctl_handle_long(oidp, &size, sizeof(size), req)); 113 } 114 115 static vm_offset_t memguard_base; 116 static vm_size_t memguard_mapsize; 117 static vm_size_t memguard_physlimit; 118 static u_long memguard_wasted; 119 static u_long memguard_succ; 120 static u_long memguard_fail_kva; 121 static u_long memguard_fail_pgs; 122 123 SYSCTL_ULONG(_vm_memguard, OID_AUTO, mapsize, CTLFLAG_RD, 124 &memguard_mapsize, 0, "MemGuard private arena size"); 125 SYSCTL_ULONG(_vm_memguard, OID_AUTO, phys_limit, CTLFLAG_RD, 126 &memguard_physlimit, 0, "Limit on MemGuard memory consumption"); 127 SYSCTL_ULONG(_vm_memguard, OID_AUTO, wasted, CTLFLAG_RD, 128 &memguard_wasted, 0, "Excess memory used through page promotion"); 129 SYSCTL_ULONG(_vm_memguard, OID_AUTO, numalloc, CTLFLAG_RD, 130 &memguard_succ, 0, "Count of successful MemGuard allocations"); 131 SYSCTL_ULONG(_vm_memguard, OID_AUTO, fail_kva, CTLFLAG_RD, 132 &memguard_fail_kva, 0, "MemGuard failures due to lack of KVA"); 133 SYSCTL_ULONG(_vm_memguard, OID_AUTO, fail_pgs, CTLFLAG_RD, 134 &memguard_fail_pgs, 0, "MemGuard failures due to lack of pages"); 135 136 #define MG_GUARD_AROUND 0x001 137 #define MG_GUARD_ALLLARGE 0x002 138 #define MG_GUARD_NOFREE 0x004 139 static int memguard_options = MG_GUARD_AROUND; 140 SYSCTL_INT(_vm_memguard, OID_AUTO, options, CTLFLAG_RWTUN, 141 &memguard_options, 0, 142 "MemGuard options:\n" 143 "\t0x001 - add guard pages around each allocation\n" 144 "\t0x002 - always use MemGuard for allocations over a page\n" 145 "\t0x004 - guard uma(9) zones with UMA_ZONE_NOFREE flag"); 146 147 static u_int memguard_minsize; 148 static u_long memguard_minsize_reject; 149 SYSCTL_UINT(_vm_memguard, OID_AUTO, minsize, CTLFLAG_RW, 150 &memguard_minsize, 0, "Minimum size for page promotion"); 151 SYSCTL_ULONG(_vm_memguard, OID_AUTO, minsize_reject, CTLFLAG_RD, 152 &memguard_minsize_reject, 0, "# times rejected for size"); 153 154 static u_int memguard_frequency; 155 static u_long memguard_frequency_hits; 156 SYSCTL_UINT(_vm_memguard, OID_AUTO, frequency, CTLFLAG_RWTUN, 157 &memguard_frequency, 0, "Times in 100000 that MemGuard will randomly run"); 158 SYSCTL_ULONG(_vm_memguard, OID_AUTO, frequency_hits, CTLFLAG_RD, 159 &memguard_frequency_hits, 0, "# times MemGuard randomly chose"); 160 161 /* 162 * Return a fudged value to be used for vm_kmem_size for allocating 163 * the kernel_arena. 164 */ 165 unsigned long 166 memguard_fudge(unsigned long km_size, const struct vm_map *parent_map) 167 { 168 u_long mem_pgs, parent_size; 169 170 vm_memguard_divisor = 10; 171 /* CTFLAG_RDTUN doesn't work during the early boot process. */ 172 TUNABLE_INT_FETCH("vm.memguard.divisor", &vm_memguard_divisor); 173 174 parent_size = vm_map_max(parent_map) - vm_map_min(parent_map) + 175 PAGE_SIZE; 176 /* Pick a conservative value if provided value sucks. */ 177 if ((vm_memguard_divisor <= 0) || 178 ((parent_size / vm_memguard_divisor) == 0)) 179 vm_memguard_divisor = 10; 180 /* 181 * Limit consumption of physical pages to 182 * 1/vm_memguard_divisor of system memory. If the KVA is 183 * smaller than this then the KVA limit comes into play first. 184 * This prevents memguard's page promotions from completely 185 * using up memory, since most malloc(9) calls are sub-page. 186 */ 187 mem_pgs = vm_cnt.v_page_count; 188 memguard_physlimit = (mem_pgs / vm_memguard_divisor) * PAGE_SIZE; 189 /* 190 * We want as much KVA as we can take safely. Use at most our 191 * allotted fraction of the parent map's size. Limit this to 192 * twice the physical memory to avoid using too much memory as 193 * pagetable pages (size must be multiple of PAGE_SIZE). 194 */ 195 memguard_mapsize = round_page(parent_size / vm_memguard_divisor); 196 if (memguard_mapsize / (2 * PAGE_SIZE) > mem_pgs) 197 memguard_mapsize = mem_pgs * 2 * PAGE_SIZE; 198 if (km_size + memguard_mapsize > parent_size) 199 memguard_mapsize = 0; 200 return (km_size + memguard_mapsize); 201 } 202 203 /* 204 * Initialize the MemGuard mock allocator. All objects from MemGuard come 205 * out of a single contiguous chunk of kernel address space that is managed 206 * by a vmem arena. 207 */ 208 void 209 memguard_init(vmem_t *parent) 210 { 211 vm_offset_t base; 212 213 vmem_alloc(parent, memguard_mapsize, M_BESTFIT | M_WAITOK, &base); 214 vmem_init(memguard_arena, "memguard arena", base, memguard_mapsize, 215 PAGE_SIZE, 0, M_WAITOK); 216 memguard_base = base; 217 218 printf("MEMGUARD DEBUGGING ALLOCATOR INITIALIZED:\n"); 219 printf("\tMEMGUARD map base: 0x%lx\n", (u_long)base); 220 printf("\tMEMGUARD map size: %jd KBytes\n", 221 (uintmax_t)memguard_mapsize >> 10); 222 } 223 224 /* 225 * Run things that can't be done as early as memguard_init(). 226 */ 227 static void 228 memguard_sysinit(void) 229 { 230 struct sysctl_oid_list *parent; 231 232 parent = SYSCTL_STATIC_CHILDREN(_vm_memguard); 233 SYSCTL_ADD_UAUTO(NULL, parent, OID_AUTO, "mapstart", 234 CTLFLAG_RD, &memguard_base, 235 "MemGuard KVA base"); 236 SYSCTL_ADD_UAUTO(NULL, parent, OID_AUTO, "maplimit", 237 CTLFLAG_RD, &memguard_mapsize, 238 "MemGuard KVA size"); 239 SYSCTL_ADD_PROC(NULL, parent, OID_AUTO, "mapused", 240 CTLFLAG_RD | CTLFLAG_MPSAFE | CTLTYPE_ULONG, NULL, 0, memguard_sysctl_mapused, "LU", 241 "MemGuard KVA used"); 242 } 243 SYSINIT(memguard, SI_SUB_KLD, SI_ORDER_ANY, memguard_sysinit, NULL); 244 245 /* 246 * v2sizep() converts a virtual address of the first page allocated for 247 * an item to a pointer to u_long recording the size of the original 248 * allocation request. 249 * 250 * This routine is very similar to those defined by UMA in uma_int.h. 251 * The difference is that this routine stores the originally allocated 252 * size in one of the page's fields that is unused when the page is 253 * wired rather than the object field, which is used. 254 */ 255 static u_long * 256 v2sizep(vm_offset_t va) 257 { 258 vm_paddr_t pa; 259 struct vm_page *p; 260 261 pa = pmap_kextract(va); 262 if (pa == 0) 263 panic("MemGuard detected double-free of %p", (void *)va); 264 p = PHYS_TO_VM_PAGE(pa); 265 KASSERT(vm_page_wired(p) && p->a.queue == PQ_NONE, 266 ("MEMGUARD: Expected wired page %p in vtomgfifo!", p)); 267 return (&p->plinks.memguard.p); 268 } 269 270 static u_long * 271 v2sizev(vm_offset_t va) 272 { 273 vm_paddr_t pa; 274 struct vm_page *p; 275 276 pa = pmap_kextract(va); 277 if (pa == 0) 278 panic("MemGuard detected double-free of %p", (void *)va); 279 p = PHYS_TO_VM_PAGE(pa); 280 KASSERT(vm_page_wired(p) && p->a.queue == PQ_NONE, 281 ("MEMGUARD: Expected wired page %p in vtomgfifo!", p)); 282 return (&p->plinks.memguard.v); 283 } 284 285 /* 286 * Allocate a single object of specified size with specified flags 287 * (either M_WAITOK or M_NOWAIT). 288 */ 289 void * 290 memguard_alloc(unsigned long req_size, int flags) 291 { 292 vm_offset_t addr, origaddr; 293 u_long size_p, size_v; 294 int do_guard, error, rv; 295 296 size_p = round_page(req_size); 297 if (size_p == 0) 298 return (NULL); 299 300 /* 301 * To ensure there are holes on both sides of the allocation, 302 * request 2 extra pages of KVA. Save the value of memguard_options 303 * so that we use a consistent value throughout this function. 304 */ 305 size_v = size_p; 306 do_guard = (memguard_options & MG_GUARD_AROUND) != 0; 307 if (do_guard) 308 size_v += 2 * PAGE_SIZE; 309 310 /* 311 * When we pass our memory limit, reject sub-page allocations. 312 * Page-size and larger allocations will use the same amount 313 * of physical memory whether we allocate or hand off to 314 * malloc_large(), so keep those. 315 */ 316 if (vmem_size(memguard_arena, VMEM_ALLOC) >= memguard_physlimit && 317 req_size < PAGE_SIZE) { 318 addr = (vm_offset_t)NULL; 319 memguard_fail_pgs++; 320 goto out; 321 } 322 323 /* 324 * Attempt to avoid address reuse for as long as possible, to increase 325 * the likelihood of catching a use-after-free. 326 */ 327 error = vmem_alloc(memguard_arena, size_v, M_NEXTFIT | M_NOWAIT, 328 &origaddr); 329 if (error != 0) { 330 memguard_fail_kva++; 331 addr = (vm_offset_t)NULL; 332 goto out; 333 } 334 addr = origaddr; 335 if (do_guard) 336 addr += PAGE_SIZE; 337 rv = kmem_back(kernel_object, addr, size_p, flags); 338 if (rv != KERN_SUCCESS) { 339 vmem_xfree(memguard_arena, origaddr, size_v); 340 memguard_fail_pgs++; 341 addr = (vm_offset_t)NULL; 342 goto out; 343 } 344 *v2sizep(trunc_page(addr)) = req_size; 345 *v2sizev(trunc_page(addr)) = size_v; 346 memguard_succ++; 347 if (req_size < PAGE_SIZE) { 348 memguard_wasted += (PAGE_SIZE - req_size); 349 if (do_guard) { 350 /* 351 * Align the request to 16 bytes, and return 352 * an address near the end of the page, to 353 * better detect array overrun. 354 */ 355 req_size = roundup2(req_size, 16); 356 addr += (PAGE_SIZE - req_size); 357 } 358 } 359 out: 360 return ((void *)addr); 361 } 362 363 int 364 is_memguard_addr(void *addr) 365 { 366 vm_offset_t a = (vm_offset_t)(uintptr_t)addr; 367 368 return (a >= memguard_base && a < memguard_base + memguard_mapsize); 369 } 370 371 /* 372 * Free specified single object. 373 */ 374 void 375 memguard_free(void *ptr) 376 { 377 vm_offset_t addr; 378 u_long req_size, size, sizev; 379 char *temp; 380 int i; 381 382 addr = trunc_page((uintptr_t)ptr); 383 req_size = *v2sizep(addr); 384 sizev = *v2sizev(addr); 385 size = round_page(req_size); 386 387 /* 388 * Page should not be guarded right now, so force a write. 389 * The purpose of this is to increase the likelihood of 390 * catching a double-free, but not necessarily a 391 * tamper-after-free (the second thread freeing might not 392 * write before freeing, so this forces it to and, 393 * subsequently, trigger a fault). 394 */ 395 temp = ptr; 396 for (i = 0; i < size; i += PAGE_SIZE) 397 temp[i] = 'M'; 398 399 /* 400 * This requires carnal knowledge of the implementation of 401 * kmem_free(), but since we've already replaced kmem_malloc() 402 * above, it's not really any worse. We want to use the 403 * vm_map lock to serialize updates to memguard_wasted, since 404 * we had the lock at increment. 405 */ 406 kmem_unback(kernel_object, addr, size); 407 if (sizev > size) 408 addr -= PAGE_SIZE; 409 vmem_xfree(memguard_arena, addr, sizev); 410 if (req_size < PAGE_SIZE) 411 memguard_wasted -= (PAGE_SIZE - req_size); 412 } 413 414 /* 415 * Re-allocate an allocation that was originally guarded. 416 */ 417 void * 418 memguard_realloc(void *addr, unsigned long size, struct malloc_type *mtp, 419 int flags) 420 { 421 void *newaddr; 422 u_long old_size; 423 424 /* 425 * Allocate the new block. Force the allocation to be guarded 426 * as the original may have been guarded through random 427 * chance, and that should be preserved. 428 */ 429 if ((newaddr = memguard_alloc(size, flags)) == NULL) 430 return (NULL); 431 432 /* Copy over original contents. */ 433 old_size = *v2sizep(trunc_page((uintptr_t)addr)); 434 bcopy(addr, newaddr, min(size, old_size)); 435 memguard_free(addr); 436 return (newaddr); 437 } 438 439 static int 440 memguard_cmp(unsigned long size) 441 { 442 443 if (size < memguard_minsize) { 444 memguard_minsize_reject++; 445 return (0); 446 } 447 if ((memguard_options & MG_GUARD_ALLLARGE) != 0 && size >= PAGE_SIZE) 448 return (1); 449 if (memguard_frequency > 0 && 450 (random() % 100000) < memguard_frequency) { 451 memguard_frequency_hits++; 452 return (1); 453 } 454 455 return (0); 456 } 457 458 int 459 memguard_cmp_mtp(struct malloc_type *mtp, unsigned long size) 460 { 461 462 if (memguard_cmp(size)) 463 return(1); 464 465 #if 1 466 /* 467 * The safest way of comparison is to always compare short description 468 * string of memory type, but it is also the slowest way. 469 */ 470 return (strcmp(mtp->ks_shortdesc, vm_memguard_desc) == 0); 471 #else 472 /* 473 * If we compare pointers, there are two possible problems: 474 * 1. Memory type was unloaded and new memory type was allocated at the 475 * same address. 476 * 2. Memory type was unloaded and loaded again, but allocated at a 477 * different address. 478 */ 479 if (vm_memguard_mtype != NULL) 480 return (mtp == vm_memguard_mtype); 481 if (strcmp(mtp->ks_shortdesc, vm_memguard_desc) == 0) { 482 vm_memguard_mtype = mtp; 483 return (1); 484 } 485 return (0); 486 #endif 487 } 488 489 int 490 memguard_cmp_zone(uma_zone_t zone) 491 { 492 493 if ((memguard_options & MG_GUARD_NOFREE) == 0 && 494 zone->uz_flags & UMA_ZONE_NOFREE) 495 return (0); 496 497 if (memguard_cmp(zone->uz_size)) 498 return (1); 499 500 /* 501 * The safest way of comparison is to always compare zone name, 502 * but it is also the slowest way. 503 */ 504 return (strcmp(zone->uz_name, vm_memguard_desc) == 0); 505 } 506 507 unsigned long 508 memguard_get_req_size(const void *addr) 509 { 510 return (*v2sizep(trunc_page((uintptr_t)addr))); 511 } 512