1 /* 2 * Slab allocator functions that are independent of the allocator strategy 3 * 4 * (C) 2012 Christoph Lameter <cl@linux.com> 5 */ 6 #include <linux/slab.h> 7 8 #include <linux/mm.h> 9 #include <linux/poison.h> 10 #include <linux/interrupt.h> 11 #include <linux/memory.h> 12 #include <linux/compiler.h> 13 #include <linux/module.h> 14 #include <linux/cpu.h> 15 #include <linux/uaccess.h> 16 #include <linux/seq_file.h> 17 #include <linux/proc_fs.h> 18 #include <asm/cacheflush.h> 19 #include <asm/tlbflush.h> 20 #include <asm/page.h> 21 #include <linux/memcontrol.h> 22 #include <trace/events/kmem.h> 23 24 #include "slab.h" 25 26 enum slab_state slab_state; 27 LIST_HEAD(slab_caches); 28 DEFINE_MUTEX(slab_mutex); 29 struct kmem_cache *kmem_cache; 30 31 #ifdef CONFIG_DEBUG_VM 32 static int kmem_cache_sanity_check(const char *name, size_t size) 33 { 34 struct kmem_cache *s = NULL; 35 36 if (!name || in_interrupt() || size < sizeof(void *) || 37 size > KMALLOC_MAX_SIZE) { 38 pr_err("kmem_cache_create(%s) integrity check failed\n", name); 39 return -EINVAL; 40 } 41 42 list_for_each_entry(s, &slab_caches, list) { 43 char tmp; 44 int res; 45 46 /* 47 * This happens when the module gets unloaded and doesn't 48 * destroy its slab cache and no-one else reuses the vmalloc 49 * area of the module. Print a warning. 50 */ 51 res = probe_kernel_address(s->name, tmp); 52 if (res) { 53 pr_err("Slab cache with size %d has lost its name\n", 54 s->object_size); 55 continue; 56 } 57 58 #if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON) 59 if (!strcmp(s->name, name)) { 60 pr_err("%s (%s): Cache name already exists.\n", 61 __func__, name); 62 dump_stack(); 63 s = NULL; 64 return -EINVAL; 65 } 66 #endif 67 } 68 69 WARN_ON(strchr(name, ' ')); /* It confuses parsers */ 70 return 0; 71 } 72 #else 73 static inline int kmem_cache_sanity_check(const char *name, size_t size) 74 { 75 return 0; 76 } 77 #endif 78 79 #ifdef CONFIG_MEMCG_KMEM 80 int memcg_update_all_caches(int num_memcgs) 81 { 82 struct kmem_cache *s; 83 int ret = 0; 84 mutex_lock(&slab_mutex); 85 86 list_for_each_entry(s, &slab_caches, list) { 87 if (!is_root_cache(s)) 88 continue; 89 90 ret = memcg_update_cache_size(s, num_memcgs); 91 /* 92 * See comment in memcontrol.c, memcg_update_cache_size: 93 * Instead of freeing the memory, we'll just leave the caches 94 * up to this point in an updated state. 95 */ 96 if (ret) 97 goto out; 98 } 99 100 memcg_update_array_size(num_memcgs); 101 out: 102 mutex_unlock(&slab_mutex); 103 return ret; 104 } 105 #endif 106 107 /* 108 * Figure out what the alignment of the objects will be given a set of 109 * flags, a user specified alignment and the size of the objects. 110 */ 111 unsigned long calculate_alignment(unsigned long flags, 112 unsigned long align, unsigned long size) 113 { 114 /* 115 * If the user wants hardware cache aligned objects then follow that 116 * suggestion if the object is sufficiently large. 117 * 118 * The hardware cache alignment cannot override the specified 119 * alignment though. If that is greater then use it. 120 */ 121 if (flags & SLAB_HWCACHE_ALIGN) { 122 unsigned long ralign = cache_line_size(); 123 while (size <= ralign / 2) 124 ralign /= 2; 125 align = max(align, ralign); 126 } 127 128 if (align < ARCH_SLAB_MINALIGN) 129 align = ARCH_SLAB_MINALIGN; 130 131 return ALIGN(align, sizeof(void *)); 132 } 133 134 static struct kmem_cache * 135 do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align, 136 unsigned long flags, void (*ctor)(void *), 137 struct mem_cgroup *memcg, struct kmem_cache *root_cache) 138 { 139 struct kmem_cache *s; 140 int err; 141 142 err = -ENOMEM; 143 s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); 144 if (!s) 145 goto out; 146 147 s->name = name; 148 s->object_size = object_size; 149 s->size = size; 150 s->align = align; 151 s->ctor = ctor; 152 153 err = memcg_alloc_cache_params(memcg, s, root_cache); 154 if (err) 155 goto out_free_cache; 156 157 err = __kmem_cache_create(s, flags); 158 if (err) 159 goto out_free_cache; 160 161 s->refcount = 1; 162 list_add(&s->list, &slab_caches); 163 memcg_register_cache(s); 164 out: 165 if (err) 166 return ERR_PTR(err); 167 return s; 168 169 out_free_cache: 170 memcg_free_cache_params(s); 171 kfree(s); 172 goto out; 173 } 174 175 /* 176 * kmem_cache_create - Create a cache. 177 * @name: A string which is used in /proc/slabinfo to identify this cache. 178 * @size: The size of objects to be created in this cache. 179 * @align: The required alignment for the objects. 180 * @flags: SLAB flags 181 * @ctor: A constructor for the objects. 182 * 183 * Returns a ptr to the cache on success, NULL on failure. 184 * Cannot be called within a interrupt, but can be interrupted. 185 * The @ctor is run when new pages are allocated by the cache. 186 * 187 * The flags are 188 * 189 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 190 * to catch references to uninitialised memory. 191 * 192 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check 193 * for buffer overruns. 194 * 195 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware 196 * cacheline. This can be beneficial if you're counting cycles as closely 197 * as davem. 198 */ 199 struct kmem_cache * 200 kmem_cache_create(const char *name, size_t size, size_t align, 201 unsigned long flags, void (*ctor)(void *)) 202 { 203 struct kmem_cache *s; 204 char *cache_name; 205 int err; 206 207 get_online_cpus(); 208 mutex_lock(&slab_mutex); 209 210 err = kmem_cache_sanity_check(name, size); 211 if (err) 212 goto out_unlock; 213 214 /* 215 * Some allocators will constraint the set of valid flags to a subset 216 * of all flags. We expect them to define CACHE_CREATE_MASK in this 217 * case, and we'll just provide them with a sanitized version of the 218 * passed flags. 219 */ 220 flags &= CACHE_CREATE_MASK; 221 222 s = __kmem_cache_alias(name, size, align, flags, ctor); 223 if (s) 224 goto out_unlock; 225 226 cache_name = kstrdup(name, GFP_KERNEL); 227 if (!cache_name) { 228 err = -ENOMEM; 229 goto out_unlock; 230 } 231 232 s = do_kmem_cache_create(cache_name, size, size, 233 calculate_alignment(flags, align, size), 234 flags, ctor, NULL, NULL); 235 if (IS_ERR(s)) { 236 err = PTR_ERR(s); 237 kfree(cache_name); 238 } 239 240 out_unlock: 241 mutex_unlock(&slab_mutex); 242 put_online_cpus(); 243 244 if (err) { 245 if (flags & SLAB_PANIC) 246 panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", 247 name, err); 248 else { 249 printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d", 250 name, err); 251 dump_stack(); 252 } 253 return NULL; 254 } 255 return s; 256 } 257 EXPORT_SYMBOL(kmem_cache_create); 258 259 #ifdef CONFIG_MEMCG_KMEM 260 /* 261 * kmem_cache_create_memcg - Create a cache for a memory cgroup. 262 * @memcg: The memory cgroup the new cache is for. 263 * @root_cache: The parent of the new cache. 264 * 265 * This function attempts to create a kmem cache that will serve allocation 266 * requests going from @memcg to @root_cache. The new cache inherits properties 267 * from its parent. 268 */ 269 void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_cache) 270 { 271 struct kmem_cache *s; 272 char *cache_name; 273 274 get_online_cpus(); 275 mutex_lock(&slab_mutex); 276 277 /* 278 * Since per-memcg caches are created asynchronously on first 279 * allocation (see memcg_kmem_get_cache()), several threads can try to 280 * create the same cache, but only one of them may succeed. 281 */ 282 if (cache_from_memcg_idx(root_cache, memcg_cache_id(memcg))) 283 goto out_unlock; 284 285 cache_name = memcg_create_cache_name(memcg, root_cache); 286 if (!cache_name) 287 goto out_unlock; 288 289 s = do_kmem_cache_create(cache_name, root_cache->object_size, 290 root_cache->size, root_cache->align, 291 root_cache->flags, root_cache->ctor, 292 memcg, root_cache); 293 if (IS_ERR(s)) { 294 kfree(cache_name); 295 goto out_unlock; 296 } 297 298 s->allocflags |= __GFP_KMEMCG; 299 300 out_unlock: 301 mutex_unlock(&slab_mutex); 302 put_online_cpus(); 303 } 304 305 static int kmem_cache_destroy_memcg_children(struct kmem_cache *s) 306 { 307 int rc; 308 309 if (!s->memcg_params || 310 !s->memcg_params->is_root_cache) 311 return 0; 312 313 mutex_unlock(&slab_mutex); 314 rc = __kmem_cache_destroy_memcg_children(s); 315 mutex_lock(&slab_mutex); 316 317 return rc; 318 } 319 #else 320 static int kmem_cache_destroy_memcg_children(struct kmem_cache *s) 321 { 322 return 0; 323 } 324 #endif /* CONFIG_MEMCG_KMEM */ 325 326 void slab_kmem_cache_release(struct kmem_cache *s) 327 { 328 kfree(s->name); 329 kmem_cache_free(kmem_cache, s); 330 } 331 332 void kmem_cache_destroy(struct kmem_cache *s) 333 { 334 get_online_cpus(); 335 mutex_lock(&slab_mutex); 336 337 s->refcount--; 338 if (s->refcount) 339 goto out_unlock; 340 341 if (kmem_cache_destroy_memcg_children(s) != 0) 342 goto out_unlock; 343 344 list_del(&s->list); 345 memcg_unregister_cache(s); 346 347 if (__kmem_cache_shutdown(s) != 0) { 348 list_add(&s->list, &slab_caches); 349 memcg_register_cache(s); 350 printk(KERN_ERR "kmem_cache_destroy %s: " 351 "Slab cache still has objects\n", s->name); 352 dump_stack(); 353 goto out_unlock; 354 } 355 356 mutex_unlock(&slab_mutex); 357 if (s->flags & SLAB_DESTROY_BY_RCU) 358 rcu_barrier(); 359 360 memcg_free_cache_params(s); 361 #ifdef SLAB_SUPPORTS_SYSFS 362 sysfs_slab_remove(s); 363 #else 364 slab_kmem_cache_release(s); 365 #endif 366 goto out_put_cpus; 367 368 out_unlock: 369 mutex_unlock(&slab_mutex); 370 out_put_cpus: 371 put_online_cpus(); 372 } 373 EXPORT_SYMBOL(kmem_cache_destroy); 374 375 int slab_is_available(void) 376 { 377 return slab_state >= UP; 378 } 379 380 #ifndef CONFIG_SLOB 381 /* Create a cache during boot when no slab services are available yet */ 382 void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, 383 unsigned long flags) 384 { 385 int err; 386 387 s->name = name; 388 s->size = s->object_size = size; 389 s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); 390 err = __kmem_cache_create(s, flags); 391 392 if (err) 393 panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n", 394 name, size, err); 395 396 s->refcount = -1; /* Exempt from merging for now */ 397 } 398 399 struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, 400 unsigned long flags) 401 { 402 struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); 403 404 if (!s) 405 panic("Out of memory when creating slab %s\n", name); 406 407 create_boot_cache(s, name, size, flags); 408 list_add(&s->list, &slab_caches); 409 s->refcount = 1; 410 return s; 411 } 412 413 struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; 414 EXPORT_SYMBOL(kmalloc_caches); 415 416 #ifdef CONFIG_ZONE_DMA 417 struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; 418 EXPORT_SYMBOL(kmalloc_dma_caches); 419 #endif 420 421 /* 422 * Conversion table for small slabs sizes / 8 to the index in the 423 * kmalloc array. This is necessary for slabs < 192 since we have non power 424 * of two cache sizes there. The size of larger slabs can be determined using 425 * fls. 426 */ 427 static s8 size_index[24] = { 428 3, /* 8 */ 429 4, /* 16 */ 430 5, /* 24 */ 431 5, /* 32 */ 432 6, /* 40 */ 433 6, /* 48 */ 434 6, /* 56 */ 435 6, /* 64 */ 436 1, /* 72 */ 437 1, /* 80 */ 438 1, /* 88 */ 439 1, /* 96 */ 440 7, /* 104 */ 441 7, /* 112 */ 442 7, /* 120 */ 443 7, /* 128 */ 444 2, /* 136 */ 445 2, /* 144 */ 446 2, /* 152 */ 447 2, /* 160 */ 448 2, /* 168 */ 449 2, /* 176 */ 450 2, /* 184 */ 451 2 /* 192 */ 452 }; 453 454 static inline int size_index_elem(size_t bytes) 455 { 456 return (bytes - 1) / 8; 457 } 458 459 /* 460 * Find the kmem_cache structure that serves a given size of 461 * allocation 462 */ 463 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) 464 { 465 int index; 466 467 if (unlikely(size > KMALLOC_MAX_SIZE)) { 468 WARN_ON_ONCE(!(flags & __GFP_NOWARN)); 469 return NULL; 470 } 471 472 if (size <= 192) { 473 if (!size) 474 return ZERO_SIZE_PTR; 475 476 index = size_index[size_index_elem(size)]; 477 } else 478 index = fls(size - 1); 479 480 #ifdef CONFIG_ZONE_DMA 481 if (unlikely((flags & GFP_DMA))) 482 return kmalloc_dma_caches[index]; 483 484 #endif 485 return kmalloc_caches[index]; 486 } 487 488 /* 489 * Create the kmalloc array. Some of the regular kmalloc arrays 490 * may already have been created because they were needed to 491 * enable allocations for slab creation. 492 */ 493 void __init create_kmalloc_caches(unsigned long flags) 494 { 495 int i; 496 497 /* 498 * Patch up the size_index table if we have strange large alignment 499 * requirements for the kmalloc array. This is only the case for 500 * MIPS it seems. The standard arches will not generate any code here. 501 * 502 * Largest permitted alignment is 256 bytes due to the way we 503 * handle the index determination for the smaller caches. 504 * 505 * Make sure that nothing crazy happens if someone starts tinkering 506 * around with ARCH_KMALLOC_MINALIGN 507 */ 508 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 509 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 510 511 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { 512 int elem = size_index_elem(i); 513 514 if (elem >= ARRAY_SIZE(size_index)) 515 break; 516 size_index[elem] = KMALLOC_SHIFT_LOW; 517 } 518 519 if (KMALLOC_MIN_SIZE >= 64) { 520 /* 521 * The 96 byte size cache is not used if the alignment 522 * is 64 byte. 523 */ 524 for (i = 64 + 8; i <= 96; i += 8) 525 size_index[size_index_elem(i)] = 7; 526 527 } 528 529 if (KMALLOC_MIN_SIZE >= 128) { 530 /* 531 * The 192 byte sized cache is not used if the alignment 532 * is 128 byte. Redirect kmalloc to use the 256 byte cache 533 * instead. 534 */ 535 for (i = 128 + 8; i <= 192; i += 8) 536 size_index[size_index_elem(i)] = 8; 537 } 538 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { 539 if (!kmalloc_caches[i]) { 540 kmalloc_caches[i] = create_kmalloc_cache(NULL, 541 1 << i, flags); 542 } 543 544 /* 545 * Caches that are not of the two-to-the-power-of size. 546 * These have to be created immediately after the 547 * earlier power of two caches 548 */ 549 if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) 550 kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags); 551 552 if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) 553 kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags); 554 } 555 556 /* Kmalloc array is now usable */ 557 slab_state = UP; 558 559 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 560 struct kmem_cache *s = kmalloc_caches[i]; 561 char *n; 562 563 if (s) { 564 n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); 565 566 BUG_ON(!n); 567 s->name = n; 568 } 569 } 570 571 #ifdef CONFIG_ZONE_DMA 572 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { 573 struct kmem_cache *s = kmalloc_caches[i]; 574 575 if (s) { 576 int size = kmalloc_size(i); 577 char *n = kasprintf(GFP_NOWAIT, 578 "dma-kmalloc-%d", size); 579 580 BUG_ON(!n); 581 kmalloc_dma_caches[i] = create_kmalloc_cache(n, 582 size, SLAB_CACHE_DMA | flags); 583 } 584 } 585 #endif 586 } 587 #endif /* !CONFIG_SLOB */ 588 589 #ifdef CONFIG_TRACING 590 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) 591 { 592 void *ret = kmalloc_order(size, flags, order); 593 trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); 594 return ret; 595 } 596 EXPORT_SYMBOL(kmalloc_order_trace); 597 #endif 598 599 #ifdef CONFIG_SLABINFO 600 601 #ifdef CONFIG_SLAB 602 #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) 603 #else 604 #define SLABINFO_RIGHTS S_IRUSR 605 #endif 606 607 void print_slabinfo_header(struct seq_file *m) 608 { 609 /* 610 * Output format version, so at least we can change it 611 * without _too_ many complaints. 612 */ 613 #ifdef CONFIG_DEBUG_SLAB 614 seq_puts(m, "slabinfo - version: 2.1 (statistics)\n"); 615 #else 616 seq_puts(m, "slabinfo - version: 2.1\n"); 617 #endif 618 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 619 "<objperslab> <pagesperslab>"); 620 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 621 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 622 #ifdef CONFIG_DEBUG_SLAB 623 seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> " 624 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>"); 625 seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>"); 626 #endif 627 seq_putc(m, '\n'); 628 } 629 630 static void *s_start(struct seq_file *m, loff_t *pos) 631 { 632 loff_t n = *pos; 633 634 mutex_lock(&slab_mutex); 635 if (!n) 636 print_slabinfo_header(m); 637 638 return seq_list_start(&slab_caches, *pos); 639 } 640 641 void *slab_next(struct seq_file *m, void *p, loff_t *pos) 642 { 643 return seq_list_next(p, &slab_caches, pos); 644 } 645 646 void slab_stop(struct seq_file *m, void *p) 647 { 648 mutex_unlock(&slab_mutex); 649 } 650 651 static void 652 memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) 653 { 654 struct kmem_cache *c; 655 struct slabinfo sinfo; 656 int i; 657 658 if (!is_root_cache(s)) 659 return; 660 661 for_each_memcg_cache_index(i) { 662 c = cache_from_memcg_idx(s, i); 663 if (!c) 664 continue; 665 666 memset(&sinfo, 0, sizeof(sinfo)); 667 get_slabinfo(c, &sinfo); 668 669 info->active_slabs += sinfo.active_slabs; 670 info->num_slabs += sinfo.num_slabs; 671 info->shared_avail += sinfo.shared_avail; 672 info->active_objs += sinfo.active_objs; 673 info->num_objs += sinfo.num_objs; 674 } 675 } 676 677 int cache_show(struct kmem_cache *s, struct seq_file *m) 678 { 679 struct slabinfo sinfo; 680 681 memset(&sinfo, 0, sizeof(sinfo)); 682 get_slabinfo(s, &sinfo); 683 684 memcg_accumulate_slabinfo(s, &sinfo); 685 686 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", 687 cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size, 688 sinfo.objects_per_slab, (1 << sinfo.cache_order)); 689 690 seq_printf(m, " : tunables %4u %4u %4u", 691 sinfo.limit, sinfo.batchcount, sinfo.shared); 692 seq_printf(m, " : slabdata %6lu %6lu %6lu", 693 sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail); 694 slabinfo_show_stats(m, s); 695 seq_putc(m, '\n'); 696 return 0; 697 } 698 699 static int s_show(struct seq_file *m, void *p) 700 { 701 struct kmem_cache *s = list_entry(p, struct kmem_cache, list); 702 703 if (!is_root_cache(s)) 704 return 0; 705 return cache_show(s, m); 706 } 707 708 /* 709 * slabinfo_op - iterator that generates /proc/slabinfo 710 * 711 * Output layout: 712 * cache-name 713 * num-active-objs 714 * total-objs 715 * object size 716 * num-active-slabs 717 * total-slabs 718 * num-pages-per-slab 719 * + further values on SMP and with statistics enabled 720 */ 721 static const struct seq_operations slabinfo_op = { 722 .start = s_start, 723 .next = slab_next, 724 .stop = slab_stop, 725 .show = s_show, 726 }; 727 728 static int slabinfo_open(struct inode *inode, struct file *file) 729 { 730 return seq_open(file, &slabinfo_op); 731 } 732 733 static const struct file_operations proc_slabinfo_operations = { 734 .open = slabinfo_open, 735 .read = seq_read, 736 .write = slabinfo_write, 737 .llseek = seq_lseek, 738 .release = seq_release, 739 }; 740 741 static int __init slab_proc_init(void) 742 { 743 proc_create("slabinfo", SLABINFO_RIGHTS, NULL, 744 &proc_slabinfo_operations); 745 return 0; 746 } 747 module_init(slab_proc_init); 748 #endif /* CONFIG_SLABINFO */ 749