1 /* 2 * SLUB: A slab allocator that limits cache line use instead of queuing 3 * objects in per cpu and per node lists. 4 * 5 * The allocator synchronizes using per slab locks and only 6 * uses a centralized lock to manage a pool of partial slabs. 7 * 8 * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com> 9 */ 10 11 #include <linux/mm.h> 12 #include <linux/module.h> 13 #include <linux/bit_spinlock.h> 14 #include <linux/interrupt.h> 15 #include <linux/bitops.h> 16 #include <linux/slab.h> 17 #include <linux/seq_file.h> 18 #include <linux/cpu.h> 19 #include <linux/cpuset.h> 20 #include <linux/mempolicy.h> 21 #include <linux/ctype.h> 22 #include <linux/kallsyms.h> 23 #include <linux/memory.h> 24 25 /* 26 * Lock order: 27 * 1. slab_lock(page) 28 * 2. slab->list_lock 29 * 30 * The slab_lock protects operations on the object of a particular 31 * slab and its metadata in the page struct. If the slab lock 32 * has been taken then no allocations nor frees can be performed 33 * on the objects in the slab nor can the slab be added or removed 34 * from the partial or full lists since this would mean modifying 35 * the page_struct of the slab. 36 * 37 * The list_lock protects the partial and full list on each node and 38 * the partial slab counter. If taken then no new slabs may be added or 39 * removed from the lists nor make the number of partial slabs be modified. 40 * (Note that the total number of slabs is an atomic value that may be 41 * modified without taking the list lock). 42 * 43 * The list_lock is a centralized lock and thus we avoid taking it as 44 * much as possible. As long as SLUB does not have to handle partial 45 * slabs, operations can continue without any centralized lock. F.e. 46 * allocating a long series of objects that fill up slabs does not require 47 * the list lock. 48 * 49 * The lock order is sometimes inverted when we are trying to get a slab 50 * off a list. We take the list_lock and then look for a page on the list 51 * to use. While we do that objects in the slabs may be freed. We can 52 * only operate on the slab if we have also taken the slab_lock. So we use 53 * a slab_trylock() on the slab. If trylock was successful then no frees 54 * can occur anymore and we can use the slab for allocations etc. If the 55 * slab_trylock() does not succeed then frees are in progress in the slab and 56 * we must stay away from it for a while since we may cause a bouncing 57 * cacheline if we try to acquire the lock. So go onto the next slab. 58 * If all pages are busy then we may allocate a new slab instead of reusing 59 * a partial slab. A new slab has noone operating on it and thus there is 60 * no danger of cacheline contention. 61 * 62 * Interrupts are disabled during allocation and deallocation in order to 63 * make the slab allocator safe to use in the context of an irq. In addition 64 * interrupts are disabled to ensure that the processor does not change 65 * while handling per_cpu slabs, due to kernel preemption. 66 * 67 * SLUB assigns one slab for allocation to each processor. 68 * Allocations only occur from these slabs called cpu slabs. 69 * 70 * Slabs with free elements are kept on a partial list and during regular 71 * operations no list for full slabs is used. If an object in a full slab is 72 * freed then the slab will show up again on the partial lists. 73 * We track full slabs for debugging purposes though because otherwise we 74 * cannot scan all objects. 75 * 76 * Slabs are freed when they become empty. Teardown and setup is 77 * minimal so we rely on the page allocators per cpu caches for 78 * fast frees and allocs. 79 * 80 * Overloading of page flags that are otherwise used for LRU management. 81 * 82 * PageActive The slab is frozen and exempt from list processing. 83 * This means that the slab is dedicated to a purpose 84 * such as satisfying allocations for a specific 85 * processor. Objects may be freed in the slab while 86 * it is frozen but slab_free will then skip the usual 87 * list operations. It is up to the processor holding 88 * the slab to integrate the slab into the slab lists 89 * when the slab is no longer needed. 90 * 91 * One use of this flag is to mark slabs that are 92 * used for allocations. Then such a slab becomes a cpu 93 * slab. The cpu slab may be equipped with an additional 94 * freelist that allows lockless access to 95 * free objects in addition to the regular freelist 96 * that requires the slab lock. 97 * 98 * PageError Slab requires special handling due to debug 99 * options set. This moves slab handling out of 100 * the fast path and disables lockless freelists. 101 */ 102 103 #define FROZEN (1 << PG_active) 104 105 #ifdef CONFIG_SLUB_DEBUG 106 #define SLABDEBUG (1 << PG_error) 107 #else 108 #define SLABDEBUG 0 109 #endif 110 111 static inline int SlabFrozen(struct page *page) 112 { 113 return page->flags & FROZEN; 114 } 115 116 static inline void SetSlabFrozen(struct page *page) 117 { 118 page->flags |= FROZEN; 119 } 120 121 static inline void ClearSlabFrozen(struct page *page) 122 { 123 page->flags &= ~FROZEN; 124 } 125 126 static inline int SlabDebug(struct page *page) 127 { 128 return page->flags & SLABDEBUG; 129 } 130 131 static inline void SetSlabDebug(struct page *page) 132 { 133 page->flags |= SLABDEBUG; 134 } 135 136 static inline void ClearSlabDebug(struct page *page) 137 { 138 page->flags &= ~SLABDEBUG; 139 } 140 141 /* 142 * Issues still to be resolved: 143 * 144 * - Support PAGE_ALLOC_DEBUG. Should be easy to do. 145 * 146 * - Variable sizing of the per node arrays 147 */ 148 149 /* Enable to test recovery from slab corruption on boot */ 150 #undef SLUB_RESILIENCY_TEST 151 152 /* 153 * Currently fastpath is not supported if preemption is enabled. 154 */ 155 #if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT) 156 #define SLUB_FASTPATH 157 #endif 158 159 #if PAGE_SHIFT <= 12 160 161 /* 162 * Small page size. Make sure that we do not fragment memory 163 */ 164 #define DEFAULT_MAX_ORDER 1 165 #define DEFAULT_MIN_OBJECTS 4 166 167 #else 168 169 /* 170 * Large page machines are customarily able to handle larger 171 * page orders. 172 */ 173 #define DEFAULT_MAX_ORDER 2 174 #define DEFAULT_MIN_OBJECTS 8 175 176 #endif 177 178 /* 179 * Mininum number of partial slabs. These will be left on the partial 180 * lists even if they are empty. kmem_cache_shrink may reclaim them. 181 */ 182 #define MIN_PARTIAL 5 183 184 /* 185 * Maximum number of desirable partial slabs. 186 * The existence of more partial slabs makes kmem_cache_shrink 187 * sort the partial list by the number of objects in the. 188 */ 189 #define MAX_PARTIAL 10 190 191 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ 192 SLAB_POISON | SLAB_STORE_USER) 193 194 /* 195 * Set of flags that will prevent slab merging 196 */ 197 #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 198 SLAB_TRACE | SLAB_DESTROY_BY_RCU) 199 200 #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ 201 SLAB_CACHE_DMA) 202 203 #ifndef ARCH_KMALLOC_MINALIGN 204 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) 205 #endif 206 207 #ifndef ARCH_SLAB_MINALIGN 208 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) 209 #endif 210 211 /* Internal SLUB flags */ 212 #define __OBJECT_POISON 0x80000000 /* Poison object */ 213 #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ 214 215 /* Not all arches define cache_line_size */ 216 #ifndef cache_line_size 217 #define cache_line_size() L1_CACHE_BYTES 218 #endif 219 220 static int kmem_size = sizeof(struct kmem_cache); 221 222 #ifdef CONFIG_SMP 223 static struct notifier_block slab_notifier; 224 #endif 225 226 static enum { 227 DOWN, /* No slab functionality available */ 228 PARTIAL, /* kmem_cache_open() works but kmalloc does not */ 229 UP, /* Everything works but does not show up in sysfs */ 230 SYSFS /* Sysfs up */ 231 } slab_state = DOWN; 232 233 /* A list of all slab caches on the system */ 234 static DECLARE_RWSEM(slub_lock); 235 static LIST_HEAD(slab_caches); 236 237 /* 238 * Tracking user of a slab. 239 */ 240 struct track { 241 void *addr; /* Called from address */ 242 int cpu; /* Was running on cpu */ 243 int pid; /* Pid context */ 244 unsigned long when; /* When did the operation occur */ 245 }; 246 247 enum track_item { TRACK_ALLOC, TRACK_FREE }; 248 249 #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) 250 static int sysfs_slab_add(struct kmem_cache *); 251 static int sysfs_slab_alias(struct kmem_cache *, const char *); 252 static void sysfs_slab_remove(struct kmem_cache *); 253 254 #else 255 static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } 256 static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) 257 { return 0; } 258 static inline void sysfs_slab_remove(struct kmem_cache *s) 259 { 260 kfree(s); 261 } 262 263 #endif 264 265 static inline void stat(struct kmem_cache_cpu *c, enum stat_item si) 266 { 267 #ifdef CONFIG_SLUB_STATS 268 c->stat[si]++; 269 #endif 270 } 271 272 /******************************************************************** 273 * Core slab cache functions 274 *******************************************************************/ 275 276 int slab_is_available(void) 277 { 278 return slab_state >= UP; 279 } 280 281 static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) 282 { 283 #ifdef CONFIG_NUMA 284 return s->node[node]; 285 #else 286 return &s->local_node; 287 #endif 288 } 289 290 static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) 291 { 292 #ifdef CONFIG_SMP 293 return s->cpu_slab[cpu]; 294 #else 295 return &s->cpu_slab; 296 #endif 297 } 298 299 /* 300 * The end pointer in a slab is special. It points to the first object in the 301 * slab but has bit 0 set to mark it. 302 * 303 * Note that SLUB relies on page_mapping returning NULL for pages with bit 0 304 * in the mapping set. 305 */ 306 static inline int is_end(void *addr) 307 { 308 return (unsigned long)addr & PAGE_MAPPING_ANON; 309 } 310 311 void *slab_address(struct page *page) 312 { 313 return page->end - PAGE_MAPPING_ANON; 314 } 315 316 static inline int check_valid_pointer(struct kmem_cache *s, 317 struct page *page, const void *object) 318 { 319 void *base; 320 321 if (object == page->end) 322 return 1; 323 324 base = slab_address(page); 325 if (object < base || object >= base + s->objects * s->size || 326 (object - base) % s->size) { 327 return 0; 328 } 329 330 return 1; 331 } 332 333 /* 334 * Slow version of get and set free pointer. 335 * 336 * This version requires touching the cache lines of kmem_cache which 337 * we avoid to do in the fast alloc free paths. There we obtain the offset 338 * from the page struct. 339 */ 340 static inline void *get_freepointer(struct kmem_cache *s, void *object) 341 { 342 return *(void **)(object + s->offset); 343 } 344 345 static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) 346 { 347 *(void **)(object + s->offset) = fp; 348 } 349 350 /* Loop over all objects in a slab */ 351 #define for_each_object(__p, __s, __addr) \ 352 for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\ 353 __p += (__s)->size) 354 355 /* Scan freelist */ 356 #define for_each_free_object(__p, __s, __free) \ 357 for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ 358 __p)) 359 360 /* Determine object index from a given position */ 361 static inline int slab_index(void *p, struct kmem_cache *s, void *addr) 362 { 363 return (p - addr) / s->size; 364 } 365 366 #ifdef CONFIG_SLUB_DEBUG 367 /* 368 * Debug settings: 369 */ 370 #ifdef CONFIG_SLUB_DEBUG_ON 371 static int slub_debug = DEBUG_DEFAULT_FLAGS; 372 #else 373 static int slub_debug; 374 #endif 375 376 static char *slub_debug_slabs; 377 378 /* 379 * Object debugging 380 */ 381 static void print_section(char *text, u8 *addr, unsigned int length) 382 { 383 int i, offset; 384 int newline = 1; 385 char ascii[17]; 386 387 ascii[16] = 0; 388 389 for (i = 0; i < length; i++) { 390 if (newline) { 391 printk(KERN_ERR "%8s 0x%p: ", text, addr + i); 392 newline = 0; 393 } 394 printk(KERN_CONT " %02x", addr[i]); 395 offset = i % 16; 396 ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; 397 if (offset == 15) { 398 printk(KERN_CONT " %s\n", ascii); 399 newline = 1; 400 } 401 } 402 if (!newline) { 403 i %= 16; 404 while (i < 16) { 405 printk(KERN_CONT " "); 406 ascii[i] = ' '; 407 i++; 408 } 409 printk(KERN_CONT " %s\n", ascii); 410 } 411 } 412 413 static struct track *get_track(struct kmem_cache *s, void *object, 414 enum track_item alloc) 415 { 416 struct track *p; 417 418 if (s->offset) 419 p = object + s->offset + sizeof(void *); 420 else 421 p = object + s->inuse; 422 423 return p + alloc; 424 } 425 426 static void set_track(struct kmem_cache *s, void *object, 427 enum track_item alloc, void *addr) 428 { 429 struct track *p; 430 431 if (s->offset) 432 p = object + s->offset + sizeof(void *); 433 else 434 p = object + s->inuse; 435 436 p += alloc; 437 if (addr) { 438 p->addr = addr; 439 p->cpu = smp_processor_id(); 440 p->pid = current ? current->pid : -1; 441 p->when = jiffies; 442 } else 443 memset(p, 0, sizeof(struct track)); 444 } 445 446 static void init_tracking(struct kmem_cache *s, void *object) 447 { 448 if (!(s->flags & SLAB_STORE_USER)) 449 return; 450 451 set_track(s, object, TRACK_FREE, NULL); 452 set_track(s, object, TRACK_ALLOC, NULL); 453 } 454 455 static void print_track(const char *s, struct track *t) 456 { 457 if (!t->addr) 458 return; 459 460 printk(KERN_ERR "INFO: %s in ", s); 461 __print_symbol("%s", (unsigned long)t->addr); 462 printk(" age=%lu cpu=%u pid=%d\n", jiffies - t->when, t->cpu, t->pid); 463 } 464 465 static void print_tracking(struct kmem_cache *s, void *object) 466 { 467 if (!(s->flags & SLAB_STORE_USER)) 468 return; 469 470 print_track("Allocated", get_track(s, object, TRACK_ALLOC)); 471 print_track("Freed", get_track(s, object, TRACK_FREE)); 472 } 473 474 static void print_page_info(struct page *page) 475 { 476 printk(KERN_ERR "INFO: Slab 0x%p used=%u fp=0x%p flags=0x%04lx\n", 477 page, page->inuse, page->freelist, page->flags); 478 479 } 480 481 static void slab_bug(struct kmem_cache *s, char *fmt, ...) 482 { 483 va_list args; 484 char buf[100]; 485 486 va_start(args, fmt); 487 vsnprintf(buf, sizeof(buf), fmt, args); 488 va_end(args); 489 printk(KERN_ERR "========================================" 490 "=====================================\n"); 491 printk(KERN_ERR "BUG %s: %s\n", s->name, buf); 492 printk(KERN_ERR "----------------------------------------" 493 "-------------------------------------\n\n"); 494 } 495 496 static void slab_fix(struct kmem_cache *s, char *fmt, ...) 497 { 498 va_list args; 499 char buf[100]; 500 501 va_start(args, fmt); 502 vsnprintf(buf, sizeof(buf), fmt, args); 503 va_end(args); 504 printk(KERN_ERR "FIX %s: %s\n", s->name, buf); 505 } 506 507 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 508 { 509 unsigned int off; /* Offset of last byte */ 510 u8 *addr = slab_address(page); 511 512 print_tracking(s, p); 513 514 print_page_info(page); 515 516 printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", 517 p, p - addr, get_freepointer(s, p)); 518 519 if (p > addr + 16) 520 print_section("Bytes b4", p - 16, 16); 521 522 print_section("Object", p, min(s->objsize, 128)); 523 524 if (s->flags & SLAB_RED_ZONE) 525 print_section("Redzone", p + s->objsize, 526 s->inuse - s->objsize); 527 528 if (s->offset) 529 off = s->offset + sizeof(void *); 530 else 531 off = s->inuse; 532 533 if (s->flags & SLAB_STORE_USER) 534 off += 2 * sizeof(struct track); 535 536 if (off != s->size) 537 /* Beginning of the filler is the free pointer */ 538 print_section("Padding", p + off, s->size - off); 539 540 dump_stack(); 541 } 542 543 static void object_err(struct kmem_cache *s, struct page *page, 544 u8 *object, char *reason) 545 { 546 slab_bug(s, reason); 547 print_trailer(s, page, object); 548 } 549 550 static void slab_err(struct kmem_cache *s, struct page *page, char *fmt, ...) 551 { 552 va_list args; 553 char buf[100]; 554 555 va_start(args, fmt); 556 vsnprintf(buf, sizeof(buf), fmt, args); 557 va_end(args); 558 slab_bug(s, fmt); 559 print_page_info(page); 560 dump_stack(); 561 } 562 563 static void init_object(struct kmem_cache *s, void *object, int active) 564 { 565 u8 *p = object; 566 567 if (s->flags & __OBJECT_POISON) { 568 memset(p, POISON_FREE, s->objsize - 1); 569 p[s->objsize - 1] = POISON_END; 570 } 571 572 if (s->flags & SLAB_RED_ZONE) 573 memset(p + s->objsize, 574 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE, 575 s->inuse - s->objsize); 576 } 577 578 static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) 579 { 580 while (bytes) { 581 if (*start != (u8)value) 582 return start; 583 start++; 584 bytes--; 585 } 586 return NULL; 587 } 588 589 static void restore_bytes(struct kmem_cache *s, char *message, u8 data, 590 void *from, void *to) 591 { 592 slab_fix(s, "Restoring 0x%p-0x%p=0x%x\n", from, to - 1, data); 593 memset(from, data, to - from); 594 } 595 596 static int check_bytes_and_report(struct kmem_cache *s, struct page *page, 597 u8 *object, char *what, 598 u8 *start, unsigned int value, unsigned int bytes) 599 { 600 u8 *fault; 601 u8 *end; 602 603 fault = check_bytes(start, value, bytes); 604 if (!fault) 605 return 1; 606 607 end = start + bytes; 608 while (end > fault && end[-1] == value) 609 end--; 610 611 slab_bug(s, "%s overwritten", what); 612 printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n", 613 fault, end - 1, fault[0], value); 614 print_trailer(s, page, object); 615 616 restore_bytes(s, what, value, fault, end); 617 return 0; 618 } 619 620 /* 621 * Object layout: 622 * 623 * object address 624 * Bytes of the object to be managed. 625 * If the freepointer may overlay the object then the free 626 * pointer is the first word of the object. 627 * 628 * Poisoning uses 0x6b (POISON_FREE) and the last byte is 629 * 0xa5 (POISON_END) 630 * 631 * object + s->objsize 632 * Padding to reach word boundary. This is also used for Redzoning. 633 * Padding is extended by another word if Redzoning is enabled and 634 * objsize == inuse. 635 * 636 * We fill with 0xbb (RED_INACTIVE) for inactive objects and with 637 * 0xcc (RED_ACTIVE) for objects in use. 638 * 639 * object + s->inuse 640 * Meta data starts here. 641 * 642 * A. Free pointer (if we cannot overwrite object on free) 643 * B. Tracking data for SLAB_STORE_USER 644 * C. Padding to reach required alignment boundary or at mininum 645 * one word if debuggin is on to be able to detect writes 646 * before the word boundary. 647 * 648 * Padding is done using 0x5a (POISON_INUSE) 649 * 650 * object + s->size 651 * Nothing is used beyond s->size. 652 * 653 * If slabcaches are merged then the objsize and inuse boundaries are mostly 654 * ignored. And therefore no slab options that rely on these boundaries 655 * may be used with merged slabcaches. 656 */ 657 658 static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) 659 { 660 unsigned long off = s->inuse; /* The end of info */ 661 662 if (s->offset) 663 /* Freepointer is placed after the object. */ 664 off += sizeof(void *); 665 666 if (s->flags & SLAB_STORE_USER) 667 /* We also have user information there */ 668 off += 2 * sizeof(struct track); 669 670 if (s->size == off) 671 return 1; 672 673 return check_bytes_and_report(s, page, p, "Object padding", 674 p + off, POISON_INUSE, s->size - off); 675 } 676 677 static int slab_pad_check(struct kmem_cache *s, struct page *page) 678 { 679 u8 *start; 680 u8 *fault; 681 u8 *end; 682 int length; 683 int remainder; 684 685 if (!(s->flags & SLAB_POISON)) 686 return 1; 687 688 start = slab_address(page); 689 end = start + (PAGE_SIZE << s->order); 690 length = s->objects * s->size; 691 remainder = end - (start + length); 692 if (!remainder) 693 return 1; 694 695 fault = check_bytes(start + length, POISON_INUSE, remainder); 696 if (!fault) 697 return 1; 698 while (end > fault && end[-1] == POISON_INUSE) 699 end--; 700 701 slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1); 702 print_section("Padding", start, length); 703 704 restore_bytes(s, "slab padding", POISON_INUSE, start, end); 705 return 0; 706 } 707 708 static int check_object(struct kmem_cache *s, struct page *page, 709 void *object, int active) 710 { 711 u8 *p = object; 712 u8 *endobject = object + s->objsize; 713 714 if (s->flags & SLAB_RED_ZONE) { 715 unsigned int red = 716 active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE; 717 718 if (!check_bytes_and_report(s, page, object, "Redzone", 719 endobject, red, s->inuse - s->objsize)) 720 return 0; 721 } else { 722 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { 723 check_bytes_and_report(s, page, p, "Alignment padding", 724 endobject, POISON_INUSE, s->inuse - s->objsize); 725 } 726 } 727 728 if (s->flags & SLAB_POISON) { 729 if (!active && (s->flags & __OBJECT_POISON) && 730 (!check_bytes_and_report(s, page, p, "Poison", p, 731 POISON_FREE, s->objsize - 1) || 732 !check_bytes_and_report(s, page, p, "Poison", 733 p + s->objsize - 1, POISON_END, 1))) 734 return 0; 735 /* 736 * check_pad_bytes cleans up on its own. 737 */ 738 check_pad_bytes(s, page, p); 739 } 740 741 if (!s->offset && active) 742 /* 743 * Object and freepointer overlap. Cannot check 744 * freepointer while object is allocated. 745 */ 746 return 1; 747 748 /* Check free pointer validity */ 749 if (!check_valid_pointer(s, page, get_freepointer(s, p))) { 750 object_err(s, page, p, "Freepointer corrupt"); 751 /* 752 * No choice but to zap it and thus loose the remainder 753 * of the free objects in this slab. May cause 754 * another error because the object count is now wrong. 755 */ 756 set_freepointer(s, p, page->end); 757 return 0; 758 } 759 return 1; 760 } 761 762 static int check_slab(struct kmem_cache *s, struct page *page) 763 { 764 VM_BUG_ON(!irqs_disabled()); 765 766 if (!PageSlab(page)) { 767 slab_err(s, page, "Not a valid slab page"); 768 return 0; 769 } 770 if (page->inuse > s->objects) { 771 slab_err(s, page, "inuse %u > max %u", 772 s->name, page->inuse, s->objects); 773 return 0; 774 } 775 /* Slab_pad_check fixes things up after itself */ 776 slab_pad_check(s, page); 777 return 1; 778 } 779 780 /* 781 * Determine if a certain object on a page is on the freelist. Must hold the 782 * slab lock to guarantee that the chains are in a consistent state. 783 */ 784 static int on_freelist(struct kmem_cache *s, struct page *page, void *search) 785 { 786 int nr = 0; 787 void *fp = page->freelist; 788 void *object = NULL; 789 790 while (fp != page->end && nr <= s->objects) { 791 if (fp == search) 792 return 1; 793 if (!check_valid_pointer(s, page, fp)) { 794 if (object) { 795 object_err(s, page, object, 796 "Freechain corrupt"); 797 set_freepointer(s, object, page->end); 798 break; 799 } else { 800 slab_err(s, page, "Freepointer corrupt"); 801 page->freelist = page->end; 802 page->inuse = s->objects; 803 slab_fix(s, "Freelist cleared"); 804 return 0; 805 } 806 break; 807 } 808 object = fp; 809 fp = get_freepointer(s, object); 810 nr++; 811 } 812 813 if (page->inuse != s->objects - nr) { 814 slab_err(s, page, "Wrong object count. Counter is %d but " 815 "counted were %d", page->inuse, s->objects - nr); 816 page->inuse = s->objects - nr; 817 slab_fix(s, "Object count adjusted."); 818 } 819 return search == NULL; 820 } 821 822 static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) 823 { 824 if (s->flags & SLAB_TRACE) { 825 printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", 826 s->name, 827 alloc ? "alloc" : "free", 828 object, page->inuse, 829 page->freelist); 830 831 if (!alloc) 832 print_section("Object", (void *)object, s->objsize); 833 834 dump_stack(); 835 } 836 } 837 838 /* 839 * Tracking of fully allocated slabs for debugging purposes. 840 */ 841 static void add_full(struct kmem_cache_node *n, struct page *page) 842 { 843 spin_lock(&n->list_lock); 844 list_add(&page->lru, &n->full); 845 spin_unlock(&n->list_lock); 846 } 847 848 static void remove_full(struct kmem_cache *s, struct page *page) 849 { 850 struct kmem_cache_node *n; 851 852 if (!(s->flags & SLAB_STORE_USER)) 853 return; 854 855 n = get_node(s, page_to_nid(page)); 856 857 spin_lock(&n->list_lock); 858 list_del(&page->lru); 859 spin_unlock(&n->list_lock); 860 } 861 862 static void setup_object_debug(struct kmem_cache *s, struct page *page, 863 void *object) 864 { 865 if (!(s->flags & (SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))) 866 return; 867 868 init_object(s, object, 0); 869 init_tracking(s, object); 870 } 871 872 static int alloc_debug_processing(struct kmem_cache *s, struct page *page, 873 void *object, void *addr) 874 { 875 if (!check_slab(s, page)) 876 goto bad; 877 878 if (object && !on_freelist(s, page, object)) { 879 object_err(s, page, object, "Object already allocated"); 880 goto bad; 881 } 882 883 if (!check_valid_pointer(s, page, object)) { 884 object_err(s, page, object, "Freelist Pointer check fails"); 885 goto bad; 886 } 887 888 if (object && !check_object(s, page, object, 0)) 889 goto bad; 890 891 /* Success perform special debug activities for allocs */ 892 if (s->flags & SLAB_STORE_USER) 893 set_track(s, object, TRACK_ALLOC, addr); 894 trace(s, page, object, 1); 895 init_object(s, object, 1); 896 return 1; 897 898 bad: 899 if (PageSlab(page)) { 900 /* 901 * If this is a slab page then lets do the best we can 902 * to avoid issues in the future. Marking all objects 903 * as used avoids touching the remaining objects. 904 */ 905 slab_fix(s, "Marking all objects used"); 906 page->inuse = s->objects; 907 page->freelist = page->end; 908 } 909 return 0; 910 } 911 912 static int free_debug_processing(struct kmem_cache *s, struct page *page, 913 void *object, void *addr) 914 { 915 if (!check_slab(s, page)) 916 goto fail; 917 918 if (!check_valid_pointer(s, page, object)) { 919 slab_err(s, page, "Invalid object pointer 0x%p", object); 920 goto fail; 921 } 922 923 if (on_freelist(s, page, object)) { 924 object_err(s, page, object, "Object already free"); 925 goto fail; 926 } 927 928 if (!check_object(s, page, object, 1)) 929 return 0; 930 931 if (unlikely(s != page->slab)) { 932 if (!PageSlab(page)) { 933 slab_err(s, page, "Attempt to free object(0x%p) " 934 "outside of slab", object); 935 } else if (!page->slab) { 936 printk(KERN_ERR 937 "SLUB <none>: no slab for object 0x%p.\n", 938 object); 939 dump_stack(); 940 } else 941 object_err(s, page, object, 942 "page slab pointer corrupt."); 943 goto fail; 944 } 945 946 /* Special debug activities for freeing objects */ 947 if (!SlabFrozen(page) && page->freelist == page->end) 948 remove_full(s, page); 949 if (s->flags & SLAB_STORE_USER) 950 set_track(s, object, TRACK_FREE, addr); 951 trace(s, page, object, 0); 952 init_object(s, object, 0); 953 return 1; 954 955 fail: 956 slab_fix(s, "Object at 0x%p not freed", object); 957 return 0; 958 } 959 960 static int __init setup_slub_debug(char *str) 961 { 962 slub_debug = DEBUG_DEFAULT_FLAGS; 963 if (*str++ != '=' || !*str) 964 /* 965 * No options specified. Switch on full debugging. 966 */ 967 goto out; 968 969 if (*str == ',') 970 /* 971 * No options but restriction on slabs. This means full 972 * debugging for slabs matching a pattern. 973 */ 974 goto check_slabs; 975 976 slub_debug = 0; 977 if (*str == '-') 978 /* 979 * Switch off all debugging measures. 980 */ 981 goto out; 982 983 /* 984 * Determine which debug features should be switched on 985 */ 986 for (; *str && *str != ','; str++) { 987 switch (tolower(*str)) { 988 case 'f': 989 slub_debug |= SLAB_DEBUG_FREE; 990 break; 991 case 'z': 992 slub_debug |= SLAB_RED_ZONE; 993 break; 994 case 'p': 995 slub_debug |= SLAB_POISON; 996 break; 997 case 'u': 998 slub_debug |= SLAB_STORE_USER; 999 break; 1000 case 't': 1001 slub_debug |= SLAB_TRACE; 1002 break; 1003 default: 1004 printk(KERN_ERR "slub_debug option '%c' " 1005 "unknown. skipped\n", *str); 1006 } 1007 } 1008 1009 check_slabs: 1010 if (*str == ',') 1011 slub_debug_slabs = str + 1; 1012 out: 1013 return 1; 1014 } 1015 1016 __setup("slub_debug", setup_slub_debug); 1017 1018 static unsigned long kmem_cache_flags(unsigned long objsize, 1019 unsigned long flags, const char *name, 1020 void (*ctor)(struct kmem_cache *, void *)) 1021 { 1022 /* 1023 * The page->offset field is only 16 bit wide. This is an offset 1024 * in units of words from the beginning of an object. If the slab 1025 * size is bigger then we cannot move the free pointer behind the 1026 * object anymore. 1027 * 1028 * On 32 bit platforms the limit is 256k. On 64bit platforms 1029 * the limit is 512k. 1030 * 1031 * Debugging or ctor may create a need to move the free 1032 * pointer. Fail if this happens. 1033 */ 1034 if (objsize >= 65535 * sizeof(void *)) { 1035 BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON | 1036 SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); 1037 BUG_ON(ctor); 1038 } else { 1039 /* 1040 * Enable debugging if selected on the kernel commandline. 1041 */ 1042 if (slub_debug && (!slub_debug_slabs || 1043 strncmp(slub_debug_slabs, name, 1044 strlen(slub_debug_slabs)) == 0)) 1045 flags |= slub_debug; 1046 } 1047 1048 return flags; 1049 } 1050 #else 1051 static inline void setup_object_debug(struct kmem_cache *s, 1052 struct page *page, void *object) {} 1053 1054 static inline int alloc_debug_processing(struct kmem_cache *s, 1055 struct page *page, void *object, void *addr) { return 0; } 1056 1057 static inline int free_debug_processing(struct kmem_cache *s, 1058 struct page *page, void *object, void *addr) { return 0; } 1059 1060 static inline int slab_pad_check(struct kmem_cache *s, struct page *page) 1061 { return 1; } 1062 static inline int check_object(struct kmem_cache *s, struct page *page, 1063 void *object, int active) { return 1; } 1064 static inline void add_full(struct kmem_cache_node *n, struct page *page) {} 1065 static inline unsigned long kmem_cache_flags(unsigned long objsize, 1066 unsigned long flags, const char *name, 1067 void (*ctor)(struct kmem_cache *, void *)) 1068 { 1069 return flags; 1070 } 1071 #define slub_debug 0 1072 #endif 1073 /* 1074 * Slab allocation and freeing 1075 */ 1076 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) 1077 { 1078 struct page *page; 1079 int pages = 1 << s->order; 1080 1081 if (s->order) 1082 flags |= __GFP_COMP; 1083 1084 if (s->flags & SLAB_CACHE_DMA) 1085 flags |= SLUB_DMA; 1086 1087 if (s->flags & SLAB_RECLAIM_ACCOUNT) 1088 flags |= __GFP_RECLAIMABLE; 1089 1090 if (node == -1) 1091 page = alloc_pages(flags, s->order); 1092 else 1093 page = alloc_pages_node(node, flags, s->order); 1094 1095 if (!page) 1096 return NULL; 1097 1098 mod_zone_page_state(page_zone(page), 1099 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1100 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1101 pages); 1102 1103 return page; 1104 } 1105 1106 static void setup_object(struct kmem_cache *s, struct page *page, 1107 void *object) 1108 { 1109 setup_object_debug(s, page, object); 1110 if (unlikely(s->ctor)) 1111 s->ctor(s, object); 1112 } 1113 1114 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) 1115 { 1116 struct page *page; 1117 struct kmem_cache_node *n; 1118 void *start; 1119 void *last; 1120 void *p; 1121 1122 BUG_ON(flags & GFP_SLAB_BUG_MASK); 1123 1124 page = allocate_slab(s, 1125 flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); 1126 if (!page) 1127 goto out; 1128 1129 n = get_node(s, page_to_nid(page)); 1130 if (n) 1131 atomic_long_inc(&n->nr_slabs); 1132 page->slab = s; 1133 page->flags |= 1 << PG_slab; 1134 if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | 1135 SLAB_STORE_USER | SLAB_TRACE)) 1136 SetSlabDebug(page); 1137 1138 start = page_address(page); 1139 page->end = start + 1; 1140 1141 if (unlikely(s->flags & SLAB_POISON)) 1142 memset(start, POISON_INUSE, PAGE_SIZE << s->order); 1143 1144 last = start; 1145 for_each_object(p, s, start) { 1146 setup_object(s, page, last); 1147 set_freepointer(s, last, p); 1148 last = p; 1149 } 1150 setup_object(s, page, last); 1151 set_freepointer(s, last, page->end); 1152 1153 page->freelist = start; 1154 page->inuse = 0; 1155 out: 1156 return page; 1157 } 1158 1159 static void __free_slab(struct kmem_cache *s, struct page *page) 1160 { 1161 int pages = 1 << s->order; 1162 1163 if (unlikely(SlabDebug(page))) { 1164 void *p; 1165 1166 slab_pad_check(s, page); 1167 for_each_object(p, s, slab_address(page)) 1168 check_object(s, page, p, 0); 1169 ClearSlabDebug(page); 1170 } 1171 1172 mod_zone_page_state(page_zone(page), 1173 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1174 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1175 -pages); 1176 1177 page->mapping = NULL; 1178 __free_pages(page, s->order); 1179 } 1180 1181 static void rcu_free_slab(struct rcu_head *h) 1182 { 1183 struct page *page; 1184 1185 page = container_of((struct list_head *)h, struct page, lru); 1186 __free_slab(page->slab, page); 1187 } 1188 1189 static void free_slab(struct kmem_cache *s, struct page *page) 1190 { 1191 if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { 1192 /* 1193 * RCU free overloads the RCU head over the LRU 1194 */ 1195 struct rcu_head *head = (void *)&page->lru; 1196 1197 call_rcu(head, rcu_free_slab); 1198 } else 1199 __free_slab(s, page); 1200 } 1201 1202 static void discard_slab(struct kmem_cache *s, struct page *page) 1203 { 1204 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1205 1206 atomic_long_dec(&n->nr_slabs); 1207 reset_page_mapcount(page); 1208 __ClearPageSlab(page); 1209 free_slab(s, page); 1210 } 1211 1212 /* 1213 * Per slab locking using the pagelock 1214 */ 1215 static __always_inline void slab_lock(struct page *page) 1216 { 1217 bit_spin_lock(PG_locked, &page->flags); 1218 } 1219 1220 static __always_inline void slab_unlock(struct page *page) 1221 { 1222 __bit_spin_unlock(PG_locked, &page->flags); 1223 } 1224 1225 static __always_inline int slab_trylock(struct page *page) 1226 { 1227 int rc = 1; 1228 1229 rc = bit_spin_trylock(PG_locked, &page->flags); 1230 return rc; 1231 } 1232 1233 /* 1234 * Management of partially allocated slabs 1235 */ 1236 static void add_partial(struct kmem_cache_node *n, 1237 struct page *page, int tail) 1238 { 1239 spin_lock(&n->list_lock); 1240 n->nr_partial++; 1241 if (tail) 1242 list_add_tail(&page->lru, &n->partial); 1243 else 1244 list_add(&page->lru, &n->partial); 1245 spin_unlock(&n->list_lock); 1246 } 1247 1248 static void remove_partial(struct kmem_cache *s, 1249 struct page *page) 1250 { 1251 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1252 1253 spin_lock(&n->list_lock); 1254 list_del(&page->lru); 1255 n->nr_partial--; 1256 spin_unlock(&n->list_lock); 1257 } 1258 1259 /* 1260 * Lock slab and remove from the partial list. 1261 * 1262 * Must hold list_lock. 1263 */ 1264 static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page) 1265 { 1266 if (slab_trylock(page)) { 1267 list_del(&page->lru); 1268 n->nr_partial--; 1269 SetSlabFrozen(page); 1270 return 1; 1271 } 1272 return 0; 1273 } 1274 1275 /* 1276 * Try to allocate a partial slab from a specific node. 1277 */ 1278 static struct page *get_partial_node(struct kmem_cache_node *n) 1279 { 1280 struct page *page; 1281 1282 /* 1283 * Racy check. If we mistakenly see no partial slabs then we 1284 * just allocate an empty slab. If we mistakenly try to get a 1285 * partial slab and there is none available then get_partials() 1286 * will return NULL. 1287 */ 1288 if (!n || !n->nr_partial) 1289 return NULL; 1290 1291 spin_lock(&n->list_lock); 1292 list_for_each_entry(page, &n->partial, lru) 1293 if (lock_and_freeze_slab(n, page)) 1294 goto out; 1295 page = NULL; 1296 out: 1297 spin_unlock(&n->list_lock); 1298 return page; 1299 } 1300 1301 /* 1302 * Get a page from somewhere. Search in increasing NUMA distances. 1303 */ 1304 static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) 1305 { 1306 #ifdef CONFIG_NUMA 1307 struct zonelist *zonelist; 1308 struct zone **z; 1309 struct page *page; 1310 1311 /* 1312 * The defrag ratio allows a configuration of the tradeoffs between 1313 * inter node defragmentation and node local allocations. A lower 1314 * defrag_ratio increases the tendency to do local allocations 1315 * instead of attempting to obtain partial slabs from other nodes. 1316 * 1317 * If the defrag_ratio is set to 0 then kmalloc() always 1318 * returns node local objects. If the ratio is higher then kmalloc() 1319 * may return off node objects because partial slabs are obtained 1320 * from other nodes and filled up. 1321 * 1322 * If /sys/slab/xx/defrag_ratio is set to 100 (which makes 1323 * defrag_ratio = 1000) then every (well almost) allocation will 1324 * first attempt to defrag slab caches on other nodes. This means 1325 * scanning over all nodes to look for partial slabs which may be 1326 * expensive if we do it every time we are trying to find a slab 1327 * with available objects. 1328 */ 1329 if (!s->remote_node_defrag_ratio || 1330 get_cycles() % 1024 > s->remote_node_defrag_ratio) 1331 return NULL; 1332 1333 zonelist = &NODE_DATA( 1334 slab_node(current->mempolicy))->node_zonelists[gfp_zone(flags)]; 1335 for (z = zonelist->zones; *z; z++) { 1336 struct kmem_cache_node *n; 1337 1338 n = get_node(s, zone_to_nid(*z)); 1339 1340 if (n && cpuset_zone_allowed_hardwall(*z, flags) && 1341 n->nr_partial > MIN_PARTIAL) { 1342 page = get_partial_node(n); 1343 if (page) 1344 return page; 1345 } 1346 } 1347 #endif 1348 return NULL; 1349 } 1350 1351 /* 1352 * Get a partial page, lock it and return it. 1353 */ 1354 static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) 1355 { 1356 struct page *page; 1357 int searchnode = (node == -1) ? numa_node_id() : node; 1358 1359 page = get_partial_node(get_node(s, searchnode)); 1360 if (page || (flags & __GFP_THISNODE)) 1361 return page; 1362 1363 return get_any_partial(s, flags); 1364 } 1365 1366 /* 1367 * Move a page back to the lists. 1368 * 1369 * Must be called with the slab lock held. 1370 * 1371 * On exit the slab lock will have been dropped. 1372 */ 1373 static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) 1374 { 1375 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1376 struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); 1377 1378 ClearSlabFrozen(page); 1379 if (page->inuse) { 1380 1381 if (page->freelist != page->end) { 1382 add_partial(n, page, tail); 1383 stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); 1384 } else { 1385 stat(c, DEACTIVATE_FULL); 1386 if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) 1387 add_full(n, page); 1388 } 1389 slab_unlock(page); 1390 } else { 1391 stat(c, DEACTIVATE_EMPTY); 1392 if (n->nr_partial < MIN_PARTIAL) { 1393 /* 1394 * Adding an empty slab to the partial slabs in order 1395 * to avoid page allocator overhead. This slab needs 1396 * to come after the other slabs with objects in 1397 * order to fill them up. That way the size of the 1398 * partial list stays small. kmem_cache_shrink can 1399 * reclaim empty slabs from the partial list. 1400 */ 1401 add_partial(n, page, 1); 1402 slab_unlock(page); 1403 } else { 1404 slab_unlock(page); 1405 stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); 1406 discard_slab(s, page); 1407 } 1408 } 1409 } 1410 1411 /* 1412 * Remove the cpu slab 1413 */ 1414 static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1415 { 1416 struct page *page = c->page; 1417 int tail = 1; 1418 1419 if (c->freelist) 1420 stat(c, DEACTIVATE_REMOTE_FREES); 1421 /* 1422 * Merge cpu freelist into freelist. Typically we get here 1423 * because both freelists are empty. So this is unlikely 1424 * to occur. 1425 * 1426 * We need to use _is_end here because deactivate slab may 1427 * be called for a debug slab. Then c->freelist may contain 1428 * a dummy pointer. 1429 */ 1430 while (unlikely(!is_end(c->freelist))) { 1431 void **object; 1432 1433 tail = 0; /* Hot objects. Put the slab first */ 1434 1435 /* Retrieve object from cpu_freelist */ 1436 object = c->freelist; 1437 c->freelist = c->freelist[c->offset]; 1438 1439 /* And put onto the regular freelist */ 1440 object[c->offset] = page->freelist; 1441 page->freelist = object; 1442 page->inuse--; 1443 } 1444 c->page = NULL; 1445 unfreeze_slab(s, page, tail); 1446 } 1447 1448 static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1449 { 1450 stat(c, CPUSLAB_FLUSH); 1451 slab_lock(c->page); 1452 deactivate_slab(s, c); 1453 } 1454 1455 /* 1456 * Flush cpu slab. 1457 * Called from IPI handler with interrupts disabled. 1458 */ 1459 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1460 { 1461 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 1462 1463 if (likely(c && c->page)) 1464 flush_slab(s, c); 1465 } 1466 1467 static void flush_cpu_slab(void *d) 1468 { 1469 struct kmem_cache *s = d; 1470 1471 __flush_cpu_slab(s, smp_processor_id()); 1472 } 1473 1474 static void flush_all(struct kmem_cache *s) 1475 { 1476 #ifdef CONFIG_SMP 1477 on_each_cpu(flush_cpu_slab, s, 1, 1); 1478 #else 1479 unsigned long flags; 1480 1481 local_irq_save(flags); 1482 flush_cpu_slab(s); 1483 local_irq_restore(flags); 1484 #endif 1485 } 1486 1487 /* 1488 * Check if the objects in a per cpu structure fit numa 1489 * locality expectations. 1490 */ 1491 static inline int node_match(struct kmem_cache_cpu *c, int node) 1492 { 1493 #ifdef CONFIG_NUMA 1494 if (node != -1 && c->node != node) 1495 return 0; 1496 #endif 1497 return 1; 1498 } 1499 1500 /* 1501 * Slow path. The lockless freelist is empty or we need to perform 1502 * debugging duties. 1503 * 1504 * Interrupts are disabled. 1505 * 1506 * Processing is still very fast if new objects have been freed to the 1507 * regular freelist. In that case we simply take over the regular freelist 1508 * as the lockless freelist and zap the regular freelist. 1509 * 1510 * If that is not working then we fall back to the partial lists. We take the 1511 * first element of the freelist as the object to allocate now and move the 1512 * rest of the freelist to the lockless freelist. 1513 * 1514 * And if we were unable to get a new slab from the partial slab lists then 1515 * we need to allocate a new slab. This is slowest path since we may sleep. 1516 */ 1517 static void *__slab_alloc(struct kmem_cache *s, 1518 gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) 1519 { 1520 void **object; 1521 struct page *new; 1522 #ifdef SLUB_FASTPATH 1523 unsigned long flags; 1524 1525 local_irq_save(flags); 1526 #endif 1527 if (!c->page) 1528 goto new_slab; 1529 1530 slab_lock(c->page); 1531 if (unlikely(!node_match(c, node))) 1532 goto another_slab; 1533 stat(c, ALLOC_REFILL); 1534 load_freelist: 1535 object = c->page->freelist; 1536 if (unlikely(object == c->page->end)) 1537 goto another_slab; 1538 if (unlikely(SlabDebug(c->page))) 1539 goto debug; 1540 1541 object = c->page->freelist; 1542 c->freelist = object[c->offset]; 1543 c->page->inuse = s->objects; 1544 c->page->freelist = c->page->end; 1545 c->node = page_to_nid(c->page); 1546 unlock_out: 1547 slab_unlock(c->page); 1548 stat(c, ALLOC_SLOWPATH); 1549 out: 1550 #ifdef SLUB_FASTPATH 1551 local_irq_restore(flags); 1552 #endif 1553 return object; 1554 1555 another_slab: 1556 deactivate_slab(s, c); 1557 1558 new_slab: 1559 new = get_partial(s, gfpflags, node); 1560 if (new) { 1561 c->page = new; 1562 stat(c, ALLOC_FROM_PARTIAL); 1563 goto load_freelist; 1564 } 1565 1566 if (gfpflags & __GFP_WAIT) 1567 local_irq_enable(); 1568 1569 new = new_slab(s, gfpflags, node); 1570 1571 if (gfpflags & __GFP_WAIT) 1572 local_irq_disable(); 1573 1574 if (new) { 1575 c = get_cpu_slab(s, smp_processor_id()); 1576 stat(c, ALLOC_SLAB); 1577 if (c->page) 1578 flush_slab(s, c); 1579 slab_lock(new); 1580 SetSlabFrozen(new); 1581 c->page = new; 1582 goto load_freelist; 1583 } 1584 object = NULL; 1585 goto out; 1586 debug: 1587 object = c->page->freelist; 1588 if (!alloc_debug_processing(s, c->page, object, addr)) 1589 goto another_slab; 1590 1591 c->page->inuse++; 1592 c->page->freelist = object[c->offset]; 1593 c->node = -1; 1594 goto unlock_out; 1595 } 1596 1597 /* 1598 * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) 1599 * have the fastpath folded into their functions. So no function call 1600 * overhead for requests that can be satisfied on the fastpath. 1601 * 1602 * The fastpath works by first checking if the lockless freelist can be used. 1603 * If not then __slab_alloc is called for slow processing. 1604 * 1605 * Otherwise we can simply pick the next object from the lockless free list. 1606 */ 1607 static __always_inline void *slab_alloc(struct kmem_cache *s, 1608 gfp_t gfpflags, int node, void *addr) 1609 { 1610 void **object; 1611 struct kmem_cache_cpu *c; 1612 1613 /* 1614 * The SLUB_FASTPATH path is provisional and is currently disabled if the 1615 * kernel is compiled with preemption or if the arch does not support 1616 * fast cmpxchg operations. There are a couple of coming changes that will 1617 * simplify matters and allow preemption. Ultimately we may end up making 1618 * SLUB_FASTPATH the default. 1619 * 1620 * 1. The introduction of the per cpu allocator will avoid array lookups 1621 * through get_cpu_slab(). A special register can be used instead. 1622 * 1623 * 2. The introduction of per cpu atomic operations (cpu_ops) means that 1624 * we can realize the logic here entirely with per cpu atomics. The 1625 * per cpu atomic ops will take care of the preemption issues. 1626 */ 1627 1628 #ifdef SLUB_FASTPATH 1629 c = get_cpu_slab(s, raw_smp_processor_id()); 1630 do { 1631 object = c->freelist; 1632 if (unlikely(is_end(object) || !node_match(c, node))) { 1633 object = __slab_alloc(s, gfpflags, node, addr, c); 1634 break; 1635 } 1636 stat(c, ALLOC_FASTPATH); 1637 } while (cmpxchg_local(&c->freelist, object, object[c->offset]) 1638 != object); 1639 #else 1640 unsigned long flags; 1641 1642 local_irq_save(flags); 1643 c = get_cpu_slab(s, smp_processor_id()); 1644 if (unlikely(is_end(c->freelist) || !node_match(c, node))) 1645 1646 object = __slab_alloc(s, gfpflags, node, addr, c); 1647 1648 else { 1649 object = c->freelist; 1650 c->freelist = object[c->offset]; 1651 stat(c, ALLOC_FASTPATH); 1652 } 1653 local_irq_restore(flags); 1654 #endif 1655 1656 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1657 memset(object, 0, c->objsize); 1658 1659 return object; 1660 } 1661 1662 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) 1663 { 1664 return slab_alloc(s, gfpflags, -1, __builtin_return_address(0)); 1665 } 1666 EXPORT_SYMBOL(kmem_cache_alloc); 1667 1668 #ifdef CONFIG_NUMA 1669 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) 1670 { 1671 return slab_alloc(s, gfpflags, node, __builtin_return_address(0)); 1672 } 1673 EXPORT_SYMBOL(kmem_cache_alloc_node); 1674 #endif 1675 1676 /* 1677 * Slow patch handling. This may still be called frequently since objects 1678 * have a longer lifetime than the cpu slabs in most processing loads. 1679 * 1680 * So we still attempt to reduce cache line usage. Just take the slab 1681 * lock and free the item. If there is no additional partial page 1682 * handling required then we can return immediately. 1683 */ 1684 static void __slab_free(struct kmem_cache *s, struct page *page, 1685 void *x, void *addr, unsigned int offset) 1686 { 1687 void *prior; 1688 void **object = (void *)x; 1689 struct kmem_cache_cpu *c; 1690 1691 #ifdef SLUB_FASTPATH 1692 unsigned long flags; 1693 1694 local_irq_save(flags); 1695 #endif 1696 c = get_cpu_slab(s, raw_smp_processor_id()); 1697 stat(c, FREE_SLOWPATH); 1698 slab_lock(page); 1699 1700 if (unlikely(SlabDebug(page))) 1701 goto debug; 1702 checks_ok: 1703 prior = object[offset] = page->freelist; 1704 page->freelist = object; 1705 page->inuse--; 1706 1707 if (unlikely(SlabFrozen(page))) { 1708 stat(c, FREE_FROZEN); 1709 goto out_unlock; 1710 } 1711 1712 if (unlikely(!page->inuse)) 1713 goto slab_empty; 1714 1715 /* 1716 * Objects left in the slab. If it 1717 * was not on the partial list before 1718 * then add it. 1719 */ 1720 if (unlikely(prior == page->end)) { 1721 add_partial(get_node(s, page_to_nid(page)), page, 1); 1722 stat(c, FREE_ADD_PARTIAL); 1723 } 1724 1725 out_unlock: 1726 slab_unlock(page); 1727 #ifdef SLUB_FASTPATH 1728 local_irq_restore(flags); 1729 #endif 1730 return; 1731 1732 slab_empty: 1733 if (prior != page->end) { 1734 /* 1735 * Slab still on the partial list. 1736 */ 1737 remove_partial(s, page); 1738 stat(c, FREE_REMOVE_PARTIAL); 1739 } 1740 slab_unlock(page); 1741 stat(c, FREE_SLAB); 1742 #ifdef SLUB_FASTPATH 1743 local_irq_restore(flags); 1744 #endif 1745 discard_slab(s, page); 1746 return; 1747 1748 debug: 1749 if (!free_debug_processing(s, page, x, addr)) 1750 goto out_unlock; 1751 goto checks_ok; 1752 } 1753 1754 /* 1755 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that 1756 * can perform fastpath freeing without additional function calls. 1757 * 1758 * The fastpath is only possible if we are freeing to the current cpu slab 1759 * of this processor. This typically the case if we have just allocated 1760 * the item before. 1761 * 1762 * If fastpath is not possible then fall back to __slab_free where we deal 1763 * with all sorts of special processing. 1764 */ 1765 static __always_inline void slab_free(struct kmem_cache *s, 1766 struct page *page, void *x, void *addr) 1767 { 1768 void **object = (void *)x; 1769 struct kmem_cache_cpu *c; 1770 1771 #ifdef SLUB_FASTPATH 1772 void **freelist; 1773 1774 c = get_cpu_slab(s, raw_smp_processor_id()); 1775 debug_check_no_locks_freed(object, s->objsize); 1776 do { 1777 freelist = c->freelist; 1778 barrier(); 1779 /* 1780 * If the compiler would reorder the retrieval of c->page to 1781 * come before c->freelist then an interrupt could 1782 * change the cpu slab before we retrieve c->freelist. We 1783 * could be matching on a page no longer active and put the 1784 * object onto the freelist of the wrong slab. 1785 * 1786 * On the other hand: If we already have the freelist pointer 1787 * then any change of cpu_slab will cause the cmpxchg to fail 1788 * since the freelist pointers are unique per slab. 1789 */ 1790 if (unlikely(page != c->page || c->node < 0)) { 1791 __slab_free(s, page, x, addr, c->offset); 1792 break; 1793 } 1794 object[c->offset] = freelist; 1795 stat(c, FREE_FASTPATH); 1796 } while (cmpxchg_local(&c->freelist, freelist, object) != freelist); 1797 #else 1798 unsigned long flags; 1799 1800 local_irq_save(flags); 1801 debug_check_no_locks_freed(object, s->objsize); 1802 c = get_cpu_slab(s, smp_processor_id()); 1803 if (likely(page == c->page && c->node >= 0)) { 1804 object[c->offset] = c->freelist; 1805 c->freelist = object; 1806 stat(c, FREE_FASTPATH); 1807 } else 1808 __slab_free(s, page, x, addr, c->offset); 1809 1810 local_irq_restore(flags); 1811 #endif 1812 } 1813 1814 void kmem_cache_free(struct kmem_cache *s, void *x) 1815 { 1816 struct page *page; 1817 1818 page = virt_to_head_page(x); 1819 1820 slab_free(s, page, x, __builtin_return_address(0)); 1821 } 1822 EXPORT_SYMBOL(kmem_cache_free); 1823 1824 /* Figure out on which slab object the object resides */ 1825 static struct page *get_object_page(const void *x) 1826 { 1827 struct page *page = virt_to_head_page(x); 1828 1829 if (!PageSlab(page)) 1830 return NULL; 1831 1832 return page; 1833 } 1834 1835 /* 1836 * Object placement in a slab is made very easy because we always start at 1837 * offset 0. If we tune the size of the object to the alignment then we can 1838 * get the required alignment by putting one properly sized object after 1839 * another. 1840 * 1841 * Notice that the allocation order determines the sizes of the per cpu 1842 * caches. Each processor has always one slab available for allocations. 1843 * Increasing the allocation order reduces the number of times that slabs 1844 * must be moved on and off the partial lists and is therefore a factor in 1845 * locking overhead. 1846 */ 1847 1848 /* 1849 * Mininum / Maximum order of slab pages. This influences locking overhead 1850 * and slab fragmentation. A higher order reduces the number of partial slabs 1851 * and increases the number of allocations possible without having to 1852 * take the list_lock. 1853 */ 1854 static int slub_min_order; 1855 static int slub_max_order = DEFAULT_MAX_ORDER; 1856 static int slub_min_objects = DEFAULT_MIN_OBJECTS; 1857 1858 /* 1859 * Merge control. If this is set then no merging of slab caches will occur. 1860 * (Could be removed. This was introduced to pacify the merge skeptics.) 1861 */ 1862 static int slub_nomerge; 1863 1864 /* 1865 * Calculate the order of allocation given an slab object size. 1866 * 1867 * The order of allocation has significant impact on performance and other 1868 * system components. Generally order 0 allocations should be preferred since 1869 * order 0 does not cause fragmentation in the page allocator. Larger objects 1870 * be problematic to put into order 0 slabs because there may be too much 1871 * unused space left. We go to a higher order if more than 1/8th of the slab 1872 * would be wasted. 1873 * 1874 * In order to reach satisfactory performance we must ensure that a minimum 1875 * number of objects is in one slab. Otherwise we may generate too much 1876 * activity on the partial lists which requires taking the list_lock. This is 1877 * less a concern for large slabs though which are rarely used. 1878 * 1879 * slub_max_order specifies the order where we begin to stop considering the 1880 * number of objects in a slab as critical. If we reach slub_max_order then 1881 * we try to keep the page order as low as possible. So we accept more waste 1882 * of space in favor of a small page order. 1883 * 1884 * Higher order allocations also allow the placement of more objects in a 1885 * slab and thereby reduce object handling overhead. If the user has 1886 * requested a higher mininum order then we start with that one instead of 1887 * the smallest order which will fit the object. 1888 */ 1889 static inline int slab_order(int size, int min_objects, 1890 int max_order, int fract_leftover) 1891 { 1892 int order; 1893 int rem; 1894 int min_order = slub_min_order; 1895 1896 for (order = max(min_order, 1897 fls(min_objects * size - 1) - PAGE_SHIFT); 1898 order <= max_order; order++) { 1899 1900 unsigned long slab_size = PAGE_SIZE << order; 1901 1902 if (slab_size < min_objects * size) 1903 continue; 1904 1905 rem = slab_size % size; 1906 1907 if (rem <= slab_size / fract_leftover) 1908 break; 1909 1910 } 1911 1912 return order; 1913 } 1914 1915 static inline int calculate_order(int size) 1916 { 1917 int order; 1918 int min_objects; 1919 int fraction; 1920 1921 /* 1922 * Attempt to find best configuration for a slab. This 1923 * works by first attempting to generate a layout with 1924 * the best configuration and backing off gradually. 1925 * 1926 * First we reduce the acceptable waste in a slab. Then 1927 * we reduce the minimum objects required in a slab. 1928 */ 1929 min_objects = slub_min_objects; 1930 while (min_objects > 1) { 1931 fraction = 8; 1932 while (fraction >= 4) { 1933 order = slab_order(size, min_objects, 1934 slub_max_order, fraction); 1935 if (order <= slub_max_order) 1936 return order; 1937 fraction /= 2; 1938 } 1939 min_objects /= 2; 1940 } 1941 1942 /* 1943 * We were unable to place multiple objects in a slab. Now 1944 * lets see if we can place a single object there. 1945 */ 1946 order = slab_order(size, 1, slub_max_order, 1); 1947 if (order <= slub_max_order) 1948 return order; 1949 1950 /* 1951 * Doh this slab cannot be placed using slub_max_order. 1952 */ 1953 order = slab_order(size, 1, MAX_ORDER, 1); 1954 if (order <= MAX_ORDER) 1955 return order; 1956 return -ENOSYS; 1957 } 1958 1959 /* 1960 * Figure out what the alignment of the objects will be. 1961 */ 1962 static unsigned long calculate_alignment(unsigned long flags, 1963 unsigned long align, unsigned long size) 1964 { 1965 /* 1966 * If the user wants hardware cache aligned objects then 1967 * follow that suggestion if the object is sufficiently 1968 * large. 1969 * 1970 * The hardware cache alignment cannot override the 1971 * specified alignment though. If that is greater 1972 * then use it. 1973 */ 1974 if ((flags & SLAB_HWCACHE_ALIGN) && 1975 size > cache_line_size() / 2) 1976 return max_t(unsigned long, align, cache_line_size()); 1977 1978 if (align < ARCH_SLAB_MINALIGN) 1979 return ARCH_SLAB_MINALIGN; 1980 1981 return ALIGN(align, sizeof(void *)); 1982 } 1983 1984 static void init_kmem_cache_cpu(struct kmem_cache *s, 1985 struct kmem_cache_cpu *c) 1986 { 1987 c->page = NULL; 1988 c->freelist = (void *)PAGE_MAPPING_ANON; 1989 c->node = 0; 1990 c->offset = s->offset / sizeof(void *); 1991 c->objsize = s->objsize; 1992 } 1993 1994 static void init_kmem_cache_node(struct kmem_cache_node *n) 1995 { 1996 n->nr_partial = 0; 1997 atomic_long_set(&n->nr_slabs, 0); 1998 spin_lock_init(&n->list_lock); 1999 INIT_LIST_HEAD(&n->partial); 2000 #ifdef CONFIG_SLUB_DEBUG 2001 INIT_LIST_HEAD(&n->full); 2002 #endif 2003 } 2004 2005 #ifdef CONFIG_SMP 2006 /* 2007 * Per cpu array for per cpu structures. 2008 * 2009 * The per cpu array places all kmem_cache_cpu structures from one processor 2010 * close together meaning that it becomes possible that multiple per cpu 2011 * structures are contained in one cacheline. This may be particularly 2012 * beneficial for the kmalloc caches. 2013 * 2014 * A desktop system typically has around 60-80 slabs. With 100 here we are 2015 * likely able to get per cpu structures for all caches from the array defined 2016 * here. We must be able to cover all kmalloc caches during bootstrap. 2017 * 2018 * If the per cpu array is exhausted then fall back to kmalloc 2019 * of individual cachelines. No sharing is possible then. 2020 */ 2021 #define NR_KMEM_CACHE_CPU 100 2022 2023 static DEFINE_PER_CPU(struct kmem_cache_cpu, 2024 kmem_cache_cpu)[NR_KMEM_CACHE_CPU]; 2025 2026 static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); 2027 static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE; 2028 2029 static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, 2030 int cpu, gfp_t flags) 2031 { 2032 struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu); 2033 2034 if (c) 2035 per_cpu(kmem_cache_cpu_free, cpu) = 2036 (void *)c->freelist; 2037 else { 2038 /* Table overflow: So allocate ourselves */ 2039 c = kmalloc_node( 2040 ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()), 2041 flags, cpu_to_node(cpu)); 2042 if (!c) 2043 return NULL; 2044 } 2045 2046 init_kmem_cache_cpu(s, c); 2047 return c; 2048 } 2049 2050 static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) 2051 { 2052 if (c < per_cpu(kmem_cache_cpu, cpu) || 2053 c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { 2054 kfree(c); 2055 return; 2056 } 2057 c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu); 2058 per_cpu(kmem_cache_cpu_free, cpu) = c; 2059 } 2060 2061 static void free_kmem_cache_cpus(struct kmem_cache *s) 2062 { 2063 int cpu; 2064 2065 for_each_online_cpu(cpu) { 2066 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 2067 2068 if (c) { 2069 s->cpu_slab[cpu] = NULL; 2070 free_kmem_cache_cpu(c, cpu); 2071 } 2072 } 2073 } 2074 2075 static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) 2076 { 2077 int cpu; 2078 2079 for_each_online_cpu(cpu) { 2080 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 2081 2082 if (c) 2083 continue; 2084 2085 c = alloc_kmem_cache_cpu(s, cpu, flags); 2086 if (!c) { 2087 free_kmem_cache_cpus(s); 2088 return 0; 2089 } 2090 s->cpu_slab[cpu] = c; 2091 } 2092 return 1; 2093 } 2094 2095 /* 2096 * Initialize the per cpu array. 2097 */ 2098 static void init_alloc_cpu_cpu(int cpu) 2099 { 2100 int i; 2101 2102 if (cpu_isset(cpu, kmem_cach_cpu_free_init_once)) 2103 return; 2104 2105 for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--) 2106 free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu); 2107 2108 cpu_set(cpu, kmem_cach_cpu_free_init_once); 2109 } 2110 2111 static void __init init_alloc_cpu(void) 2112 { 2113 int cpu; 2114 2115 for_each_online_cpu(cpu) 2116 init_alloc_cpu_cpu(cpu); 2117 } 2118 2119 #else 2120 static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} 2121 static inline void init_alloc_cpu(void) {} 2122 2123 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) 2124 { 2125 init_kmem_cache_cpu(s, &s->cpu_slab); 2126 return 1; 2127 } 2128 #endif 2129 2130 #ifdef CONFIG_NUMA 2131 /* 2132 * No kmalloc_node yet so do it by hand. We know that this is the first 2133 * slab on the node for this slabcache. There are no concurrent accesses 2134 * possible. 2135 * 2136 * Note that this function only works on the kmalloc_node_cache 2137 * when allocating for the kmalloc_node_cache. This is used for bootstrapping 2138 * memory on a fresh node that has no slab structures yet. 2139 */ 2140 static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, 2141 int node) 2142 { 2143 struct page *page; 2144 struct kmem_cache_node *n; 2145 unsigned long flags; 2146 2147 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); 2148 2149 page = new_slab(kmalloc_caches, gfpflags, node); 2150 2151 BUG_ON(!page); 2152 if (page_to_nid(page) != node) { 2153 printk(KERN_ERR "SLUB: Unable to allocate memory from " 2154 "node %d\n", node); 2155 printk(KERN_ERR "SLUB: Allocating a useless per node structure " 2156 "in order to be able to continue\n"); 2157 } 2158 2159 n = page->freelist; 2160 BUG_ON(!n); 2161 page->freelist = get_freepointer(kmalloc_caches, n); 2162 page->inuse++; 2163 kmalloc_caches->node[node] = n; 2164 #ifdef CONFIG_SLUB_DEBUG 2165 init_object(kmalloc_caches, n, 1); 2166 init_tracking(kmalloc_caches, n); 2167 #endif 2168 init_kmem_cache_node(n); 2169 atomic_long_inc(&n->nr_slabs); 2170 /* 2171 * lockdep requires consistent irq usage for each lock 2172 * so even though there cannot be a race this early in 2173 * the boot sequence, we still disable irqs. 2174 */ 2175 local_irq_save(flags); 2176 add_partial(n, page, 0); 2177 local_irq_restore(flags); 2178 return n; 2179 } 2180 2181 static void free_kmem_cache_nodes(struct kmem_cache *s) 2182 { 2183 int node; 2184 2185 for_each_node_state(node, N_NORMAL_MEMORY) { 2186 struct kmem_cache_node *n = s->node[node]; 2187 if (n && n != &s->local_node) 2188 kmem_cache_free(kmalloc_caches, n); 2189 s->node[node] = NULL; 2190 } 2191 } 2192 2193 static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) 2194 { 2195 int node; 2196 int local_node; 2197 2198 if (slab_state >= UP) 2199 local_node = page_to_nid(virt_to_page(s)); 2200 else 2201 local_node = 0; 2202 2203 for_each_node_state(node, N_NORMAL_MEMORY) { 2204 struct kmem_cache_node *n; 2205 2206 if (local_node == node) 2207 n = &s->local_node; 2208 else { 2209 if (slab_state == DOWN) { 2210 n = early_kmem_cache_node_alloc(gfpflags, 2211 node); 2212 continue; 2213 } 2214 n = kmem_cache_alloc_node(kmalloc_caches, 2215 gfpflags, node); 2216 2217 if (!n) { 2218 free_kmem_cache_nodes(s); 2219 return 0; 2220 } 2221 2222 } 2223 s->node[node] = n; 2224 init_kmem_cache_node(n); 2225 } 2226 return 1; 2227 } 2228 #else 2229 static void free_kmem_cache_nodes(struct kmem_cache *s) 2230 { 2231 } 2232 2233 static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) 2234 { 2235 init_kmem_cache_node(&s->local_node); 2236 return 1; 2237 } 2238 #endif 2239 2240 /* 2241 * calculate_sizes() determines the order and the distribution of data within 2242 * a slab object. 2243 */ 2244 static int calculate_sizes(struct kmem_cache *s) 2245 { 2246 unsigned long flags = s->flags; 2247 unsigned long size = s->objsize; 2248 unsigned long align = s->align; 2249 2250 /* 2251 * Determine if we can poison the object itself. If the user of 2252 * the slab may touch the object after free or before allocation 2253 * then we should never poison the object itself. 2254 */ 2255 if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) && 2256 !s->ctor) 2257 s->flags |= __OBJECT_POISON; 2258 else 2259 s->flags &= ~__OBJECT_POISON; 2260 2261 /* 2262 * Round up object size to the next word boundary. We can only 2263 * place the free pointer at word boundaries and this determines 2264 * the possible location of the free pointer. 2265 */ 2266 size = ALIGN(size, sizeof(void *)); 2267 2268 #ifdef CONFIG_SLUB_DEBUG 2269 /* 2270 * If we are Redzoning then check if there is some space between the 2271 * end of the object and the free pointer. If not then add an 2272 * additional word to have some bytes to store Redzone information. 2273 */ 2274 if ((flags & SLAB_RED_ZONE) && size == s->objsize) 2275 size += sizeof(void *); 2276 #endif 2277 2278 /* 2279 * With that we have determined the number of bytes in actual use 2280 * by the object. This is the potential offset to the free pointer. 2281 */ 2282 s->inuse = size; 2283 2284 if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || 2285 s->ctor)) { 2286 /* 2287 * Relocate free pointer after the object if it is not 2288 * permitted to overwrite the first word of the object on 2289 * kmem_cache_free. 2290 * 2291 * This is the case if we do RCU, have a constructor or 2292 * destructor or are poisoning the objects. 2293 */ 2294 s->offset = size; 2295 size += sizeof(void *); 2296 } 2297 2298 #ifdef CONFIG_SLUB_DEBUG 2299 if (flags & SLAB_STORE_USER) 2300 /* 2301 * Need to store information about allocs and frees after 2302 * the object. 2303 */ 2304 size += 2 * sizeof(struct track); 2305 2306 if (flags & SLAB_RED_ZONE) 2307 /* 2308 * Add some empty padding so that we can catch 2309 * overwrites from earlier objects rather than let 2310 * tracking information or the free pointer be 2311 * corrupted if an user writes before the start 2312 * of the object. 2313 */ 2314 size += sizeof(void *); 2315 #endif 2316 2317 /* 2318 * Determine the alignment based on various parameters that the 2319 * user specified and the dynamic determination of cache line size 2320 * on bootup. 2321 */ 2322 align = calculate_alignment(flags, align, s->objsize); 2323 2324 /* 2325 * SLUB stores one object immediately after another beginning from 2326 * offset 0. In order to align the objects we have to simply size 2327 * each object to conform to the alignment. 2328 */ 2329 size = ALIGN(size, align); 2330 s->size = size; 2331 2332 s->order = calculate_order(size); 2333 if (s->order < 0) 2334 return 0; 2335 2336 /* 2337 * Determine the number of objects per slab 2338 */ 2339 s->objects = (PAGE_SIZE << s->order) / size; 2340 2341 return !!s->objects; 2342 2343 } 2344 2345 static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, 2346 const char *name, size_t size, 2347 size_t align, unsigned long flags, 2348 void (*ctor)(struct kmem_cache *, void *)) 2349 { 2350 memset(s, 0, kmem_size); 2351 s->name = name; 2352 s->ctor = ctor; 2353 s->objsize = size; 2354 s->align = align; 2355 s->flags = kmem_cache_flags(size, flags, name, ctor); 2356 2357 if (!calculate_sizes(s)) 2358 goto error; 2359 2360 s->refcount = 1; 2361 #ifdef CONFIG_NUMA 2362 s->remote_node_defrag_ratio = 100; 2363 #endif 2364 if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) 2365 goto error; 2366 2367 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) 2368 return 1; 2369 free_kmem_cache_nodes(s); 2370 error: 2371 if (flags & SLAB_PANIC) 2372 panic("Cannot create slab %s size=%lu realsize=%u " 2373 "order=%u offset=%u flags=%lx\n", 2374 s->name, (unsigned long)size, s->size, s->order, 2375 s->offset, flags); 2376 return 0; 2377 } 2378 2379 /* 2380 * Check if a given pointer is valid 2381 */ 2382 int kmem_ptr_validate(struct kmem_cache *s, const void *object) 2383 { 2384 struct page *page; 2385 2386 page = get_object_page(object); 2387 2388 if (!page || s != page->slab) 2389 /* No slab or wrong slab */ 2390 return 0; 2391 2392 if (!check_valid_pointer(s, page, object)) 2393 return 0; 2394 2395 /* 2396 * We could also check if the object is on the slabs freelist. 2397 * But this would be too expensive and it seems that the main 2398 * purpose of kmem_ptr_valid is to check if the object belongs 2399 * to a certain slab. 2400 */ 2401 return 1; 2402 } 2403 EXPORT_SYMBOL(kmem_ptr_validate); 2404 2405 /* 2406 * Determine the size of a slab object 2407 */ 2408 unsigned int kmem_cache_size(struct kmem_cache *s) 2409 { 2410 return s->objsize; 2411 } 2412 EXPORT_SYMBOL(kmem_cache_size); 2413 2414 const char *kmem_cache_name(struct kmem_cache *s) 2415 { 2416 return s->name; 2417 } 2418 EXPORT_SYMBOL(kmem_cache_name); 2419 2420 /* 2421 * Attempt to free all slabs on a node. Return the number of slabs we 2422 * were unable to free. 2423 */ 2424 static int free_list(struct kmem_cache *s, struct kmem_cache_node *n, 2425 struct list_head *list) 2426 { 2427 int slabs_inuse = 0; 2428 unsigned long flags; 2429 struct page *page, *h; 2430 2431 spin_lock_irqsave(&n->list_lock, flags); 2432 list_for_each_entry_safe(page, h, list, lru) 2433 if (!page->inuse) { 2434 list_del(&page->lru); 2435 discard_slab(s, page); 2436 } else 2437 slabs_inuse++; 2438 spin_unlock_irqrestore(&n->list_lock, flags); 2439 return slabs_inuse; 2440 } 2441 2442 /* 2443 * Release all resources used by a slab cache. 2444 */ 2445 static inline int kmem_cache_close(struct kmem_cache *s) 2446 { 2447 int node; 2448 2449 flush_all(s); 2450 2451 /* Attempt to free all objects */ 2452 free_kmem_cache_cpus(s); 2453 for_each_node_state(node, N_NORMAL_MEMORY) { 2454 struct kmem_cache_node *n = get_node(s, node); 2455 2456 n->nr_partial -= free_list(s, n, &n->partial); 2457 if (atomic_long_read(&n->nr_slabs)) 2458 return 1; 2459 } 2460 free_kmem_cache_nodes(s); 2461 return 0; 2462 } 2463 2464 /* 2465 * Close a cache and release the kmem_cache structure 2466 * (must be used for caches created using kmem_cache_create) 2467 */ 2468 void kmem_cache_destroy(struct kmem_cache *s) 2469 { 2470 down_write(&slub_lock); 2471 s->refcount--; 2472 if (!s->refcount) { 2473 list_del(&s->list); 2474 up_write(&slub_lock); 2475 if (kmem_cache_close(s)) 2476 WARN_ON(1); 2477 sysfs_slab_remove(s); 2478 } else 2479 up_write(&slub_lock); 2480 } 2481 EXPORT_SYMBOL(kmem_cache_destroy); 2482 2483 /******************************************************************** 2484 * Kmalloc subsystem 2485 *******************************************************************/ 2486 2487 struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned; 2488 EXPORT_SYMBOL(kmalloc_caches); 2489 2490 #ifdef CONFIG_ZONE_DMA 2491 static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT]; 2492 #endif 2493 2494 static int __init setup_slub_min_order(char *str) 2495 { 2496 get_option(&str, &slub_min_order); 2497 2498 return 1; 2499 } 2500 2501 __setup("slub_min_order=", setup_slub_min_order); 2502 2503 static int __init setup_slub_max_order(char *str) 2504 { 2505 get_option(&str, &slub_max_order); 2506 2507 return 1; 2508 } 2509 2510 __setup("slub_max_order=", setup_slub_max_order); 2511 2512 static int __init setup_slub_min_objects(char *str) 2513 { 2514 get_option(&str, &slub_min_objects); 2515 2516 return 1; 2517 } 2518 2519 __setup("slub_min_objects=", setup_slub_min_objects); 2520 2521 static int __init setup_slub_nomerge(char *str) 2522 { 2523 slub_nomerge = 1; 2524 return 1; 2525 } 2526 2527 __setup("slub_nomerge", setup_slub_nomerge); 2528 2529 static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, 2530 const char *name, int size, gfp_t gfp_flags) 2531 { 2532 unsigned int flags = 0; 2533 2534 if (gfp_flags & SLUB_DMA) 2535 flags = SLAB_CACHE_DMA; 2536 2537 down_write(&slub_lock); 2538 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, 2539 flags, NULL)) 2540 goto panic; 2541 2542 list_add(&s->list, &slab_caches); 2543 up_write(&slub_lock); 2544 if (sysfs_slab_add(s)) 2545 goto panic; 2546 return s; 2547 2548 panic: 2549 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); 2550 } 2551 2552 #ifdef CONFIG_ZONE_DMA 2553 2554 static void sysfs_add_func(struct work_struct *w) 2555 { 2556 struct kmem_cache *s; 2557 2558 down_write(&slub_lock); 2559 list_for_each_entry(s, &slab_caches, list) { 2560 if (s->flags & __SYSFS_ADD_DEFERRED) { 2561 s->flags &= ~__SYSFS_ADD_DEFERRED; 2562 sysfs_slab_add(s); 2563 } 2564 } 2565 up_write(&slub_lock); 2566 } 2567 2568 static DECLARE_WORK(sysfs_add_work, sysfs_add_func); 2569 2570 static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) 2571 { 2572 struct kmem_cache *s; 2573 char *text; 2574 size_t realsize; 2575 2576 s = kmalloc_caches_dma[index]; 2577 if (s) 2578 return s; 2579 2580 /* Dynamically create dma cache */ 2581 if (flags & __GFP_WAIT) 2582 down_write(&slub_lock); 2583 else { 2584 if (!down_write_trylock(&slub_lock)) 2585 goto out; 2586 } 2587 2588 if (kmalloc_caches_dma[index]) 2589 goto unlock_out; 2590 2591 realsize = kmalloc_caches[index].objsize; 2592 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", 2593 (unsigned int)realsize); 2594 s = kmalloc(kmem_size, flags & ~SLUB_DMA); 2595 2596 if (!s || !text || !kmem_cache_open(s, flags, text, 2597 realsize, ARCH_KMALLOC_MINALIGN, 2598 SLAB_CACHE_DMA|__SYSFS_ADD_DEFERRED, NULL)) { 2599 kfree(s); 2600 kfree(text); 2601 goto unlock_out; 2602 } 2603 2604 list_add(&s->list, &slab_caches); 2605 kmalloc_caches_dma[index] = s; 2606 2607 schedule_work(&sysfs_add_work); 2608 2609 unlock_out: 2610 up_write(&slub_lock); 2611 out: 2612 return kmalloc_caches_dma[index]; 2613 } 2614 #endif 2615 2616 /* 2617 * Conversion table for small slabs sizes / 8 to the index in the 2618 * kmalloc array. This is necessary for slabs < 192 since we have non power 2619 * of two cache sizes there. The size of larger slabs can be determined using 2620 * fls. 2621 */ 2622 static s8 size_index[24] = { 2623 3, /* 8 */ 2624 4, /* 16 */ 2625 5, /* 24 */ 2626 5, /* 32 */ 2627 6, /* 40 */ 2628 6, /* 48 */ 2629 6, /* 56 */ 2630 6, /* 64 */ 2631 1, /* 72 */ 2632 1, /* 80 */ 2633 1, /* 88 */ 2634 1, /* 96 */ 2635 7, /* 104 */ 2636 7, /* 112 */ 2637 7, /* 120 */ 2638 7, /* 128 */ 2639 2, /* 136 */ 2640 2, /* 144 */ 2641 2, /* 152 */ 2642 2, /* 160 */ 2643 2, /* 168 */ 2644 2, /* 176 */ 2645 2, /* 184 */ 2646 2 /* 192 */ 2647 }; 2648 2649 static struct kmem_cache *get_slab(size_t size, gfp_t flags) 2650 { 2651 int index; 2652 2653 if (size <= 192) { 2654 if (!size) 2655 return ZERO_SIZE_PTR; 2656 2657 index = size_index[(size - 1) / 8]; 2658 } else 2659 index = fls(size - 1); 2660 2661 #ifdef CONFIG_ZONE_DMA 2662 if (unlikely((flags & SLUB_DMA))) 2663 return dma_kmalloc_cache(index, flags); 2664 2665 #endif 2666 return &kmalloc_caches[index]; 2667 } 2668 2669 void *__kmalloc(size_t size, gfp_t flags) 2670 { 2671 struct kmem_cache *s; 2672 2673 if (unlikely(size > PAGE_SIZE / 2)) 2674 return (void *)__get_free_pages(flags | __GFP_COMP, 2675 get_order(size)); 2676 2677 s = get_slab(size, flags); 2678 2679 if (unlikely(ZERO_OR_NULL_PTR(s))) 2680 return s; 2681 2682 return slab_alloc(s, flags, -1, __builtin_return_address(0)); 2683 } 2684 EXPORT_SYMBOL(__kmalloc); 2685 2686 #ifdef CONFIG_NUMA 2687 void *__kmalloc_node(size_t size, gfp_t flags, int node) 2688 { 2689 struct kmem_cache *s; 2690 2691 if (unlikely(size > PAGE_SIZE / 2)) 2692 return (void *)__get_free_pages(flags | __GFP_COMP, 2693 get_order(size)); 2694 2695 s = get_slab(size, flags); 2696 2697 if (unlikely(ZERO_OR_NULL_PTR(s))) 2698 return s; 2699 2700 return slab_alloc(s, flags, node, __builtin_return_address(0)); 2701 } 2702 EXPORT_SYMBOL(__kmalloc_node); 2703 #endif 2704 2705 size_t ksize(const void *object) 2706 { 2707 struct page *page; 2708 struct kmem_cache *s; 2709 2710 BUG_ON(!object); 2711 if (unlikely(object == ZERO_SIZE_PTR)) 2712 return 0; 2713 2714 page = virt_to_head_page(object); 2715 BUG_ON(!page); 2716 2717 if (unlikely(!PageSlab(page))) 2718 return PAGE_SIZE << compound_order(page); 2719 2720 s = page->slab; 2721 BUG_ON(!s); 2722 2723 /* 2724 * Debugging requires use of the padding between object 2725 * and whatever may come after it. 2726 */ 2727 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 2728 return s->objsize; 2729 2730 /* 2731 * If we have the need to store the freelist pointer 2732 * back there or track user information then we can 2733 * only use the space before that information. 2734 */ 2735 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) 2736 return s->inuse; 2737 2738 /* 2739 * Else we can use all the padding etc for the allocation 2740 */ 2741 return s->size; 2742 } 2743 EXPORT_SYMBOL(ksize); 2744 2745 void kfree(const void *x) 2746 { 2747 struct page *page; 2748 void *object = (void *)x; 2749 2750 if (unlikely(ZERO_OR_NULL_PTR(x))) 2751 return; 2752 2753 page = virt_to_head_page(x); 2754 if (unlikely(!PageSlab(page))) { 2755 put_page(page); 2756 return; 2757 } 2758 slab_free(page->slab, page, object, __builtin_return_address(0)); 2759 } 2760 EXPORT_SYMBOL(kfree); 2761 2762 static unsigned long count_partial(struct kmem_cache_node *n) 2763 { 2764 unsigned long flags; 2765 unsigned long x = 0; 2766 struct page *page; 2767 2768 spin_lock_irqsave(&n->list_lock, flags); 2769 list_for_each_entry(page, &n->partial, lru) 2770 x += page->inuse; 2771 spin_unlock_irqrestore(&n->list_lock, flags); 2772 return x; 2773 } 2774 2775 /* 2776 * kmem_cache_shrink removes empty slabs from the partial lists and sorts 2777 * the remaining slabs by the number of items in use. The slabs with the 2778 * most items in use come first. New allocations will then fill those up 2779 * and thus they can be removed from the partial lists. 2780 * 2781 * The slabs with the least items are placed last. This results in them 2782 * being allocated from last increasing the chance that the last objects 2783 * are freed in them. 2784 */ 2785 int kmem_cache_shrink(struct kmem_cache *s) 2786 { 2787 int node; 2788 int i; 2789 struct kmem_cache_node *n; 2790 struct page *page; 2791 struct page *t; 2792 struct list_head *slabs_by_inuse = 2793 kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL); 2794 unsigned long flags; 2795 2796 if (!slabs_by_inuse) 2797 return -ENOMEM; 2798 2799 flush_all(s); 2800 for_each_node_state(node, N_NORMAL_MEMORY) { 2801 n = get_node(s, node); 2802 2803 if (!n->nr_partial) 2804 continue; 2805 2806 for (i = 0; i < s->objects; i++) 2807 INIT_LIST_HEAD(slabs_by_inuse + i); 2808 2809 spin_lock_irqsave(&n->list_lock, flags); 2810 2811 /* 2812 * Build lists indexed by the items in use in each slab. 2813 * 2814 * Note that concurrent frees may occur while we hold the 2815 * list_lock. page->inuse here is the upper limit. 2816 */ 2817 list_for_each_entry_safe(page, t, &n->partial, lru) { 2818 if (!page->inuse && slab_trylock(page)) { 2819 /* 2820 * Must hold slab lock here because slab_free 2821 * may have freed the last object and be 2822 * waiting to release the slab. 2823 */ 2824 list_del(&page->lru); 2825 n->nr_partial--; 2826 slab_unlock(page); 2827 discard_slab(s, page); 2828 } else { 2829 list_move(&page->lru, 2830 slabs_by_inuse + page->inuse); 2831 } 2832 } 2833 2834 /* 2835 * Rebuild the partial list with the slabs filled up most 2836 * first and the least used slabs at the end. 2837 */ 2838 for (i = s->objects - 1; i >= 0; i--) 2839 list_splice(slabs_by_inuse + i, n->partial.prev); 2840 2841 spin_unlock_irqrestore(&n->list_lock, flags); 2842 } 2843 2844 kfree(slabs_by_inuse); 2845 return 0; 2846 } 2847 EXPORT_SYMBOL(kmem_cache_shrink); 2848 2849 #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) 2850 static int slab_mem_going_offline_callback(void *arg) 2851 { 2852 struct kmem_cache *s; 2853 2854 down_read(&slub_lock); 2855 list_for_each_entry(s, &slab_caches, list) 2856 kmem_cache_shrink(s); 2857 up_read(&slub_lock); 2858 2859 return 0; 2860 } 2861 2862 static void slab_mem_offline_callback(void *arg) 2863 { 2864 struct kmem_cache_node *n; 2865 struct kmem_cache *s; 2866 struct memory_notify *marg = arg; 2867 int offline_node; 2868 2869 offline_node = marg->status_change_nid; 2870 2871 /* 2872 * If the node still has available memory. we need kmem_cache_node 2873 * for it yet. 2874 */ 2875 if (offline_node < 0) 2876 return; 2877 2878 down_read(&slub_lock); 2879 list_for_each_entry(s, &slab_caches, list) { 2880 n = get_node(s, offline_node); 2881 if (n) { 2882 /* 2883 * if n->nr_slabs > 0, slabs still exist on the node 2884 * that is going down. We were unable to free them, 2885 * and offline_pages() function shoudn't call this 2886 * callback. So, we must fail. 2887 */ 2888 BUG_ON(atomic_long_read(&n->nr_slabs)); 2889 2890 s->node[offline_node] = NULL; 2891 kmem_cache_free(kmalloc_caches, n); 2892 } 2893 } 2894 up_read(&slub_lock); 2895 } 2896 2897 static int slab_mem_going_online_callback(void *arg) 2898 { 2899 struct kmem_cache_node *n; 2900 struct kmem_cache *s; 2901 struct memory_notify *marg = arg; 2902 int nid = marg->status_change_nid; 2903 int ret = 0; 2904 2905 /* 2906 * If the node's memory is already available, then kmem_cache_node is 2907 * already created. Nothing to do. 2908 */ 2909 if (nid < 0) 2910 return 0; 2911 2912 /* 2913 * We are bringing a node online. No memory is availabe yet. We must 2914 * allocate a kmem_cache_node structure in order to bring the node 2915 * online. 2916 */ 2917 down_read(&slub_lock); 2918 list_for_each_entry(s, &slab_caches, list) { 2919 /* 2920 * XXX: kmem_cache_alloc_node will fallback to other nodes 2921 * since memory is not yet available from the node that 2922 * is brought up. 2923 */ 2924 n = kmem_cache_alloc(kmalloc_caches, GFP_KERNEL); 2925 if (!n) { 2926 ret = -ENOMEM; 2927 goto out; 2928 } 2929 init_kmem_cache_node(n); 2930 s->node[nid] = n; 2931 } 2932 out: 2933 up_read(&slub_lock); 2934 return ret; 2935 } 2936 2937 static int slab_memory_callback(struct notifier_block *self, 2938 unsigned long action, void *arg) 2939 { 2940 int ret = 0; 2941 2942 switch (action) { 2943 case MEM_GOING_ONLINE: 2944 ret = slab_mem_going_online_callback(arg); 2945 break; 2946 case MEM_GOING_OFFLINE: 2947 ret = slab_mem_going_offline_callback(arg); 2948 break; 2949 case MEM_OFFLINE: 2950 case MEM_CANCEL_ONLINE: 2951 slab_mem_offline_callback(arg); 2952 break; 2953 case MEM_ONLINE: 2954 case MEM_CANCEL_OFFLINE: 2955 break; 2956 } 2957 2958 ret = notifier_from_errno(ret); 2959 return ret; 2960 } 2961 2962 #endif /* CONFIG_MEMORY_HOTPLUG */ 2963 2964 /******************************************************************** 2965 * Basic setup of slabs 2966 *******************************************************************/ 2967 2968 void __init kmem_cache_init(void) 2969 { 2970 int i; 2971 int caches = 0; 2972 2973 init_alloc_cpu(); 2974 2975 #ifdef CONFIG_NUMA 2976 /* 2977 * Must first have the slab cache available for the allocations of the 2978 * struct kmem_cache_node's. There is special bootstrap code in 2979 * kmem_cache_open for slab_state == DOWN. 2980 */ 2981 create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", 2982 sizeof(struct kmem_cache_node), GFP_KERNEL); 2983 kmalloc_caches[0].refcount = -1; 2984 caches++; 2985 2986 hotplug_memory_notifier(slab_memory_callback, 1); 2987 #endif 2988 2989 /* Able to allocate the per node structures */ 2990 slab_state = PARTIAL; 2991 2992 /* Caches that are not of the two-to-the-power-of size */ 2993 if (KMALLOC_MIN_SIZE <= 64) { 2994 create_kmalloc_cache(&kmalloc_caches[1], 2995 "kmalloc-96", 96, GFP_KERNEL); 2996 caches++; 2997 } 2998 if (KMALLOC_MIN_SIZE <= 128) { 2999 create_kmalloc_cache(&kmalloc_caches[2], 3000 "kmalloc-192", 192, GFP_KERNEL); 3001 caches++; 3002 } 3003 3004 for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) { 3005 create_kmalloc_cache(&kmalloc_caches[i], 3006 "kmalloc", 1 << i, GFP_KERNEL); 3007 caches++; 3008 } 3009 3010 3011 /* 3012 * Patch up the size_index table if we have strange large alignment 3013 * requirements for the kmalloc array. This is only the case for 3014 * mips it seems. The standard arches will not generate any code here. 3015 * 3016 * Largest permitted alignment is 256 bytes due to the way we 3017 * handle the index determination for the smaller caches. 3018 * 3019 * Make sure that nothing crazy happens if someone starts tinkering 3020 * around with ARCH_KMALLOC_MINALIGN 3021 */ 3022 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || 3023 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); 3024 3025 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) 3026 size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW; 3027 3028 slab_state = UP; 3029 3030 /* Provide the correct kmalloc names now that the caches are up */ 3031 for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) 3032 kmalloc_caches[i]. name = 3033 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); 3034 3035 #ifdef CONFIG_SMP 3036 register_cpu_notifier(&slab_notifier); 3037 kmem_size = offsetof(struct kmem_cache, cpu_slab) + 3038 nr_cpu_ids * sizeof(struct kmem_cache_cpu *); 3039 #else 3040 kmem_size = sizeof(struct kmem_cache); 3041 #endif 3042 3043 3044 printk(KERN_INFO 3045 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 3046 " CPUs=%d, Nodes=%d\n", 3047 caches, cache_line_size(), 3048 slub_min_order, slub_max_order, slub_min_objects, 3049 nr_cpu_ids, nr_node_ids); 3050 } 3051 3052 /* 3053 * Find a mergeable slab cache 3054 */ 3055 static int slab_unmergeable(struct kmem_cache *s) 3056 { 3057 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) 3058 return 1; 3059 3060 if (s->ctor) 3061 return 1; 3062 3063 /* 3064 * We may have set a slab to be unmergeable during bootstrap. 3065 */ 3066 if (s->refcount < 0) 3067 return 1; 3068 3069 return 0; 3070 } 3071 3072 static struct kmem_cache *find_mergeable(size_t size, 3073 size_t align, unsigned long flags, const char *name, 3074 void (*ctor)(struct kmem_cache *, void *)) 3075 { 3076 struct kmem_cache *s; 3077 3078 if (slub_nomerge || (flags & SLUB_NEVER_MERGE)) 3079 return NULL; 3080 3081 if (ctor) 3082 return NULL; 3083 3084 size = ALIGN(size, sizeof(void *)); 3085 align = calculate_alignment(flags, align, size); 3086 size = ALIGN(size, align); 3087 flags = kmem_cache_flags(size, flags, name, NULL); 3088 3089 list_for_each_entry(s, &slab_caches, list) { 3090 if (slab_unmergeable(s)) 3091 continue; 3092 3093 if (size > s->size) 3094 continue; 3095 3096 if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME)) 3097 continue; 3098 /* 3099 * Check if alignment is compatible. 3100 * Courtesy of Adrian Drzewiecki 3101 */ 3102 if ((s->size & ~(align - 1)) != s->size) 3103 continue; 3104 3105 if (s->size - size >= sizeof(void *)) 3106 continue; 3107 3108 return s; 3109 } 3110 return NULL; 3111 } 3112 3113 struct kmem_cache *kmem_cache_create(const char *name, size_t size, 3114 size_t align, unsigned long flags, 3115 void (*ctor)(struct kmem_cache *, void *)) 3116 { 3117 struct kmem_cache *s; 3118 3119 down_write(&slub_lock); 3120 s = find_mergeable(size, align, flags, name, ctor); 3121 if (s) { 3122 int cpu; 3123 3124 s->refcount++; 3125 /* 3126 * Adjust the object sizes so that we clear 3127 * the complete object on kzalloc. 3128 */ 3129 s->objsize = max(s->objsize, (int)size); 3130 3131 /* 3132 * And then we need to update the object size in the 3133 * per cpu structures 3134 */ 3135 for_each_online_cpu(cpu) 3136 get_cpu_slab(s, cpu)->objsize = s->objsize; 3137 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3138 up_write(&slub_lock); 3139 if (sysfs_slab_alias(s, name)) 3140 goto err; 3141 return s; 3142 } 3143 s = kmalloc(kmem_size, GFP_KERNEL); 3144 if (s) { 3145 if (kmem_cache_open(s, GFP_KERNEL, name, 3146 size, align, flags, ctor)) { 3147 list_add(&s->list, &slab_caches); 3148 up_write(&slub_lock); 3149 if (sysfs_slab_add(s)) 3150 goto err; 3151 return s; 3152 } 3153 kfree(s); 3154 } 3155 up_write(&slub_lock); 3156 3157 err: 3158 if (flags & SLAB_PANIC) 3159 panic("Cannot create slabcache %s\n", name); 3160 else 3161 s = NULL; 3162 return s; 3163 } 3164 EXPORT_SYMBOL(kmem_cache_create); 3165 3166 #ifdef CONFIG_SMP 3167 /* 3168 * Use the cpu notifier to insure that the cpu slabs are flushed when 3169 * necessary. 3170 */ 3171 static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, 3172 unsigned long action, void *hcpu) 3173 { 3174 long cpu = (long)hcpu; 3175 struct kmem_cache *s; 3176 unsigned long flags; 3177 3178 switch (action) { 3179 case CPU_UP_PREPARE: 3180 case CPU_UP_PREPARE_FROZEN: 3181 init_alloc_cpu_cpu(cpu); 3182 down_read(&slub_lock); 3183 list_for_each_entry(s, &slab_caches, list) 3184 s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu, 3185 GFP_KERNEL); 3186 up_read(&slub_lock); 3187 break; 3188 3189 case CPU_UP_CANCELED: 3190 case CPU_UP_CANCELED_FROZEN: 3191 case CPU_DEAD: 3192 case CPU_DEAD_FROZEN: 3193 down_read(&slub_lock); 3194 list_for_each_entry(s, &slab_caches, list) { 3195 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 3196 3197 local_irq_save(flags); 3198 __flush_cpu_slab(s, cpu); 3199 local_irq_restore(flags); 3200 free_kmem_cache_cpu(c, cpu); 3201 s->cpu_slab[cpu] = NULL; 3202 } 3203 up_read(&slub_lock); 3204 break; 3205 default: 3206 break; 3207 } 3208 return NOTIFY_OK; 3209 } 3210 3211 static struct notifier_block __cpuinitdata slab_notifier = { 3212 .notifier_call = slab_cpuup_callback 3213 }; 3214 3215 #endif 3216 3217 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) 3218 { 3219 struct kmem_cache *s; 3220 3221 if (unlikely(size > PAGE_SIZE / 2)) 3222 return (void *)__get_free_pages(gfpflags | __GFP_COMP, 3223 get_order(size)); 3224 s = get_slab(size, gfpflags); 3225 3226 if (unlikely(ZERO_OR_NULL_PTR(s))) 3227 return s; 3228 3229 return slab_alloc(s, gfpflags, -1, caller); 3230 } 3231 3232 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, 3233 int node, void *caller) 3234 { 3235 struct kmem_cache *s; 3236 3237 if (unlikely(size > PAGE_SIZE / 2)) 3238 return (void *)__get_free_pages(gfpflags | __GFP_COMP, 3239 get_order(size)); 3240 s = get_slab(size, gfpflags); 3241 3242 if (unlikely(ZERO_OR_NULL_PTR(s))) 3243 return s; 3244 3245 return slab_alloc(s, gfpflags, node, caller); 3246 } 3247 3248 #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) 3249 static int validate_slab(struct kmem_cache *s, struct page *page, 3250 unsigned long *map) 3251 { 3252 void *p; 3253 void *addr = slab_address(page); 3254 3255 if (!check_slab(s, page) || 3256 !on_freelist(s, page, NULL)) 3257 return 0; 3258 3259 /* Now we know that a valid freelist exists */ 3260 bitmap_zero(map, s->objects); 3261 3262 for_each_free_object(p, s, page->freelist) { 3263 set_bit(slab_index(p, s, addr), map); 3264 if (!check_object(s, page, p, 0)) 3265 return 0; 3266 } 3267 3268 for_each_object(p, s, addr) 3269 if (!test_bit(slab_index(p, s, addr), map)) 3270 if (!check_object(s, page, p, 1)) 3271 return 0; 3272 return 1; 3273 } 3274 3275 static void validate_slab_slab(struct kmem_cache *s, struct page *page, 3276 unsigned long *map) 3277 { 3278 if (slab_trylock(page)) { 3279 validate_slab(s, page, map); 3280 slab_unlock(page); 3281 } else 3282 printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n", 3283 s->name, page); 3284 3285 if (s->flags & DEBUG_DEFAULT_FLAGS) { 3286 if (!SlabDebug(page)) 3287 printk(KERN_ERR "SLUB %s: SlabDebug not set " 3288 "on slab 0x%p\n", s->name, page); 3289 } else { 3290 if (SlabDebug(page)) 3291 printk(KERN_ERR "SLUB %s: SlabDebug set on " 3292 "slab 0x%p\n", s->name, page); 3293 } 3294 } 3295 3296 static int validate_slab_node(struct kmem_cache *s, 3297 struct kmem_cache_node *n, unsigned long *map) 3298 { 3299 unsigned long count = 0; 3300 struct page *page; 3301 unsigned long flags; 3302 3303 spin_lock_irqsave(&n->list_lock, flags); 3304 3305 list_for_each_entry(page, &n->partial, lru) { 3306 validate_slab_slab(s, page, map); 3307 count++; 3308 } 3309 if (count != n->nr_partial) 3310 printk(KERN_ERR "SLUB %s: %ld partial slabs counted but " 3311 "counter=%ld\n", s->name, count, n->nr_partial); 3312 3313 if (!(s->flags & SLAB_STORE_USER)) 3314 goto out; 3315 3316 list_for_each_entry(page, &n->full, lru) { 3317 validate_slab_slab(s, page, map); 3318 count++; 3319 } 3320 if (count != atomic_long_read(&n->nr_slabs)) 3321 printk(KERN_ERR "SLUB: %s %ld slabs counted but " 3322 "counter=%ld\n", s->name, count, 3323 atomic_long_read(&n->nr_slabs)); 3324 3325 out: 3326 spin_unlock_irqrestore(&n->list_lock, flags); 3327 return count; 3328 } 3329 3330 static long validate_slab_cache(struct kmem_cache *s) 3331 { 3332 int node; 3333 unsigned long count = 0; 3334 unsigned long *map = kmalloc(BITS_TO_LONGS(s->objects) * 3335 sizeof(unsigned long), GFP_KERNEL); 3336 3337 if (!map) 3338 return -ENOMEM; 3339 3340 flush_all(s); 3341 for_each_node_state(node, N_NORMAL_MEMORY) { 3342 struct kmem_cache_node *n = get_node(s, node); 3343 3344 count += validate_slab_node(s, n, map); 3345 } 3346 kfree(map); 3347 return count; 3348 } 3349 3350 #ifdef SLUB_RESILIENCY_TEST 3351 static void resiliency_test(void) 3352 { 3353 u8 *p; 3354 3355 printk(KERN_ERR "SLUB resiliency testing\n"); 3356 printk(KERN_ERR "-----------------------\n"); 3357 printk(KERN_ERR "A. Corruption after allocation\n"); 3358 3359 p = kzalloc(16, GFP_KERNEL); 3360 p[16] = 0x12; 3361 printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" 3362 " 0x12->0x%p\n\n", p + 16); 3363 3364 validate_slab_cache(kmalloc_caches + 4); 3365 3366 /* Hmmm... The next two are dangerous */ 3367 p = kzalloc(32, GFP_KERNEL); 3368 p[32 + sizeof(void *)] = 0x34; 3369 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" 3370 " 0x34 -> -0x%p\n", p); 3371 printk(KERN_ERR 3372 "If allocated object is overwritten then not detectable\n\n"); 3373 3374 validate_slab_cache(kmalloc_caches + 5); 3375 p = kzalloc(64, GFP_KERNEL); 3376 p += 64 + (get_cycles() & 0xff) * sizeof(void *); 3377 *p = 0x56; 3378 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", 3379 p); 3380 printk(KERN_ERR 3381 "If allocated object is overwritten then not detectable\n\n"); 3382 validate_slab_cache(kmalloc_caches + 6); 3383 3384 printk(KERN_ERR "\nB. Corruption after free\n"); 3385 p = kzalloc(128, GFP_KERNEL); 3386 kfree(p); 3387 *p = 0x78; 3388 printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); 3389 validate_slab_cache(kmalloc_caches + 7); 3390 3391 p = kzalloc(256, GFP_KERNEL); 3392 kfree(p); 3393 p[50] = 0x9a; 3394 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", 3395 p); 3396 validate_slab_cache(kmalloc_caches + 8); 3397 3398 p = kzalloc(512, GFP_KERNEL); 3399 kfree(p); 3400 p[512] = 0xab; 3401 printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); 3402 validate_slab_cache(kmalloc_caches + 9); 3403 } 3404 #else 3405 static void resiliency_test(void) {}; 3406 #endif 3407 3408 /* 3409 * Generate lists of code addresses where slabcache objects are allocated 3410 * and freed. 3411 */ 3412 3413 struct location { 3414 unsigned long count; 3415 void *addr; 3416 long long sum_time; 3417 long min_time; 3418 long max_time; 3419 long min_pid; 3420 long max_pid; 3421 cpumask_t cpus; 3422 nodemask_t nodes; 3423 }; 3424 3425 struct loc_track { 3426 unsigned long max; 3427 unsigned long count; 3428 struct location *loc; 3429 }; 3430 3431 static void free_loc_track(struct loc_track *t) 3432 { 3433 if (t->max) 3434 free_pages((unsigned long)t->loc, 3435 get_order(sizeof(struct location) * t->max)); 3436 } 3437 3438 static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags) 3439 { 3440 struct location *l; 3441 int order; 3442 3443 order = get_order(sizeof(struct location) * max); 3444 3445 l = (void *)__get_free_pages(flags, order); 3446 if (!l) 3447 return 0; 3448 3449 if (t->count) { 3450 memcpy(l, t->loc, sizeof(struct location) * t->count); 3451 free_loc_track(t); 3452 } 3453 t->max = max; 3454 t->loc = l; 3455 return 1; 3456 } 3457 3458 static int add_location(struct loc_track *t, struct kmem_cache *s, 3459 const struct track *track) 3460 { 3461 long start, end, pos; 3462 struct location *l; 3463 void *caddr; 3464 unsigned long age = jiffies - track->when; 3465 3466 start = -1; 3467 end = t->count; 3468 3469 for ( ; ; ) { 3470 pos = start + (end - start + 1) / 2; 3471 3472 /* 3473 * There is nothing at "end". If we end up there 3474 * we need to add something to before end. 3475 */ 3476 if (pos == end) 3477 break; 3478 3479 caddr = t->loc[pos].addr; 3480 if (track->addr == caddr) { 3481 3482 l = &t->loc[pos]; 3483 l->count++; 3484 if (track->when) { 3485 l->sum_time += age; 3486 if (age < l->min_time) 3487 l->min_time = age; 3488 if (age > l->max_time) 3489 l->max_time = age; 3490 3491 if (track->pid < l->min_pid) 3492 l->min_pid = track->pid; 3493 if (track->pid > l->max_pid) 3494 l->max_pid = track->pid; 3495 3496 cpu_set(track->cpu, l->cpus); 3497 } 3498 node_set(page_to_nid(virt_to_page(track)), l->nodes); 3499 return 1; 3500 } 3501 3502 if (track->addr < caddr) 3503 end = pos; 3504 else 3505 start = pos; 3506 } 3507 3508 /* 3509 * Not found. Insert new tracking element. 3510 */ 3511 if (t->count >= t->max && !alloc_loc_track(t, 2 * t->max, GFP_ATOMIC)) 3512 return 0; 3513 3514 l = t->loc + pos; 3515 if (pos < t->count) 3516 memmove(l + 1, l, 3517 (t->count - pos) * sizeof(struct location)); 3518 t->count++; 3519 l->count = 1; 3520 l->addr = track->addr; 3521 l->sum_time = age; 3522 l->min_time = age; 3523 l->max_time = age; 3524 l->min_pid = track->pid; 3525 l->max_pid = track->pid; 3526 cpus_clear(l->cpus); 3527 cpu_set(track->cpu, l->cpus); 3528 nodes_clear(l->nodes); 3529 node_set(page_to_nid(virt_to_page(track)), l->nodes); 3530 return 1; 3531 } 3532 3533 static void process_slab(struct loc_track *t, struct kmem_cache *s, 3534 struct page *page, enum track_item alloc) 3535 { 3536 void *addr = slab_address(page); 3537 DECLARE_BITMAP(map, s->objects); 3538 void *p; 3539 3540 bitmap_zero(map, s->objects); 3541 for_each_free_object(p, s, page->freelist) 3542 set_bit(slab_index(p, s, addr), map); 3543 3544 for_each_object(p, s, addr) 3545 if (!test_bit(slab_index(p, s, addr), map)) 3546 add_location(t, s, get_track(s, p, alloc)); 3547 } 3548 3549 static int list_locations(struct kmem_cache *s, char *buf, 3550 enum track_item alloc) 3551 { 3552 int len = 0; 3553 unsigned long i; 3554 struct loc_track t = { 0, 0, NULL }; 3555 int node; 3556 3557 if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location), 3558 GFP_TEMPORARY)) 3559 return sprintf(buf, "Out of memory\n"); 3560 3561 /* Push back cpu slabs */ 3562 flush_all(s); 3563 3564 for_each_node_state(node, N_NORMAL_MEMORY) { 3565 struct kmem_cache_node *n = get_node(s, node); 3566 unsigned long flags; 3567 struct page *page; 3568 3569 if (!atomic_long_read(&n->nr_slabs)) 3570 continue; 3571 3572 spin_lock_irqsave(&n->list_lock, flags); 3573 list_for_each_entry(page, &n->partial, lru) 3574 process_slab(&t, s, page, alloc); 3575 list_for_each_entry(page, &n->full, lru) 3576 process_slab(&t, s, page, alloc); 3577 spin_unlock_irqrestore(&n->list_lock, flags); 3578 } 3579 3580 for (i = 0; i < t.count; i++) { 3581 struct location *l = &t.loc[i]; 3582 3583 if (len > PAGE_SIZE - 100) 3584 break; 3585 len += sprintf(buf + len, "%7ld ", l->count); 3586 3587 if (l->addr) 3588 len += sprint_symbol(buf + len, (unsigned long)l->addr); 3589 else 3590 len += sprintf(buf + len, "<not-available>"); 3591 3592 if (l->sum_time != l->min_time) { 3593 unsigned long remainder; 3594 3595 len += sprintf(buf + len, " age=%ld/%ld/%ld", 3596 l->min_time, 3597 div_long_long_rem(l->sum_time, l->count, &remainder), 3598 l->max_time); 3599 } else 3600 len += sprintf(buf + len, " age=%ld", 3601 l->min_time); 3602 3603 if (l->min_pid != l->max_pid) 3604 len += sprintf(buf + len, " pid=%ld-%ld", 3605 l->min_pid, l->max_pid); 3606 else 3607 len += sprintf(buf + len, " pid=%ld", 3608 l->min_pid); 3609 3610 if (num_online_cpus() > 1 && !cpus_empty(l->cpus) && 3611 len < PAGE_SIZE - 60) { 3612 len += sprintf(buf + len, " cpus="); 3613 len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, 3614 l->cpus); 3615 } 3616 3617 if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && 3618 len < PAGE_SIZE - 60) { 3619 len += sprintf(buf + len, " nodes="); 3620 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50, 3621 l->nodes); 3622 } 3623 3624 len += sprintf(buf + len, "\n"); 3625 } 3626 3627 free_loc_track(&t); 3628 if (!t.count) 3629 len += sprintf(buf, "No data\n"); 3630 return len; 3631 } 3632 3633 enum slab_stat_type { 3634 SL_FULL, 3635 SL_PARTIAL, 3636 SL_CPU, 3637 SL_OBJECTS 3638 }; 3639 3640 #define SO_FULL (1 << SL_FULL) 3641 #define SO_PARTIAL (1 << SL_PARTIAL) 3642 #define SO_CPU (1 << SL_CPU) 3643 #define SO_OBJECTS (1 << SL_OBJECTS) 3644 3645 static unsigned long slab_objects(struct kmem_cache *s, 3646 char *buf, unsigned long flags) 3647 { 3648 unsigned long total = 0; 3649 int cpu; 3650 int node; 3651 int x; 3652 unsigned long *nodes; 3653 unsigned long *per_cpu; 3654 3655 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); 3656 per_cpu = nodes + nr_node_ids; 3657 3658 for_each_possible_cpu(cpu) { 3659 struct page *page; 3660 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 3661 3662 if (!c) 3663 continue; 3664 3665 page = c->page; 3666 node = c->node; 3667 if (node < 0) 3668 continue; 3669 if (page) { 3670 if (flags & SO_CPU) { 3671 if (flags & SO_OBJECTS) 3672 x = page->inuse; 3673 else 3674 x = 1; 3675 total += x; 3676 nodes[node] += x; 3677 } 3678 per_cpu[node]++; 3679 } 3680 } 3681 3682 for_each_node_state(node, N_NORMAL_MEMORY) { 3683 struct kmem_cache_node *n = get_node(s, node); 3684 3685 if (flags & SO_PARTIAL) { 3686 if (flags & SO_OBJECTS) 3687 x = count_partial(n); 3688 else 3689 x = n->nr_partial; 3690 total += x; 3691 nodes[node] += x; 3692 } 3693 3694 if (flags & SO_FULL) { 3695 int full_slabs = atomic_long_read(&n->nr_slabs) 3696 - per_cpu[node] 3697 - n->nr_partial; 3698 3699 if (flags & SO_OBJECTS) 3700 x = full_slabs * s->objects; 3701 else 3702 x = full_slabs; 3703 total += x; 3704 nodes[node] += x; 3705 } 3706 } 3707 3708 x = sprintf(buf, "%lu", total); 3709 #ifdef CONFIG_NUMA 3710 for_each_node_state(node, N_NORMAL_MEMORY) 3711 if (nodes[node]) 3712 x += sprintf(buf + x, " N%d=%lu", 3713 node, nodes[node]); 3714 #endif 3715 kfree(nodes); 3716 return x + sprintf(buf + x, "\n"); 3717 } 3718 3719 static int any_slab_objects(struct kmem_cache *s) 3720 { 3721 int node; 3722 int cpu; 3723 3724 for_each_possible_cpu(cpu) { 3725 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 3726 3727 if (c && c->page) 3728 return 1; 3729 } 3730 3731 for_each_online_node(node) { 3732 struct kmem_cache_node *n = get_node(s, node); 3733 3734 if (!n) 3735 continue; 3736 3737 if (n->nr_partial || atomic_long_read(&n->nr_slabs)) 3738 return 1; 3739 } 3740 return 0; 3741 } 3742 3743 #define to_slab_attr(n) container_of(n, struct slab_attribute, attr) 3744 #define to_slab(n) container_of(n, struct kmem_cache, kobj); 3745 3746 struct slab_attribute { 3747 struct attribute attr; 3748 ssize_t (*show)(struct kmem_cache *s, char *buf); 3749 ssize_t (*store)(struct kmem_cache *s, const char *x, size_t count); 3750 }; 3751 3752 #define SLAB_ATTR_RO(_name) \ 3753 static struct slab_attribute _name##_attr = __ATTR_RO(_name) 3754 3755 #define SLAB_ATTR(_name) \ 3756 static struct slab_attribute _name##_attr = \ 3757 __ATTR(_name, 0644, _name##_show, _name##_store) 3758 3759 static ssize_t slab_size_show(struct kmem_cache *s, char *buf) 3760 { 3761 return sprintf(buf, "%d\n", s->size); 3762 } 3763 SLAB_ATTR_RO(slab_size); 3764 3765 static ssize_t align_show(struct kmem_cache *s, char *buf) 3766 { 3767 return sprintf(buf, "%d\n", s->align); 3768 } 3769 SLAB_ATTR_RO(align); 3770 3771 static ssize_t object_size_show(struct kmem_cache *s, char *buf) 3772 { 3773 return sprintf(buf, "%d\n", s->objsize); 3774 } 3775 SLAB_ATTR_RO(object_size); 3776 3777 static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) 3778 { 3779 return sprintf(buf, "%d\n", s->objects); 3780 } 3781 SLAB_ATTR_RO(objs_per_slab); 3782 3783 static ssize_t order_show(struct kmem_cache *s, char *buf) 3784 { 3785 return sprintf(buf, "%d\n", s->order); 3786 } 3787 SLAB_ATTR_RO(order); 3788 3789 static ssize_t ctor_show(struct kmem_cache *s, char *buf) 3790 { 3791 if (s->ctor) { 3792 int n = sprint_symbol(buf, (unsigned long)s->ctor); 3793 3794 return n + sprintf(buf + n, "\n"); 3795 } 3796 return 0; 3797 } 3798 SLAB_ATTR_RO(ctor); 3799 3800 static ssize_t aliases_show(struct kmem_cache *s, char *buf) 3801 { 3802 return sprintf(buf, "%d\n", s->refcount - 1); 3803 } 3804 SLAB_ATTR_RO(aliases); 3805 3806 static ssize_t slabs_show(struct kmem_cache *s, char *buf) 3807 { 3808 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); 3809 } 3810 SLAB_ATTR_RO(slabs); 3811 3812 static ssize_t partial_show(struct kmem_cache *s, char *buf) 3813 { 3814 return slab_objects(s, buf, SO_PARTIAL); 3815 } 3816 SLAB_ATTR_RO(partial); 3817 3818 static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) 3819 { 3820 return slab_objects(s, buf, SO_CPU); 3821 } 3822 SLAB_ATTR_RO(cpu_slabs); 3823 3824 static ssize_t objects_show(struct kmem_cache *s, char *buf) 3825 { 3826 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); 3827 } 3828 SLAB_ATTR_RO(objects); 3829 3830 static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) 3831 { 3832 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); 3833 } 3834 3835 static ssize_t sanity_checks_store(struct kmem_cache *s, 3836 const char *buf, size_t length) 3837 { 3838 s->flags &= ~SLAB_DEBUG_FREE; 3839 if (buf[0] == '1') 3840 s->flags |= SLAB_DEBUG_FREE; 3841 return length; 3842 } 3843 SLAB_ATTR(sanity_checks); 3844 3845 static ssize_t trace_show(struct kmem_cache *s, char *buf) 3846 { 3847 return sprintf(buf, "%d\n", !!(s->flags & SLAB_TRACE)); 3848 } 3849 3850 static ssize_t trace_store(struct kmem_cache *s, const char *buf, 3851 size_t length) 3852 { 3853 s->flags &= ~SLAB_TRACE; 3854 if (buf[0] == '1') 3855 s->flags |= SLAB_TRACE; 3856 return length; 3857 } 3858 SLAB_ATTR(trace); 3859 3860 static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) 3861 { 3862 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); 3863 } 3864 3865 static ssize_t reclaim_account_store(struct kmem_cache *s, 3866 const char *buf, size_t length) 3867 { 3868 s->flags &= ~SLAB_RECLAIM_ACCOUNT; 3869 if (buf[0] == '1') 3870 s->flags |= SLAB_RECLAIM_ACCOUNT; 3871 return length; 3872 } 3873 SLAB_ATTR(reclaim_account); 3874 3875 static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) 3876 { 3877 return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN)); 3878 } 3879 SLAB_ATTR_RO(hwcache_align); 3880 3881 #ifdef CONFIG_ZONE_DMA 3882 static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) 3883 { 3884 return sprintf(buf, "%d\n", !!(s->flags & SLAB_CACHE_DMA)); 3885 } 3886 SLAB_ATTR_RO(cache_dma); 3887 #endif 3888 3889 static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) 3890 { 3891 return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); 3892 } 3893 SLAB_ATTR_RO(destroy_by_rcu); 3894 3895 static ssize_t red_zone_show(struct kmem_cache *s, char *buf) 3896 { 3897 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RED_ZONE)); 3898 } 3899 3900 static ssize_t red_zone_store(struct kmem_cache *s, 3901 const char *buf, size_t length) 3902 { 3903 if (any_slab_objects(s)) 3904 return -EBUSY; 3905 3906 s->flags &= ~SLAB_RED_ZONE; 3907 if (buf[0] == '1') 3908 s->flags |= SLAB_RED_ZONE; 3909 calculate_sizes(s); 3910 return length; 3911 } 3912 SLAB_ATTR(red_zone); 3913 3914 static ssize_t poison_show(struct kmem_cache *s, char *buf) 3915 { 3916 return sprintf(buf, "%d\n", !!(s->flags & SLAB_POISON)); 3917 } 3918 3919 static ssize_t poison_store(struct kmem_cache *s, 3920 const char *buf, size_t length) 3921 { 3922 if (any_slab_objects(s)) 3923 return -EBUSY; 3924 3925 s->flags &= ~SLAB_POISON; 3926 if (buf[0] == '1') 3927 s->flags |= SLAB_POISON; 3928 calculate_sizes(s); 3929 return length; 3930 } 3931 SLAB_ATTR(poison); 3932 3933 static ssize_t store_user_show(struct kmem_cache *s, char *buf) 3934 { 3935 return sprintf(buf, "%d\n", !!(s->flags & SLAB_STORE_USER)); 3936 } 3937 3938 static ssize_t store_user_store(struct kmem_cache *s, 3939 const char *buf, size_t length) 3940 { 3941 if (any_slab_objects(s)) 3942 return -EBUSY; 3943 3944 s->flags &= ~SLAB_STORE_USER; 3945 if (buf[0] == '1') 3946 s->flags |= SLAB_STORE_USER; 3947 calculate_sizes(s); 3948 return length; 3949 } 3950 SLAB_ATTR(store_user); 3951 3952 static ssize_t validate_show(struct kmem_cache *s, char *buf) 3953 { 3954 return 0; 3955 } 3956 3957 static ssize_t validate_store(struct kmem_cache *s, 3958 const char *buf, size_t length) 3959 { 3960 int ret = -EINVAL; 3961 3962 if (buf[0] == '1') { 3963 ret = validate_slab_cache(s); 3964 if (ret >= 0) 3965 ret = length; 3966 } 3967 return ret; 3968 } 3969 SLAB_ATTR(validate); 3970 3971 static ssize_t shrink_show(struct kmem_cache *s, char *buf) 3972 { 3973 return 0; 3974 } 3975 3976 static ssize_t shrink_store(struct kmem_cache *s, 3977 const char *buf, size_t length) 3978 { 3979 if (buf[0] == '1') { 3980 int rc = kmem_cache_shrink(s); 3981 3982 if (rc) 3983 return rc; 3984 } else 3985 return -EINVAL; 3986 return length; 3987 } 3988 SLAB_ATTR(shrink); 3989 3990 static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) 3991 { 3992 if (!(s->flags & SLAB_STORE_USER)) 3993 return -ENOSYS; 3994 return list_locations(s, buf, TRACK_ALLOC); 3995 } 3996 SLAB_ATTR_RO(alloc_calls); 3997 3998 static ssize_t free_calls_show(struct kmem_cache *s, char *buf) 3999 { 4000 if (!(s->flags & SLAB_STORE_USER)) 4001 return -ENOSYS; 4002 return list_locations(s, buf, TRACK_FREE); 4003 } 4004 SLAB_ATTR_RO(free_calls); 4005 4006 #ifdef CONFIG_NUMA 4007 static ssize_t remote_node_defrag_ratio_show(struct kmem_cache *s, char *buf) 4008 { 4009 return sprintf(buf, "%d\n", s->remote_node_defrag_ratio / 10); 4010 } 4011 4012 static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, 4013 const char *buf, size_t length) 4014 { 4015 int n = simple_strtoul(buf, NULL, 10); 4016 4017 if (n < 100) 4018 s->remote_node_defrag_ratio = n * 10; 4019 return length; 4020 } 4021 SLAB_ATTR(remote_node_defrag_ratio); 4022 #endif 4023 4024 #ifdef CONFIG_SLUB_STATS 4025 4026 static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) 4027 { 4028 unsigned long sum = 0; 4029 int cpu; 4030 int len; 4031 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); 4032 4033 if (!data) 4034 return -ENOMEM; 4035 4036 for_each_online_cpu(cpu) { 4037 unsigned x = get_cpu_slab(s, cpu)->stat[si]; 4038 4039 data[cpu] = x; 4040 sum += x; 4041 } 4042 4043 len = sprintf(buf, "%lu", sum); 4044 4045 for_each_online_cpu(cpu) { 4046 if (data[cpu] && len < PAGE_SIZE - 20) 4047 len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]); 4048 } 4049 kfree(data); 4050 return len + sprintf(buf + len, "\n"); 4051 } 4052 4053 #define STAT_ATTR(si, text) \ 4054 static ssize_t text##_show(struct kmem_cache *s, char *buf) \ 4055 { \ 4056 return show_stat(s, buf, si); \ 4057 } \ 4058 SLAB_ATTR_RO(text); \ 4059 4060 STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); 4061 STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); 4062 STAT_ATTR(FREE_FASTPATH, free_fastpath); 4063 STAT_ATTR(FREE_SLOWPATH, free_slowpath); 4064 STAT_ATTR(FREE_FROZEN, free_frozen); 4065 STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial); 4066 STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial); 4067 STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial); 4068 STAT_ATTR(ALLOC_SLAB, alloc_slab); 4069 STAT_ATTR(ALLOC_REFILL, alloc_refill); 4070 STAT_ATTR(FREE_SLAB, free_slab); 4071 STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush); 4072 STAT_ATTR(DEACTIVATE_FULL, deactivate_full); 4073 STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty); 4074 STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); 4075 STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); 4076 STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); 4077 4078 #endif 4079 4080 static struct attribute *slab_attrs[] = { 4081 &slab_size_attr.attr, 4082 &object_size_attr.attr, 4083 &objs_per_slab_attr.attr, 4084 &order_attr.attr, 4085 &objects_attr.attr, 4086 &slabs_attr.attr, 4087 &partial_attr.attr, 4088 &cpu_slabs_attr.attr, 4089 &ctor_attr.attr, 4090 &aliases_attr.attr, 4091 &align_attr.attr, 4092 &sanity_checks_attr.attr, 4093 &trace_attr.attr, 4094 &hwcache_align_attr.attr, 4095 &reclaim_account_attr.attr, 4096 &destroy_by_rcu_attr.attr, 4097 &red_zone_attr.attr, 4098 &poison_attr.attr, 4099 &store_user_attr.attr, 4100 &validate_attr.attr, 4101 &shrink_attr.attr, 4102 &alloc_calls_attr.attr, 4103 &free_calls_attr.attr, 4104 #ifdef CONFIG_ZONE_DMA 4105 &cache_dma_attr.attr, 4106 #endif 4107 #ifdef CONFIG_NUMA 4108 &remote_node_defrag_ratio_attr.attr, 4109 #endif 4110 #ifdef CONFIG_SLUB_STATS 4111 &alloc_fastpath_attr.attr, 4112 &alloc_slowpath_attr.attr, 4113 &free_fastpath_attr.attr, 4114 &free_slowpath_attr.attr, 4115 &free_frozen_attr.attr, 4116 &free_add_partial_attr.attr, 4117 &free_remove_partial_attr.attr, 4118 &alloc_from_partial_attr.attr, 4119 &alloc_slab_attr.attr, 4120 &alloc_refill_attr.attr, 4121 &free_slab_attr.attr, 4122 &cpuslab_flush_attr.attr, 4123 &deactivate_full_attr.attr, 4124 &deactivate_empty_attr.attr, 4125 &deactivate_to_head_attr.attr, 4126 &deactivate_to_tail_attr.attr, 4127 &deactivate_remote_frees_attr.attr, 4128 #endif 4129 NULL 4130 }; 4131 4132 static struct attribute_group slab_attr_group = { 4133 .attrs = slab_attrs, 4134 }; 4135 4136 static ssize_t slab_attr_show(struct kobject *kobj, 4137 struct attribute *attr, 4138 char *buf) 4139 { 4140 struct slab_attribute *attribute; 4141 struct kmem_cache *s; 4142 int err; 4143 4144 attribute = to_slab_attr(attr); 4145 s = to_slab(kobj); 4146 4147 if (!attribute->show) 4148 return -EIO; 4149 4150 err = attribute->show(s, buf); 4151 4152 return err; 4153 } 4154 4155 static ssize_t slab_attr_store(struct kobject *kobj, 4156 struct attribute *attr, 4157 const char *buf, size_t len) 4158 { 4159 struct slab_attribute *attribute; 4160 struct kmem_cache *s; 4161 int err; 4162 4163 attribute = to_slab_attr(attr); 4164 s = to_slab(kobj); 4165 4166 if (!attribute->store) 4167 return -EIO; 4168 4169 err = attribute->store(s, buf, len); 4170 4171 return err; 4172 } 4173 4174 static void kmem_cache_release(struct kobject *kobj) 4175 { 4176 struct kmem_cache *s = to_slab(kobj); 4177 4178 kfree(s); 4179 } 4180 4181 static struct sysfs_ops slab_sysfs_ops = { 4182 .show = slab_attr_show, 4183 .store = slab_attr_store, 4184 }; 4185 4186 static struct kobj_type slab_ktype = { 4187 .sysfs_ops = &slab_sysfs_ops, 4188 .release = kmem_cache_release 4189 }; 4190 4191 static int uevent_filter(struct kset *kset, struct kobject *kobj) 4192 { 4193 struct kobj_type *ktype = get_ktype(kobj); 4194 4195 if (ktype == &slab_ktype) 4196 return 1; 4197 return 0; 4198 } 4199 4200 static struct kset_uevent_ops slab_uevent_ops = { 4201 .filter = uevent_filter, 4202 }; 4203 4204 static struct kset *slab_kset; 4205 4206 #define ID_STR_LENGTH 64 4207 4208 /* Create a unique string id for a slab cache: 4209 * format 4210 * :[flags-]size:[memory address of kmemcache] 4211 */ 4212 static char *create_unique_id(struct kmem_cache *s) 4213 { 4214 char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL); 4215 char *p = name; 4216 4217 BUG_ON(!name); 4218 4219 *p++ = ':'; 4220 /* 4221 * First flags affecting slabcache operations. We will only 4222 * get here for aliasable slabs so we do not need to support 4223 * too many flags. The flags here must cover all flags that 4224 * are matched during merging to guarantee that the id is 4225 * unique. 4226 */ 4227 if (s->flags & SLAB_CACHE_DMA) 4228 *p++ = 'd'; 4229 if (s->flags & SLAB_RECLAIM_ACCOUNT) 4230 *p++ = 'a'; 4231 if (s->flags & SLAB_DEBUG_FREE) 4232 *p++ = 'F'; 4233 if (p != name + 1) 4234 *p++ = '-'; 4235 p += sprintf(p, "%07d", s->size); 4236 BUG_ON(p > name + ID_STR_LENGTH - 1); 4237 return name; 4238 } 4239 4240 static int sysfs_slab_add(struct kmem_cache *s) 4241 { 4242 int err; 4243 const char *name; 4244 int unmergeable; 4245 4246 if (slab_state < SYSFS) 4247 /* Defer until later */ 4248 return 0; 4249 4250 unmergeable = slab_unmergeable(s); 4251 if (unmergeable) { 4252 /* 4253 * Slabcache can never be merged so we can use the name proper. 4254 * This is typically the case for debug situations. In that 4255 * case we can catch duplicate names easily. 4256 */ 4257 sysfs_remove_link(&slab_kset->kobj, s->name); 4258 name = s->name; 4259 } else { 4260 /* 4261 * Create a unique name for the slab as a target 4262 * for the symlinks. 4263 */ 4264 name = create_unique_id(s); 4265 } 4266 4267 s->kobj.kset = slab_kset; 4268 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name); 4269 if (err) { 4270 kobject_put(&s->kobj); 4271 return err; 4272 } 4273 4274 err = sysfs_create_group(&s->kobj, &slab_attr_group); 4275 if (err) 4276 return err; 4277 kobject_uevent(&s->kobj, KOBJ_ADD); 4278 if (!unmergeable) { 4279 /* Setup first alias */ 4280 sysfs_slab_alias(s, s->name); 4281 kfree(name); 4282 } 4283 return 0; 4284 } 4285 4286 static void sysfs_slab_remove(struct kmem_cache *s) 4287 { 4288 kobject_uevent(&s->kobj, KOBJ_REMOVE); 4289 kobject_del(&s->kobj); 4290 kobject_put(&s->kobj); 4291 } 4292 4293 /* 4294 * Need to buffer aliases during bootup until sysfs becomes 4295 * available lest we loose that information. 4296 */ 4297 struct saved_alias { 4298 struct kmem_cache *s; 4299 const char *name; 4300 struct saved_alias *next; 4301 }; 4302 4303 static struct saved_alias *alias_list; 4304 4305 static int sysfs_slab_alias(struct kmem_cache *s, const char *name) 4306 { 4307 struct saved_alias *al; 4308 4309 if (slab_state == SYSFS) { 4310 /* 4311 * If we have a leftover link then remove it. 4312 */ 4313 sysfs_remove_link(&slab_kset->kobj, name); 4314 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name); 4315 } 4316 4317 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL); 4318 if (!al) 4319 return -ENOMEM; 4320 4321 al->s = s; 4322 al->name = name; 4323 al->next = alias_list; 4324 alias_list = al; 4325 return 0; 4326 } 4327 4328 static int __init slab_sysfs_init(void) 4329 { 4330 struct kmem_cache *s; 4331 int err; 4332 4333 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); 4334 if (!slab_kset) { 4335 printk(KERN_ERR "Cannot register slab subsystem.\n"); 4336 return -ENOSYS; 4337 } 4338 4339 slab_state = SYSFS; 4340 4341 list_for_each_entry(s, &slab_caches, list) { 4342 err = sysfs_slab_add(s); 4343 if (err) 4344 printk(KERN_ERR "SLUB: Unable to add boot slab %s" 4345 " to sysfs\n", s->name); 4346 } 4347 4348 while (alias_list) { 4349 struct saved_alias *al = alias_list; 4350 4351 alias_list = alias_list->next; 4352 err = sysfs_slab_alias(al->s, al->name); 4353 if (err) 4354 printk(KERN_ERR "SLUB: Unable to add boot slab alias" 4355 " %s to sysfs\n", s->name); 4356 kfree(al); 4357 } 4358 4359 resiliency_test(); 4360 return 0; 4361 } 4362 4363 __initcall(slab_sysfs_init); 4364 #endif 4365 4366 /* 4367 * The /proc/slabinfo ABI 4368 */ 4369 #ifdef CONFIG_SLABINFO 4370 4371 ssize_t slabinfo_write(struct file *file, const char __user * buffer, 4372 size_t count, loff_t *ppos) 4373 { 4374 return -EINVAL; 4375 } 4376 4377 4378 static void print_slabinfo_header(struct seq_file *m) 4379 { 4380 seq_puts(m, "slabinfo - version: 2.1\n"); 4381 seq_puts(m, "# name <active_objs> <num_objs> <objsize> " 4382 "<objperslab> <pagesperslab>"); 4383 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); 4384 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); 4385 seq_putc(m, '\n'); 4386 } 4387 4388 static void *s_start(struct seq_file *m, loff_t *pos) 4389 { 4390 loff_t n = *pos; 4391 4392 down_read(&slub_lock); 4393 if (!n) 4394 print_slabinfo_header(m); 4395 4396 return seq_list_start(&slab_caches, *pos); 4397 } 4398 4399 static void *s_next(struct seq_file *m, void *p, loff_t *pos) 4400 { 4401 return seq_list_next(p, &slab_caches, pos); 4402 } 4403 4404 static void s_stop(struct seq_file *m, void *p) 4405 { 4406 up_read(&slub_lock); 4407 } 4408 4409 static int s_show(struct seq_file *m, void *p) 4410 { 4411 unsigned long nr_partials = 0; 4412 unsigned long nr_slabs = 0; 4413 unsigned long nr_inuse = 0; 4414 unsigned long nr_objs; 4415 struct kmem_cache *s; 4416 int node; 4417 4418 s = list_entry(p, struct kmem_cache, list); 4419 4420 for_each_online_node(node) { 4421 struct kmem_cache_node *n = get_node(s, node); 4422 4423 if (!n) 4424 continue; 4425 4426 nr_partials += n->nr_partial; 4427 nr_slabs += atomic_long_read(&n->nr_slabs); 4428 nr_inuse += count_partial(n); 4429 } 4430 4431 nr_objs = nr_slabs * s->objects; 4432 nr_inuse += (nr_slabs - nr_partials) * s->objects; 4433 4434 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, 4435 nr_objs, s->size, s->objects, (1 << s->order)); 4436 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0); 4437 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs, 4438 0UL); 4439 seq_putc(m, '\n'); 4440 return 0; 4441 } 4442 4443 const struct seq_operations slabinfo_op = { 4444 .start = s_start, 4445 .next = s_next, 4446 .stop = s_stop, 4447 .show = s_show, 4448 }; 4449 4450 #endif /* CONFIG_SLABINFO */ 4451