xref: /freebsd/sys/vm/uma_core.c (revision 2357939bc239bd5334a169b62313806178dd8f30)
1 /*
2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * uma_core.c  Implementation of the Universal Memory allocator
29  *
30  * This allocator is intended to replace the multitude of similar object caches
31  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
32  * effecient.  A primary design goal is to return unused memory to the rest of
33  * the system.  This will make the system as a whole more flexible due to the
34  * ability to move memory to subsystems which most need it instead of leaving
35  * pools of reserved memory unused.
36  *
37  * The basic ideas stem from similar slab/zone based allocators whose algorithms
38  * are well known.
39  *
40  */
41 
42 /*
43  * TODO:
44  *	- Improve memory usage for large allocations
45  *	- Investigate cache size adjustments
46  */
47 
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
50 
51 /* I should really use ktr.. */
52 /*
53 #define UMA_DEBUG 1
54 #define UMA_DEBUG_ALLOC 1
55 #define UMA_DEBUG_ALLOC_1 1
56 */
57 
58 #include "opt_param.h"
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/kernel.h>
62 #include <sys/types.h>
63 #include <sys/queue.h>
64 #include <sys/malloc.h>
65 #include <sys/lock.h>
66 #include <sys/sysctl.h>
67 #include <sys/mutex.h>
68 #include <sys/proc.h>
69 #include <sys/smp.h>
70 #include <sys/vmmeter.h>
71 #include <sys/mbuf.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_param.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_extern.h>
80 #include <vm/uma.h>
81 #include <vm/uma_int.h>
82 #include <vm/uma_dbg.h>
83 
84 #include <machine/vmparam.h>
85 
86 /*
87  * This is the zone from which all zones are spawned.  The idea is that even
88  * the zone heads are allocated from the allocator, so we use the bss section
89  * to bootstrap us.
90  */
91 static struct uma_zone masterzone;
92 static uma_zone_t zones = &masterzone;
93 
94 /* This is the zone from which all of uma_slab_t's are allocated. */
95 static uma_zone_t slabzone;
96 
97 /*
98  * The initial hash tables come out of this zone so they can be allocated
99  * prior to malloc coming up.
100  */
101 static uma_zone_t hashzone;
102 
103 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
104 
105 /*
106  * Are we allowed to allocate buckets?
107  */
108 static int bucketdisable = 1;
109 
110 /* Linked list of all zones in the system */
111 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
112 
113 /* This mutex protects the zone list */
114 static struct mtx uma_mtx;
115 
116 /* These are the pcpu cache locks */
117 static struct mtx uma_pcpu_mtx[MAXCPU];
118 
119 /* Linked list of boot time pages */
120 static LIST_HEAD(,uma_slab) uma_boot_pages =
121     LIST_HEAD_INITIALIZER(&uma_boot_pages);
122 
123 /* Count of free boottime pages */
124 static int uma_boot_free = 0;
125 
126 /* Is the VM done starting up? */
127 static int booted = 0;
128 
129 /*
130  * This is the handle used to schedule events that need to happen
131  * outside of the allocation fast path.
132  */
133 static struct callout uma_callout;
134 #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
135 
136 /*
137  * This structure is passed as the zone ctor arg so that I don't have to create
138  * a special allocation function just for zones.
139  */
140 struct uma_zctor_args {
141 	char *name;
142 	size_t size;
143 	uma_ctor ctor;
144 	uma_dtor dtor;
145 	uma_init uminit;
146 	uma_fini fini;
147 	int align;
148 	u_int16_t flags;
149 };
150 
151 struct uma_bucket_zone {
152 	uma_zone_t	ubz_zone;
153 	char		*ubz_name;
154 	int		ubz_entries;
155 };
156 
157 #define	BUCKET_MAX	128
158 
159 struct uma_bucket_zone bucket_zones[] = {
160 	{ NULL, "16 Bucket", 16 },
161 	{ NULL, "32 Bucket", 32 },
162 	{ NULL, "64 Bucket", 64 },
163 	{ NULL, "128 Bucket", 128 },
164 	{ NULL, NULL, 0}
165 };
166 
167 #define	BUCKET_SHIFT	4
168 #define	BUCKET_ZONES	((BUCKET_MAX >> BUCKET_SHIFT) + 1)
169 
170 uint8_t bucket_size[BUCKET_ZONES];
171 
172 /* Prototypes.. */
173 
174 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
175 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
176 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
177 static void page_free(void *, int, u_int8_t);
178 static uma_slab_t slab_zalloc(uma_zone_t, int);
179 static void cache_drain(uma_zone_t);
180 static void bucket_drain(uma_zone_t, uma_bucket_t);
181 static void bucket_cache_drain(uma_zone_t zone);
182 static void zone_ctor(void *, int, void *);
183 static void zone_dtor(void *, int, void *);
184 static void zero_init(void *, int);
185 static void zone_small_init(uma_zone_t zone);
186 static void zone_large_init(uma_zone_t zone);
187 static void zone_foreach(void (*zfunc)(uma_zone_t));
188 static void zone_timeout(uma_zone_t zone);
189 static int hash_alloc(struct uma_hash *);
190 static int hash_expand(struct uma_hash *, struct uma_hash *);
191 static void hash_free(struct uma_hash *hash);
192 static void uma_timeout(void *);
193 static void uma_startup3(void);
194 static void *uma_zalloc_internal(uma_zone_t, void *, int);
195 static void uma_zfree_internal(uma_zone_t, void *, void *, int);
196 static void bucket_enable(void);
197 static void bucket_init(void);
198 static uma_bucket_t bucket_alloc(int, int);
199 static void bucket_free(uma_bucket_t);
200 static void bucket_zone_drain(void);
201 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
202 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
203 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
204 static void zone_drain(uma_zone_t);
205 
206 void uma_print_zone(uma_zone_t);
207 void uma_print_stats(void);
208 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
209 
210 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
211     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
212 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
213 
214 /*
215  * This routine checks to see whether or not it's safe to enable buckets.
216  */
217 
218 static void
219 bucket_enable(void)
220 {
221 	if (cnt.v_free_count < cnt.v_free_min)
222 		bucketdisable = 1;
223 	else
224 		bucketdisable = 0;
225 }
226 
227 static void
228 bucket_init(void)
229 {
230 	struct uma_bucket_zone *ubz;
231 	int i;
232 	int j;
233 
234 	for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
235 		int size;
236 
237 		ubz = &bucket_zones[j];
238 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
239 		size += sizeof(void *) * ubz->ubz_entries;
240 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
241 		    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
242 		for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
243 			bucket_size[i >> BUCKET_SHIFT] = j;
244 	}
245 }
246 
247 static uma_bucket_t
248 bucket_alloc(int entries, int bflags)
249 {
250 	struct uma_bucket_zone *ubz;
251 	uma_bucket_t bucket;
252 	int idx;
253 
254 	/*
255 	 * This is to stop us from allocating per cpu buckets while we're
256 	 * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
257 	 * boot pages.  This also prevents us from allocating buckets in
258 	 * low memory situations.
259 	 */
260 
261 	if (bucketdisable)
262 		return (NULL);
263 	idx = howmany(entries, 1 << BUCKET_SHIFT);
264 	ubz = &bucket_zones[bucket_size[idx]];
265 	bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
266 	if (bucket) {
267 #ifdef INVARIANTS
268 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
269 #endif
270 		bucket->ub_cnt = 0;
271 		bucket->ub_entries = ubz->ubz_entries;
272 	}
273 
274 	return (bucket);
275 }
276 
277 static void
278 bucket_free(uma_bucket_t bucket)
279 {
280 	struct uma_bucket_zone *ubz;
281 	int idx;
282 
283 	idx = howmany(bucket->ub_entries, 1 << BUCKET_SHIFT);
284 	ubz = &bucket_zones[bucket_size[idx]];
285 	uma_zfree_internal(ubz->ubz_zone, bucket, NULL, 0);
286 }
287 
288 static void
289 bucket_zone_drain(void)
290 {
291 	struct uma_bucket_zone *ubz;
292 
293 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
294 		zone_drain(ubz->ubz_zone);
295 }
296 
297 
298 /*
299  * Routine called by timeout which is used to fire off some time interval
300  * based calculations.  (stats, hash size, etc.)
301  *
302  * Arguments:
303  *	arg   Unused
304  *
305  * Returns:
306  *	Nothing
307  */
308 static void
309 uma_timeout(void *unused)
310 {
311 	bucket_enable();
312 	zone_foreach(zone_timeout);
313 
314 	/* Reschedule this event */
315 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
316 }
317 
318 /*
319  * Routine to perform timeout driven calculations.  This expands the
320  * hashes and does per cpu statistics aggregation.
321  *
322  *  Arguments:
323  *	zone  The zone to operate on
324  *
325  *  Returns:
326  *	Nothing
327  */
328 static void
329 zone_timeout(uma_zone_t zone)
330 {
331 	uma_cache_t cache;
332 	u_int64_t alloc;
333 	int cpu;
334 
335 	alloc = 0;
336 
337 	/*
338 	 * Aggregate per cpu cache statistics back to the zone.
339 	 *
340 	 * XXX This should be done in the sysctl handler.
341 	 *
342 	 * I may rewrite this to set a flag in the per cpu cache instead of
343 	 * locking.  If the flag is not cleared on the next round I will have
344 	 * to lock and do it here instead so that the statistics don't get too
345 	 * far out of sync.
346 	 */
347 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
348 		for (cpu = 0; cpu <= mp_maxid; cpu++) {
349 			if (CPU_ABSENT(cpu))
350 				continue;
351 			CPU_LOCK(cpu);
352 			cache = &zone->uz_cpu[cpu];
353 			/* Add them up, and reset */
354 			alloc += cache->uc_allocs;
355 			cache->uc_allocs = 0;
356 			CPU_UNLOCK(cpu);
357 		}
358 	}
359 
360 	/* Now push these stats back into the zone.. */
361 	ZONE_LOCK(zone);
362 	zone->uz_allocs += alloc;
363 
364 	/*
365 	 * Expand the zone hash table.
366 	 *
367 	 * This is done if the number of slabs is larger than the hash size.
368 	 * What I'm trying to do here is completely reduce collisions.  This
369 	 * may be a little aggressive.  Should I allow for two collisions max?
370 	 */
371 
372 	if (zone->uz_flags & UMA_ZONE_HASH &&
373 	    zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
374 		struct uma_hash newhash;
375 		struct uma_hash oldhash;
376 		int ret;
377 
378 		/*
379 		 * This is so involved because allocating and freeing
380 		 * while the zone lock is held will lead to deadlock.
381 		 * I have to do everything in stages and check for
382 		 * races.
383 		 */
384 		newhash = zone->uz_hash;
385 		ZONE_UNLOCK(zone);
386 		ret = hash_alloc(&newhash);
387 		ZONE_LOCK(zone);
388 		if (ret) {
389 			if (hash_expand(&zone->uz_hash, &newhash)) {
390 				oldhash = zone->uz_hash;
391 				zone->uz_hash = newhash;
392 			} else
393 				oldhash = newhash;
394 
395 			ZONE_UNLOCK(zone);
396 			hash_free(&oldhash);
397 			ZONE_LOCK(zone);
398 		}
399 	}
400 	ZONE_UNLOCK(zone);
401 }
402 
403 /*
404  * Allocate and zero fill the next sized hash table from the appropriate
405  * backing store.
406  *
407  * Arguments:
408  *	hash  A new hash structure with the old hash size in uh_hashsize
409  *
410  * Returns:
411  *	1 on sucess and 0 on failure.
412  */
413 static int
414 hash_alloc(struct uma_hash *hash)
415 {
416 	int oldsize;
417 	int alloc;
418 
419 	oldsize = hash->uh_hashsize;
420 
421 	/* We're just going to go to a power of two greater */
422 	if (oldsize)  {
423 		hash->uh_hashsize = oldsize * 2;
424 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
425 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
426 		    M_UMAHASH, M_NOWAIT);
427 	} else {
428 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
429 		hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
430 		    M_WAITOK);
431 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
432 	}
433 	if (hash->uh_slab_hash) {
434 		bzero(hash->uh_slab_hash, alloc);
435 		hash->uh_hashmask = hash->uh_hashsize - 1;
436 		return (1);
437 	}
438 
439 	return (0);
440 }
441 
442 /*
443  * Expands the hash table for HASH zones.  This is done from zone_timeout
444  * to reduce collisions.  This must not be done in the regular allocation
445  * path, otherwise, we can recurse on the vm while allocating pages.
446  *
447  * Arguments:
448  *	oldhash  The hash you want to expand
449  *	newhash  The hash structure for the new table
450  *
451  * Returns:
452  *	Nothing
453  *
454  * Discussion:
455  */
456 static int
457 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
458 {
459 	uma_slab_t slab;
460 	int hval;
461 	int i;
462 
463 	if (!newhash->uh_slab_hash)
464 		return (0);
465 
466 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
467 		return (0);
468 
469 	/*
470 	 * I need to investigate hash algorithms for resizing without a
471 	 * full rehash.
472 	 */
473 
474 	for (i = 0; i < oldhash->uh_hashsize; i++)
475 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
476 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
477 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
478 			hval = UMA_HASH(newhash, slab->us_data);
479 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
480 			    slab, us_hlink);
481 		}
482 
483 	return (1);
484 }
485 
486 /*
487  * Free the hash bucket to the appropriate backing store.
488  *
489  * Arguments:
490  *	slab_hash  The hash bucket we're freeing
491  *	hashsize   The number of entries in that hash bucket
492  *
493  * Returns:
494  *	Nothing
495  */
496 static void
497 hash_free(struct uma_hash *hash)
498 {
499 	if (hash->uh_slab_hash == NULL)
500 		return;
501 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
502 		uma_zfree_internal(hashzone,
503 		    hash->uh_slab_hash, NULL, 0);
504 	else
505 		free(hash->uh_slab_hash, M_UMAHASH);
506 }
507 
508 /*
509  * Frees all outstanding items in a bucket
510  *
511  * Arguments:
512  *	zone   The zone to free to, must be unlocked.
513  *	bucket The free/alloc bucket with items, cpu queue must be locked.
514  *
515  * Returns:
516  *	Nothing
517  */
518 
519 static void
520 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
521 {
522 	uma_slab_t slab;
523 	int mzone;
524 	void *item;
525 
526 	if (bucket == NULL)
527 		return;
528 
529 	slab = NULL;
530 	mzone = 0;
531 
532 	/* We have to lookup the slab again for malloc.. */
533 	if (zone->uz_flags & UMA_ZONE_MALLOC)
534 		mzone = 1;
535 
536 	while (bucket->ub_cnt > 0)  {
537 		bucket->ub_cnt--;
538 		item = bucket->ub_bucket[bucket->ub_cnt];
539 #ifdef INVARIANTS
540 		bucket->ub_bucket[bucket->ub_cnt] = NULL;
541 		KASSERT(item != NULL,
542 		    ("bucket_drain: botched ptr, item is NULL"));
543 #endif
544 		/*
545 		 * This is extremely inefficient.  The slab pointer was passed
546 		 * to uma_zfree_arg, but we lost it because the buckets don't
547 		 * hold them.  This will go away when free() gets a size passed
548 		 * to it.
549 		 */
550 		if (mzone)
551 			slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
552 		uma_zfree_internal(zone, item, slab, 1);
553 	}
554 }
555 
556 /*
557  * Drains the per cpu caches for a zone.
558  *
559  * Arguments:
560  *	zone     The zone to drain, must be unlocked.
561  *
562  * Returns:
563  *	Nothing
564  */
565 static void
566 cache_drain(uma_zone_t zone)
567 {
568 	uma_cache_t cache;
569 	int cpu;
570 
571 	/*
572 	 * We have to lock each cpu cache before locking the zone
573 	 */
574 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
575 		if (CPU_ABSENT(cpu))
576 			continue;
577 		CPU_LOCK(cpu);
578 		cache = &zone->uz_cpu[cpu];
579 		bucket_drain(zone, cache->uc_allocbucket);
580 		bucket_drain(zone, cache->uc_freebucket);
581 		if (cache->uc_allocbucket != NULL)
582 			bucket_free(cache->uc_allocbucket);
583 		if (cache->uc_freebucket != NULL)
584 			bucket_free(cache->uc_freebucket);
585 		cache->uc_allocbucket = cache->uc_freebucket = NULL;
586 	}
587 	ZONE_LOCK(zone);
588 	bucket_cache_drain(zone);
589 	ZONE_UNLOCK(zone);
590 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
591 		if (CPU_ABSENT(cpu))
592 			continue;
593 		CPU_UNLOCK(cpu);
594 	}
595 }
596 
597 /*
598  * Drain the cached buckets from a zone.  Expects a locked zone on entry.
599  */
600 static void
601 bucket_cache_drain(uma_zone_t zone)
602 {
603 	uma_bucket_t bucket;
604 
605 	/*
606 	 * Drain the bucket queues and free the buckets, we just keep two per
607 	 * cpu (alloc/free).
608 	 */
609 	while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
610 		LIST_REMOVE(bucket, ub_link);
611 		ZONE_UNLOCK(zone);
612 		bucket_drain(zone, bucket);
613 		bucket_free(bucket);
614 		ZONE_LOCK(zone);
615 	}
616 
617 	/* Now we do the free queue.. */
618 	while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
619 		LIST_REMOVE(bucket, ub_link);
620 		bucket_free(bucket);
621 	}
622 }
623 
624 /*
625  * Frees pages from a zone back to the system.  This is done on demand from
626  * the pageout daemon.
627  *
628  * Arguments:
629  *	zone  The zone to free pages from
630  *	 all  Should we drain all items?
631  *
632  * Returns:
633  *	Nothing.
634  */
635 static void
636 zone_drain(uma_zone_t zone)
637 {
638 	struct slabhead freeslabs = {};
639 	uma_slab_t slab;
640 	uma_slab_t n;
641 	u_int8_t flags;
642 	u_int8_t *mem;
643 	int i;
644 
645 	/*
646 	 * We don't want to take pages from staticly allocated zones at this
647 	 * time
648 	 */
649 	if (zone->uz_flags & UMA_ZONE_NOFREE || zone->uz_freef == NULL)
650 		return;
651 
652 	ZONE_LOCK(zone);
653 
654 #ifdef UMA_DEBUG
655 	printf("%s free items: %u\n", zone->uz_name, zone->uz_free);
656 #endif
657 	bucket_cache_drain(zone);
658 	if (zone->uz_free == 0)
659 		goto finished;
660 
661 	slab = LIST_FIRST(&zone->uz_free_slab);
662 	while (slab) {
663 		n = LIST_NEXT(slab, us_link);
664 
665 		/* We have no where to free these to */
666 		if (slab->us_flags & UMA_SLAB_BOOT) {
667 			slab = n;
668 			continue;
669 		}
670 
671 		LIST_REMOVE(slab, us_link);
672 		zone->uz_pages -= zone->uz_ppera;
673 		zone->uz_free -= zone->uz_ipers;
674 
675 		if (zone->uz_flags & UMA_ZONE_HASH)
676 			UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
677 
678 		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
679 
680 		slab = n;
681 	}
682 finished:
683 	ZONE_UNLOCK(zone);
684 
685 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
686 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
687 		if (zone->uz_fini)
688 			for (i = 0; i < zone->uz_ipers; i++)
689 				zone->uz_fini(
690 				    slab->us_data + (zone->uz_rsize * i),
691 				    zone->uz_size);
692 		flags = slab->us_flags;
693 		mem = slab->us_data;
694 
695 		if (zone->uz_flags & UMA_ZONE_OFFPAGE)
696 			uma_zfree_internal(slabzone, slab, NULL, 0);
697 		if (zone->uz_flags & UMA_ZONE_MALLOC) {
698 			vm_object_t obj;
699 
700 			if (flags & UMA_SLAB_KMEM)
701 				obj = kmem_object;
702 			else
703 				obj = NULL;
704 			for (i = 0; i < zone->uz_ppera; i++)
705 				vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
706 				    obj);
707 		}
708 #ifdef UMA_DEBUG
709 		printf("%s: Returning %d bytes.\n",
710 		    zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
711 #endif
712 		zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
713 	}
714 
715 }
716 
717 /*
718  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
719  *
720  * Arguments:
721  *	zone  The zone to allocate slabs for
722  *	wait  Shall we wait?
723  *
724  * Returns:
725  *	The slab that was allocated or NULL if there is no memory and the
726  *	caller specified M_NOWAIT.
727  */
728 static uma_slab_t
729 slab_zalloc(uma_zone_t zone, int wait)
730 {
731 	uma_slab_t slab;	/* Starting slab */
732 	u_int8_t *mem;
733 	u_int8_t flags;
734 	int i;
735 
736 	slab = NULL;
737 
738 #ifdef UMA_DEBUG
739 	printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
740 #endif
741 	ZONE_UNLOCK(zone);
742 
743 	if (zone->uz_flags & UMA_ZONE_OFFPAGE) {
744 		slab = uma_zalloc_internal(slabzone, NULL, wait);
745 		if (slab == NULL) {
746 			ZONE_LOCK(zone);
747 			return NULL;
748 		}
749 	}
750 
751 	/*
752 	 * This reproduces the old vm_zone behavior of zero filling pages the
753 	 * first time they are added to a zone.
754 	 *
755 	 * Malloced items are zeroed in uma_zalloc.
756 	 */
757 
758 	if ((zone->uz_flags & UMA_ZONE_MALLOC) == 0)
759 		wait |= M_ZERO;
760 	else
761 		wait &= ~M_ZERO;
762 
763 	mem = zone->uz_allocf(zone, zone->uz_ppera * UMA_SLAB_SIZE,
764 	    &flags, wait);
765 	if (mem == NULL) {
766 		ZONE_LOCK(zone);
767 		return (NULL);
768 	}
769 
770 	/* Point the slab into the allocated memory */
771 	if (!(zone->uz_flags & UMA_ZONE_OFFPAGE))
772 		slab = (uma_slab_t )(mem + zone->uz_pgoff);
773 
774 	if (zone->uz_flags & UMA_ZONE_MALLOC)
775 		for (i = 0; i < zone->uz_ppera; i++)
776 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
777 
778 	slab->us_zone = zone;
779 	slab->us_data = mem;
780 	slab->us_freecount = zone->uz_ipers;
781 	slab->us_firstfree = 0;
782 	slab->us_flags = flags;
783 	for (i = 0; i < zone->uz_ipers; i++)
784 		slab->us_freelist[i] = i+1;
785 
786 	if (zone->uz_init)
787 		for (i = 0; i < zone->uz_ipers; i++)
788 			zone->uz_init(slab->us_data + (zone->uz_rsize * i),
789 			    zone->uz_size);
790 	ZONE_LOCK(zone);
791 
792 	if (zone->uz_flags & UMA_ZONE_HASH)
793 		UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
794 
795 	zone->uz_pages += zone->uz_ppera;
796 	zone->uz_free += zone->uz_ipers;
797 
798 	return (slab);
799 }
800 
801 /*
802  * This function is intended to be used early on in place of page_alloc() so
803  * that we may use the boot time page cache to satisfy allocations before
804  * the VM is ready.
805  */
806 static void *
807 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
808 {
809 	/*
810 	 * Check our small startup cache to see if it has pages remaining.
811 	 */
812 	mtx_lock(&uma_mtx);
813 	if (uma_boot_free != 0) {
814 		uma_slab_t tmps;
815 
816 		tmps = LIST_FIRST(&uma_boot_pages);
817 		LIST_REMOVE(tmps, us_link);
818 		uma_boot_free--;
819 		mtx_unlock(&uma_mtx);
820 		*pflag = tmps->us_flags;
821 		return (tmps->us_data);
822 	}
823 	mtx_unlock(&uma_mtx);
824 	if (booted == 0)
825 		panic("UMA: Increase UMA_BOOT_PAGES");
826 	/*
827 	 * Now that we've booted reset these users to their real allocator.
828 	 */
829 #ifdef UMA_MD_SMALL_ALLOC
830 	zone->uz_allocf = uma_small_alloc;
831 #else
832 	zone->uz_allocf = page_alloc;
833 #endif
834 	return zone->uz_allocf(zone, bytes, pflag, wait);
835 }
836 
837 /*
838  * Allocates a number of pages from the system
839  *
840  * Arguments:
841  *	zone  Unused
842  *	bytes  The number of bytes requested
843  *	wait  Shall we wait?
844  *
845  * Returns:
846  *	A pointer to the alloced memory or possibly
847  *	NULL if M_NOWAIT is set.
848  */
849 static void *
850 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
851 {
852 	void *p;	/* Returned page */
853 
854 	*pflag = UMA_SLAB_KMEM;
855 	p = (void *) kmem_malloc(kmem_map, bytes, wait);
856 
857 	return (p);
858 }
859 
860 /*
861  * Allocates a number of pages from within an object
862  *
863  * Arguments:
864  *	zone   Unused
865  *	bytes  The number of bytes requested
866  *	wait   Shall we wait?
867  *
868  * Returns:
869  *	A pointer to the alloced memory or possibly
870  *	NULL if M_NOWAIT is set.
871  */
872 static void *
873 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
874 {
875 	vm_object_t object;
876 	vm_offset_t retkva, zkva;
877 	vm_page_t p;
878 	int pages, startpages;
879 
880 	object = zone->uz_obj;
881 	retkva = 0;
882 
883 	/*
884 	 * This looks a little weird since we're getting one page at a time.
885 	 */
886 	VM_OBJECT_LOCK(object);
887 	p = TAILQ_LAST(&object->memq, pglist);
888 	pages = p != NULL ? p->pindex + 1 : 0;
889 	startpages = pages;
890 	zkva = zone->uz_kva + pages * PAGE_SIZE;
891 	for (; bytes > 0; bytes -= PAGE_SIZE) {
892 		p = vm_page_alloc(object, pages,
893 		    VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
894 		if (p == NULL) {
895 			if (pages != startpages)
896 				pmap_qremove(retkva, pages - startpages);
897 			while (pages != startpages) {
898 				pages--;
899 				p = TAILQ_LAST(&object->memq, pglist);
900 				vm_page_lock_queues();
901 				vm_page_unwire(p, 0);
902 				vm_page_free(p);
903 				vm_page_unlock_queues();
904 			}
905 			retkva = 0;
906 			goto done;
907 		}
908 		pmap_qenter(zkva, &p, 1);
909 		if (retkva == 0)
910 			retkva = zkva;
911 		zkva += PAGE_SIZE;
912 		pages += 1;
913 	}
914 done:
915 	VM_OBJECT_UNLOCK(object);
916 	*flags = UMA_SLAB_PRIV;
917 
918 	return ((void *)retkva);
919 }
920 
921 /*
922  * Frees a number of pages to the system
923  *
924  * Arguments:
925  *	mem   A pointer to the memory to be freed
926  *	size  The size of the memory being freed
927  *	flags The original p->us_flags field
928  *
929  * Returns:
930  *	Nothing
931  */
932 static void
933 page_free(void *mem, int size, u_int8_t flags)
934 {
935 	vm_map_t map;
936 
937 	if (flags & UMA_SLAB_KMEM)
938 		map = kmem_map;
939 	else
940 		panic("UMA: page_free used with invalid flags %d\n", flags);
941 
942 	kmem_free(map, (vm_offset_t)mem, size);
943 }
944 
945 /*
946  * Zero fill initializer
947  *
948  * Arguments/Returns follow uma_init specifications
949  */
950 static void
951 zero_init(void *mem, int size)
952 {
953 	bzero(mem, size);
954 }
955 
956 /*
957  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
958  *
959  * Arguments
960  *	zone  The zone we should initialize
961  *
962  * Returns
963  *	Nothing
964  */
965 static void
966 zone_small_init(uma_zone_t zone)
967 {
968 	int rsize;
969 	int memused;
970 	int ipers;
971 
972 	rsize = zone->uz_size;
973 
974 	if (rsize < UMA_SMALLEST_UNIT)
975 		rsize = UMA_SMALLEST_UNIT;
976 
977 	if (rsize & zone->uz_align)
978 		rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
979 
980 	zone->uz_rsize = rsize;
981 
982 	rsize += 1;	/* Account for the byte of linkage */
983 	zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
984 	zone->uz_ppera = 1;
985 
986 	KASSERT(zone->uz_ipers != 0, ("zone_small_init: ipers is 0, uh-oh!"));
987 	memused = zone->uz_ipers * zone->uz_rsize;
988 
989 	/* Can we do any better? */
990 	if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
991 		/*
992 		 * We can't do this if we're internal or if we've been
993 		 * asked to not go to the VM for buckets.  If we do this we
994 		 * may end up going to the VM (kmem_map) for slabs which we
995 		 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
996 		 * result of UMA_ZONE_VM, which clearly forbids it.
997 		 */
998 		if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) ||
999 		    (zone->uz_flags & UMA_ZFLAG_CACHEONLY))
1000 			return;
1001 		ipers = UMA_SLAB_SIZE / zone->uz_rsize;
1002 		if (ipers > zone->uz_ipers) {
1003 			zone->uz_flags |= UMA_ZONE_OFFPAGE;
1004 			if ((zone->uz_flags & UMA_ZONE_MALLOC) == 0)
1005 				zone->uz_flags |= UMA_ZONE_HASH;
1006 			zone->uz_ipers = ipers;
1007 		}
1008 	}
1009 }
1010 
1011 /*
1012  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
1013  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
1014  * more complicated.
1015  *
1016  * Arguments
1017  *	zone  The zone we should initialize
1018  *
1019  * Returns
1020  *	Nothing
1021  */
1022 static void
1023 zone_large_init(uma_zone_t zone)
1024 {
1025 	int pages;
1026 
1027 	KASSERT((zone->uz_flags & UMA_ZFLAG_CACHEONLY) == 0,
1028 	    ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1029 
1030 	pages = zone->uz_size / UMA_SLAB_SIZE;
1031 
1032 	/* Account for remainder */
1033 	if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
1034 		pages++;
1035 
1036 	zone->uz_ppera = pages;
1037 	zone->uz_ipers = 1;
1038 
1039 	zone->uz_flags |= UMA_ZONE_OFFPAGE;
1040 	if ((zone->uz_flags & UMA_ZONE_MALLOC) == 0)
1041 		zone->uz_flags |= UMA_ZONE_HASH;
1042 
1043 	zone->uz_rsize = zone->uz_size;
1044 }
1045 
1046 /*
1047  * Zone header ctor.  This initializes all fields, locks, etc.  And inserts
1048  * the zone onto the global zone list.
1049  *
1050  * Arguments/Returns follow uma_ctor specifications
1051  *	udata  Actually uma_zcreat_args
1052  */
1053 
1054 static void
1055 zone_ctor(void *mem, int size, void *udata)
1056 {
1057 	struct uma_zctor_args *arg = udata;
1058 	uma_zone_t zone = mem;
1059 	int privlc;
1060 
1061 	bzero(zone, size);
1062 	zone->uz_name = arg->name;
1063 	zone->uz_size = arg->size;
1064 	zone->uz_ctor = arg->ctor;
1065 	zone->uz_dtor = arg->dtor;
1066 	zone->uz_init = arg->uminit;
1067 	zone->uz_fini = arg->fini;
1068 	zone->uz_align = arg->align;
1069 	zone->uz_free = 0;
1070 	zone->uz_pages = 0;
1071 	zone->uz_flags = arg->flags;
1072 	zone->uz_allocf = page_alloc;
1073 	zone->uz_freef = page_free;
1074 
1075 	if (arg->flags & UMA_ZONE_ZINIT)
1076 		zone->uz_init = zero_init;
1077 
1078 	if (arg->flags & UMA_ZONE_VM)
1079 		zone->uz_flags |= UMA_ZFLAG_CACHEONLY;
1080 
1081 	/*
1082 	 * XXX:
1083 	 * The +1 byte added to uz_size is to account for the byte of
1084 	 * linkage that is added to the size in zone_small_init().  If
1085 	 * we don't account for this here then we may end up in
1086 	 * zone_small_init() with a calculated 'ipers' of 0.
1087 	 */
1088 	if ((zone->uz_size+1) > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1089 		zone_large_init(zone);
1090 	else
1091 		zone_small_init(zone);
1092 	/*
1093 	 * If we haven't booted yet we need allocations to go through the
1094 	 * startup cache until the vm is ready.
1095 	 */
1096 	if (zone->uz_ppera == 1) {
1097 #ifdef UMA_MD_SMALL_ALLOC
1098 		zone->uz_allocf = uma_small_alloc;
1099 		zone->uz_freef = uma_small_free;
1100 #endif
1101 		if (booted == 0)
1102 			zone->uz_allocf = startup_alloc;
1103 	}
1104 	if (arg->flags & UMA_ZONE_MTXCLASS)
1105 		privlc = 1;
1106 	else
1107 		privlc = 0;
1108 
1109 	/*
1110 	 * If we're putting the slab header in the actual page we need to
1111 	 * figure out where in each page it goes.  This calculates a right
1112 	 * justified offset into the memory on an ALIGN_PTR boundary.
1113 	 */
1114 	if (!(zone->uz_flags & UMA_ZONE_OFFPAGE)) {
1115 		int totsize;
1116 
1117 		/* Size of the slab struct and free list */
1118 		totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1119 		if (totsize & UMA_ALIGN_PTR)
1120 			totsize = (totsize & ~UMA_ALIGN_PTR) +
1121 			    (UMA_ALIGN_PTR + 1);
1122 		zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1123 		totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1124 		    + zone->uz_ipers;
1125 		/* I don't think it's possible, but I'll make sure anyway */
1126 		if (totsize > UMA_SLAB_SIZE) {
1127 			printf("zone %s ipers %d rsize %d size %d\n",
1128 			    zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1129 			    zone->uz_size);
1130 			panic("UMA slab won't fit.\n");
1131 		}
1132 	}
1133 
1134 	if (zone->uz_flags & UMA_ZONE_HASH)
1135 		hash_alloc(&zone->uz_hash);
1136 
1137 #ifdef UMA_DEBUG
1138 	printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1139 	    zone->uz_name, zone,
1140 	    zone->uz_size, zone->uz_ipers,
1141 	    zone->uz_ppera, zone->uz_pgoff);
1142 #endif
1143 	ZONE_LOCK_INIT(zone, privlc);
1144 
1145 	mtx_lock(&uma_mtx);
1146 	LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1147 	mtx_unlock(&uma_mtx);
1148 
1149 	/*
1150 	 * Some internal zones don't have room allocated for the per cpu
1151 	 * caches.  If we're internal, bail out here.
1152 	 */
1153 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1154 		return;
1155 
1156 	if (zone->uz_ipers <= BUCKET_MAX)
1157 		zone->uz_count = zone->uz_ipers;
1158 	else
1159 		zone->uz_count = BUCKET_MAX;
1160 }
1161 
1162 /*
1163  * Zone header dtor.  This frees all data, destroys locks, frees the hash table
1164  * and removes the zone from the global list.
1165  *
1166  * Arguments/Returns follow uma_dtor specifications
1167  *	udata  unused
1168  */
1169 
1170 static void
1171 zone_dtor(void *arg, int size, void *udata)
1172 {
1173 	uma_zone_t zone;
1174 
1175 	zone = (uma_zone_t)arg;
1176 
1177 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1178 		cache_drain(zone);
1179 	mtx_lock(&uma_mtx);
1180 	LIST_REMOVE(zone, uz_link);
1181 	zone_drain(zone);
1182 	mtx_unlock(&uma_mtx);
1183 
1184 	ZONE_LOCK(zone);
1185 	if (zone->uz_free != 0) {
1186 		printf("Zone %s was not empty (%d items). "
1187 		    " Lost %d pages of memory.\n",
1188 		    zone->uz_name, zone->uz_free, zone->uz_pages);
1189 		uma_print_zone(zone);
1190 	}
1191 
1192 	ZONE_UNLOCK(zone);
1193 	if (zone->uz_flags & UMA_ZONE_HASH)
1194 		hash_free(&zone->uz_hash);
1195 
1196 	ZONE_LOCK_FINI(zone);
1197 }
1198 /*
1199  * Traverses every zone in the system and calls a callback
1200  *
1201  * Arguments:
1202  *	zfunc  A pointer to a function which accepts a zone
1203  *		as an argument.
1204  *
1205  * Returns:
1206  *	Nothing
1207  */
1208 static void
1209 zone_foreach(void (*zfunc)(uma_zone_t))
1210 {
1211 	uma_zone_t zone;
1212 
1213 	mtx_lock(&uma_mtx);
1214 	LIST_FOREACH(zone, &uma_zones, uz_link)
1215 		zfunc(zone);
1216 	mtx_unlock(&uma_mtx);
1217 }
1218 
1219 /* Public functions */
1220 /* See uma.h */
1221 void
1222 uma_startup(void *bootmem)
1223 {
1224 	struct uma_zctor_args args;
1225 	uma_slab_t slab;
1226 	int slabsize;
1227 	int i;
1228 
1229 #ifdef UMA_DEBUG
1230 	printf("Creating uma zone headers zone.\n");
1231 #endif
1232 	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1233 	/* "manually" Create the initial zone */
1234 	args.name = "UMA Zones";
1235 	args.size = sizeof(struct uma_zone) +
1236 	    (sizeof(struct uma_cache) * (mp_maxid + 1));
1237 	args.ctor = zone_ctor;
1238 	args.dtor = zone_dtor;
1239 	args.uminit = zero_init;
1240 	args.fini = NULL;
1241 	args.align = 32 - 1;
1242 	args.flags = UMA_ZFLAG_INTERNAL;
1243 	/* The initial zone has no Per cpu queues so it's smaller */
1244 	zone_ctor(zones, sizeof(struct uma_zone), &args);
1245 
1246 	/* Initialize the pcpu cache lock set once and for all */
1247 	for (i = 0; i <= mp_maxid; i++)
1248 		CPU_LOCK_INIT(i);
1249 #ifdef UMA_DEBUG
1250 	printf("Filling boot free list.\n");
1251 #endif
1252 	for (i = 0; i < UMA_BOOT_PAGES; i++) {
1253 		slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1254 		slab->us_data = (u_int8_t *)slab;
1255 		slab->us_flags = UMA_SLAB_BOOT;
1256 		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1257 		uma_boot_free++;
1258 	}
1259 
1260 #ifdef UMA_DEBUG
1261 	printf("Creating slab zone.\n");
1262 #endif
1263 
1264 	/*
1265 	 * This is the max number of free list items we'll have with
1266 	 * offpage slabs.
1267 	 */
1268 	slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1269 	slabsize /= UMA_MAX_WASTE;
1270 	slabsize++;			/* In case there it's rounded */
1271 	slabsize += sizeof(struct uma_slab);
1272 
1273 	/* Now make a zone for slab headers */
1274 	slabzone = uma_zcreate("UMA Slabs",
1275 				slabsize,
1276 				NULL, NULL, NULL, NULL,
1277 				UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1278 
1279 	hashzone = uma_zcreate("UMA Hash",
1280 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1281 	    NULL, NULL, NULL, NULL,
1282 	    UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1283 
1284 	bucket_init();
1285 
1286 #ifdef UMA_MD_SMALL_ALLOC
1287 	booted = 1;
1288 #endif
1289 
1290 #ifdef UMA_DEBUG
1291 	printf("UMA startup complete.\n");
1292 #endif
1293 }
1294 
1295 /* see uma.h */
1296 void
1297 uma_startup2(void)
1298 {
1299 	booted = 1;
1300 	bucket_enable();
1301 #ifdef UMA_DEBUG
1302 	printf("UMA startup2 complete.\n");
1303 #endif
1304 }
1305 
1306 /*
1307  * Initialize our callout handle
1308  *
1309  */
1310 
1311 static void
1312 uma_startup3(void)
1313 {
1314 #ifdef UMA_DEBUG
1315 	printf("Starting callout.\n");
1316 #endif
1317 	callout_init(&uma_callout, CALLOUT_MPSAFE);
1318 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1319 #ifdef UMA_DEBUG
1320 	printf("UMA startup3 complete.\n");
1321 #endif
1322 }
1323 
1324 /* See uma.h */
1325 uma_zone_t
1326 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1327 		uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1328 
1329 {
1330 	struct uma_zctor_args args;
1331 
1332 	/* This stuff is essential for the zone ctor */
1333 	args.name = name;
1334 	args.size = size;
1335 	args.ctor = ctor;
1336 	args.dtor = dtor;
1337 	args.uminit = uminit;
1338 	args.fini = fini;
1339 	args.align = align;
1340 	args.flags = flags;
1341 
1342 	return (uma_zalloc_internal(zones, &args, M_WAITOK));
1343 }
1344 
1345 /* See uma.h */
1346 void
1347 uma_zdestroy(uma_zone_t zone)
1348 {
1349 	uma_zfree_internal(zones, zone, NULL, 0);
1350 }
1351 
1352 /* See uma.h */
1353 void *
1354 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1355 {
1356 	void *item;
1357 	uma_cache_t cache;
1358 	uma_bucket_t bucket;
1359 	int cpu;
1360 
1361 	/* This is the fast path allocation */
1362 #ifdef UMA_DEBUG_ALLOC_1
1363 	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1364 #endif
1365 
1366 #ifdef INVARIANTS
1367 	/*
1368 	 * To make sure that WAITOK or NOWAIT is set, but not more than
1369 	 * one, and check against the API botches that are common.
1370 	 * The uma code implies M_WAITOK if M_NOWAIT is not set, so
1371 	 * we default to waiting if none of the flags is set.
1372 	 */
1373 	cpu = flags & (M_WAITOK | M_NOWAIT | M_DONTWAIT | M_TRYWAIT);
1374 	if (cpu != M_NOWAIT && cpu != M_WAITOK) {
1375 		static	struct timeval lasterr;
1376 		static	int curerr, once;
1377 		if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) {
1378 			printf("Bad uma_zalloc flags: %x\n", cpu);
1379 			backtrace();
1380 			once++;
1381 		}
1382 	}
1383 #endif
1384 	if (!(flags & M_NOWAIT)) {
1385 		KASSERT(curthread->td_intr_nesting_level == 0,
1386 		   ("malloc(M_WAITOK) in interrupt context"));
1387 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1388 		    "malloc() of \"%s\"", zone->uz_name);
1389 	}
1390 
1391 zalloc_restart:
1392 	cpu = PCPU_GET(cpuid);
1393 	CPU_LOCK(cpu);
1394 	cache = &zone->uz_cpu[cpu];
1395 
1396 zalloc_start:
1397 	bucket = cache->uc_allocbucket;
1398 
1399 	if (bucket) {
1400 		if (bucket->ub_cnt > 0) {
1401 			bucket->ub_cnt--;
1402 			item = bucket->ub_bucket[bucket->ub_cnt];
1403 #ifdef INVARIANTS
1404 			bucket->ub_bucket[bucket->ub_cnt] = NULL;
1405 #endif
1406 			KASSERT(item != NULL,
1407 			    ("uma_zalloc: Bucket pointer mangled."));
1408 			cache->uc_allocs++;
1409 #ifdef INVARIANTS
1410 			ZONE_LOCK(zone);
1411 			uma_dbg_alloc(zone, NULL, item);
1412 			ZONE_UNLOCK(zone);
1413 #endif
1414 			CPU_UNLOCK(cpu);
1415 			if (zone->uz_ctor)
1416 				zone->uz_ctor(item, zone->uz_size, udata);
1417 			if (flags & M_ZERO)
1418 				bzero(item, zone->uz_size);
1419 			return (item);
1420 		} else if (cache->uc_freebucket) {
1421 			/*
1422 			 * We have run out of items in our allocbucket.
1423 			 * See if we can switch with our free bucket.
1424 			 */
1425 			if (cache->uc_freebucket->ub_cnt > 0) {
1426 #ifdef UMA_DEBUG_ALLOC
1427 				printf("uma_zalloc: Swapping empty with"
1428 				    " alloc.\n");
1429 #endif
1430 				bucket = cache->uc_freebucket;
1431 				cache->uc_freebucket = cache->uc_allocbucket;
1432 				cache->uc_allocbucket = bucket;
1433 
1434 				goto zalloc_start;
1435 			}
1436 		}
1437 	}
1438 	ZONE_LOCK(zone);
1439 	/* Since we have locked the zone we may as well send back our stats */
1440 	zone->uz_allocs += cache->uc_allocs;
1441 	cache->uc_allocs = 0;
1442 
1443 	/* Our old one is now a free bucket */
1444 	if (cache->uc_allocbucket) {
1445 		KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1446 		    ("uma_zalloc_arg: Freeing a non free bucket."));
1447 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1448 		    cache->uc_allocbucket, ub_link);
1449 		cache->uc_allocbucket = NULL;
1450 	}
1451 
1452 	/* Check the free list for a new alloc bucket */
1453 	if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1454 		KASSERT(bucket->ub_cnt != 0,
1455 		    ("uma_zalloc_arg: Returning an empty bucket."));
1456 
1457 		LIST_REMOVE(bucket, ub_link);
1458 		cache->uc_allocbucket = bucket;
1459 		ZONE_UNLOCK(zone);
1460 		goto zalloc_start;
1461 	}
1462 	/* We are no longer associated with this cpu!!! */
1463 	CPU_UNLOCK(cpu);
1464 
1465 	/* Bump up our uz_count so we get here less */
1466 	if (zone->uz_count < BUCKET_MAX)
1467 		zone->uz_count++;
1468 	/*
1469 	 * Now lets just fill a bucket and put it on the free list.  If that
1470 	 * works we'll restart the allocation from the begining.
1471 	 */
1472 	if (uma_zalloc_bucket(zone, flags)) {
1473 		ZONE_UNLOCK(zone);
1474 		goto zalloc_restart;
1475 	}
1476 	ZONE_UNLOCK(zone);
1477 	/*
1478 	 * We may not be able to get a bucket so return an actual item.
1479 	 */
1480 #ifdef UMA_DEBUG
1481 	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1482 #endif
1483 
1484 	return (uma_zalloc_internal(zone, udata, flags));
1485 }
1486 
1487 static uma_slab_t
1488 uma_zone_slab(uma_zone_t zone, int flags)
1489 {
1490 	uma_slab_t slab;
1491 
1492 	/*
1493 	 * This is to prevent us from recursively trying to allocate
1494 	 * buckets.  The problem is that if an allocation forces us to
1495 	 * grab a new bucket we will call page_alloc, which will go off
1496 	 * and cause the vm to allocate vm_map_entries.  If we need new
1497 	 * buckets there too we will recurse in kmem_alloc and bad
1498 	 * things happen.  So instead we return a NULL bucket, and make
1499 	 * the code that allocates buckets smart enough to deal with it
1500 	 */
1501 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL && zone->uz_recurse != 0)
1502 		return (NULL);
1503 
1504 	slab = NULL;
1505 
1506 	for (;;) {
1507 		/*
1508 		 * Find a slab with some space.  Prefer slabs that are partially
1509 		 * used over those that are totally full.  This helps to reduce
1510 		 * fragmentation.
1511 		 */
1512 		if (zone->uz_free != 0) {
1513 			if (!LIST_EMPTY(&zone->uz_part_slab)) {
1514 				slab = LIST_FIRST(&zone->uz_part_slab);
1515 			} else {
1516 				slab = LIST_FIRST(&zone->uz_free_slab);
1517 				LIST_REMOVE(slab, us_link);
1518 				LIST_INSERT_HEAD(&zone->uz_part_slab, slab,
1519 				us_link);
1520 			}
1521 			return (slab);
1522 		}
1523 
1524 		/*
1525 		 * M_NOVM means don't ask at all!
1526 		 */
1527 		if (flags & M_NOVM)
1528 			break;
1529 
1530 		if (zone->uz_maxpages &&
1531 		    zone->uz_pages >= zone->uz_maxpages) {
1532 			zone->uz_flags |= UMA_ZFLAG_FULL;
1533 
1534 			if (flags & M_NOWAIT)
1535 				break;
1536 			else
1537 				msleep(zone, &zone->uz_lock, PVM,
1538 				    "zonelimit", 0);
1539 			continue;
1540 		}
1541 		zone->uz_recurse++;
1542 		slab = slab_zalloc(zone, flags);
1543 		zone->uz_recurse--;
1544 		/*
1545 		 * If we got a slab here it's safe to mark it partially used
1546 		 * and return.  We assume that the caller is going to remove
1547 		 * at least one item.
1548 		 */
1549 		if (slab) {
1550 			LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1551 			return (slab);
1552 		}
1553 		/*
1554 		 * We might not have been able to get a slab but another cpu
1555 		 * could have while we were unlocked.  Check again before we
1556 		 * fail.
1557 		 */
1558 		if (flags & M_NOWAIT)
1559 			flags |= M_NOVM;
1560 	}
1561 	return (slab);
1562 }
1563 
1564 static void *
1565 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
1566 {
1567 	void *item;
1568 	u_int8_t freei;
1569 
1570 	freei = slab->us_firstfree;
1571 	slab->us_firstfree = slab->us_freelist[freei];
1572 	item = slab->us_data + (zone->uz_rsize * freei);
1573 
1574 	slab->us_freecount--;
1575 	zone->uz_free--;
1576 #ifdef INVARIANTS
1577 	uma_dbg_alloc(zone, slab, item);
1578 #endif
1579 	/* Move this slab to the full list */
1580 	if (slab->us_freecount == 0) {
1581 		LIST_REMOVE(slab, us_link);
1582 		LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1583 	}
1584 
1585 	return (item);
1586 }
1587 
1588 static int
1589 uma_zalloc_bucket(uma_zone_t zone, int flags)
1590 {
1591 	uma_bucket_t bucket;
1592 	uma_slab_t slab;
1593 	int max;
1594 
1595 	/*
1596 	 * Try this zone's free list first so we don't allocate extra buckets.
1597 	 */
1598 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1599 		KASSERT(bucket->ub_cnt == 0,
1600 		    ("uma_zalloc_bucket: Bucket on free list is not empty."));
1601 		LIST_REMOVE(bucket, ub_link);
1602 	} else {
1603 		int bflags;
1604 
1605 		bflags = (flags & ~M_ZERO);
1606 		if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
1607 			bflags |= M_NOVM;
1608 
1609 		ZONE_UNLOCK(zone);
1610 		bucket = bucket_alloc(zone->uz_count, bflags);
1611 		ZONE_LOCK(zone);
1612 	}
1613 
1614 	if (bucket == NULL)
1615 		return (0);
1616 
1617 #ifdef SMP
1618 	/*
1619 	 * This code is here to limit the number of simultaneous bucket fills
1620 	 * for any given zone to the number of per cpu caches in this zone. This
1621 	 * is done so that we don't allocate more memory than we really need.
1622 	 */
1623 	if (zone->uz_fills >= mp_ncpus)
1624 		goto done;
1625 
1626 #endif
1627 	zone->uz_fills++;
1628 
1629 	max = MIN(bucket->ub_entries, zone->uz_count);
1630 	/* Try to keep the buckets totally full */
1631 	while (bucket->ub_cnt < max &&
1632 	    (slab = uma_zone_slab(zone, flags)) != NULL) {
1633 		while (slab->us_freecount && bucket->ub_cnt < max) {
1634 			bucket->ub_bucket[bucket->ub_cnt++] =
1635 			    uma_slab_alloc(zone, slab);
1636 		}
1637 		/* Don't block on the next fill */
1638 		flags |= M_NOWAIT;
1639 	}
1640 
1641 	zone->uz_fills--;
1642 
1643 	if (bucket->ub_cnt != 0) {
1644 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1645 		    bucket, ub_link);
1646 		return (1);
1647 	}
1648 #ifdef SMP
1649 done:
1650 #endif
1651 	bucket_free(bucket);
1652 
1653 	return (0);
1654 }
1655 /*
1656  * Allocates an item for an internal zone
1657  *
1658  * Arguments
1659  *	zone   The zone to alloc for.
1660  *	udata  The data to be passed to the constructor.
1661  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
1662  *
1663  * Returns
1664  *	NULL if there is no memory and M_NOWAIT is set
1665  *	An item if successful
1666  */
1667 
1668 static void *
1669 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
1670 {
1671 	uma_slab_t slab;
1672 	void *item;
1673 
1674 	item = NULL;
1675 
1676 #ifdef UMA_DEBUG_ALLOC
1677 	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1678 #endif
1679 	ZONE_LOCK(zone);
1680 
1681 	slab = uma_zone_slab(zone, flags);
1682 	if (slab == NULL) {
1683 		ZONE_UNLOCK(zone);
1684 		return (NULL);
1685 	}
1686 
1687 	item = uma_slab_alloc(zone, slab);
1688 
1689 	ZONE_UNLOCK(zone);
1690 
1691 	if (zone->uz_ctor != NULL)
1692 		zone->uz_ctor(item, zone->uz_size, udata);
1693 	if (flags & M_ZERO)
1694 		bzero(item, zone->uz_size);
1695 
1696 	return (item);
1697 }
1698 
1699 /* See uma.h */
1700 void
1701 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1702 {
1703 	uma_cache_t cache;
1704 	uma_bucket_t bucket;
1705 	int bflags;
1706 	int cpu;
1707 	int skip;
1708 
1709 	/* This is the fast path free */
1710 	skip = 0;
1711 #ifdef UMA_DEBUG_ALLOC_1
1712 	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1713 #endif
1714 	/*
1715 	 * The race here is acceptable.  If we miss it we'll just have to wait
1716 	 * a little longer for the limits to be reset.
1717 	 */
1718 
1719 	if (zone->uz_flags & UMA_ZFLAG_FULL)
1720 		goto zfree_internal;
1721 
1722 	if (zone->uz_dtor) {
1723 		zone->uz_dtor(item, zone->uz_size, udata);
1724 		skip = 1;
1725 	}
1726 
1727 zfree_restart:
1728 	cpu = PCPU_GET(cpuid);
1729 	CPU_LOCK(cpu);
1730 	cache = &zone->uz_cpu[cpu];
1731 
1732 zfree_start:
1733 	bucket = cache->uc_freebucket;
1734 
1735 	if (bucket) {
1736 		/*
1737 		 * Do we have room in our bucket? It is OK for this uz count
1738 		 * check to be slightly out of sync.
1739 		 */
1740 
1741 		if (bucket->ub_cnt < bucket->ub_entries) {
1742 			KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
1743 			    ("uma_zfree: Freeing to non free bucket index."));
1744 			bucket->ub_bucket[bucket->ub_cnt] = item;
1745 			bucket->ub_cnt++;
1746 #ifdef INVARIANTS
1747 			ZONE_LOCK(zone);
1748 			if (zone->uz_flags & UMA_ZONE_MALLOC)
1749 				uma_dbg_free(zone, udata, item);
1750 			else
1751 				uma_dbg_free(zone, NULL, item);
1752 			ZONE_UNLOCK(zone);
1753 #endif
1754 			CPU_UNLOCK(cpu);
1755 			return;
1756 		} else if (cache->uc_allocbucket) {
1757 #ifdef UMA_DEBUG_ALLOC
1758 			printf("uma_zfree: Swapping buckets.\n");
1759 #endif
1760 			/*
1761 			 * We have run out of space in our freebucket.
1762 			 * See if we can switch with our alloc bucket.
1763 			 */
1764 			if (cache->uc_allocbucket->ub_cnt <
1765 			    cache->uc_freebucket->ub_cnt) {
1766 				bucket = cache->uc_freebucket;
1767 				cache->uc_freebucket = cache->uc_allocbucket;
1768 				cache->uc_allocbucket = bucket;
1769 				goto zfree_start;
1770 			}
1771 		}
1772 	}
1773 	/*
1774 	 * We can get here for two reasons:
1775 	 *
1776 	 * 1) The buckets are NULL
1777 	 * 2) The alloc and free buckets are both somewhat full.
1778 	 */
1779 
1780 	ZONE_LOCK(zone);
1781 
1782 	bucket = cache->uc_freebucket;
1783 	cache->uc_freebucket = NULL;
1784 
1785 	/* Can we throw this on the zone full list? */
1786 	if (bucket != NULL) {
1787 #ifdef UMA_DEBUG_ALLOC
1788 		printf("uma_zfree: Putting old bucket on the free list.\n");
1789 #endif
1790 		/* ub_cnt is pointing to the last free item */
1791 		KASSERT(bucket->ub_cnt != 0,
1792 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1793 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1794 		    bucket, ub_link);
1795 	}
1796 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1797 		LIST_REMOVE(bucket, ub_link);
1798 		ZONE_UNLOCK(zone);
1799 		cache->uc_freebucket = bucket;
1800 		goto zfree_start;
1801 	}
1802 	/* We're done with this CPU now */
1803 	CPU_UNLOCK(cpu);
1804 
1805 	/* And the zone.. */
1806 	ZONE_UNLOCK(zone);
1807 
1808 #ifdef UMA_DEBUG_ALLOC
1809 	printf("uma_zfree: Allocating new free bucket.\n");
1810 #endif
1811 	bflags = M_NOWAIT;
1812 
1813 	if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
1814 		bflags |= M_NOVM;
1815 	bucket = bucket_alloc(zone->uz_count, bflags);
1816 	if (bucket) {
1817 		ZONE_LOCK(zone);
1818 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1819 		    bucket, ub_link);
1820 		ZONE_UNLOCK(zone);
1821 		goto zfree_restart;
1822 	}
1823 
1824 	/*
1825 	 * If nothing else caught this, we'll just do an internal free.
1826 	 */
1827 
1828 zfree_internal:
1829 
1830 #ifdef INVARIANTS
1831 	/*
1832 	 * If we need to skip the dtor and the uma_dbg_free in
1833 	 * uma_zfree_internal because we've already called the dtor
1834 	 * above, but we ended up here, then we need to make sure
1835 	 * that we take care of the uma_dbg_free immediately.
1836 	 */
1837 	if (skip) {
1838 		ZONE_LOCK(zone);
1839 		if (zone->uz_flags & UMA_ZONE_MALLOC)
1840 			uma_dbg_free(zone, udata, item);
1841 		else
1842 			uma_dbg_free(zone, NULL, item);
1843 		ZONE_UNLOCK(zone);
1844 	}
1845 #endif
1846 	uma_zfree_internal(zone, item, udata, skip);
1847 
1848 	return;
1849 
1850 }
1851 
1852 /*
1853  * Frees an item to an INTERNAL zone or allocates a free bucket
1854  *
1855  * Arguments:
1856  *	zone   The zone to free to
1857  *	item   The item we're freeing
1858  *	udata  User supplied data for the dtor
1859  *	skip   Skip the dtor, it was done in uma_zfree_arg
1860  */
1861 static void
1862 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1863 {
1864 	uma_slab_t slab;
1865 	u_int8_t *mem;
1866 	u_int8_t freei;
1867 
1868 	if (!skip && zone->uz_dtor)
1869 		zone->uz_dtor(item, zone->uz_size, udata);
1870 
1871 	ZONE_LOCK(zone);
1872 
1873 	if (!(zone->uz_flags & UMA_ZONE_MALLOC)) {
1874 		mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1875 		if (zone->uz_flags & UMA_ZONE_HASH)
1876 			slab = hash_sfind(&zone->uz_hash, mem);
1877 		else {
1878 			mem += zone->uz_pgoff;
1879 			slab = (uma_slab_t)mem;
1880 		}
1881 	} else {
1882 		slab = (uma_slab_t)udata;
1883 	}
1884 
1885 	/* Do we need to remove from any lists? */
1886 	if (slab->us_freecount+1 == zone->uz_ipers) {
1887 		LIST_REMOVE(slab, us_link);
1888 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1889 	} else if (slab->us_freecount == 0) {
1890 		LIST_REMOVE(slab, us_link);
1891 		LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1892 	}
1893 
1894 	/* Slab management stuff */
1895 	freei = ((unsigned long)item - (unsigned long)slab->us_data)
1896 		/ zone->uz_rsize;
1897 
1898 #ifdef INVARIANTS
1899 	if (!skip)
1900 		uma_dbg_free(zone, slab, item);
1901 #endif
1902 
1903 	slab->us_freelist[freei] = slab->us_firstfree;
1904 	slab->us_firstfree = freei;
1905 	slab->us_freecount++;
1906 
1907 	/* Zone statistics */
1908 	zone->uz_free++;
1909 
1910 	if (zone->uz_flags & UMA_ZFLAG_FULL) {
1911 		if (zone->uz_pages < zone->uz_maxpages)
1912 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
1913 
1914 		/* We can handle one more allocation */
1915 		wakeup_one(zone);
1916 	}
1917 
1918 	ZONE_UNLOCK(zone);
1919 }
1920 
1921 /* See uma.h */
1922 void
1923 uma_zone_set_max(uma_zone_t zone, int nitems)
1924 {
1925 	ZONE_LOCK(zone);
1926 	if (zone->uz_ppera > 1)
1927 		zone->uz_maxpages = nitems * zone->uz_ppera;
1928 	else
1929 		zone->uz_maxpages = nitems / zone->uz_ipers;
1930 
1931 	if (zone->uz_maxpages * zone->uz_ipers < nitems)
1932 		zone->uz_maxpages++;
1933 
1934 	ZONE_UNLOCK(zone);
1935 }
1936 
1937 /* See uma.h */
1938 void
1939 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1940 {
1941 	ZONE_LOCK(zone);
1942 	zone->uz_freef = freef;
1943 	ZONE_UNLOCK(zone);
1944 }
1945 
1946 /* See uma.h */
1947 void
1948 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1949 {
1950 	ZONE_LOCK(zone);
1951 	zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1952 	zone->uz_allocf = allocf;
1953 	ZONE_UNLOCK(zone);
1954 }
1955 
1956 /* See uma.h */
1957 int
1958 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1959 {
1960 	int pages;
1961 	vm_offset_t kva;
1962 
1963 	pages = count / zone->uz_ipers;
1964 
1965 	if (pages * zone->uz_ipers < count)
1966 		pages++;
1967 
1968 	kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1969 
1970 	if (kva == 0)
1971 		return (0);
1972 	if (obj == NULL) {
1973 		obj = vm_object_allocate(OBJT_DEFAULT,
1974 		    pages);
1975 	} else {
1976 		VM_OBJECT_LOCK_INIT(obj);
1977 		_vm_object_allocate(OBJT_DEFAULT,
1978 		    pages, obj);
1979 	}
1980 	ZONE_LOCK(zone);
1981 	zone->uz_kva = kva;
1982 	zone->uz_obj = obj;
1983 	zone->uz_maxpages = pages;
1984 	zone->uz_allocf = obj_alloc;
1985 	zone->uz_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
1986 	ZONE_UNLOCK(zone);
1987 	return (1);
1988 }
1989 
1990 /* See uma.h */
1991 void
1992 uma_prealloc(uma_zone_t zone, int items)
1993 {
1994 	int slabs;
1995 	uma_slab_t slab;
1996 
1997 	ZONE_LOCK(zone);
1998 	slabs = items / zone->uz_ipers;
1999 	if (slabs * zone->uz_ipers < items)
2000 		slabs++;
2001 	while (slabs > 0) {
2002 		slab = slab_zalloc(zone, M_WAITOK);
2003 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
2004 		slabs--;
2005 	}
2006 	ZONE_UNLOCK(zone);
2007 }
2008 
2009 /* See uma.h */
2010 void
2011 uma_reclaim(void)
2012 {
2013 #ifdef UMA_DEBUG
2014 	printf("UMA: vm asked us to release pages!\n");
2015 #endif
2016 	bucket_enable();
2017 	zone_foreach(zone_drain);
2018 	/*
2019 	 * Some slabs may have been freed but this zone will be visited early
2020 	 * we visit again so that we can free pages that are empty once other
2021 	 * zones are drained.  We have to do the same for buckets.
2022 	 */
2023 	zone_drain(slabzone);
2024 	bucket_zone_drain();
2025 }
2026 
2027 void *
2028 uma_large_malloc(int size, int wait)
2029 {
2030 	void *mem;
2031 	uma_slab_t slab;
2032 	u_int8_t flags;
2033 
2034 	slab = uma_zalloc_internal(slabzone, NULL, wait);
2035 	if (slab == NULL)
2036 		return (NULL);
2037 	mem = page_alloc(NULL, size, &flags, wait);
2038 	if (mem) {
2039 		vsetslab((vm_offset_t)mem, slab);
2040 		slab->us_data = mem;
2041 		slab->us_flags = flags | UMA_SLAB_MALLOC;
2042 		slab->us_size = size;
2043 	} else {
2044 		uma_zfree_internal(slabzone, slab, NULL, 0);
2045 	}
2046 
2047 
2048 	return (mem);
2049 }
2050 
2051 void
2052 uma_large_free(uma_slab_t slab)
2053 {
2054 	vsetobj((vm_offset_t)slab->us_data, kmem_object);
2055 	page_free(slab->us_data, slab->us_size, slab->us_flags);
2056 	uma_zfree_internal(slabzone, slab, NULL, 0);
2057 }
2058 
2059 void
2060 uma_print_stats(void)
2061 {
2062 	zone_foreach(uma_print_zone);
2063 }
2064 
2065 static void
2066 slab_print(uma_slab_t slab)
2067 {
2068 	printf("slab: zone %p, data %p, freecount %d, firstfree %d\n",
2069 		slab->us_zone, slab->us_data, slab->us_freecount,
2070 		slab->us_firstfree);
2071 }
2072 
2073 static void
2074 cache_print(uma_cache_t cache)
2075 {
2076 	printf("alloc: %p(%d), free: %p(%d)\n",
2077 		cache->uc_allocbucket,
2078 		cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2079 		cache->uc_freebucket,
2080 		cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2081 }
2082 
2083 void
2084 uma_print_zone(uma_zone_t zone)
2085 {
2086 	uma_cache_t cache;
2087 	uma_slab_t slab;
2088 	int i;
2089 
2090 	printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2091 	    zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
2092 	    zone->uz_ipers, zone->uz_ppera,
2093 	    (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
2094 	printf("Part slabs:\n");
2095 	LIST_FOREACH(slab, &zone->uz_part_slab, us_link)
2096 		slab_print(slab);
2097 	printf("Free slabs:\n");
2098 	LIST_FOREACH(slab, &zone->uz_free_slab, us_link)
2099 		slab_print(slab);
2100 	printf("Full slabs:\n");
2101 	LIST_FOREACH(slab, &zone->uz_full_slab, us_link)
2102 		slab_print(slab);
2103 	for (i = 0; i <= mp_maxid; i++) {
2104 		if (CPU_ABSENT(i))
2105 			continue;
2106 		cache = &zone->uz_cpu[i];
2107 		printf("CPU %d Cache:\n", i);
2108 		cache_print(cache);
2109 	}
2110 }
2111 
2112 /*
2113  * Sysctl handler for vm.zone
2114  *
2115  * stolen from vm_zone.c
2116  */
2117 static int
2118 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2119 {
2120 	int error, len, cnt;
2121 	const int linesize = 128;	/* conservative */
2122 	int totalfree;
2123 	char *tmpbuf, *offset;
2124 	uma_zone_t z;
2125 	char *p;
2126 	int cpu;
2127 	int cachefree;
2128 	uma_bucket_t bucket;
2129 	uma_cache_t cache;
2130 
2131 	cnt = 0;
2132 	mtx_lock(&uma_mtx);
2133 	LIST_FOREACH(z, &uma_zones, uz_link)
2134 		cnt++;
2135 	mtx_unlock(&uma_mtx);
2136 	MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2137 			M_TEMP, M_WAITOK);
2138 	len = snprintf(tmpbuf, linesize,
2139 	    "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
2140 	if (cnt == 0)
2141 		tmpbuf[len - 1] = '\0';
2142 	error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2143 	if (error || cnt == 0)
2144 		goto out;
2145 	offset = tmpbuf;
2146 	mtx_lock(&uma_mtx);
2147 	LIST_FOREACH(z, &uma_zones, uz_link) {
2148 		if (cnt == 0)	/* list may have changed size */
2149 			break;
2150 		if (!(z->uz_flags & UMA_ZFLAG_INTERNAL)) {
2151 			for (cpu = 0; cpu <= mp_maxid; cpu++) {
2152 				if (CPU_ABSENT(cpu))
2153 					continue;
2154 				CPU_LOCK(cpu);
2155 			}
2156 		}
2157 		ZONE_LOCK(z);
2158 		cachefree = 0;
2159 		if (!(z->uz_flags & UMA_ZFLAG_INTERNAL)) {
2160 			for (cpu = 0; cpu <= mp_maxid; cpu++) {
2161 				if (CPU_ABSENT(cpu))
2162 					continue;
2163 				cache = &z->uz_cpu[cpu];
2164 				if (cache->uc_allocbucket != NULL)
2165 					cachefree += cache->uc_allocbucket->ub_cnt;
2166 				if (cache->uc_freebucket != NULL)
2167 					cachefree += cache->uc_freebucket->ub_cnt;
2168 				CPU_UNLOCK(cpu);
2169 			}
2170 		}
2171 		LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
2172 			cachefree += bucket->ub_cnt;
2173 		}
2174 		totalfree = z->uz_free + cachefree;
2175 		len = snprintf(offset, linesize,
2176 		    "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2177 		    z->uz_name, z->uz_size,
2178 		    z->uz_maxpages * z->uz_ipers,
2179 		    (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2180 		    totalfree,
2181 		    (unsigned long long)z->uz_allocs);
2182 		ZONE_UNLOCK(z);
2183 		for (p = offset + 12; p > offset && *p == ' '; --p)
2184 			/* nothing */ ;
2185 		p[1] = ':';
2186 		cnt--;
2187 		offset += len;
2188 	}
2189 	mtx_unlock(&uma_mtx);
2190 	*offset++ = '\0';
2191 	error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2192 out:
2193 	FREE(tmpbuf, M_TEMP);
2194 	return (error);
2195 }
2196