xref: /freebsd/sys/vm/uma_core.c (revision 52ec752989b2e6d4e9a59a8ff25d8ff596d85e62)
1 /*
2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * uma_core.c  Implementation of the Universal Memory allocator
29  *
30  * This allocator is intended to replace the multitude of similar object caches
31  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
32  * effecient.  A primary design goal is to return unused memory to the rest of
33  * the system.  This will make the system as a whole more flexible due to the
34  * ability to move memory to subsystems which most need it instead of leaving
35  * pools of reserved memory unused.
36  *
37  * The basic ideas stem from similar slab/zone based allocators whose algorithms
38  * are well known.
39  *
40  */
41 
42 /*
43  * TODO:
44  *	- Improve memory usage for large allocations
45  *	- Investigate cache size adjustments
46  */
47 
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
50 
51 /* I should really use ktr.. */
52 /*
53 #define UMA_DEBUG 1
54 #define UMA_DEBUG_ALLOC 1
55 #define UMA_DEBUG_ALLOC_1 1
56 */
57 
58 #include "opt_param.h"
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/kernel.h>
62 #include <sys/types.h>
63 #include <sys/queue.h>
64 #include <sys/malloc.h>
65 #include <sys/lock.h>
66 #include <sys/sysctl.h>
67 #include <sys/mutex.h>
68 #include <sys/proc.h>
69 #include <sys/smp.h>
70 #include <sys/vmmeter.h>
71 #include <sys/mbuf.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_param.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_extern.h>
80 #include <vm/uma.h>
81 #include <vm/uma_int.h>
82 #include <vm/uma_dbg.h>
83 
84 #include <machine/vmparam.h>
85 
86 /*
87  * This is the zone from which all zones are spawned.  The idea is that even
88  * the zone heads are allocated from the allocator, so we use the bss section
89  * to bootstrap us.
90  */
91 static struct uma_zone masterzone;
92 static uma_zone_t zones = &masterzone;
93 
94 /* This is the zone from which all of uma_slab_t's are allocated. */
95 static uma_zone_t slabzone;
96 
97 /*
98  * The initial hash tables come out of this zone so they can be allocated
99  * prior to malloc coming up.
100  */
101 static uma_zone_t hashzone;
102 
103 static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
104 
105 /*
106  * Are we allowed to allocate buckets?
107  */
108 static int bucketdisable = 1;
109 
110 /* Linked list of all zones in the system */
111 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
112 
113 /* This mutex protects the zone list */
114 static struct mtx uma_mtx;
115 
116 /* These are the pcpu cache locks */
117 static struct mtx uma_pcpu_mtx[MAXCPU];
118 
119 /* Linked list of boot time pages */
120 static LIST_HEAD(,uma_slab) uma_boot_pages =
121     LIST_HEAD_INITIALIZER(&uma_boot_pages);
122 
123 /* Count of free boottime pages */
124 static int uma_boot_free = 0;
125 
126 /* Is the VM done starting up? */
127 static int booted = 0;
128 
129 /*
130  * This is the handle used to schedule events that need to happen
131  * outside of the allocation fast path.
132  */
133 static struct callout uma_callout;
134 #define	UMA_TIMEOUT	20		/* Seconds for callout interval. */
135 
136 /*
137  * This structure is passed as the zone ctor arg so that I don't have to create
138  * a special allocation function just for zones.
139  */
140 struct uma_zctor_args {
141 	char *name;
142 	size_t size;
143 	uma_ctor ctor;
144 	uma_dtor dtor;
145 	uma_init uminit;
146 	uma_fini fini;
147 	int align;
148 	u_int16_t flags;
149 };
150 
151 struct uma_bucket_zone {
152 	uma_zone_t	ubz_zone;
153 	char		*ubz_name;
154 	int		ubz_entries;
155 };
156 
157 #define	BUCKET_MAX	128
158 
159 struct uma_bucket_zone bucket_zones[] = {
160 	{ NULL, "16 Bucket", 16 },
161 	{ NULL, "32 Bucket", 32 },
162 	{ NULL, "64 Bucket", 64 },
163 	{ NULL, "128 Bucket", 128 },
164 	{ NULL, NULL, 0}
165 };
166 
167 #define	BUCKET_SHIFT	4
168 #define	BUCKET_ZONES	((BUCKET_MAX >> BUCKET_SHIFT) + 1)
169 
170 uint8_t bucket_size[BUCKET_ZONES];
171 
172 /* Prototypes.. */
173 
174 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
175 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
176 static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
177 static void page_free(void *, int, u_int8_t);
178 static uma_slab_t slab_zalloc(uma_zone_t, int);
179 static void cache_drain(uma_zone_t);
180 static void bucket_drain(uma_zone_t, uma_bucket_t);
181 static void zone_ctor(void *, int, void *);
182 static void zone_dtor(void *, int, void *);
183 static void zero_init(void *, int);
184 static void zone_small_init(uma_zone_t zone);
185 static void zone_large_init(uma_zone_t zone);
186 static void zone_foreach(void (*zfunc)(uma_zone_t));
187 static void zone_timeout(uma_zone_t zone);
188 static int hash_alloc(struct uma_hash *);
189 static int hash_expand(struct uma_hash *, struct uma_hash *);
190 static void hash_free(struct uma_hash *hash);
191 static void uma_timeout(void *);
192 static void uma_startup3(void);
193 static void *uma_zalloc_internal(uma_zone_t, void *, int);
194 static void uma_zfree_internal(uma_zone_t, void *, void *, int);
195 static void bucket_enable(void);
196 static void bucket_init(void);
197 static uma_bucket_t bucket_alloc(int, int);
198 static void bucket_free(uma_bucket_t);
199 static void bucket_zone_drain(void);
200 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
201 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
202 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
203 static void zone_drain(uma_zone_t);
204 
205 void uma_print_zone(uma_zone_t);
206 void uma_print_stats(void);
207 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
208 
209 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
210     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
211 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
212 
213 /*
214  * This routine checks to see whether or not it's safe to enable buckets.
215  */
216 
217 static void
218 bucket_enable(void)
219 {
220 	if (cnt.v_free_count < cnt.v_free_min)
221 		bucketdisable = 1;
222 	else
223 		bucketdisable = 0;
224 }
225 
226 static void
227 bucket_init(void)
228 {
229 	struct uma_bucket_zone *ubz;
230 	int i;
231 	int j;
232 
233 	for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
234 		int size;
235 
236 		ubz = &bucket_zones[j];
237 		size = roundup(sizeof(struct uma_bucket), sizeof(void *));
238 		size += sizeof(void *) * ubz->ubz_entries;
239 		ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
240 	    	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
241 		for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
242 			bucket_size[i >> BUCKET_SHIFT] = j;
243 	}
244 }
245 
246 static uma_bucket_t
247 bucket_alloc(int entries, int bflags)
248 {
249 	struct uma_bucket_zone *ubz;
250 	uma_bucket_t bucket;
251 	int idx;
252 
253 	/*
254 	 * This is to stop us from allocating per cpu buckets while we're
255 	 * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
256 	 * boot pages.  This also prevents us from allocating buckets in
257 	 * low memory situations.
258 	 */
259 
260 	if (bucketdisable)
261 		return (NULL);
262 	idx = howmany(entries, 1 << BUCKET_SHIFT);
263 	ubz = &bucket_zones[bucket_size[idx]];
264 	bucket = uma_zalloc_internal(ubz->ubz_zone, NULL, bflags);
265 	if (bucket) {
266 #ifdef INVARIANTS
267 		bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
268 #endif
269 		bucket->ub_cnt = 0;
270 		bucket->ub_entries = ubz->ubz_entries;
271 	}
272 
273 	return (bucket);
274 }
275 
276 static void
277 bucket_free(uma_bucket_t bucket)
278 {
279 	struct uma_bucket_zone *ubz;
280 	int idx;
281 
282 	idx = howmany(bucket->ub_entries, 1 << BUCKET_SHIFT);
283 	ubz = &bucket_zones[bucket_size[idx]];
284 	uma_zfree_internal(ubz->ubz_zone, bucket, NULL, 0);
285 }
286 
287 static void
288 bucket_zone_drain(void)
289 {
290 	struct uma_bucket_zone *ubz;
291 
292 	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
293 		zone_drain(ubz->ubz_zone);
294 }
295 
296 
297 /*
298  * Routine called by timeout which is used to fire off some time interval
299  * based calculations.  (stats, hash size, etc.)
300  *
301  * Arguments:
302  *	arg   Unused
303  *
304  * Returns:
305  *	Nothing
306  */
307 static void
308 uma_timeout(void *unused)
309 {
310 	bucket_enable();
311 	zone_foreach(zone_timeout);
312 
313 	/* Reschedule this event */
314 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
315 }
316 
317 /*
318  * Routine to perform timeout driven calculations.  This expands the
319  * hashes and does per cpu statistics aggregation.
320  *
321  *  Arguments:
322  *	zone  The zone to operate on
323  *
324  *  Returns:
325  *	Nothing
326  */
327 static void
328 zone_timeout(uma_zone_t zone)
329 {
330 	uma_cache_t cache;
331 	u_int64_t alloc;
332 	int cpu;
333 
334 	alloc = 0;
335 
336 	/*
337 	 * Aggregate per cpu cache statistics back to the zone.
338 	 *
339 	 * I may rewrite this to set a flag in the per cpu cache instead of
340 	 * locking.  If the flag is not cleared on the next round I will have
341 	 * to lock and do it here instead so that the statistics don't get too
342 	 * far out of sync.
343 	 */
344 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
345 		for (cpu = 0; cpu <= mp_maxid; cpu++) {
346 			if (CPU_ABSENT(cpu))
347 				continue;
348 			CPU_LOCK(cpu);
349 			cache = &zone->uz_cpu[cpu];
350 			/* Add them up, and reset */
351 			alloc += cache->uc_allocs;
352 			cache->uc_allocs = 0;
353 			CPU_UNLOCK(cpu);
354 		}
355 	}
356 
357 	/* Now push these stats back into the zone.. */
358 	ZONE_LOCK(zone);
359 	zone->uz_allocs += alloc;
360 
361 	/*
362 	 * Expand the zone hash table.
363 	 *
364 	 * This is done if the number of slabs is larger than the hash size.
365 	 * What I'm trying to do here is completely reduce collisions.  This
366 	 * may be a little aggressive.  Should I allow for two collisions max?
367 	 */
368 
369 	if (zone->uz_flags & UMA_ZONE_HASH &&
370 	    zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
371 		struct uma_hash newhash;
372 		struct uma_hash oldhash;
373 		int ret;
374 
375 		/*
376 		 * This is so involved because allocating and freeing
377 		 * while the zone lock is held will lead to deadlock.
378 		 * I have to do everything in stages and check for
379 		 * races.
380 		 */
381 		newhash = zone->uz_hash;
382 		ZONE_UNLOCK(zone);
383 		ret = hash_alloc(&newhash);
384 		ZONE_LOCK(zone);
385 		if (ret) {
386 			if (hash_expand(&zone->uz_hash, &newhash)) {
387 				oldhash = zone->uz_hash;
388 				zone->uz_hash = newhash;
389 			} else
390 				oldhash = newhash;
391 
392 			ZONE_UNLOCK(zone);
393 			hash_free(&oldhash);
394 			ZONE_LOCK(zone);
395 		}
396 	}
397 	ZONE_UNLOCK(zone);
398 }
399 
400 /*
401  * Allocate and zero fill the next sized hash table from the appropriate
402  * backing store.
403  *
404  * Arguments:
405  *	hash  A new hash structure with the old hash size in uh_hashsize
406  *
407  * Returns:
408  *	1 on sucess and 0 on failure.
409  */
410 static int
411 hash_alloc(struct uma_hash *hash)
412 {
413 	int oldsize;
414 	int alloc;
415 
416 	oldsize = hash->uh_hashsize;
417 
418 	/* We're just going to go to a power of two greater */
419 	if (oldsize)  {
420 		hash->uh_hashsize = oldsize * 2;
421 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
422 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
423 		    M_UMAHASH, M_NOWAIT);
424 	} else {
425 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
426 		hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
427 		    M_WAITOK);
428 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
429 	}
430 	if (hash->uh_slab_hash) {
431 		bzero(hash->uh_slab_hash, alloc);
432 		hash->uh_hashmask = hash->uh_hashsize - 1;
433 		return (1);
434 	}
435 
436 	return (0);
437 }
438 
439 /*
440  * Expands the hash table for HASH zones.  This is done from zone_timeout
441  * to reduce collisions.  This must not be done in the regular allocation
442  * path, otherwise, we can recurse on the vm while allocating pages.
443  *
444  * Arguments:
445  *	oldhash  The hash you want to expand
446  *	newhash  The hash structure for the new table
447  *
448  * Returns:
449  * 	Nothing
450  *
451  * Discussion:
452  */
453 static int
454 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
455 {
456 	uma_slab_t slab;
457 	int hval;
458 	int i;
459 
460 	if (!newhash->uh_slab_hash)
461 		return (0);
462 
463 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
464 		return (0);
465 
466 	/*
467 	 * I need to investigate hash algorithms for resizing without a
468 	 * full rehash.
469 	 */
470 
471 	for (i = 0; i < oldhash->uh_hashsize; i++)
472 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
473 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
474 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
475 			hval = UMA_HASH(newhash, slab->us_data);
476 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
477 			    slab, us_hlink);
478 		}
479 
480 	return (1);
481 }
482 
483 /*
484  * Free the hash bucket to the appropriate backing store.
485  *
486  * Arguments:
487  *	slab_hash  The hash bucket we're freeing
488  *	hashsize   The number of entries in that hash bucket
489  *
490  * Returns:
491  *	Nothing
492  */
493 static void
494 hash_free(struct uma_hash *hash)
495 {
496 	if (hash->uh_slab_hash == NULL)
497 		return;
498 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
499 		uma_zfree_internal(hashzone,
500 		    hash->uh_slab_hash, NULL, 0);
501 	else
502 		free(hash->uh_slab_hash, M_UMAHASH);
503 }
504 
505 /*
506  * Frees all outstanding items in a bucket
507  *
508  * Arguments:
509  *	zone   The zone to free to, must be unlocked.
510  *	bucket The free/alloc bucket with items, cpu queue must be locked.
511  *
512  * Returns:
513  *	Nothing
514  */
515 
516 static void
517 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
518 {
519 	uma_slab_t slab;
520 	int mzone;
521 	void *item;
522 
523 	if (bucket == NULL)
524 		return;
525 
526 	slab = NULL;
527 	mzone = 0;
528 
529 	/* We have to lookup the slab again for malloc.. */
530 	if (zone->uz_flags & UMA_ZONE_MALLOC)
531 		mzone = 1;
532 
533 	while (bucket->ub_cnt > 0)  {
534 		bucket->ub_cnt--;
535 		item = bucket->ub_bucket[bucket->ub_cnt];
536 #ifdef INVARIANTS
537 		bucket->ub_bucket[bucket->ub_cnt] = NULL;
538 		KASSERT(item != NULL,
539 		    ("bucket_drain: botched ptr, item is NULL"));
540 #endif
541 		/*
542 		 * This is extremely inefficient.  The slab pointer was passed
543 		 * to uma_zfree_arg, but we lost it because the buckets don't
544 		 * hold them.  This will go away when free() gets a size passed
545 		 * to it.
546 		 */
547 		if (mzone)
548 			slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
549 		uma_zfree_internal(zone, item, slab, 1);
550 	}
551 }
552 
553 /*
554  * Drains the per cpu caches for a zone.
555  *
556  * Arguments:
557  *	zone     The zone to drain, must be unlocked.
558  *
559  * Returns:
560  *	Nothing
561  */
562 static void
563 cache_drain(uma_zone_t zone)
564 {
565 	uma_bucket_t bucket;
566 	uma_cache_t cache;
567 	int cpu;
568 
569 	/*
570 	 * We have to lock each cpu cache before locking the zone
571 	 */
572 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
573 		if (CPU_ABSENT(cpu))
574 			continue;
575 		CPU_LOCK(cpu);
576 		cache = &zone->uz_cpu[cpu];
577 		bucket_drain(zone, cache->uc_allocbucket);
578 		bucket_drain(zone, cache->uc_freebucket);
579 		if (cache->uc_allocbucket != NULL)
580 			bucket_free(cache->uc_allocbucket);
581 		if (cache->uc_freebucket != NULL)
582 			bucket_free(cache->uc_freebucket);
583 		cache->uc_allocbucket = cache->uc_freebucket = NULL;
584 	}
585 
586 	/*
587 	 * Drain the bucket queues and free the buckets, we just keep two per
588 	 * cpu (alloc/free).
589 	 */
590 	ZONE_LOCK(zone);
591 	while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
592 		LIST_REMOVE(bucket, ub_link);
593 		ZONE_UNLOCK(zone);
594 		bucket_drain(zone, bucket);
595 		bucket_free(bucket);
596 		ZONE_LOCK(zone);
597 	}
598 
599 	/* Now we do the free queue.. */
600 	while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
601 		LIST_REMOVE(bucket, ub_link);
602 		bucket_free(bucket);
603 	}
604 	for (cpu = 0; cpu <= mp_maxid; cpu++) {
605 		if (CPU_ABSENT(cpu))
606 			continue;
607 		CPU_UNLOCK(cpu);
608 	}
609 	ZONE_UNLOCK(zone);
610 }
611 
612 /*
613  * Frees pages from a zone back to the system.  This is done on demand from
614  * the pageout daemon.
615  *
616  * Arguments:
617  *	zone  The zone to free pages from
618  *	 all  Should we drain all items?
619  *
620  * Returns:
621  *	Nothing.
622  */
623 static void
624 zone_drain(uma_zone_t zone)
625 {
626 	struct slabhead freeslabs = {};
627 	uma_slab_t slab;
628 	uma_slab_t n;
629 	u_int8_t flags;
630 	u_int8_t *mem;
631 	int i;
632 
633 	/*
634 	 * We don't want to take pages from staticly allocated zones at this
635 	 * time
636 	 */
637 	if (zone->uz_flags & UMA_ZONE_NOFREE || zone->uz_freef == NULL)
638 		return;
639 
640 	ZONE_LOCK(zone);
641 
642 #ifdef UMA_DEBUG
643 	printf("%s free items: %u\n", zone->uz_name, zone->uz_free);
644 #endif
645 	if (zone->uz_free == 0)
646 		goto finished;
647 
648 	slab = LIST_FIRST(&zone->uz_free_slab);
649 	while (slab) {
650 		n = LIST_NEXT(slab, us_link);
651 
652 		/* We have no where to free these to */
653 		if (slab->us_flags & UMA_SLAB_BOOT) {
654 			slab = n;
655 			continue;
656 		}
657 
658 		LIST_REMOVE(slab, us_link);
659 		zone->uz_pages -= zone->uz_ppera;
660 		zone->uz_free -= zone->uz_ipers;
661 
662 		if (zone->uz_flags & UMA_ZONE_HASH)
663 			UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
664 
665 		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
666 
667 		slab = n;
668 	}
669 finished:
670 	ZONE_UNLOCK(zone);
671 
672 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
673 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
674 		if (zone->uz_fini)
675 			for (i = 0; i < zone->uz_ipers; i++)
676 				zone->uz_fini(
677 				    slab->us_data + (zone->uz_rsize * i),
678 				    zone->uz_size);
679 		flags = slab->us_flags;
680 		mem = slab->us_data;
681 
682 		if (zone->uz_flags & UMA_ZONE_OFFPAGE)
683 			uma_zfree_internal(slabzone, slab, NULL, 0);
684 		if (zone->uz_flags & UMA_ZONE_MALLOC) {
685 			vm_object_t obj;
686 
687 			if (flags & UMA_SLAB_KMEM)
688 				obj = kmem_object;
689 			else
690 				obj = NULL;
691 			for (i = 0; i < zone->uz_ppera; i++)
692 				vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
693 				    obj);
694 		}
695 #ifdef UMA_DEBUG
696 		printf("%s: Returning %d bytes.\n",
697 		    zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
698 #endif
699 		zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
700 	}
701 
702 }
703 
704 /*
705  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
706  *
707  * Arguments:
708  *	zone  The zone to allocate slabs for
709  *	wait  Shall we wait?
710  *
711  * Returns:
712  *	The slab that was allocated or NULL if there is no memory and the
713  *	caller specified M_NOWAIT.
714  */
715 static uma_slab_t
716 slab_zalloc(uma_zone_t zone, int wait)
717 {
718 	uma_slab_t slab;	/* Starting slab */
719 	u_int8_t *mem;
720 	u_int8_t flags;
721 	int i;
722 
723 	slab = NULL;
724 
725 #ifdef UMA_DEBUG
726 	printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
727 #endif
728 	ZONE_UNLOCK(zone);
729 
730 	if (zone->uz_flags & UMA_ZONE_OFFPAGE) {
731 		slab = uma_zalloc_internal(slabzone, NULL, wait);
732 		if (slab == NULL) {
733 			ZONE_LOCK(zone);
734 			return NULL;
735 		}
736 	}
737 
738 	/*
739 	 * This reproduces the old vm_zone behavior of zero filling pages the
740 	 * first time they are added to a zone.
741 	 *
742 	 * Malloced items are zeroed in uma_zalloc.
743 	 */
744 
745 	if ((zone->uz_flags & UMA_ZONE_MALLOC) == 0)
746 		wait |= M_ZERO;
747 	else
748 		wait &= ~M_ZERO;
749 
750 	mem = zone->uz_allocf(zone, zone->uz_ppera * UMA_SLAB_SIZE,
751 	    &flags, wait);
752 	if (mem == NULL) {
753 		ZONE_LOCK(zone);
754 		return (NULL);
755 	}
756 
757 	/* Point the slab into the allocated memory */
758 	if (!(zone->uz_flags & UMA_ZONE_OFFPAGE))
759 		slab = (uma_slab_t )(mem + zone->uz_pgoff);
760 
761 	if (zone->uz_flags & UMA_ZONE_MALLOC)
762 		for (i = 0; i < zone->uz_ppera; i++)
763 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
764 
765 	slab->us_zone = zone;
766 	slab->us_data = mem;
767 	slab->us_freecount = zone->uz_ipers;
768 	slab->us_firstfree = 0;
769 	slab->us_flags = flags;
770 	for (i = 0; i < zone->uz_ipers; i++)
771 		slab->us_freelist[i] = i+1;
772 
773 	if (zone->uz_init)
774 		for (i = 0; i < zone->uz_ipers; i++)
775 			zone->uz_init(slab->us_data + (zone->uz_rsize * i),
776 			    zone->uz_size);
777 	ZONE_LOCK(zone);
778 
779 	if (zone->uz_flags & UMA_ZONE_HASH)
780 		UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
781 
782 	zone->uz_pages += zone->uz_ppera;
783 	zone->uz_free += zone->uz_ipers;
784 
785 	return (slab);
786 }
787 
788 /*
789  * This function is intended to be used early on in place of page_alloc() so
790  * that we may use the boot time page cache to satisfy allocations before
791  * the VM is ready.
792  */
793 static void *
794 startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
795 {
796 	/*
797 	 * Check our small startup cache to see if it has pages remaining.
798 	 */
799 	mtx_lock(&uma_mtx);
800 	if (uma_boot_free != 0) {
801 		uma_slab_t tmps;
802 
803 		tmps = LIST_FIRST(&uma_boot_pages);
804 		LIST_REMOVE(tmps, us_link);
805 		uma_boot_free--;
806 		mtx_unlock(&uma_mtx);
807 		*pflag = tmps->us_flags;
808 		return (tmps->us_data);
809 	}
810 	mtx_unlock(&uma_mtx);
811 	if (booted == 0)
812 		panic("UMA: Increase UMA_BOOT_PAGES");
813 	/*
814 	 * Now that we've booted reset these users to their real allocator.
815 	 */
816 #ifdef UMA_MD_SMALL_ALLOC
817 	zone->uz_allocf = uma_small_alloc;
818 #else
819 	zone->uz_allocf = page_alloc;
820 #endif
821 	return zone->uz_allocf(zone, bytes, pflag, wait);
822 }
823 
824 /*
825  * Allocates a number of pages from the system
826  *
827  * Arguments:
828  *	zone  Unused
829  *	bytes  The number of bytes requested
830  *	wait  Shall we wait?
831  *
832  * Returns:
833  *	A pointer to the alloced memory or possibly
834  *	NULL if M_NOWAIT is set.
835  */
836 static void *
837 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
838 {
839 	void *p;	/* Returned page */
840 
841 	*pflag = UMA_SLAB_KMEM;
842 	p = (void *) kmem_malloc(kmem_map, bytes, wait);
843 
844 	return (p);
845 }
846 
847 /*
848  * Allocates a number of pages from within an object
849  *
850  * Arguments:
851  *	zone   Unused
852  *	bytes  The number of bytes requested
853  *	wait   Shall we wait?
854  *
855  * Returns:
856  *	A pointer to the alloced memory or possibly
857  *	NULL if M_NOWAIT is set.
858  */
859 static void *
860 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
861 {
862 	vm_object_t object;
863 	vm_offset_t retkva, zkva;
864 	vm_page_t p;
865 	int pages, startpages;
866 
867 	object = zone->uz_obj;
868 	retkva = 0;
869 
870 	/*
871 	 * This looks a little weird since we're getting one page at a time.
872 	 */
873 	VM_OBJECT_LOCK(object);
874 	p = TAILQ_LAST(&object->memq, pglist);
875 	pages = p != NULL ? p->pindex + 1 : 0;
876 	startpages = pages;
877 	zkva = zone->uz_kva + pages * PAGE_SIZE;
878 	for (; bytes > 0; bytes -= PAGE_SIZE) {
879 		p = vm_page_alloc(object, pages,
880 		    VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
881 		if (p == NULL) {
882 			if (pages != startpages)
883 				pmap_qremove(retkva, pages - startpages);
884 			while (pages != startpages) {
885 				pages--;
886 				p = TAILQ_LAST(&object->memq, pglist);
887 				vm_page_lock_queues();
888 				vm_page_unwire(p, 0);
889 				vm_page_free(p);
890 				vm_page_unlock_queues();
891 			}
892 			retkva = 0;
893 			goto done;
894 		}
895 		pmap_qenter(zkva, &p, 1);
896 		if (retkva == 0)
897 			retkva = zkva;
898 		zkva += PAGE_SIZE;
899 		pages += 1;
900 	}
901 done:
902 	VM_OBJECT_UNLOCK(object);
903 	*flags = UMA_SLAB_PRIV;
904 
905 	return ((void *)retkva);
906 }
907 
908 /*
909  * Frees a number of pages to the system
910  *
911  * Arguments:
912  *	mem   A pointer to the memory to be freed
913  *	size  The size of the memory being freed
914  *	flags The original p->us_flags field
915  *
916  * Returns:
917  *	Nothing
918  */
919 static void
920 page_free(void *mem, int size, u_int8_t flags)
921 {
922 	vm_map_t map;
923 
924 	if (flags & UMA_SLAB_KMEM)
925 		map = kmem_map;
926 	else
927 		panic("UMA: page_free used with invalid flags %d\n", flags);
928 
929 	kmem_free(map, (vm_offset_t)mem, size);
930 }
931 
932 /*
933  * Zero fill initializer
934  *
935  * Arguments/Returns follow uma_init specifications
936  */
937 static void
938 zero_init(void *mem, int size)
939 {
940 	bzero(mem, size);
941 }
942 
943 /*
944  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
945  *
946  * Arguments
947  *	zone  The zone we should initialize
948  *
949  * Returns
950  *	Nothing
951  */
952 static void
953 zone_small_init(uma_zone_t zone)
954 {
955 	int rsize;
956 	int memused;
957 	int ipers;
958 
959 	rsize = zone->uz_size;
960 
961 	if (rsize < UMA_SMALLEST_UNIT)
962 		rsize = UMA_SMALLEST_UNIT;
963 
964 	if (rsize & zone->uz_align)
965 		rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
966 
967 	zone->uz_rsize = rsize;
968 
969 	rsize += 1;	/* Account for the byte of linkage */
970 	zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
971 	zone->uz_ppera = 1;
972 
973 	KASSERT(zone->uz_ipers != 0, ("zone_small_init: ipers is 0, uh-oh!"));
974 	memused = zone->uz_ipers * zone->uz_rsize;
975 
976 	/* Can we do any better? */
977 	if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
978 		/*
979 		 * We can't do this if we're internal or if we've been
980 		 * asked to not go to the VM for buckets.  If we do this we
981 		 * may end up going to the VM (kmem_map) for slabs which we
982 		 * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
983 		 * result of UMA_ZONE_VM, which clearly forbids it.
984 		 */
985 		if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) ||
986 		    (zone->uz_flags & UMA_ZFLAG_CACHEONLY))
987 			return;
988 		ipers = UMA_SLAB_SIZE / zone->uz_rsize;
989 		if (ipers > zone->uz_ipers) {
990 			zone->uz_flags |= UMA_ZONE_OFFPAGE;
991 			if ((zone->uz_flags & UMA_ZONE_MALLOC) == 0)
992 				zone->uz_flags |= UMA_ZONE_HASH;
993 			zone->uz_ipers = ipers;
994 		}
995 	}
996 }
997 
998 /*
999  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
1000  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
1001  * more complicated.
1002  *
1003  * Arguments
1004  *	zone  The zone we should initialize
1005  *
1006  * Returns
1007  *	Nothing
1008  */
1009 static void
1010 zone_large_init(uma_zone_t zone)
1011 {
1012 	int pages;
1013 
1014 	KASSERT((zone->uz_flags & UMA_ZFLAG_CACHEONLY) == 0,
1015 	    ("zone_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY zone"));
1016 
1017 	pages = zone->uz_size / UMA_SLAB_SIZE;
1018 
1019 	/* Account for remainder */
1020 	if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
1021 		pages++;
1022 
1023 	zone->uz_ppera = pages;
1024 	zone->uz_ipers = 1;
1025 
1026 	zone->uz_flags |= UMA_ZONE_OFFPAGE;
1027 	if ((zone->uz_flags & UMA_ZONE_MALLOC) == 0)
1028 		zone->uz_flags |= UMA_ZONE_HASH;
1029 
1030 	zone->uz_rsize = zone->uz_size;
1031 }
1032 
1033 /*
1034  * Zone header ctor.  This initializes all fields, locks, etc.  And inserts
1035  * the zone onto the global zone list.
1036  *
1037  * Arguments/Returns follow uma_ctor specifications
1038  *	udata  Actually uma_zcreat_args
1039  */
1040 
1041 static void
1042 zone_ctor(void *mem, int size, void *udata)
1043 {
1044 	struct uma_zctor_args *arg = udata;
1045 	uma_zone_t zone = mem;
1046 	int privlc;
1047 
1048 	bzero(zone, size);
1049 	zone->uz_name = arg->name;
1050 	zone->uz_size = arg->size;
1051 	zone->uz_ctor = arg->ctor;
1052 	zone->uz_dtor = arg->dtor;
1053 	zone->uz_init = arg->uminit;
1054 	zone->uz_fini = arg->fini;
1055 	zone->uz_align = arg->align;
1056 	zone->uz_free = 0;
1057 	zone->uz_pages = 0;
1058 	zone->uz_flags = arg->flags;
1059 	zone->uz_allocf = page_alloc;
1060 	zone->uz_freef = page_free;
1061 
1062 	if (arg->flags & UMA_ZONE_ZINIT)
1063 		zone->uz_init = zero_init;
1064 
1065 	if (arg->flags & UMA_ZONE_VM)
1066 		zone->uz_flags |= UMA_ZFLAG_CACHEONLY;
1067 
1068 	/*
1069 	 * XXX:
1070 	 * The +1 byte added to uz_size is to account for the byte of
1071 	 * linkage that is added to the size in zone_small_init().  If
1072 	 * we don't account for this here then we may end up in
1073 	 * zone_small_init() with a calculated 'ipers' of 0.
1074 	 */
1075 	if ((zone->uz_size+1) > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1076 		zone_large_init(zone);
1077 	else
1078 		zone_small_init(zone);
1079 	/*
1080 	 * If we haven't booted yet we need allocations to go through the
1081 	 * startup cache until the vm is ready.
1082 	 */
1083 	if (zone->uz_ppera == 1) {
1084 #ifdef UMA_MD_SMALL_ALLOC
1085 		zone->uz_allocf = uma_small_alloc;
1086 		zone->uz_freef = uma_small_free;
1087 #endif
1088 		if (booted == 0)
1089 			zone->uz_allocf = startup_alloc;
1090 	}
1091 	if (arg->flags & UMA_ZONE_MTXCLASS)
1092 		privlc = 1;
1093 	else
1094 		privlc = 0;
1095 
1096 	/*
1097 	 * If we're putting the slab header in the actual page we need to
1098 	 * figure out where in each page it goes.  This calculates a right
1099 	 * justified offset into the memory on an ALIGN_PTR boundary.
1100 	 */
1101 	if (!(zone->uz_flags & UMA_ZONE_OFFPAGE)) {
1102 		int totsize;
1103 
1104 		/* Size of the slab struct and free list */
1105 		totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1106 		if (totsize & UMA_ALIGN_PTR)
1107 			totsize = (totsize & ~UMA_ALIGN_PTR) +
1108 			    (UMA_ALIGN_PTR + 1);
1109 		zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1110 		totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1111 		    + zone->uz_ipers;
1112 		/* I don't think it's possible, but I'll make sure anyway */
1113 		if (totsize > UMA_SLAB_SIZE) {
1114 			printf("zone %s ipers %d rsize %d size %d\n",
1115 			    zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1116 			    zone->uz_size);
1117 			panic("UMA slab won't fit.\n");
1118 		}
1119 	}
1120 
1121 	if (zone->uz_flags & UMA_ZONE_HASH)
1122 		hash_alloc(&zone->uz_hash);
1123 
1124 #ifdef UMA_DEBUG
1125 	printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1126 	    zone->uz_name, zone,
1127 	    zone->uz_size, zone->uz_ipers,
1128 	    zone->uz_ppera, zone->uz_pgoff);
1129 #endif
1130 	ZONE_LOCK_INIT(zone, privlc);
1131 
1132 	mtx_lock(&uma_mtx);
1133 	LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1134 	mtx_unlock(&uma_mtx);
1135 
1136 	/*
1137 	 * Some internal zones don't have room allocated for the per cpu
1138 	 * caches.  If we're internal, bail out here.
1139 	 */
1140 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1141 		return;
1142 
1143 	if (zone->uz_ipers <= BUCKET_MAX)
1144 		zone->uz_count = zone->uz_ipers;
1145 	else
1146 		zone->uz_count = BUCKET_MAX;
1147 }
1148 
1149 /*
1150  * Zone header dtor.  This frees all data, destroys locks, frees the hash table
1151  * and removes the zone from the global list.
1152  *
1153  * Arguments/Returns follow uma_dtor specifications
1154  *	udata  unused
1155  */
1156 
1157 static void
1158 zone_dtor(void *arg, int size, void *udata)
1159 {
1160 	uma_zone_t zone;
1161 
1162 	zone = (uma_zone_t)arg;
1163 
1164 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1165 		cache_drain(zone);
1166 	mtx_lock(&uma_mtx);
1167 	LIST_REMOVE(zone, uz_link);
1168 	zone_drain(zone);
1169 	mtx_unlock(&uma_mtx);
1170 
1171 	ZONE_LOCK(zone);
1172 	if (zone->uz_free != 0) {
1173 		printf("Zone %s was not empty (%d items). "
1174 		    " Lost %d pages of memory.\n",
1175 		    zone->uz_name, zone->uz_free, zone->uz_pages);
1176 		uma_print_zone(zone);
1177 	}
1178 
1179 	ZONE_UNLOCK(zone);
1180 	if (zone->uz_flags & UMA_ZONE_HASH)
1181 		hash_free(&zone->uz_hash);
1182 
1183 	ZONE_LOCK_FINI(zone);
1184 }
1185 /*
1186  * Traverses every zone in the system and calls a callback
1187  *
1188  * Arguments:
1189  *	zfunc  A pointer to a function which accepts a zone
1190  *		as an argument.
1191  *
1192  * Returns:
1193  *	Nothing
1194  */
1195 static void
1196 zone_foreach(void (*zfunc)(uma_zone_t))
1197 {
1198 	uma_zone_t zone;
1199 
1200 	mtx_lock(&uma_mtx);
1201 	LIST_FOREACH(zone, &uma_zones, uz_link)
1202 		zfunc(zone);
1203 	mtx_unlock(&uma_mtx);
1204 }
1205 
1206 /* Public functions */
1207 /* See uma.h */
1208 void
1209 uma_startup(void *bootmem)
1210 {
1211 	struct uma_zctor_args args;
1212 	uma_slab_t slab;
1213 	int slabsize;
1214 	int i;
1215 
1216 #ifdef UMA_DEBUG
1217 	printf("Creating uma zone headers zone.\n");
1218 #endif
1219 	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1220 	/* "manually" Create the initial zone */
1221 	args.name = "UMA Zones";
1222 	args.size = sizeof(struct uma_zone) +
1223 	    (sizeof(struct uma_cache) * (mp_maxid + 1));
1224 	args.ctor = zone_ctor;
1225 	args.dtor = zone_dtor;
1226 	args.uminit = zero_init;
1227 	args.fini = NULL;
1228 	args.align = 32 - 1;
1229 	args.flags = UMA_ZFLAG_INTERNAL;
1230 	/* The initial zone has no Per cpu queues so it's smaller */
1231 	zone_ctor(zones, sizeof(struct uma_zone), &args);
1232 
1233 	/* Initialize the pcpu cache lock set once and for all */
1234 	for (i = 0; i <= mp_maxid; i++)
1235 		CPU_LOCK_INIT(i);
1236 #ifdef UMA_DEBUG
1237 	printf("Filling boot free list.\n");
1238 #endif
1239 	for (i = 0; i < UMA_BOOT_PAGES; i++) {
1240 		slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1241 		slab->us_data = (u_int8_t *)slab;
1242 		slab->us_flags = UMA_SLAB_BOOT;
1243 		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1244 		uma_boot_free++;
1245 	}
1246 
1247 #ifdef UMA_DEBUG
1248 	printf("Creating slab zone.\n");
1249 #endif
1250 
1251 	/*
1252 	 * This is the max number of free list items we'll have with
1253 	 * offpage slabs.
1254 	 */
1255 	slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1256 	slabsize /= UMA_MAX_WASTE;
1257 	slabsize++;			/* In case there it's rounded */
1258 	slabsize += sizeof(struct uma_slab);
1259 
1260 	/* Now make a zone for slab headers */
1261 	slabzone = uma_zcreate("UMA Slabs",
1262 				slabsize,
1263 				NULL, NULL, NULL, NULL,
1264 				UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1265 
1266 	hashzone = uma_zcreate("UMA Hash",
1267 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1268 	    NULL, NULL, NULL, NULL,
1269 	    UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1270 
1271 	bucket_init();
1272 
1273 #ifdef UMA_MD_SMALL_ALLOC
1274 	booted = 1;
1275 #endif
1276 
1277 #ifdef UMA_DEBUG
1278 	printf("UMA startup complete.\n");
1279 #endif
1280 }
1281 
1282 /* see uma.h */
1283 void
1284 uma_startup2(void)
1285 {
1286 	booted = 1;
1287 	bucket_enable();
1288 #ifdef UMA_DEBUG
1289 	printf("UMA startup2 complete.\n");
1290 #endif
1291 }
1292 
1293 /*
1294  * Initialize our callout handle
1295  *
1296  */
1297 
1298 static void
1299 uma_startup3(void)
1300 {
1301 #ifdef UMA_DEBUG
1302 	printf("Starting callout.\n");
1303 #endif
1304 	callout_init(&uma_callout, 0);
1305 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1306 #ifdef UMA_DEBUG
1307 	printf("UMA startup3 complete.\n");
1308 #endif
1309 }
1310 
1311 /* See uma.h */
1312 uma_zone_t
1313 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1314 		uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1315 
1316 {
1317 	struct uma_zctor_args args;
1318 
1319 	/* This stuff is essential for the zone ctor */
1320 	args.name = name;
1321 	args.size = size;
1322 	args.ctor = ctor;
1323 	args.dtor = dtor;
1324 	args.uminit = uminit;
1325 	args.fini = fini;
1326 	args.align = align;
1327 	args.flags = flags;
1328 
1329 	return (uma_zalloc_internal(zones, &args, M_WAITOK));
1330 }
1331 
1332 /* See uma.h */
1333 void
1334 uma_zdestroy(uma_zone_t zone)
1335 {
1336 	uma_zfree_internal(zones, zone, NULL, 0);
1337 }
1338 
1339 /* See uma.h */
1340 void *
1341 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1342 {
1343 	void *item;
1344 	uma_cache_t cache;
1345 	uma_bucket_t bucket;
1346 	int cpu;
1347 
1348 	/* This is the fast path allocation */
1349 #ifdef UMA_DEBUG_ALLOC_1
1350 	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1351 #endif
1352 
1353 #ifdef INVARIANTS
1354 	/*
1355 	 * To make sure that WAITOK or NOWAIT is set, but not more than
1356 	 * one, and check against the API botches that are common.
1357 	 * The uma code implies M_WAITOK if M_NOWAIT is not set, so
1358 	 * we default to waiting if none of the flags is set.
1359 	 */
1360 	cpu = flags & (M_WAITOK | M_NOWAIT | M_DONTWAIT | M_TRYWAIT);
1361 	if (cpu != M_NOWAIT && cpu != M_WAITOK) {
1362 		static	struct timeval lasterr;
1363 		static	int curerr, once;
1364 		if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) {
1365 			printf("Bad uma_zalloc flags: %x\n", cpu);
1366 			backtrace();
1367 			once++;
1368 		}
1369 	}
1370 #endif
1371 	if (!(flags & M_NOWAIT)) {
1372 		KASSERT(curthread->td_intr_nesting_level == 0,
1373 		   ("malloc(M_WAITOK) in interrupt context"));
1374 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1375 		    "malloc() of \"%s\"", zone->uz_name);
1376 	}
1377 
1378 zalloc_restart:
1379 	cpu = PCPU_GET(cpuid);
1380 	CPU_LOCK(cpu);
1381 	cache = &zone->uz_cpu[cpu];
1382 
1383 zalloc_start:
1384 	bucket = cache->uc_allocbucket;
1385 
1386 	if (bucket) {
1387 		if (bucket->ub_cnt > 0) {
1388 			bucket->ub_cnt--;
1389 			item = bucket->ub_bucket[bucket->ub_cnt];
1390 #ifdef INVARIANTS
1391 			bucket->ub_bucket[bucket->ub_cnt] = NULL;
1392 #endif
1393 			KASSERT(item != NULL,
1394 			    ("uma_zalloc: Bucket pointer mangled."));
1395 			cache->uc_allocs++;
1396 #ifdef INVARIANTS
1397 			ZONE_LOCK(zone);
1398 			uma_dbg_alloc(zone, NULL, item);
1399 			ZONE_UNLOCK(zone);
1400 #endif
1401 			CPU_UNLOCK(cpu);
1402 			if (zone->uz_ctor)
1403 				zone->uz_ctor(item, zone->uz_size, udata);
1404 			if (flags & M_ZERO)
1405 				bzero(item, zone->uz_size);
1406 			return (item);
1407 		} else if (cache->uc_freebucket) {
1408 			/*
1409 			 * We have run out of items in our allocbucket.
1410 			 * See if we can switch with our free bucket.
1411 			 */
1412 			if (cache->uc_freebucket->ub_cnt > 0) {
1413 #ifdef UMA_DEBUG_ALLOC
1414 				printf("uma_zalloc: Swapping empty with"
1415 				    " alloc.\n");
1416 #endif
1417 				bucket = cache->uc_freebucket;
1418 				cache->uc_freebucket = cache->uc_allocbucket;
1419 				cache->uc_allocbucket = bucket;
1420 
1421 				goto zalloc_start;
1422 			}
1423 		}
1424 	}
1425 	ZONE_LOCK(zone);
1426 	/* Since we have locked the zone we may as well send back our stats */
1427 	zone->uz_allocs += cache->uc_allocs;
1428 	cache->uc_allocs = 0;
1429 
1430 	/* Our old one is now a free bucket */
1431 	if (cache->uc_allocbucket) {
1432 		KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1433 		    ("uma_zalloc_arg: Freeing a non free bucket."));
1434 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1435 		    cache->uc_allocbucket, ub_link);
1436 		cache->uc_allocbucket = NULL;
1437 	}
1438 
1439 	/* Check the free list for a new alloc bucket */
1440 	if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1441 		KASSERT(bucket->ub_cnt != 0,
1442 		    ("uma_zalloc_arg: Returning an empty bucket."));
1443 
1444 		LIST_REMOVE(bucket, ub_link);
1445 		cache->uc_allocbucket = bucket;
1446 		ZONE_UNLOCK(zone);
1447 		goto zalloc_start;
1448 	}
1449 	/* We are no longer associated with this cpu!!! */
1450 	CPU_UNLOCK(cpu);
1451 
1452 	/* Bump up our uz_count so we get here less */
1453 	if (zone->uz_count < BUCKET_MAX)
1454 		zone->uz_count++;
1455 	/*
1456 	 * Now lets just fill a bucket and put it on the free list.  If that
1457 	 * works we'll restart the allocation from the begining.
1458 	 */
1459 	if (uma_zalloc_bucket(zone, flags)) {
1460 		ZONE_UNLOCK(zone);
1461 		goto zalloc_restart;
1462 	}
1463 	ZONE_UNLOCK(zone);
1464 	/*
1465 	 * We may not be able to get a bucket so return an actual item.
1466 	 */
1467 #ifdef UMA_DEBUG
1468 	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1469 #endif
1470 
1471 	return (uma_zalloc_internal(zone, udata, flags));
1472 }
1473 
1474 static uma_slab_t
1475 uma_zone_slab(uma_zone_t zone, int flags)
1476 {
1477 	uma_slab_t slab;
1478 
1479 	/*
1480 	 * This is to prevent us from recursively trying to allocate
1481 	 * buckets.  The problem is that if an allocation forces us to
1482 	 * grab a new bucket we will call page_alloc, which will go off
1483 	 * and cause the vm to allocate vm_map_entries.  If we need new
1484 	 * buckets there too we will recurse in kmem_alloc and bad
1485 	 * things happen.  So instead we return a NULL bucket, and make
1486 	 * the code that allocates buckets smart enough to deal with it
1487 	 */
1488 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL && zone->uz_recurse != 0)
1489 		return (NULL);
1490 
1491 	slab = NULL;
1492 
1493 	for (;;) {
1494 		/*
1495 		 * Find a slab with some space.  Prefer slabs that are partially
1496 		 * used over those that are totally full.  This helps to reduce
1497 		 * fragmentation.
1498 		 */
1499 		if (zone->uz_free != 0) {
1500 			if (!LIST_EMPTY(&zone->uz_part_slab)) {
1501 				slab = LIST_FIRST(&zone->uz_part_slab);
1502 			} else {
1503 				slab = LIST_FIRST(&zone->uz_free_slab);
1504 				LIST_REMOVE(slab, us_link);
1505 				LIST_INSERT_HEAD(&zone->uz_part_slab, slab,
1506 				us_link);
1507 			}
1508 			return (slab);
1509 		}
1510 
1511 		/*
1512 		 * M_NOVM means don't ask at all!
1513 		 */
1514 		if (flags & M_NOVM)
1515 			break;
1516 
1517 		if (zone->uz_maxpages &&
1518 		    zone->uz_pages >= zone->uz_maxpages) {
1519 			zone->uz_flags |= UMA_ZFLAG_FULL;
1520 
1521 			if (flags & M_NOWAIT)
1522 				break;
1523 			else
1524 				msleep(zone, &zone->uz_lock, PVM,
1525 				    "zonelimit", 0);
1526 			continue;
1527 		}
1528 		zone->uz_recurse++;
1529 		slab = slab_zalloc(zone, flags);
1530 		zone->uz_recurse--;
1531 		/*
1532 		 * If we got a slab here it's safe to mark it partially used
1533 		 * and return.  We assume that the caller is going to remove
1534 		 * at least one item.
1535 		 */
1536 		if (slab) {
1537 			LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1538 			return (slab);
1539 		}
1540 		/*
1541 		 * We might not have been able to get a slab but another cpu
1542 		 * could have while we were unlocked.  Check again before we
1543 		 * fail.
1544 		 */
1545 		if (flags & M_NOWAIT)
1546 			flags |= M_NOVM;
1547 	}
1548 	return (slab);
1549 }
1550 
1551 static void *
1552 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
1553 {
1554 	void *item;
1555 	u_int8_t freei;
1556 
1557 	freei = slab->us_firstfree;
1558 	slab->us_firstfree = slab->us_freelist[freei];
1559 	item = slab->us_data + (zone->uz_rsize * freei);
1560 
1561 	slab->us_freecount--;
1562 	zone->uz_free--;
1563 #ifdef INVARIANTS
1564 	uma_dbg_alloc(zone, slab, item);
1565 #endif
1566 	/* Move this slab to the full list */
1567 	if (slab->us_freecount == 0) {
1568 		LIST_REMOVE(slab, us_link);
1569 		LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1570 	}
1571 
1572 	return (item);
1573 }
1574 
1575 static int
1576 uma_zalloc_bucket(uma_zone_t zone, int flags)
1577 {
1578 	uma_bucket_t bucket;
1579 	uma_slab_t slab;
1580 	int max;
1581 
1582 	/*
1583 	 * Try this zone's free list first so we don't allocate extra buckets.
1584 	 */
1585 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1586 		KASSERT(bucket->ub_cnt == 0,
1587 		    ("uma_zalloc_bucket: Bucket on free list is not empty."));
1588 		LIST_REMOVE(bucket, ub_link);
1589 	} else {
1590 		int bflags;
1591 
1592 		bflags = (flags & ~M_ZERO);
1593 		if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
1594 			bflags |= M_NOVM;
1595 
1596 		ZONE_UNLOCK(zone);
1597 		bucket = bucket_alloc(zone->uz_count, bflags);
1598 		ZONE_LOCK(zone);
1599 	}
1600 
1601 	if (bucket == NULL)
1602 		return (0);
1603 
1604 #ifdef SMP
1605 	/*
1606 	 * This code is here to limit the number of simultaneous bucket fills
1607 	 * for any given zone to the number of per cpu caches in this zone. This
1608 	 * is done so that we don't allocate more memory than we really need.
1609 	 */
1610 	if (zone->uz_fills >= mp_ncpus)
1611 		goto done;
1612 
1613 #endif
1614 	zone->uz_fills++;
1615 
1616 	max = MIN(bucket->ub_entries, zone->uz_count);
1617 	/* Try to keep the buckets totally full */
1618 	while (bucket->ub_cnt < max &&
1619 	    (slab = uma_zone_slab(zone, flags)) != NULL) {
1620 		while (slab->us_freecount && bucket->ub_cnt < max) {
1621 			bucket->ub_bucket[bucket->ub_cnt++] =
1622 			    uma_slab_alloc(zone, slab);
1623 		}
1624 		/* Don't block on the next fill */
1625 		flags |= M_NOWAIT;
1626 	}
1627 
1628 	zone->uz_fills--;
1629 
1630 	if (bucket->ub_cnt != 0) {
1631 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1632 		    bucket, ub_link);
1633 		return (1);
1634 	}
1635 #ifdef SMP
1636 done:
1637 #endif
1638 	bucket_free(bucket);
1639 
1640 	return (0);
1641 }
1642 /*
1643  * Allocates an item for an internal zone
1644  *
1645  * Arguments
1646  *	zone   The zone to alloc for.
1647  *	udata  The data to be passed to the constructor.
1648  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
1649  *
1650  * Returns
1651  *	NULL if there is no memory and M_NOWAIT is set
1652  *	An item if successful
1653  */
1654 
1655 static void *
1656 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
1657 {
1658 	uma_slab_t slab;
1659 	void *item;
1660 
1661 	item = NULL;
1662 
1663 #ifdef UMA_DEBUG_ALLOC
1664 	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1665 #endif
1666 	ZONE_LOCK(zone);
1667 
1668 	slab = uma_zone_slab(zone, flags);
1669 	if (slab == NULL) {
1670 		ZONE_UNLOCK(zone);
1671 		return (NULL);
1672 	}
1673 
1674 	item = uma_slab_alloc(zone, slab);
1675 
1676 	ZONE_UNLOCK(zone);
1677 
1678 	if (zone->uz_ctor != NULL)
1679 		zone->uz_ctor(item, zone->uz_size, udata);
1680 	if (flags & M_ZERO)
1681 		bzero(item, zone->uz_size);
1682 
1683 	return (item);
1684 }
1685 
1686 /* See uma.h */
1687 void
1688 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1689 {
1690 	uma_cache_t cache;
1691 	uma_bucket_t bucket;
1692 	int bflags;
1693 	int cpu;
1694 	int skip;
1695 
1696 	/* This is the fast path free */
1697 	skip = 0;
1698 #ifdef UMA_DEBUG_ALLOC_1
1699 	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1700 #endif
1701 	/*
1702 	 * The race here is acceptable.  If we miss it we'll just have to wait
1703 	 * a little longer for the limits to be reset.
1704 	 */
1705 
1706 	if (zone->uz_flags & UMA_ZFLAG_FULL)
1707 		goto zfree_internal;
1708 
1709 	if (zone->uz_dtor) {
1710 		zone->uz_dtor(item, zone->uz_size, udata);
1711 		skip = 1;
1712 	}
1713 
1714 zfree_restart:
1715 	cpu = PCPU_GET(cpuid);
1716 	CPU_LOCK(cpu);
1717 	cache = &zone->uz_cpu[cpu];
1718 
1719 zfree_start:
1720 	bucket = cache->uc_freebucket;
1721 
1722 	if (bucket) {
1723 		/*
1724 		 * Do we have room in our bucket? It is OK for this uz count
1725 		 * check to be slightly out of sync.
1726 		 */
1727 
1728 		if (bucket->ub_cnt < bucket->ub_entries) {
1729 			KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
1730 			    ("uma_zfree: Freeing to non free bucket index."));
1731 			bucket->ub_bucket[bucket->ub_cnt] = item;
1732 			bucket->ub_cnt++;
1733 #ifdef INVARIANTS
1734 			ZONE_LOCK(zone);
1735 			if (zone->uz_flags & UMA_ZONE_MALLOC)
1736 				uma_dbg_free(zone, udata, item);
1737 			else
1738 				uma_dbg_free(zone, NULL, item);
1739 			ZONE_UNLOCK(zone);
1740 #endif
1741 			CPU_UNLOCK(cpu);
1742 			return;
1743 		} else if (cache->uc_allocbucket) {
1744 #ifdef UMA_DEBUG_ALLOC
1745 			printf("uma_zfree: Swapping buckets.\n");
1746 #endif
1747 			/*
1748 			 * We have run out of space in our freebucket.
1749 			 * See if we can switch with our alloc bucket.
1750 			 */
1751 			if (cache->uc_allocbucket->ub_cnt <
1752 			    cache->uc_freebucket->ub_cnt) {
1753 				bucket = cache->uc_freebucket;
1754 				cache->uc_freebucket = cache->uc_allocbucket;
1755 				cache->uc_allocbucket = bucket;
1756 				goto zfree_start;
1757 			}
1758 		}
1759 	}
1760 	/*
1761 	 * We can get here for two reasons:
1762 	 *
1763 	 * 1) The buckets are NULL
1764 	 * 2) The alloc and free buckets are both somewhat full.
1765 	 */
1766 
1767 	ZONE_LOCK(zone);
1768 
1769 	bucket = cache->uc_freebucket;
1770 	cache->uc_freebucket = NULL;
1771 
1772 	/* Can we throw this on the zone full list? */
1773 	if (bucket != NULL) {
1774 #ifdef UMA_DEBUG_ALLOC
1775 		printf("uma_zfree: Putting old bucket on the free list.\n");
1776 #endif
1777 		/* ub_cnt is pointing to the last free item */
1778 		KASSERT(bucket->ub_cnt != 0,
1779 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1780 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1781 		    bucket, ub_link);
1782 	}
1783 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1784 		LIST_REMOVE(bucket, ub_link);
1785 		ZONE_UNLOCK(zone);
1786 		cache->uc_freebucket = bucket;
1787 		goto zfree_start;
1788 	}
1789 	/* We're done with this CPU now */
1790 	CPU_UNLOCK(cpu);
1791 
1792 	/* And the zone.. */
1793 	ZONE_UNLOCK(zone);
1794 
1795 #ifdef UMA_DEBUG_ALLOC
1796 	printf("uma_zfree: Allocating new free bucket.\n");
1797 #endif
1798 	bflags = M_NOWAIT;
1799 
1800 	if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
1801 		bflags |= M_NOVM;
1802 	bucket = bucket_alloc(zone->uz_count, bflags);
1803 	if (bucket) {
1804 		ZONE_LOCK(zone);
1805 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1806 		    bucket, ub_link);
1807 		ZONE_UNLOCK(zone);
1808 		goto zfree_restart;
1809 	}
1810 
1811 	/*
1812 	 * If nothing else caught this, we'll just do an internal free.
1813 	 */
1814 
1815 zfree_internal:
1816 
1817 #ifdef INVARIANTS
1818 	/*
1819 	 * If we need to skip the dtor and the uma_dbg_free in
1820 	 * uma_zfree_internal because we've already called the dtor
1821 	 * above, but we ended up here, then we need to make sure
1822 	 * that we take care of the uma_dbg_free immediately.
1823 	 */
1824 	if (skip) {
1825 		ZONE_LOCK(zone);
1826 		if (zone->uz_flags & UMA_ZONE_MALLOC)
1827 			uma_dbg_free(zone, udata, item);
1828 		else
1829 			uma_dbg_free(zone, NULL, item);
1830 		ZONE_UNLOCK(zone);
1831 	}
1832 #endif
1833 	uma_zfree_internal(zone, item, udata, skip);
1834 
1835 	return;
1836 
1837 }
1838 
1839 /*
1840  * Frees an item to an INTERNAL zone or allocates a free bucket
1841  *
1842  * Arguments:
1843  *	zone   The zone to free to
1844  *	item   The item we're freeing
1845  *	udata  User supplied data for the dtor
1846  *	skip   Skip the dtor, it was done in uma_zfree_arg
1847  */
1848 static void
1849 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1850 {
1851 	uma_slab_t slab;
1852 	u_int8_t *mem;
1853 	u_int8_t freei;
1854 
1855 	if (!skip && zone->uz_dtor)
1856 		zone->uz_dtor(item, zone->uz_size, udata);
1857 
1858 	ZONE_LOCK(zone);
1859 
1860 	if (!(zone->uz_flags & UMA_ZONE_MALLOC)) {
1861 		mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1862 		if (zone->uz_flags & UMA_ZONE_HASH)
1863 			slab = hash_sfind(&zone->uz_hash, mem);
1864 		else {
1865 			mem += zone->uz_pgoff;
1866 			slab = (uma_slab_t)mem;
1867 		}
1868 	} else {
1869 		slab = (uma_slab_t)udata;
1870 	}
1871 
1872 	/* Do we need to remove from any lists? */
1873 	if (slab->us_freecount+1 == zone->uz_ipers) {
1874 		LIST_REMOVE(slab, us_link);
1875 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1876 	} else if (slab->us_freecount == 0) {
1877 		LIST_REMOVE(slab, us_link);
1878 		LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1879 	}
1880 
1881 	/* Slab management stuff */
1882 	freei = ((unsigned long)item - (unsigned long)slab->us_data)
1883 		/ zone->uz_rsize;
1884 
1885 #ifdef INVARIANTS
1886 	if (!skip)
1887 		uma_dbg_free(zone, slab, item);
1888 #endif
1889 
1890 	slab->us_freelist[freei] = slab->us_firstfree;
1891 	slab->us_firstfree = freei;
1892 	slab->us_freecount++;
1893 
1894 	/* Zone statistics */
1895 	zone->uz_free++;
1896 
1897 	if (zone->uz_flags & UMA_ZFLAG_FULL) {
1898 		if (zone->uz_pages < zone->uz_maxpages)
1899 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
1900 
1901 		/* We can handle one more allocation */
1902 		wakeup_one(zone);
1903 	}
1904 
1905 	ZONE_UNLOCK(zone);
1906 }
1907 
1908 /* See uma.h */
1909 void
1910 uma_zone_set_max(uma_zone_t zone, int nitems)
1911 {
1912 	ZONE_LOCK(zone);
1913 	if (zone->uz_ppera > 1)
1914 		zone->uz_maxpages = nitems * zone->uz_ppera;
1915 	else
1916 		zone->uz_maxpages = nitems / zone->uz_ipers;
1917 
1918 	if (zone->uz_maxpages * zone->uz_ipers < nitems)
1919 		zone->uz_maxpages++;
1920 
1921 	ZONE_UNLOCK(zone);
1922 }
1923 
1924 /* See uma.h */
1925 void
1926 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1927 {
1928 	ZONE_LOCK(zone);
1929 	zone->uz_freef = freef;
1930 	ZONE_UNLOCK(zone);
1931 }
1932 
1933 /* See uma.h */
1934 void
1935 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1936 {
1937 	ZONE_LOCK(zone);
1938 	zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1939 	zone->uz_allocf = allocf;
1940 	ZONE_UNLOCK(zone);
1941 }
1942 
1943 /* See uma.h */
1944 int
1945 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1946 {
1947 	int pages;
1948 	vm_offset_t kva;
1949 
1950 	pages = count / zone->uz_ipers;
1951 
1952 	if (pages * zone->uz_ipers < count)
1953 		pages++;
1954 
1955 	kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1956 
1957 	if (kva == 0)
1958 		return (0);
1959 	if (obj == NULL) {
1960 		obj = vm_object_allocate(OBJT_DEFAULT,
1961 		    pages);
1962 	} else {
1963 		VM_OBJECT_LOCK_INIT(obj);
1964 		_vm_object_allocate(OBJT_DEFAULT,
1965 		    pages, obj);
1966 	}
1967 	ZONE_LOCK(zone);
1968 	zone->uz_kva = kva;
1969 	zone->uz_obj = obj;
1970 	zone->uz_maxpages = pages;
1971 	zone->uz_allocf = obj_alloc;
1972 	zone->uz_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
1973 	ZONE_UNLOCK(zone);
1974 	return (1);
1975 }
1976 
1977 /* See uma.h */
1978 void
1979 uma_prealloc(uma_zone_t zone, int items)
1980 {
1981 	int slabs;
1982 	uma_slab_t slab;
1983 
1984 	ZONE_LOCK(zone);
1985 	slabs = items / zone->uz_ipers;
1986 	if (slabs * zone->uz_ipers < items)
1987 		slabs++;
1988 	while (slabs > 0) {
1989 		slab = slab_zalloc(zone, M_WAITOK);
1990 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1991 		slabs--;
1992 	}
1993 	ZONE_UNLOCK(zone);
1994 }
1995 
1996 /* See uma.h */
1997 void
1998 uma_reclaim(void)
1999 {
2000 #ifdef UMA_DEBUG
2001 	printf("UMA: vm asked us to release pages!\n");
2002 #endif
2003 	bucket_enable();
2004 	zone_foreach(zone_drain);
2005 	/*
2006 	 * Some slabs may have been freed but this zone will be visited early
2007 	 * we visit again so that we can free pages that are empty once other
2008 	 * zones are drained.  We have to do the same for buckets.
2009 	 */
2010 	zone_drain(slabzone);
2011 	bucket_zone_drain();
2012 }
2013 
2014 void *
2015 uma_large_malloc(int size, int wait)
2016 {
2017 	void *mem;
2018 	uma_slab_t slab;
2019 	u_int8_t flags;
2020 
2021 	slab = uma_zalloc_internal(slabzone, NULL, wait);
2022 	if (slab == NULL)
2023 		return (NULL);
2024 	mem = page_alloc(NULL, size, &flags, wait);
2025 	if (mem) {
2026 		vsetslab((vm_offset_t)mem, slab);
2027 		slab->us_data = mem;
2028 		slab->us_flags = flags | UMA_SLAB_MALLOC;
2029 		slab->us_size = size;
2030 	} else {
2031 		uma_zfree_internal(slabzone, slab, NULL, 0);
2032 	}
2033 
2034 
2035 	return (mem);
2036 }
2037 
2038 void
2039 uma_large_free(uma_slab_t slab)
2040 {
2041 	vsetobj((vm_offset_t)slab->us_data, kmem_object);
2042 	/*
2043 	 * XXX: We get a lock order reversal if we don't have Giant:
2044 	 * vm_map_remove (locks system map) -> vm_map_delete ->
2045 	 *    vm_map_entry_unwire -> vm_fault_unwire -> mtx_lock(&Giant)
2046 	 */
2047 	if (!mtx_owned(&Giant)) {
2048 		mtx_lock(&Giant);
2049 		page_free(slab->us_data, slab->us_size, slab->us_flags);
2050 		mtx_unlock(&Giant);
2051 	} else
2052 		page_free(slab->us_data, slab->us_size, slab->us_flags);
2053 	uma_zfree_internal(slabzone, slab, NULL, 0);
2054 }
2055 
2056 void
2057 uma_print_stats(void)
2058 {
2059 	zone_foreach(uma_print_zone);
2060 }
2061 
2062 static void
2063 slab_print(uma_slab_t slab)
2064 {
2065 	printf("slab: zone %p, data %p, freecount %d, firstfree %d\n",
2066 		slab->us_zone, slab->us_data, slab->us_freecount,
2067 		slab->us_firstfree);
2068 }
2069 
2070 static void
2071 cache_print(uma_cache_t cache)
2072 {
2073 	printf("alloc: %p(%d), free: %p(%d)\n",
2074 		cache->uc_allocbucket,
2075 		cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2076 		cache->uc_freebucket,
2077 		cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2078 }
2079 
2080 void
2081 uma_print_zone(uma_zone_t zone)
2082 {
2083 	uma_cache_t cache;
2084 	uma_slab_t slab;
2085 	int i;
2086 
2087 	printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2088 	    zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
2089 	    zone->uz_ipers, zone->uz_ppera,
2090 	    (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
2091 	printf("Part slabs:\n");
2092 	LIST_FOREACH(slab, &zone->uz_part_slab, us_link)
2093 		slab_print(slab);
2094 	printf("Free slabs:\n");
2095 	LIST_FOREACH(slab, &zone->uz_free_slab, us_link)
2096 		slab_print(slab);
2097 	printf("Full slabs:\n");
2098 	LIST_FOREACH(slab, &zone->uz_full_slab, us_link)
2099 		slab_print(slab);
2100 	for (i = 0; i <= mp_maxid; i++) {
2101 		if (CPU_ABSENT(i))
2102 			continue;
2103 		cache = &zone->uz_cpu[i];
2104 		printf("CPU %d Cache:\n", i);
2105 		cache_print(cache);
2106 	}
2107 }
2108 
2109 /*
2110  * Sysctl handler for vm.zone
2111  *
2112  * stolen from vm_zone.c
2113  */
2114 static int
2115 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2116 {
2117 	int error, len, cnt;
2118 	const int linesize = 128;	/* conservative */
2119 	int totalfree;
2120 	char *tmpbuf, *offset;
2121 	uma_zone_t z;
2122 	char *p;
2123 	int cpu;
2124 	int cachefree;
2125 	uma_bucket_t bucket;
2126 	uma_cache_t cache;
2127 
2128 	cnt = 0;
2129 	mtx_lock(&uma_mtx);
2130 	LIST_FOREACH(z, &uma_zones, uz_link)
2131 		cnt++;
2132 	mtx_unlock(&uma_mtx);
2133 	MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2134 			M_TEMP, M_WAITOK);
2135 	len = snprintf(tmpbuf, linesize,
2136 	    "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
2137 	if (cnt == 0)
2138 		tmpbuf[len - 1] = '\0';
2139 	error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2140 	if (error || cnt == 0)
2141 		goto out;
2142 	offset = tmpbuf;
2143 	mtx_lock(&uma_mtx);
2144 	LIST_FOREACH(z, &uma_zones, uz_link) {
2145 		if (cnt == 0)	/* list may have changed size */
2146 			break;
2147 		if (!(z->uz_flags & UMA_ZFLAG_INTERNAL)) {
2148 			for (cpu = 0; cpu <= mp_maxid; cpu++) {
2149 				if (CPU_ABSENT(cpu))
2150 					continue;
2151 				CPU_LOCK(cpu);
2152 			}
2153 		}
2154 		ZONE_LOCK(z);
2155 		cachefree = 0;
2156 		if (!(z->uz_flags & UMA_ZFLAG_INTERNAL)) {
2157 			for (cpu = 0; cpu <= mp_maxid; cpu++) {
2158 				if (CPU_ABSENT(cpu))
2159 					continue;
2160 				cache = &z->uz_cpu[cpu];
2161 				if (cache->uc_allocbucket != NULL)
2162 					cachefree += cache->uc_allocbucket->ub_cnt;
2163 				if (cache->uc_freebucket != NULL)
2164 					cachefree += cache->uc_freebucket->ub_cnt;
2165 				CPU_UNLOCK(cpu);
2166 			}
2167 		}
2168 		LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) {
2169 			cachefree += bucket->ub_cnt;
2170 		}
2171 		totalfree = z->uz_free + cachefree;
2172 		len = snprintf(offset, linesize,
2173 		    "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2174 		    z->uz_name, z->uz_size,
2175 		    z->uz_maxpages * z->uz_ipers,
2176 		    (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2177 		    totalfree,
2178 		    (unsigned long long)z->uz_allocs);
2179 		ZONE_UNLOCK(z);
2180 		for (p = offset + 12; p > offset && *p == ' '; --p)
2181 			/* nothing */ ;
2182 		p[1] = ':';
2183 		cnt--;
2184 		offset += len;
2185 	}
2186 	mtx_unlock(&uma_mtx);
2187 	*offset++ = '\0';
2188 	error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2189 out:
2190 	FREE(tmpbuf, M_TEMP);
2191 	return (error);
2192 }
2193