xref: /freebsd/sys/vm/uma_core.c (revision b52f49a9a0f22207ad5130ad8faba08de3ed23d8)
1 /*
2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * uma_core.c  Implementation of the Universal Memory allocator
29  *
30  * This allocator is intended to replace the multitude of similar object caches
31  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
32  * effecient.  A primary design goal is to return unused memory to the rest of
33  * the system.  This will make the system as a whole more flexible due to the
34  * ability to move memory to subsystems which most need it instead of leaving
35  * pools of reserved memory unused.
36  *
37  * The basic ideas stem from similar slab/zone based allocators whose algorithms
38  * are well known.
39  *
40  */
41 
42 /*
43  * TODO:
44  *	- Improve memory usage for large allocations
45  *	- Investigate cache size adjustments
46  */
47 
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
50 
51 /* I should really use ktr.. */
52 /*
53 #define UMA_DEBUG 1
54 #define UMA_DEBUG_ALLOC 1
55 #define UMA_DEBUG_ALLOC_1 1
56 */
57 
58 #include "opt_param.h"
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/kernel.h>
62 #include <sys/types.h>
63 #include <sys/queue.h>
64 #include <sys/malloc.h>
65 #include <sys/lock.h>
66 #include <sys/sysctl.h>
67 #include <sys/mutex.h>
68 #include <sys/proc.h>
69 #include <sys/smp.h>
70 #include <sys/vmmeter.h>
71 
72 #include <vm/vm.h>
73 #include <vm/vm_object.h>
74 #include <vm/vm_page.h>
75 #include <vm/vm_param.h>
76 #include <vm/vm_map.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_extern.h>
79 #include <vm/uma.h>
80 #include <vm/uma_int.h>
81 #include <vm/uma_dbg.h>
82 
83 #include <machine/vmparam.h>
84 
85 /*
86  * This is the zone from which all zones are spawned.  The idea is that even
87  * the zone heads are allocated from the allocator, so we use the bss section
88  * to bootstrap us.
89  */
90 static struct uma_zone masterzone;
91 static uma_zone_t zones = &masterzone;
92 
93 /* This is the zone from which all of uma_slab_t's are allocated. */
94 static uma_zone_t slabzone;
95 
96 /*
97  * The initial hash tables come out of this zone so they can be allocated
98  * prior to malloc coming up.
99  */
100 static uma_zone_t hashzone;
101 
102 /*
103  * Zone that buckets come from.
104  */
105 static uma_zone_t bucketzone;
106 
107 /*
108  * Are we allowed to allocate buckets?
109  */
110 static int bucketdisable = 1;
111 
112 /* Linked list of all zones in the system */
113 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
114 
115 /* This mutex protects the zone list */
116 static struct mtx uma_mtx;
117 
118 /* Linked list of boot time pages */
119 static LIST_HEAD(,uma_slab) uma_boot_pages =
120     LIST_HEAD_INITIALIZER(&uma_boot_pages);
121 
122 /* Count of free boottime pages */
123 static int uma_boot_free = 0;
124 
125 /* Is the VM done starting up? */
126 static int booted = 0;
127 
128 /* This is the handle used to schedule our working set calculator */
129 static struct callout uma_callout;
130 
131 /* This is mp_maxid + 1, for use while looping over each cpu */
132 static int maxcpu;
133 
134 /*
135  * This structure is passed as the zone ctor arg so that I don't have to create
136  * a special allocation function just for zones.
137  */
138 struct uma_zctor_args {
139 	char *name;
140 	size_t size;
141 	uma_ctor ctor;
142 	uma_dtor dtor;
143 	uma_init uminit;
144 	uma_fini fini;
145 	int align;
146 	u_int16_t flags;
147 };
148 
149 /* Prototypes.. */
150 
151 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
152 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
153 static void page_free(void *, int, u_int8_t);
154 static uma_slab_t slab_zalloc(uma_zone_t, int);
155 static void cache_drain(uma_zone_t);
156 static void bucket_drain(uma_zone_t, uma_bucket_t);
157 static void zone_drain(uma_zone_t);
158 static void zone_ctor(void *, int, void *);
159 static void zone_dtor(void *, int, void *);
160 static void zero_init(void *, int);
161 static void zone_small_init(uma_zone_t zone);
162 static void zone_large_init(uma_zone_t zone);
163 static void zone_foreach(void (*zfunc)(uma_zone_t));
164 static void zone_timeout(uma_zone_t zone);
165 static int hash_alloc(struct uma_hash *);
166 static int hash_expand(struct uma_hash *, struct uma_hash *);
167 static void hash_free(struct uma_hash *hash);
168 static void uma_timeout(void *);
169 static void uma_startup3(void);
170 static void *uma_zalloc_internal(uma_zone_t, void *, int);
171 static void uma_zfree_internal(uma_zone_t, void *, void *, int);
172 static void bucket_enable(void);
173 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
174 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
175 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
176 
177 void uma_print_zone(uma_zone_t);
178 void uma_print_stats(void);
179 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
180 
181 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
182     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
183 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
184 
185 /*
186  * This routine checks to see whether or not it's safe to enable buckets.
187  */
188 
189 static void
190 bucket_enable(void)
191 {
192 	if (cnt.v_free_count < cnt.v_free_min)
193 		bucketdisable = 1;
194 	else
195 		bucketdisable = 0;
196 }
197 
198 
199 /*
200  * Routine called by timeout which is used to fire off some time interval
201  * based calculations.  (working set, stats, etc.)
202  *
203  * Arguments:
204  *	arg   Unused
205  *
206  * Returns:
207  *	Nothing
208  */
209 static void
210 uma_timeout(void *unused)
211 {
212 	bucket_enable();
213 	zone_foreach(zone_timeout);
214 
215 	/* Reschedule this event */
216 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
217 }
218 
219 /*
220  * Routine to perform timeout driven calculations.  This does the working set
221  * as well as hash expanding, and per cpu statistics aggregation.
222  *
223  *  Arguments:
224  *	zone  The zone to operate on
225  *
226  *  Returns:
227  *	Nothing
228  */
229 static void
230 zone_timeout(uma_zone_t zone)
231 {
232 	uma_cache_t cache;
233 	u_int64_t alloc;
234 	int free;
235 	int cpu;
236 
237 	alloc = 0;
238 	free = 0;
239 
240 	/*
241 	 * Aggregate per cpu cache statistics back to the zone.
242 	 *
243 	 * I may rewrite this to set a flag in the per cpu cache instead of
244 	 * locking.  If the flag is not cleared on the next round I will have
245 	 * to lock and do it here instead so that the statistics don't get too
246 	 * far out of sync.
247 	 */
248 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
249 		for (cpu = 0; cpu < maxcpu; cpu++) {
250 			if (CPU_ABSENT(cpu))
251 				continue;
252 			CPU_LOCK(zone, cpu);
253 			cache = &zone->uz_cpu[cpu];
254 			/* Add them up, and reset */
255 			alloc += cache->uc_allocs;
256 			cache->uc_allocs = 0;
257 			if (cache->uc_allocbucket)
258 				free += cache->uc_allocbucket->ub_ptr + 1;
259 			if (cache->uc_freebucket)
260 				free += cache->uc_freebucket->ub_ptr + 1;
261 			CPU_UNLOCK(zone, cpu);
262 		}
263 	}
264 
265 	/* Now push these stats back into the zone.. */
266 	ZONE_LOCK(zone);
267 	zone->uz_allocs += alloc;
268 
269 	/*
270 	 * cachefree is an instantanious snapshot of what is in the per cpu
271 	 * caches, not an accurate counter
272 	 */
273 	zone->uz_cachefree = free;
274 
275 	/*
276 	 * Expand the zone hash table.
277 	 *
278 	 * This is done if the number of slabs is larger than the hash size.
279 	 * What I'm trying to do here is completely reduce collisions.  This
280 	 * may be a little aggressive.  Should I allow for two collisions max?
281 	 */
282 
283 	if (zone->uz_flags & UMA_ZFLAG_HASH &&
284 	    zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
285 		struct uma_hash newhash;
286 		struct uma_hash oldhash;
287 		int ret;
288 
289 		/*
290 		 * This is so involved because allocating and freeing
291 		 * while the zone lock is held will lead to deadlock.
292 		 * I have to do everything in stages and check for
293 		 * races.
294 		 */
295 		newhash = zone->uz_hash;
296 		ZONE_UNLOCK(zone);
297 		ret = hash_alloc(&newhash);
298 		ZONE_LOCK(zone);
299 		if (ret) {
300 			if (hash_expand(&zone->uz_hash, &newhash)) {
301 				oldhash = zone->uz_hash;
302 				zone->uz_hash = newhash;
303 			} else
304 				oldhash = newhash;
305 
306 			ZONE_UNLOCK(zone);
307 			hash_free(&oldhash);
308 			ZONE_LOCK(zone);
309 		}
310 	}
311 
312 	/*
313 	 * Here we compute the working set size as the total number of items
314 	 * left outstanding since the last time interval.  This is slightly
315 	 * suboptimal. What we really want is the highest number of outstanding
316 	 * items during the last time quantum.  This should be close enough.
317 	 *
318 	 * The working set size is used to throttle the zone_drain function.
319 	 * We don't want to return memory that we may need again immediately.
320 	 */
321 	alloc = zone->uz_allocs - zone->uz_oallocs;
322 	zone->uz_oallocs = zone->uz_allocs;
323 	zone->uz_wssize = alloc;
324 
325 	ZONE_UNLOCK(zone);
326 }
327 
328 /*
329  * Allocate and zero fill the next sized hash table from the appropriate
330  * backing store.
331  *
332  * Arguments:
333  *	hash  A new hash structure with the old hash size in uh_hashsize
334  *
335  * Returns:
336  *	1 on sucess and 0 on failure.
337  */
338 static int
339 hash_alloc(struct uma_hash *hash)
340 {
341 	int oldsize;
342 	int alloc;
343 
344 	oldsize = hash->uh_hashsize;
345 
346 	/* We're just going to go to a power of two greater */
347 	if (oldsize)  {
348 		hash->uh_hashsize = oldsize * 2;
349 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
350 		/* XXX Shouldn't be abusing DEVBUF here */
351 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
352 		    M_DEVBUF, M_NOWAIT);
353 	} else {
354 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
355 		hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
356 		    M_WAITOK);
357 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
358 	}
359 	if (hash->uh_slab_hash) {
360 		bzero(hash->uh_slab_hash, alloc);
361 		hash->uh_hashmask = hash->uh_hashsize - 1;
362 		return (1);
363 	}
364 
365 	return (0);
366 }
367 
368 /*
369  * Expands the hash table for OFFPAGE zones.  This is done from zone_timeout
370  * to reduce collisions.  This must not be done in the regular allocation path,
371  * otherwise, we can recurse on the vm while allocating pages.
372  *
373  * Arguments:
374  *	oldhash  The hash you want to expand
375  *	newhash  The hash structure for the new table
376  *
377  * Returns:
378  * 	Nothing
379  *
380  * Discussion:
381  */
382 static int
383 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
384 {
385 	uma_slab_t slab;
386 	int hval;
387 	int i;
388 
389 	if (!newhash->uh_slab_hash)
390 		return (0);
391 
392 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
393 		return (0);
394 
395 	/*
396 	 * I need to investigate hash algorithms for resizing without a
397 	 * full rehash.
398 	 */
399 
400 	for (i = 0; i < oldhash->uh_hashsize; i++)
401 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
402 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
403 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
404 			hval = UMA_HASH(newhash, slab->us_data);
405 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
406 			    slab, us_hlink);
407 		}
408 
409 	return (1);
410 }
411 
412 /*
413  * Free the hash bucket to the appropriate backing store.
414  *
415  * Arguments:
416  *	slab_hash  The hash bucket we're freeing
417  *	hashsize   The number of entries in that hash bucket
418  *
419  * Returns:
420  *	Nothing
421  */
422 static void
423 hash_free(struct uma_hash *hash)
424 {
425 	if (hash->uh_slab_hash == NULL)
426 		return;
427 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
428 		uma_zfree_internal(hashzone,
429 		    hash->uh_slab_hash, NULL, 0);
430 	else
431 		free(hash->uh_slab_hash, M_DEVBUF);
432 }
433 
434 /*
435  * Frees all outstanding items in a bucket
436  *
437  * Arguments:
438  *	zone   The zone to free to, must be unlocked.
439  *	bucket The free/alloc bucket with items, cpu queue must be locked.
440  *
441  * Returns:
442  *	Nothing
443  */
444 
445 static void
446 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
447 {
448 	uma_slab_t slab;
449 	int mzone;
450 	void *item;
451 
452 	if (bucket == NULL)
453 		return;
454 
455 	slab = NULL;
456 	mzone = 0;
457 
458 	/* We have to lookup the slab again for malloc.. */
459 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
460 		mzone = 1;
461 
462 	while (bucket->ub_ptr > -1)  {
463 		item = bucket->ub_bucket[bucket->ub_ptr];
464 #ifdef INVARIANTS
465 		bucket->ub_bucket[bucket->ub_ptr] = NULL;
466 		KASSERT(item != NULL,
467 		    ("bucket_drain: botched ptr, item is NULL"));
468 #endif
469 		bucket->ub_ptr--;
470 		/*
471 		 * This is extremely inefficient.  The slab pointer was passed
472 		 * to uma_zfree_arg, but we lost it because the buckets don't
473 		 * hold them.  This will go away when free() gets a size passed
474 		 * to it.
475 		 */
476 		if (mzone)
477 			slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
478 		uma_zfree_internal(zone, item, slab, 1);
479 	}
480 }
481 
482 /*
483  * Drains the per cpu caches for a zone.
484  *
485  * Arguments:
486  *	zone  The zone to drain, must be unlocked.
487  *
488  * Returns:
489  *	Nothing
490  *
491  * This function returns with the zone locked so that the per cpu queues can
492  * not be filled until zone_drain is finished.
493  *
494  */
495 static void
496 cache_drain(uma_zone_t zone)
497 {
498 	uma_bucket_t bucket;
499 	uma_cache_t cache;
500 	int cpu;
501 
502 	/*
503 	 * Flush out the per cpu queues.
504 	 *
505 	 * XXX This causes unnecessary thrashing due to immediately having
506 	 * empty per cpu queues.  I need to improve this.
507 	 */
508 
509 	/*
510 	 * We have to lock each cpu cache before locking the zone
511 	 */
512 	ZONE_UNLOCK(zone);
513 
514 	for (cpu = 0; cpu < maxcpu; cpu++) {
515 		if (CPU_ABSENT(cpu))
516 			continue;
517 		CPU_LOCK(zone, cpu);
518 		cache = &zone->uz_cpu[cpu];
519 		bucket_drain(zone, cache->uc_allocbucket);
520 		bucket_drain(zone, cache->uc_freebucket);
521 	}
522 
523 	/*
524 	 * Drain the bucket queues and free the buckets, we just keep two per
525 	 * cpu (alloc/free).
526 	 */
527 	ZONE_LOCK(zone);
528 	while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
529 		LIST_REMOVE(bucket, ub_link);
530 		ZONE_UNLOCK(zone);
531 		bucket_drain(zone, bucket);
532 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
533 		ZONE_LOCK(zone);
534 	}
535 
536 	/* Now we do the free queue.. */
537 	while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
538 		LIST_REMOVE(bucket, ub_link);
539 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
540 	}
541 
542 	/* We unlock here, but they will all block until the zone is unlocked */
543 	for (cpu = 0; cpu < maxcpu; cpu++) {
544 		if (CPU_ABSENT(cpu))
545 			continue;
546 		CPU_UNLOCK(zone, cpu);
547 	}
548 
549 	zone->uz_cachefree = 0;
550 }
551 
552 /*
553  * Frees pages from a zone back to the system.  This is done on demand from
554  * the pageout daemon.
555  *
556  * Arguments:
557  *	zone  The zone to free pages from
558  *	all   Should we drain all items?
559  *
560  * Returns:
561  *	Nothing.
562  */
563 static void
564 zone_drain(uma_zone_t zone)
565 {
566 	struct slabhead freeslabs = {};
567 	uma_slab_t slab;
568 	uma_slab_t n;
569 	u_int64_t extra;
570 	u_int8_t flags;
571 	u_int8_t *mem;
572 	int i;
573 
574 	/*
575 	 * We don't want to take pages from staticly allocated zones at this
576 	 * time
577 	 */
578 	if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
579 		return;
580 
581 	ZONE_LOCK(zone);
582 
583 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
584 		cache_drain(zone);
585 
586 	if (zone->uz_free < zone->uz_wssize)
587 		goto finished;
588 #ifdef UMA_DEBUG
589 	printf("%s working set size: %llu free items: %u\n",
590 	    zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
591 #endif
592 	extra = zone->uz_free - zone->uz_wssize;
593 	extra /= zone->uz_ipers;
594 
595 	/* extra is now the number of extra slabs that we can free */
596 
597 	if (extra == 0)
598 		goto finished;
599 
600 	slab = LIST_FIRST(&zone->uz_free_slab);
601 	while (slab && extra) {
602 		n = LIST_NEXT(slab, us_link);
603 
604 		/* We have no where to free these to */
605 		if (slab->us_flags & UMA_SLAB_BOOT) {
606 			slab = n;
607 			continue;
608 		}
609 
610 		LIST_REMOVE(slab, us_link);
611 		zone->uz_pages -= zone->uz_ppera;
612 		zone->uz_free -= zone->uz_ipers;
613 
614 		if (zone->uz_flags & UMA_ZFLAG_HASH)
615 			UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
616 
617 		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
618 
619 		slab = n;
620 		extra--;
621 	}
622 finished:
623 	ZONE_UNLOCK(zone);
624 
625 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
626 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
627 		if (zone->uz_fini)
628 			for (i = 0; i < zone->uz_ipers; i++)
629 				zone->uz_fini(
630 				    slab->us_data + (zone->uz_rsize * i),
631 				    zone->uz_size);
632 		flags = slab->us_flags;
633 		mem = slab->us_data;
634 
635 		if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
636 			uma_zfree_internal(slabzone, slab, NULL, 0);
637 		if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
638 			vm_object_t obj;
639 
640 			if (flags & UMA_SLAB_KMEM)
641 				obj = kmem_object;
642 			else
643 				obj = NULL;
644 			for (i = 0; i < zone->uz_ppera; i++)
645 				vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
646 				    obj);
647 		}
648 #ifdef UMA_DEBUG
649 		printf("%s: Returning %d bytes.\n",
650 		    zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
651 #endif
652 		zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
653 	}
654 
655 }
656 
657 /*
658  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
659  *
660  * Arguments:
661  *	zone  The zone to allocate slabs for
662  *	wait  Shall we wait?
663  *
664  * Returns:
665  *	The slab that was allocated or NULL if there is no memory and the
666  *	caller specified M_NOWAIT.
667  *
668  */
669 static uma_slab_t
670 slab_zalloc(uma_zone_t zone, int wait)
671 {
672 	uma_slab_t slab;	/* Starting slab */
673 	u_int8_t *mem;
674 	u_int8_t flags;
675 	int i;
676 
677 	slab = NULL;
678 
679 #ifdef UMA_DEBUG
680 	printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
681 #endif
682 	ZONE_UNLOCK(zone);
683 
684 	if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
685 		slab = uma_zalloc_internal(slabzone, NULL, wait);
686 		if (slab == NULL) {
687 			ZONE_LOCK(zone);
688 			return NULL;
689 		}
690 	}
691 
692 	/*
693 	 * This reproduces the old vm_zone behavior of zero filling pages the
694 	 * first time they are added to a zone.
695 	 *
696 	 * Malloced items are zeroed in uma_zalloc.
697 	 */
698 
699 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
700 		wait |= M_ZERO;
701 	else
702 		wait &= ~M_ZERO;
703 
704 	if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
705 		if ((wait & M_NOWAIT) == 0) {
706 			mtx_lock(&Giant);
707 			mem = zone->uz_allocf(zone,
708 			    zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
709 			mtx_unlock(&Giant);
710 		} else {
711 			mem = zone->uz_allocf(zone,
712 			    zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
713 		}
714 		if (mem == NULL) {
715 			ZONE_LOCK(zone);
716 			return (NULL);
717 		}
718 	} else {
719 		uma_slab_t tmps;
720 
721 		if (zone->uz_ppera > 1)
722 			panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
723 		if (zone->uz_flags & UMA_ZFLAG_MALLOC)
724 			panic("Mallocing before uma_startup2 has been called.\n");
725 		if (uma_boot_free == 0)
726 			panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
727 		tmps = LIST_FIRST(&uma_boot_pages);
728 		LIST_REMOVE(tmps, us_link);
729 		uma_boot_free--;
730 		mem = tmps->us_data;
731 		flags = tmps->us_flags;
732 	}
733 
734 	/* Point the slab into the allocated memory */
735 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE))
736 		slab = (uma_slab_t )(mem + zone->uz_pgoff);
737 
738 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
739 		for (i = 0; i < zone->uz_ppera; i++)
740 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
741 
742 	slab->us_zone = zone;
743 	slab->us_data = mem;
744 
745 	/*
746 	 * This is intended to spread data out across cache lines.
747 	 *
748 	 * This code doesn't seem to work properly on x86, and on alpha
749 	 * it makes absolutely no performance difference. I'm sure it could
750 	 * use some tuning, but sun makes outrageous claims about it's
751 	 * performance.
752 	 */
753 #if 0
754 	if (zone->uz_cachemax) {
755 		slab->us_data += zone->uz_cacheoff;
756 		zone->uz_cacheoff += UMA_CACHE_INC;
757 		if (zone->uz_cacheoff > zone->uz_cachemax)
758 			zone->uz_cacheoff = 0;
759 	}
760 #endif
761 
762 	slab->us_freecount = zone->uz_ipers;
763 	slab->us_firstfree = 0;
764 	slab->us_flags = flags;
765 	for (i = 0; i < zone->uz_ipers; i++)
766 		slab->us_freelist[i] = i+1;
767 
768 	if (zone->uz_init)
769 		for (i = 0; i < zone->uz_ipers; i++)
770 			zone->uz_init(slab->us_data + (zone->uz_rsize * i),
771 			    zone->uz_size);
772 	ZONE_LOCK(zone);
773 
774 	if (zone->uz_flags & UMA_ZFLAG_HASH)
775 		UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
776 
777 	zone->uz_pages += zone->uz_ppera;
778 	zone->uz_free += zone->uz_ipers;
779 
780 
781 	return (slab);
782 }
783 
784 /*
785  * Allocates a number of pages from the system
786  *
787  * Arguments:
788  *	zone  Unused
789  *	bytes  The number of bytes requested
790  *	wait  Shall we wait?
791  *
792  * Returns:
793  *	A pointer to the alloced memory or possibly
794  *	NULL if M_NOWAIT is set.
795  */
796 static void *
797 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
798 {
799 	void *p;	/* Returned page */
800 
801 	*pflag = UMA_SLAB_KMEM;
802 	p = (void *) kmem_malloc(kmem_map, bytes, wait);
803 
804 	return (p);
805 }
806 
807 /*
808  * Allocates a number of pages from within an object
809  *
810  * Arguments:
811  *	zone   Unused
812  *	bytes  The number of bytes requested
813  *	wait   Shall we wait?
814  *
815  * Returns:
816  *	A pointer to the alloced memory or possibly
817  *	NULL if M_NOWAIT is set.
818  *
819  * TODO: If we fail during a multi-page allocation release the pages that have
820  *	 already been allocated.
821  */
822 static void *
823 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
824 {
825 	vm_offset_t zkva;
826 	vm_offset_t retkva;
827 	vm_page_t p;
828 	int pages;
829 
830 	retkva = 0;
831 	pages = zone->uz_pages;
832 
833 	/*
834 	 * This looks a little weird since we're getting one page at a time
835 	 */
836 	while (bytes > 0) {
837 		VM_OBJECT_LOCK(zone->uz_obj);
838 		p = vm_page_alloc(zone->uz_obj, pages,
839 		    VM_ALLOC_INTERRUPT);
840 		VM_OBJECT_UNLOCK(zone->uz_obj);
841 		if (p == NULL)
842 			return (NULL);
843 
844 		zkva = zone->uz_kva + pages * PAGE_SIZE;
845 		if (retkva == 0)
846 			retkva = zkva;
847 		pmap_qenter(zkva, &p, 1);
848 		bytes -= PAGE_SIZE;
849 		pages += 1;
850 	}
851 
852 	*flags = UMA_SLAB_PRIV;
853 
854 	return ((void *)retkva);
855 }
856 
857 /*
858  * Frees a number of pages to the system
859  *
860  * Arguments:
861  *	mem   A pointer to the memory to be freed
862  *	size  The size of the memory being freed
863  *	flags The original p->us_flags field
864  *
865  * Returns:
866  *	Nothing
867  *
868  */
869 static void
870 page_free(void *mem, int size, u_int8_t flags)
871 {
872 	vm_map_t map;
873 
874 	if (flags & UMA_SLAB_KMEM)
875 		map = kmem_map;
876 	else
877 		panic("UMA: page_free used with invalid flags %d\n", flags);
878 
879 	kmem_free(map, (vm_offset_t)mem, size);
880 }
881 
882 /*
883  * Zero fill initializer
884  *
885  * Arguments/Returns follow uma_init specifications
886  *
887  */
888 static void
889 zero_init(void *mem, int size)
890 {
891 	bzero(mem, size);
892 }
893 
894 /*
895  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
896  *
897  * Arguments
898  *	zone  The zone we should initialize
899  *
900  * Returns
901  *	Nothing
902  */
903 static void
904 zone_small_init(uma_zone_t zone)
905 {
906 	int rsize;
907 	int memused;
908 	int ipers;
909 
910 	rsize = zone->uz_size;
911 
912 	if (rsize < UMA_SMALLEST_UNIT)
913 		rsize = UMA_SMALLEST_UNIT;
914 
915 	if (rsize & zone->uz_align)
916 		rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
917 
918 	zone->uz_rsize = rsize;
919 
920 	rsize += 1;	/* Account for the byte of linkage */
921 	zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
922 	zone->uz_ppera = 1;
923 
924 	memused = zone->uz_ipers * zone->uz_rsize;
925 
926 	/* Can we do any better? */
927 	if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
928 		if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
929 			return;
930 		ipers = UMA_SLAB_SIZE / zone->uz_rsize;
931 		if (ipers > zone->uz_ipers) {
932 			zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
933 			if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
934 				zone->uz_flags |= UMA_ZFLAG_HASH;
935 			zone->uz_ipers = ipers;
936 		}
937 	}
938 
939 }
940 
941 /*
942  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
943  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
944  * more complicated.
945  *
946  * Arguments
947  *	zone  The zone we should initialize
948  *
949  * Returns
950  *	Nothing
951  */
952 static void
953 zone_large_init(uma_zone_t zone)
954 {
955 	int pages;
956 
957 	pages = zone->uz_size / UMA_SLAB_SIZE;
958 
959 	/* Account for remainder */
960 	if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
961 		pages++;
962 
963 	zone->uz_ppera = pages;
964 	zone->uz_ipers = 1;
965 
966 	zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
967 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
968 		zone->uz_flags |= UMA_ZFLAG_HASH;
969 
970 	zone->uz_rsize = zone->uz_size;
971 }
972 
973 /*
974  * Zone header ctor.  This initializes all fields, locks, etc.  And inserts
975  * the zone onto the global zone list.
976  *
977  * Arguments/Returns follow uma_ctor specifications
978  *	udata  Actually uma_zcreat_args
979  *
980  */
981 
982 static void
983 zone_ctor(void *mem, int size, void *udata)
984 {
985 	struct uma_zctor_args *arg = udata;
986 	uma_zone_t zone = mem;
987 	int privlc;
988 	int cplen;
989 	int cpu;
990 
991 	bzero(zone, size);
992 	zone->uz_name = arg->name;
993 	zone->uz_size = arg->size;
994 	zone->uz_ctor = arg->ctor;
995 	zone->uz_dtor = arg->dtor;
996 	zone->uz_init = arg->uminit;
997 	zone->uz_fini = arg->fini;
998 	zone->uz_align = arg->align;
999 	zone->uz_free = 0;
1000 	zone->uz_pages = 0;
1001 	zone->uz_flags = 0;
1002 	zone->uz_allocf = page_alloc;
1003 	zone->uz_freef = page_free;
1004 
1005 	if (arg->flags & UMA_ZONE_ZINIT)
1006 		zone->uz_init = zero_init;
1007 
1008 	if (arg->flags & UMA_ZONE_INTERNAL)
1009 		zone->uz_flags |= UMA_ZFLAG_INTERNAL;
1010 
1011 	if (arg->flags & UMA_ZONE_MALLOC)
1012 		zone->uz_flags |= UMA_ZFLAG_MALLOC;
1013 
1014 	if (arg->flags & UMA_ZONE_NOFREE)
1015 		zone->uz_flags |= UMA_ZFLAG_NOFREE;
1016 
1017 	if (arg->flags & UMA_ZONE_VM)
1018 		zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE;
1019 
1020 	if (zone->uz_size > UMA_SLAB_SIZE)
1021 		zone_large_init(zone);
1022 	else
1023 		zone_small_init(zone);
1024 #ifdef UMA_MD_SMALL_ALLOC
1025 	if (zone->uz_ppera == 1) {
1026 		zone->uz_allocf = uma_small_alloc;
1027 		zone->uz_freef = uma_small_free;
1028 	}
1029 #endif	/* UMA_MD_SMALL_ALLOC */
1030 
1031 	if (arg->flags & UMA_ZONE_MTXCLASS)
1032 		privlc = 1;
1033 	else
1034 		privlc = 0;
1035 
1036 	/* We do this so that the per cpu lock name is unique for each zone */
1037 	memcpy(zone->uz_lname, "PCPU ", 5);
1038 	cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
1039 	memcpy(zone->uz_lname+5, zone->uz_name, cplen);
1040 	zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
1041 
1042 	/*
1043 	 * If we're putting the slab header in the actual page we need to
1044 	 * figure out where in each page it goes.  This calculates a right
1045 	 * justified offset into the memory on an ALIGN_PTR boundary.
1046 	 */
1047 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
1048 		int totsize;
1049 		int waste;
1050 
1051 		/* Size of the slab struct and free list */
1052 		totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1053 		if (totsize & UMA_ALIGN_PTR)
1054 			totsize = (totsize & ~UMA_ALIGN_PTR) +
1055 			    (UMA_ALIGN_PTR + 1);
1056 		zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1057 
1058 		waste = zone->uz_pgoff;
1059 		waste -= (zone->uz_ipers * zone->uz_rsize);
1060 
1061 		/*
1062 		 * This calculates how much space we have for cache line size
1063 		 * optimizations.  It works by offseting each slab slightly.
1064 		 * Currently it breaks on x86, and so it is disabled.
1065 		 */
1066 
1067 		if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
1068 			zone->uz_cachemax = waste - UMA_CACHE_INC;
1069 			zone->uz_cacheoff = 0;
1070 		}
1071 
1072 		totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1073 		    + zone->uz_ipers;
1074 		/* I don't think it's possible, but I'll make sure anyway */
1075 		if (totsize > UMA_SLAB_SIZE) {
1076 			printf("zone %s ipers %d rsize %d size %d\n",
1077 			    zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1078 			    zone->uz_size);
1079 			panic("UMA slab won't fit.\n");
1080 		}
1081 	}
1082 
1083 	if (zone->uz_flags & UMA_ZFLAG_HASH)
1084 		hash_alloc(&zone->uz_hash);
1085 
1086 #ifdef UMA_DEBUG
1087 	printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1088 	    zone->uz_name, zone,
1089 	    zone->uz_size, zone->uz_ipers,
1090 	    zone->uz_ppera, zone->uz_pgoff);
1091 #endif
1092 	ZONE_LOCK_INIT(zone, privlc);
1093 
1094 	mtx_lock(&uma_mtx);
1095 	LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1096 	mtx_unlock(&uma_mtx);
1097 
1098 	/*
1099 	 * Some internal zones don't have room allocated for the per cpu
1100 	 * caches.  If we're internal, bail out here.
1101 	 */
1102 
1103 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1104 		return;
1105 
1106 	if (zone->uz_ipers < UMA_BUCKET_SIZE)
1107 		zone->uz_count = zone->uz_ipers - 1;
1108 	else
1109 		zone->uz_count = UMA_BUCKET_SIZE - 1;
1110 
1111 	for (cpu = 0; cpu < maxcpu; cpu++)
1112 		CPU_LOCK_INIT(zone, cpu, privlc);
1113 }
1114 
1115 /*
1116  * Zone header dtor.  This frees all data, destroys locks, frees the hash table
1117  * and removes the zone from the global list.
1118  *
1119  * Arguments/Returns follow uma_dtor specifications
1120  *	udata  unused
1121  */
1122 
1123 static void
1124 zone_dtor(void *arg, int size, void *udata)
1125 {
1126 	uma_zone_t zone;
1127 	int cpu;
1128 
1129 	zone = (uma_zone_t)arg;
1130 
1131 	ZONE_LOCK(zone);
1132 	zone->uz_wssize = 0;
1133 	ZONE_UNLOCK(zone);
1134 
1135 	mtx_lock(&uma_mtx);
1136 	LIST_REMOVE(zone, uz_link);
1137 	zone_drain(zone);
1138 	mtx_unlock(&uma_mtx);
1139 
1140 	ZONE_LOCK(zone);
1141 	if (zone->uz_free != 0)
1142 		printf("Zone %s was not empty (%d items).  Lost %d pages of memory.\n",
1143 		    zone->uz_name, zone->uz_free, zone->uz_pages);
1144 
1145 	if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0)
1146 		for (cpu = 0; cpu < maxcpu; cpu++)
1147 			CPU_LOCK_FINI(zone, cpu);
1148 
1149 	ZONE_UNLOCK(zone);
1150 	if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
1151 		hash_free(&zone->uz_hash);
1152 
1153 	ZONE_LOCK_FINI(zone);
1154 }
1155 /*
1156  * Traverses every zone in the system and calls a callback
1157  *
1158  * Arguments:
1159  *	zfunc  A pointer to a function which accepts a zone
1160  *		as an argument.
1161  *
1162  * Returns:
1163  *	Nothing
1164  */
1165 static void
1166 zone_foreach(void (*zfunc)(uma_zone_t))
1167 {
1168 	uma_zone_t zone;
1169 
1170 	mtx_lock(&uma_mtx);
1171 	LIST_FOREACH(zone, &uma_zones, uz_link) {
1172 		zfunc(zone);
1173 	}
1174 	mtx_unlock(&uma_mtx);
1175 }
1176 
1177 /* Public functions */
1178 /* See uma.h */
1179 void
1180 uma_startup(void *bootmem)
1181 {
1182 	struct uma_zctor_args args;
1183 	uma_slab_t slab;
1184 	int slabsize;
1185 	int i;
1186 
1187 #ifdef UMA_DEBUG
1188 	printf("Creating uma zone headers zone.\n");
1189 #endif
1190 #ifdef SMP
1191 	maxcpu = mp_maxid + 1;
1192 #else
1193 	maxcpu = 1;
1194 #endif
1195 #ifdef UMA_DEBUG
1196 	printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1197 	Debugger("stop");
1198 #endif
1199 	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1200 	/* "manually" Create the initial zone */
1201 	args.name = "UMA Zones";
1202 	args.size = sizeof(struct uma_zone) +
1203 	    (sizeof(struct uma_cache) * (maxcpu - 1));
1204 	args.ctor = zone_ctor;
1205 	args.dtor = zone_dtor;
1206 	args.uminit = zero_init;
1207 	args.fini = NULL;
1208 	args.align = 32 - 1;
1209 	args.flags = UMA_ZONE_INTERNAL;
1210 	/* The initial zone has no Per cpu queues so it's smaller */
1211 	zone_ctor(zones, sizeof(struct uma_zone), &args);
1212 
1213 #ifdef UMA_DEBUG
1214 	printf("Filling boot free list.\n");
1215 #endif
1216 	for (i = 0; i < UMA_BOOT_PAGES; i++) {
1217 		slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1218 		slab->us_data = (u_int8_t *)slab;
1219 		slab->us_flags = UMA_SLAB_BOOT;
1220 		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1221 		uma_boot_free++;
1222 	}
1223 
1224 #ifdef UMA_DEBUG
1225 	printf("Creating slab zone.\n");
1226 #endif
1227 
1228 	/*
1229 	 * This is the max number of free list items we'll have with
1230 	 * offpage slabs.
1231 	 */
1232 
1233 	slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1234 	slabsize /= UMA_MAX_WASTE;
1235 	slabsize++;			/* In case there it's rounded */
1236 	slabsize += sizeof(struct uma_slab);
1237 
1238 	/* Now make a zone for slab headers */
1239 	slabzone = uma_zcreate("UMA Slabs",
1240 				slabsize,
1241 				NULL, NULL, NULL, NULL,
1242 				UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1243 
1244 	hashzone = uma_zcreate("UMA Hash",
1245 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1246 	    NULL, NULL, NULL, NULL,
1247 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1248 
1249 	bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1250 	    NULL, NULL, NULL, NULL,
1251 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1252 
1253 #ifdef UMA_MD_SMALL_ALLOC
1254 	booted = 1;
1255 #endif
1256 
1257 #ifdef UMA_DEBUG
1258 	printf("UMA startup complete.\n");
1259 #endif
1260 }
1261 
1262 /* see uma.h */
1263 void
1264 uma_startup2(void)
1265 {
1266 	booted = 1;
1267 	bucket_enable();
1268 #ifdef UMA_DEBUG
1269 	printf("UMA startup2 complete.\n");
1270 #endif
1271 }
1272 
1273 /*
1274  * Initialize our callout handle
1275  *
1276  */
1277 
1278 static void
1279 uma_startup3(void)
1280 {
1281 #ifdef UMA_DEBUG
1282 	printf("Starting callout.\n");
1283 #endif
1284 	callout_init(&uma_callout, 0);
1285 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1286 #ifdef UMA_DEBUG
1287 	printf("UMA startup3 complete.\n");
1288 #endif
1289 }
1290 
1291 /* See uma.h */
1292 uma_zone_t
1293 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1294 		uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1295 
1296 {
1297 	struct uma_zctor_args args;
1298 
1299 	/* This stuff is essential for the zone ctor */
1300 	args.name = name;
1301 	args.size = size;
1302 	args.ctor = ctor;
1303 	args.dtor = dtor;
1304 	args.uminit = uminit;
1305 	args.fini = fini;
1306 	args.align = align;
1307 	args.flags = flags;
1308 
1309 	return (uma_zalloc_internal(zones, &args, M_WAITOK));
1310 }
1311 
1312 /* See uma.h */
1313 void
1314 uma_zdestroy(uma_zone_t zone)
1315 {
1316 	uma_zfree_internal(zones, zone, NULL, 0);
1317 }
1318 
1319 /* See uma.h */
1320 void *
1321 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1322 {
1323 	void *item;
1324 	uma_cache_t cache;
1325 	uma_bucket_t bucket;
1326 	int cpu;
1327 
1328 	/* This is the fast path allocation */
1329 #ifdef UMA_DEBUG_ALLOC_1
1330 	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1331 #endif
1332 
1333 	if (!(flags & M_NOWAIT)) {
1334 		KASSERT(curthread->td_intr_nesting_level == 0,
1335 		   ("malloc(M_WAITOK) in interrupt context"));
1336 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1337 		    "malloc() of \"%s\"", zone->uz_name);
1338 	}
1339 
1340 zalloc_restart:
1341 	cpu = PCPU_GET(cpuid);
1342 	CPU_LOCK(zone, cpu);
1343 	cache = &zone->uz_cpu[cpu];
1344 
1345 zalloc_start:
1346 	bucket = cache->uc_allocbucket;
1347 
1348 	if (bucket) {
1349 		if (bucket->ub_ptr > -1) {
1350 			item = bucket->ub_bucket[bucket->ub_ptr];
1351 #ifdef INVARIANTS
1352 			bucket->ub_bucket[bucket->ub_ptr] = NULL;
1353 #endif
1354 			bucket->ub_ptr--;
1355 			KASSERT(item != NULL,
1356 			    ("uma_zalloc: Bucket pointer mangled."));
1357 			cache->uc_allocs++;
1358 #ifdef INVARIANTS
1359 			ZONE_LOCK(zone);
1360 			uma_dbg_alloc(zone, NULL, item);
1361 			ZONE_UNLOCK(zone);
1362 #endif
1363 			CPU_UNLOCK(zone, cpu);
1364 			if (zone->uz_ctor)
1365 				zone->uz_ctor(item, zone->uz_size, udata);
1366 			if (flags & M_ZERO)
1367 				bzero(item, zone->uz_size);
1368 			return (item);
1369 		} else if (cache->uc_freebucket) {
1370 			/*
1371 			 * We have run out of items in our allocbucket.
1372 			 * See if we can switch with our free bucket.
1373 			 */
1374 			if (cache->uc_freebucket->ub_ptr > -1) {
1375 				uma_bucket_t swap;
1376 
1377 #ifdef UMA_DEBUG_ALLOC
1378 				printf("uma_zalloc: Swapping empty with alloc.\n");
1379 #endif
1380 				swap = cache->uc_freebucket;
1381 				cache->uc_freebucket = cache->uc_allocbucket;
1382 				cache->uc_allocbucket = swap;
1383 
1384 				goto zalloc_start;
1385 			}
1386 		}
1387 	}
1388 	ZONE_LOCK(zone);
1389 	/* Since we have locked the zone we may as well send back our stats */
1390 	zone->uz_allocs += cache->uc_allocs;
1391 	cache->uc_allocs = 0;
1392 
1393 	/* Our old one is now a free bucket */
1394 	if (cache->uc_allocbucket) {
1395 		KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1396 		    ("uma_zalloc_arg: Freeing a non free bucket."));
1397 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1398 		    cache->uc_allocbucket, ub_link);
1399 		cache->uc_allocbucket = NULL;
1400 	}
1401 
1402 	/* Check the free list for a new alloc bucket */
1403 	if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1404 		KASSERT(bucket->ub_ptr != -1,
1405 		    ("uma_zalloc_arg: Returning an empty bucket."));
1406 
1407 		LIST_REMOVE(bucket, ub_link);
1408 		cache->uc_allocbucket = bucket;
1409 		ZONE_UNLOCK(zone);
1410 		goto zalloc_start;
1411 	}
1412 	/* We are no longer associated with this cpu!!! */
1413 	CPU_UNLOCK(zone, cpu);
1414 
1415 	/* Bump up our uz_count so we get here less */
1416 	if (zone->uz_count < UMA_BUCKET_SIZE - 1)
1417 		zone->uz_count++;
1418 
1419 	/*
1420 	 * Now lets just fill a bucket and put it on the free list.  If that
1421 	 * works we'll restart the allocation from the begining.
1422 	 */
1423 
1424 	if (uma_zalloc_bucket(zone, flags)) {
1425 		ZONE_UNLOCK(zone);
1426 		goto zalloc_restart;
1427 	}
1428 	ZONE_UNLOCK(zone);
1429 	/*
1430 	 * We may not be able to get a bucket so return an actual item.
1431 	 */
1432 #ifdef UMA_DEBUG
1433 	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1434 #endif
1435 
1436 	return (uma_zalloc_internal(zone, udata, flags));
1437 }
1438 
1439 static uma_slab_t
1440 uma_zone_slab(uma_zone_t zone, int flags)
1441 {
1442 	uma_slab_t slab;
1443 
1444 	/*
1445 	 * This is to prevent us from recursively trying to allocate
1446 	 * buckets.  The problem is that if an allocation forces us to
1447 	 * grab a new bucket we will call page_alloc, which will go off
1448 	 * and cause the vm to allocate vm_map_entries.  If we need new
1449 	 * buckets there too we will recurse in kmem_alloc and bad
1450 	 * things happen.  So instead we return a NULL bucket, and make
1451 	 * the code that allocates buckets smart enough to deal with it
1452 	 */
1453 	if (zone == bucketzone && zone->uz_recurse != 0)
1454 		return (NULL);
1455 
1456 	slab = NULL;
1457 
1458 	for (;;) {
1459 		/*
1460 		 * Find a slab with some space.  Prefer slabs that are partially
1461 		 * used over those that are totally full.  This helps to reduce
1462 		 * fragmentation.
1463 		 */
1464 		if (zone->uz_free != 0) {
1465 			if (!LIST_EMPTY(&zone->uz_part_slab)) {
1466 				slab = LIST_FIRST(&zone->uz_part_slab);
1467 			} else {
1468 				slab = LIST_FIRST(&zone->uz_free_slab);
1469 				LIST_REMOVE(slab, us_link);
1470 				LIST_INSERT_HEAD(&zone->uz_part_slab, slab,
1471 				us_link);
1472 			}
1473 			return (slab);
1474 		}
1475 
1476 		/*
1477 		 * M_NOVM means don't ask at all!
1478 		 */
1479 		if (flags & M_NOVM)
1480 			break;
1481 
1482 		if (zone->uz_maxpages &&
1483 		    zone->uz_pages >= zone->uz_maxpages) {
1484 			zone->uz_flags |= UMA_ZFLAG_FULL;
1485 
1486 			if (flags & M_NOWAIT)
1487 				break;
1488 			else
1489 				msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
1490 			continue;
1491 		}
1492 		zone->uz_recurse++;
1493 		slab = slab_zalloc(zone, flags);
1494 		zone->uz_recurse--;
1495 		/*
1496 		 * If we got a slab here it's safe to mark it partially used
1497 		 * and return.  We assume that the caller is going to remove
1498 		 * at least one item.
1499 		 */
1500 		if (slab) {
1501 			LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1502 			return (slab);
1503 		}
1504 		/*
1505 		 * We might not have been able to get a slab but another cpu
1506 		 * could have while we were unlocked.  Check again before we
1507 		 * fail.
1508 		 */
1509 		if (flags & M_NOWAIT)
1510 			flags |= M_NOVM;
1511 	}
1512 	return (slab);
1513 }
1514 
1515 static __inline void *
1516 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
1517 {
1518 	void *item;
1519 	u_int8_t freei;
1520 
1521 	freei = slab->us_firstfree;
1522 	slab->us_firstfree = slab->us_freelist[freei];
1523 	item = slab->us_data + (zone->uz_rsize * freei);
1524 
1525 	slab->us_freecount--;
1526 	zone->uz_free--;
1527 #ifdef INVARIANTS
1528 	uma_dbg_alloc(zone, slab, item);
1529 #endif
1530 	/* Move this slab to the full list */
1531 	if (slab->us_freecount == 0) {
1532 		LIST_REMOVE(slab, us_link);
1533 		LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1534 	}
1535 
1536 	return (item);
1537 }
1538 
1539 static int
1540 uma_zalloc_bucket(uma_zone_t zone, int flags)
1541 {
1542 	uma_bucket_t bucket;
1543 	uma_slab_t slab;
1544 
1545 	/*
1546 	 * Try this zone's free list first so we don't allocate extra buckets.
1547 	 */
1548 
1549 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1550 		KASSERT(bucket->ub_ptr == -1,
1551 		    ("uma_zalloc_bucket: Bucket on free list is not empty."));
1552 		LIST_REMOVE(bucket, ub_link);
1553 	} else {
1554 		int bflags;
1555 
1556 		bflags = flags;
1557 		if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1558 			bflags |= M_NOVM;
1559 
1560 		ZONE_UNLOCK(zone);
1561 		bucket = uma_zalloc_internal(bucketzone,
1562 		    NULL, bflags);
1563 		ZONE_LOCK(zone);
1564 		if (bucket != NULL) {
1565 #ifdef INVARIANTS
1566 			bzero(bucket, bucketzone->uz_size);
1567 #endif
1568 			bucket->ub_ptr = -1;
1569 		}
1570 	}
1571 
1572 	if (bucket == NULL)
1573 		return (0);
1574 
1575 #ifdef SMP
1576 	/*
1577 	 * This code is here to limit the number of simultaneous bucket fills
1578 	 * for any given zone to the number of per cpu caches in this zone. This
1579 	 * is done so that we don't allocate more memory than we really need.
1580 	 */
1581 	if (zone->uz_fills >= mp_ncpus)
1582 		goto done;
1583 
1584 #endif
1585 	zone->uz_fills++;
1586 
1587 	/* Try to keep the buckets totally full */
1588 	while ((slab = uma_zone_slab(zone, flags)) != NULL &&
1589 	    bucket->ub_ptr < zone->uz_count) {
1590 		while (slab->us_freecount &&
1591 		    bucket->ub_ptr < zone->uz_count) {
1592 			bucket->ub_bucket[++bucket->ub_ptr] =
1593 			    uma_slab_alloc(zone, slab);
1594 		}
1595 		/* Don't block on the next fill */
1596 		flags |= M_NOWAIT;
1597 	}
1598 
1599 	zone->uz_fills--;
1600 
1601 	if (bucket->ub_ptr != -1) {
1602 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1603 		    bucket, ub_link);
1604 		return (1);
1605 	}
1606 #ifdef SMP
1607 done:
1608 #endif
1609 	uma_zfree_internal(bucketzone, bucket, NULL, 0);
1610 
1611 	return (0);
1612 }
1613 /*
1614  * Allocates an item for an internal zone
1615  *
1616  * Arguments
1617  *	zone   The zone to alloc for.
1618  *	udata  The data to be passed to the constructor.
1619  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
1620  *
1621  * Returns
1622  *	NULL if there is no memory and M_NOWAIT is set
1623  *	An item if successful
1624  */
1625 
1626 static void *
1627 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
1628 {
1629 	uma_slab_t slab;
1630 	void *item;
1631 
1632 	item = NULL;
1633 
1634 	/*
1635 	 * This is to stop us from allocating per cpu buckets while we're
1636 	 * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
1637 	 * boot pages.
1638 	 */
1639 
1640 	if (bucketdisable && zone == bucketzone)
1641 		return (NULL);
1642 
1643 #ifdef UMA_DEBUG_ALLOC
1644 	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1645 #endif
1646 	ZONE_LOCK(zone);
1647 
1648 	slab = uma_zone_slab(zone, flags);
1649 	if (slab == NULL) {
1650 		ZONE_UNLOCK(zone);
1651 		return (NULL);
1652 	}
1653 
1654 	item = uma_slab_alloc(zone, slab);
1655 
1656 	ZONE_UNLOCK(zone);
1657 
1658 	if (zone->uz_ctor != NULL)
1659 		zone->uz_ctor(item, zone->uz_size, udata);
1660 	if (flags & M_ZERO)
1661 		bzero(item, zone->uz_size);
1662 
1663 	return (item);
1664 }
1665 
1666 /* See uma.h */
1667 void
1668 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1669 {
1670 	uma_cache_t cache;
1671 	uma_bucket_t bucket;
1672 	int bflags;
1673 	int cpu;
1674 
1675 	/* This is the fast path free */
1676 #ifdef UMA_DEBUG_ALLOC_1
1677 	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1678 #endif
1679 	/*
1680 	 * The race here is acceptable.  If we miss it we'll just have to wait
1681 	 * a little longer for the limits to be reset.
1682 	 */
1683 
1684 	if (zone->uz_flags & UMA_ZFLAG_FULL)
1685 		goto zfree_internal;
1686 
1687 	if (zone->uz_dtor)
1688 		zone->uz_dtor(item, zone->uz_size, udata);
1689 
1690 zfree_restart:
1691 	cpu = PCPU_GET(cpuid);
1692 	CPU_LOCK(zone, cpu);
1693 	cache = &zone->uz_cpu[cpu];
1694 
1695 zfree_start:
1696 	bucket = cache->uc_freebucket;
1697 
1698 	if (bucket) {
1699 		/*
1700 		 * Do we have room in our bucket? It is OK for this uz count
1701 		 * check to be slightly out of sync.
1702 		 */
1703 
1704 		if (bucket->ub_ptr < zone->uz_count) {
1705 			bucket->ub_ptr++;
1706 			KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1707 			    ("uma_zfree: Freeing to non free bucket index."));
1708 			bucket->ub_bucket[bucket->ub_ptr] = item;
1709 #ifdef INVARIANTS
1710 			ZONE_LOCK(zone);
1711 			if (zone->uz_flags & UMA_ZFLAG_MALLOC)
1712 				uma_dbg_free(zone, udata, item);
1713 			else
1714 				uma_dbg_free(zone, NULL, item);
1715 			ZONE_UNLOCK(zone);
1716 #endif
1717 			CPU_UNLOCK(zone, cpu);
1718 			return;
1719 		} else if (cache->uc_allocbucket) {
1720 #ifdef UMA_DEBUG_ALLOC
1721 			printf("uma_zfree: Swapping buckets.\n");
1722 #endif
1723 			/*
1724 			 * We have run out of space in our freebucket.
1725 			 * See if we can switch with our alloc bucket.
1726 			 */
1727 			if (cache->uc_allocbucket->ub_ptr <
1728 			    cache->uc_freebucket->ub_ptr) {
1729 				uma_bucket_t swap;
1730 
1731 				swap = cache->uc_freebucket;
1732 				cache->uc_freebucket = cache->uc_allocbucket;
1733 				cache->uc_allocbucket = swap;
1734 
1735 				goto zfree_start;
1736 			}
1737 		}
1738 	}
1739 
1740 	/*
1741 	 * We can get here for two reasons:
1742 	 *
1743 	 * 1) The buckets are NULL
1744 	 * 2) The alloc and free buckets are both somewhat full.
1745 	 *
1746 	 */
1747 
1748 	ZONE_LOCK(zone);
1749 
1750 	bucket = cache->uc_freebucket;
1751 	cache->uc_freebucket = NULL;
1752 
1753 	/* Can we throw this on the zone full list? */
1754 	if (bucket != NULL) {
1755 #ifdef UMA_DEBUG_ALLOC
1756 		printf("uma_zfree: Putting old bucket on the free list.\n");
1757 #endif
1758 		/* ub_ptr is pointing to the last free item */
1759 		KASSERT(bucket->ub_ptr != -1,
1760 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1761 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1762 		    bucket, ub_link);
1763 	}
1764 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1765 		LIST_REMOVE(bucket, ub_link);
1766 		ZONE_UNLOCK(zone);
1767 		cache->uc_freebucket = bucket;
1768 		goto zfree_start;
1769 	}
1770 	/* We're done with this CPU now */
1771 	CPU_UNLOCK(zone, cpu);
1772 
1773 	/* And the zone.. */
1774 	ZONE_UNLOCK(zone);
1775 
1776 #ifdef UMA_DEBUG_ALLOC
1777 	printf("uma_zfree: Allocating new free bucket.\n");
1778 #endif
1779 	bflags = M_NOWAIT;
1780 
1781 	if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1782 		bflags |= M_NOVM;
1783 #ifdef INVARIANTS
1784 	bflags |= M_ZERO;
1785 #endif
1786 	bucket = uma_zalloc_internal(bucketzone,
1787 	    NULL, bflags);
1788 	if (bucket) {
1789 		bucket->ub_ptr = -1;
1790 		ZONE_LOCK(zone);
1791 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1792 		    bucket, ub_link);
1793 		ZONE_UNLOCK(zone);
1794 		goto zfree_restart;
1795 	}
1796 
1797 	/*
1798 	 * If nothing else caught this, we'll just do an internal free.
1799 	 */
1800 
1801 zfree_internal:
1802 
1803 	uma_zfree_internal(zone, item, udata, 0);
1804 
1805 	return;
1806 
1807 }
1808 
1809 /*
1810  * Frees an item to an INTERNAL zone or allocates a free bucket
1811  *
1812  * Arguments:
1813  *	zone   The zone to free to
1814  *	item   The item we're freeing
1815  *	udata  User supplied data for the dtor
1816  *	skip   Skip the dtor, it was done in uma_zfree_arg
1817  */
1818 
1819 static void
1820 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1821 {
1822 	uma_slab_t slab;
1823 	u_int8_t *mem;
1824 	u_int8_t freei;
1825 
1826 	if (!skip && zone->uz_dtor)
1827 		zone->uz_dtor(item, zone->uz_size, udata);
1828 
1829 	ZONE_LOCK(zone);
1830 
1831 	if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1832 		mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1833 		if (zone->uz_flags & UMA_ZFLAG_HASH)
1834 			slab = hash_sfind(&zone->uz_hash, mem);
1835 		else {
1836 			mem += zone->uz_pgoff;
1837 			slab = (uma_slab_t)mem;
1838 		}
1839 	} else {
1840 		slab = (uma_slab_t)udata;
1841 	}
1842 
1843 	/* Do we need to remove from any lists? */
1844 	if (slab->us_freecount+1 == zone->uz_ipers) {
1845 		LIST_REMOVE(slab, us_link);
1846 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1847 	} else if (slab->us_freecount == 0) {
1848 		LIST_REMOVE(slab, us_link);
1849 		LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1850 	}
1851 
1852 	/* Slab management stuff */
1853 	freei = ((unsigned long)item - (unsigned long)slab->us_data)
1854 		/ zone->uz_rsize;
1855 
1856 #ifdef INVARIANTS
1857 	if (!skip)
1858 		uma_dbg_free(zone, slab, item);
1859 #endif
1860 
1861 	slab->us_freelist[freei] = slab->us_firstfree;
1862 	slab->us_firstfree = freei;
1863 	slab->us_freecount++;
1864 
1865 	/* Zone statistics */
1866 	zone->uz_free++;
1867 
1868 	if (zone->uz_flags & UMA_ZFLAG_FULL) {
1869 		if (zone->uz_pages < zone->uz_maxpages)
1870 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
1871 
1872 		/* We can handle one more allocation */
1873 		wakeup_one(zone);
1874 	}
1875 
1876 	ZONE_UNLOCK(zone);
1877 }
1878 
1879 /* See uma.h */
1880 void
1881 uma_zone_set_max(uma_zone_t zone, int nitems)
1882 {
1883 	ZONE_LOCK(zone);
1884 	if (zone->uz_ppera > 1)
1885 		zone->uz_maxpages = nitems * zone->uz_ppera;
1886 	else
1887 		zone->uz_maxpages = nitems / zone->uz_ipers;
1888 
1889 	if (zone->uz_maxpages * zone->uz_ipers < nitems)
1890 		zone->uz_maxpages++;
1891 
1892 	ZONE_UNLOCK(zone);
1893 }
1894 
1895 /* See uma.h */
1896 void
1897 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1898 {
1899 	ZONE_LOCK(zone);
1900 
1901 	zone->uz_freef = freef;
1902 
1903 	ZONE_UNLOCK(zone);
1904 }
1905 
1906 /* See uma.h */
1907 void
1908 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1909 {
1910 	ZONE_LOCK(zone);
1911 
1912 	zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1913 	zone->uz_allocf = allocf;
1914 
1915 	ZONE_UNLOCK(zone);
1916 }
1917 
1918 /* See uma.h */
1919 int
1920 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1921 {
1922 	int pages;
1923 	vm_offset_t kva;
1924 
1925 	mtx_lock(&Giant);
1926 
1927 	pages = count / zone->uz_ipers;
1928 
1929 	if (pages * zone->uz_ipers < count)
1930 		pages++;
1931 
1932 	kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1933 
1934 	if (kva == 0) {
1935 		mtx_unlock(&Giant);
1936 		return (0);
1937 	}
1938 
1939 
1940 	if (obj == NULL)
1941 		obj = vm_object_allocate(OBJT_DEFAULT,
1942 		    pages);
1943 	else {
1944 		VM_OBJECT_LOCK_INIT(obj);
1945 		_vm_object_allocate(OBJT_DEFAULT,
1946 		    pages, obj);
1947 	}
1948 	ZONE_LOCK(zone);
1949 	zone->uz_kva = kva;
1950 	zone->uz_obj = obj;
1951 	zone->uz_maxpages = pages;
1952 
1953 	zone->uz_allocf = obj_alloc;
1954 	zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1955 
1956 	ZONE_UNLOCK(zone);
1957 	mtx_unlock(&Giant);
1958 
1959 	return (1);
1960 }
1961 
1962 /* See uma.h */
1963 void
1964 uma_prealloc(uma_zone_t zone, int items)
1965 {
1966 	int slabs;
1967 	uma_slab_t slab;
1968 
1969 	ZONE_LOCK(zone);
1970 	slabs = items / zone->uz_ipers;
1971 	if (slabs * zone->uz_ipers < items)
1972 		slabs++;
1973 
1974 	while (slabs > 0) {
1975 		slab = slab_zalloc(zone, M_WAITOK);
1976 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1977 		slabs--;
1978 	}
1979 	ZONE_UNLOCK(zone);
1980 }
1981 
1982 /* See uma.h */
1983 void
1984 uma_reclaim(void)
1985 {
1986 	/*
1987 	 * You might think that the delay below would improve performance since
1988 	 * the allocator will give away memory that it may ask for immediately.
1989 	 * Really, it makes things worse, since cpu cycles are so much cheaper
1990 	 * than disk activity.
1991 	 */
1992 #if 0
1993 	static struct timeval tv = {0};
1994 	struct timeval now;
1995 	getmicrouptime(&now);
1996 	if (now.tv_sec > tv.tv_sec + 30)
1997 		tv = now;
1998 	else
1999 		return;
2000 #endif
2001 #ifdef UMA_DEBUG
2002 	printf("UMA: vm asked us to release pages!\n");
2003 #endif
2004 	bucket_enable();
2005 	zone_foreach(zone_drain);
2006 
2007 	/*
2008 	 * Some slabs may have been freed but this zone will be visited early
2009 	 * we visit again so that we can free pages that are empty once other
2010 	 * zones are drained.  We have to do the same for buckets.
2011 	 */
2012 	zone_drain(slabzone);
2013 	zone_drain(bucketzone);
2014 }
2015 
2016 void *
2017 uma_large_malloc(int size, int wait)
2018 {
2019 	void *mem;
2020 	uma_slab_t slab;
2021 	u_int8_t flags;
2022 
2023 	slab = uma_zalloc_internal(slabzone, NULL, wait);
2024 	if (slab == NULL)
2025 		return (NULL);
2026 
2027 	/* XXX: kmem_malloc panics if Giant isn't held and sleep allowed */
2028 	if ((wait & M_NOWAIT) == 0 && !mtx_owned(&Giant)) {
2029 		mtx_lock(&Giant);
2030 		mem = page_alloc(NULL, size, &flags, wait);
2031 		mtx_unlock(&Giant);
2032 	} else
2033 		mem = page_alloc(NULL, size, &flags, wait);
2034 	if (mem) {
2035 		vsetslab((vm_offset_t)mem, slab);
2036 		slab->us_data = mem;
2037 		slab->us_flags = flags | UMA_SLAB_MALLOC;
2038 		slab->us_size = size;
2039 	} else {
2040 		uma_zfree_internal(slabzone, slab, NULL, 0);
2041 	}
2042 
2043 
2044 	return (mem);
2045 }
2046 
2047 void
2048 uma_large_free(uma_slab_t slab)
2049 {
2050 	vsetobj((vm_offset_t)slab->us_data, kmem_object);
2051 	/*
2052 	 * XXX: We get a lock order reversal if we don't have Giant:
2053 	 * vm_map_remove (locks system map) -> vm_map_delete ->
2054 	 *    vm_map_entry_unwire -> vm_fault_unwire -> mtx_lock(&Giant)
2055 	 */
2056 	if (!mtx_owned(&Giant)) {
2057 		mtx_lock(&Giant);
2058 		page_free(slab->us_data, slab->us_size, slab->us_flags);
2059 		mtx_unlock(&Giant);
2060 	} else
2061 		page_free(slab->us_data, slab->us_size, slab->us_flags);
2062 	uma_zfree_internal(slabzone, slab, NULL, 0);
2063 }
2064 
2065 void
2066 uma_print_stats(void)
2067 {
2068 	zone_foreach(uma_print_zone);
2069 }
2070 
2071 void
2072 uma_print_zone(uma_zone_t zone)
2073 {
2074 	printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2075 	    zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
2076 	    zone->uz_ipers, zone->uz_ppera,
2077 	    (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
2078 }
2079 
2080 /*
2081  * Sysctl handler for vm.zone
2082  *
2083  * stolen from vm_zone.c
2084  */
2085 static int
2086 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2087 {
2088 	int error, len, cnt;
2089 	const int linesize = 128;	/* conservative */
2090 	int totalfree;
2091 	char *tmpbuf, *offset;
2092 	uma_zone_t z;
2093 	char *p;
2094 
2095 	cnt = 0;
2096 	mtx_lock(&uma_mtx);
2097 	LIST_FOREACH(z, &uma_zones, uz_link)
2098 		cnt++;
2099 	mtx_unlock(&uma_mtx);
2100 	MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2101 			M_TEMP, M_WAITOK);
2102 	len = snprintf(tmpbuf, linesize,
2103 	    "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
2104 	if (cnt == 0)
2105 		tmpbuf[len - 1] = '\0';
2106 	error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2107 	if (error || cnt == 0)
2108 		goto out;
2109 	offset = tmpbuf;
2110 	mtx_lock(&uma_mtx);
2111 	LIST_FOREACH(z, &uma_zones, uz_link) {
2112 		if (cnt == 0)	/* list may have changed size */
2113 			break;
2114 		ZONE_LOCK(z);
2115 		totalfree = z->uz_free + z->uz_cachefree;
2116 		len = snprintf(offset, linesize,
2117 		    "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2118 		    z->uz_name, z->uz_size,
2119 		    z->uz_maxpages * z->uz_ipers,
2120 		    (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2121 		    totalfree,
2122 		    (unsigned long long)z->uz_allocs);
2123 		ZONE_UNLOCK(z);
2124 		for (p = offset + 12; p > offset && *p == ' '; --p)
2125 			/* nothing */ ;
2126 		p[1] = ':';
2127 		cnt--;
2128 		offset += len;
2129 	}
2130 	mtx_unlock(&uma_mtx);
2131 	*offset++ = '\0';
2132 	error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2133 out:
2134 	FREE(tmpbuf, M_TEMP);
2135 	return (error);
2136 }
2137