xref: /freebsd/sys/vm/uma_core.c (revision 3d238d9e981227b3bf739a51281e5d200bff3f8c)
1 /*
2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * uma_core.c  Implementation of the Universal Memory allocator
29  *
30  * This allocator is intended to replace the multitude of similar object caches
31  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
32  * effecient.  A primary design goal is to return unused memory to the rest of
33  * the system.  This will make the system as a whole more flexible due to the
34  * ability to move memory to subsystems which most need it instead of leaving
35  * pools of reserved memory unused.
36  *
37  * The basic ideas stem from similar slab/zone based allocators whose algorithms
38  * are well known.
39  *
40  */
41 
42 /*
43  * TODO:
44  *	- Improve memory usage for large allocations
45  *	- Investigate cache size adjustments
46  */
47 
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
50 
51 /* I should really use ktr.. */
52 /*
53 #define UMA_DEBUG 1
54 #define UMA_DEBUG_ALLOC 1
55 #define UMA_DEBUG_ALLOC_1 1
56 */
57 
58 #include "opt_param.h"
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/kernel.h>
62 #include <sys/types.h>
63 #include <sys/queue.h>
64 #include <sys/malloc.h>
65 #include <sys/lock.h>
66 #include <sys/sysctl.h>
67 #include <sys/mutex.h>
68 #include <sys/proc.h>
69 #include <sys/smp.h>
70 #include <sys/vmmeter.h>
71 
72 #include <vm/vm.h>
73 #include <vm/vm_object.h>
74 #include <vm/vm_page.h>
75 #include <vm/vm_param.h>
76 #include <vm/vm_map.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_extern.h>
79 #include <vm/uma.h>
80 #include <vm/uma_int.h>
81 #include <vm/uma_dbg.h>
82 
83 #include <machine/vmparam.h>
84 
85 /*
86  * This is the zone from which all zones are spawned.  The idea is that even
87  * the zone heads are allocated from the allocator, so we use the bss section
88  * to bootstrap us.
89  */
90 static struct uma_zone masterzone;
91 static uma_zone_t zones = &masterzone;
92 
93 /* This is the zone from which all of uma_slab_t's are allocated. */
94 static uma_zone_t slabzone;
95 
96 /*
97  * The initial hash tables come out of this zone so they can be allocated
98  * prior to malloc coming up.
99  */
100 static uma_zone_t hashzone;
101 
102 /*
103  * Zone that buckets come from.
104  */
105 static uma_zone_t bucketzone;
106 
107 /*
108  * Are we allowed to allocate buckets?
109  */
110 static int bucketdisable = 1;
111 
112 /* Linked list of all zones in the system */
113 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
114 
115 /* This mutex protects the zone list */
116 static struct mtx uma_mtx;
117 
118 /* These are the pcpu cache locks */
119 static struct mtx uma_pcpu_mtx[MAXCPU];
120 
121 /* Linked list of boot time pages */
122 static LIST_HEAD(,uma_slab) uma_boot_pages =
123     LIST_HEAD_INITIALIZER(&uma_boot_pages);
124 
125 /* Count of free boottime pages */
126 static int uma_boot_free = 0;
127 
128 /* Is the VM done starting up? */
129 static int booted = 0;
130 
131 /* This is the handle used to schedule our working set calculator */
132 static struct callout uma_callout;
133 
134 /* This is mp_maxid + 1, for use while looping over each cpu */
135 static int maxcpu;
136 
137 /*
138  * This structure is passed as the zone ctor arg so that I don't have to create
139  * a special allocation function just for zones.
140  */
141 struct uma_zctor_args {
142 	char *name;
143 	size_t size;
144 	uma_ctor ctor;
145 	uma_dtor dtor;
146 	uma_init uminit;
147 	uma_fini fini;
148 	int align;
149 	u_int16_t flags;
150 };
151 
152 /* Prototypes.. */
153 
154 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
155 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
156 static void page_free(void *, int, u_int8_t);
157 static uma_slab_t slab_zalloc(uma_zone_t, int);
158 static void cache_drain(uma_zone_t);
159 static void bucket_drain(uma_zone_t, uma_bucket_t);
160 static void zone_drain(uma_zone_t);
161 static void zone_ctor(void *, int, void *);
162 static void zone_dtor(void *, int, void *);
163 static void zero_init(void *, int);
164 static void zone_small_init(uma_zone_t zone);
165 static void zone_large_init(uma_zone_t zone);
166 static void zone_foreach(void (*zfunc)(uma_zone_t));
167 static void zone_timeout(uma_zone_t zone);
168 static int hash_alloc(struct uma_hash *);
169 static int hash_expand(struct uma_hash *, struct uma_hash *);
170 static void hash_free(struct uma_hash *hash);
171 static void uma_timeout(void *);
172 static void uma_startup3(void);
173 static void *uma_zalloc_internal(uma_zone_t, void *, int);
174 static void uma_zfree_internal(uma_zone_t, void *, void *, int);
175 static void bucket_enable(void);
176 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
177 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
178 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
179 
180 void uma_print_zone(uma_zone_t);
181 void uma_print_stats(void);
182 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
183 
184 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
185     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
186 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
187 
188 /*
189  * This routine checks to see whether or not it's safe to enable buckets.
190  */
191 
192 static void
193 bucket_enable(void)
194 {
195 	if (cnt.v_free_count < cnt.v_free_min)
196 		bucketdisable = 1;
197 	else
198 		bucketdisable = 0;
199 }
200 
201 
202 /*
203  * Routine called by timeout which is used to fire off some time interval
204  * based calculations.  (working set, stats, etc.)
205  *
206  * Arguments:
207  *	arg   Unused
208  *
209  * Returns:
210  *	Nothing
211  */
212 static void
213 uma_timeout(void *unused)
214 {
215 	bucket_enable();
216 	zone_foreach(zone_timeout);
217 
218 	/* Reschedule this event */
219 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
220 }
221 
222 /*
223  * Routine to perform timeout driven calculations.  This does the working set
224  * as well as hash expanding, and per cpu statistics aggregation.
225  *
226  *  Arguments:
227  *	zone  The zone to operate on
228  *
229  *  Returns:
230  *	Nothing
231  */
232 static void
233 zone_timeout(uma_zone_t zone)
234 {
235 	uma_cache_t cache;
236 	u_int64_t alloc;
237 	int free;
238 	int cpu;
239 
240 	alloc = 0;
241 	free = 0;
242 
243 	/*
244 	 * Aggregate per cpu cache statistics back to the zone.
245 	 *
246 	 * I may rewrite this to set a flag in the per cpu cache instead of
247 	 * locking.  If the flag is not cleared on the next round I will have
248 	 * to lock and do it here instead so that the statistics don't get too
249 	 * far out of sync.
250 	 */
251 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
252 		for (cpu = 0; cpu < maxcpu; cpu++) {
253 			if (CPU_ABSENT(cpu))
254 				continue;
255 			CPU_LOCK(cpu);
256 			cache = &zone->uz_cpu[cpu];
257 			/* Add them up, and reset */
258 			alloc += cache->uc_allocs;
259 			cache->uc_allocs = 0;
260 			if (cache->uc_allocbucket)
261 				free += cache->uc_allocbucket->ub_ptr + 1;
262 			if (cache->uc_freebucket)
263 				free += cache->uc_freebucket->ub_ptr + 1;
264 			CPU_UNLOCK(cpu);
265 		}
266 	}
267 
268 	/* Now push these stats back into the zone.. */
269 	ZONE_LOCK(zone);
270 	zone->uz_allocs += alloc;
271 
272 	/*
273 	 * cachefree is an instantanious snapshot of what is in the per cpu
274 	 * caches, not an accurate counter
275 	 */
276 	zone->uz_cachefree = free;
277 
278 	/*
279 	 * Expand the zone hash table.
280 	 *
281 	 * This is done if the number of slabs is larger than the hash size.
282 	 * What I'm trying to do here is completely reduce collisions.  This
283 	 * may be a little aggressive.  Should I allow for two collisions max?
284 	 */
285 
286 	if (zone->uz_flags & UMA_ZFLAG_HASH &&
287 	    zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
288 		struct uma_hash newhash;
289 		struct uma_hash oldhash;
290 		int ret;
291 
292 		/*
293 		 * This is so involved because allocating and freeing
294 		 * while the zone lock is held will lead to deadlock.
295 		 * I have to do everything in stages and check for
296 		 * races.
297 		 */
298 		newhash = zone->uz_hash;
299 		ZONE_UNLOCK(zone);
300 		ret = hash_alloc(&newhash);
301 		ZONE_LOCK(zone);
302 		if (ret) {
303 			if (hash_expand(&zone->uz_hash, &newhash)) {
304 				oldhash = zone->uz_hash;
305 				zone->uz_hash = newhash;
306 			} else
307 				oldhash = newhash;
308 
309 			ZONE_UNLOCK(zone);
310 			hash_free(&oldhash);
311 			ZONE_LOCK(zone);
312 		}
313 	}
314 
315 	/*
316 	 * Here we compute the working set size as the total number of items
317 	 * left outstanding since the last time interval.  This is slightly
318 	 * suboptimal. What we really want is the highest number of outstanding
319 	 * items during the last time quantum.  This should be close enough.
320 	 *
321 	 * The working set size is used to throttle the zone_drain function.
322 	 * We don't want to return memory that we may need again immediately.
323 	 */
324 	alloc = zone->uz_allocs - zone->uz_oallocs;
325 	zone->uz_oallocs = zone->uz_allocs;
326 	zone->uz_wssize = alloc;
327 
328 	ZONE_UNLOCK(zone);
329 }
330 
331 /*
332  * Allocate and zero fill the next sized hash table from the appropriate
333  * backing store.
334  *
335  * Arguments:
336  *	hash  A new hash structure with the old hash size in uh_hashsize
337  *
338  * Returns:
339  *	1 on sucess and 0 on failure.
340  */
341 static int
342 hash_alloc(struct uma_hash *hash)
343 {
344 	int oldsize;
345 	int alloc;
346 
347 	oldsize = hash->uh_hashsize;
348 
349 	/* We're just going to go to a power of two greater */
350 	if (oldsize)  {
351 		hash->uh_hashsize = oldsize * 2;
352 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
353 		/* XXX Shouldn't be abusing DEVBUF here */
354 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
355 		    M_DEVBUF, M_NOWAIT);
356 	} else {
357 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
358 		hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
359 		    M_WAITOK);
360 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
361 	}
362 	if (hash->uh_slab_hash) {
363 		bzero(hash->uh_slab_hash, alloc);
364 		hash->uh_hashmask = hash->uh_hashsize - 1;
365 		return (1);
366 	}
367 
368 	return (0);
369 }
370 
371 /*
372  * Expands the hash table for OFFPAGE zones.  This is done from zone_timeout
373  * to reduce collisions.  This must not be done in the regular allocation path,
374  * otherwise, we can recurse on the vm while allocating pages.
375  *
376  * Arguments:
377  *	oldhash  The hash you want to expand
378  *	newhash  The hash structure for the new table
379  *
380  * Returns:
381  * 	Nothing
382  *
383  * Discussion:
384  */
385 static int
386 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
387 {
388 	uma_slab_t slab;
389 	int hval;
390 	int i;
391 
392 	if (!newhash->uh_slab_hash)
393 		return (0);
394 
395 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
396 		return (0);
397 
398 	/*
399 	 * I need to investigate hash algorithms for resizing without a
400 	 * full rehash.
401 	 */
402 
403 	for (i = 0; i < oldhash->uh_hashsize; i++)
404 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
405 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
406 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
407 			hval = UMA_HASH(newhash, slab->us_data);
408 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
409 			    slab, us_hlink);
410 		}
411 
412 	return (1);
413 }
414 
415 /*
416  * Free the hash bucket to the appropriate backing store.
417  *
418  * Arguments:
419  *	slab_hash  The hash bucket we're freeing
420  *	hashsize   The number of entries in that hash bucket
421  *
422  * Returns:
423  *	Nothing
424  */
425 static void
426 hash_free(struct uma_hash *hash)
427 {
428 	if (hash->uh_slab_hash == NULL)
429 		return;
430 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
431 		uma_zfree_internal(hashzone,
432 		    hash->uh_slab_hash, NULL, 0);
433 	else
434 		free(hash->uh_slab_hash, M_DEVBUF);
435 }
436 
437 /*
438  * Frees all outstanding items in a bucket
439  *
440  * Arguments:
441  *	zone   The zone to free to, must be unlocked.
442  *	bucket The free/alloc bucket with items, cpu queue must be locked.
443  *
444  * Returns:
445  *	Nothing
446  */
447 
448 static void
449 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
450 {
451 	uma_slab_t slab;
452 	int mzone;
453 	void *item;
454 
455 	if (bucket == NULL)
456 		return;
457 
458 	slab = NULL;
459 	mzone = 0;
460 
461 	/* We have to lookup the slab again for malloc.. */
462 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
463 		mzone = 1;
464 
465 	while (bucket->ub_ptr > -1)  {
466 		item = bucket->ub_bucket[bucket->ub_ptr];
467 #ifdef INVARIANTS
468 		bucket->ub_bucket[bucket->ub_ptr] = NULL;
469 		KASSERT(item != NULL,
470 		    ("bucket_drain: botched ptr, item is NULL"));
471 #endif
472 		bucket->ub_ptr--;
473 		/*
474 		 * This is extremely inefficient.  The slab pointer was passed
475 		 * to uma_zfree_arg, but we lost it because the buckets don't
476 		 * hold them.  This will go away when free() gets a size passed
477 		 * to it.
478 		 */
479 		if (mzone)
480 			slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
481 		uma_zfree_internal(zone, item, slab, 1);
482 	}
483 }
484 
485 /*
486  * Drains the per cpu caches for a zone.
487  *
488  * Arguments:
489  *	zone  The zone to drain, must be unlocked.
490  *
491  * Returns:
492  *	Nothing
493  *
494  * This function returns with the zone locked so that the per cpu queues can
495  * not be filled until zone_drain is finished.
496  *
497  */
498 static void
499 cache_drain(uma_zone_t zone)
500 {
501 	uma_bucket_t bucket;
502 	uma_cache_t cache;
503 	int cpu;
504 
505 	/*
506 	 * Flush out the per cpu queues.
507 	 *
508 	 * XXX This causes unnecessary thrashing due to immediately having
509 	 * empty per cpu queues.  I need to improve this.
510 	 */
511 
512 	/*
513 	 * We have to lock each cpu cache before locking the zone
514 	 */
515 	ZONE_UNLOCK(zone);
516 
517 	for (cpu = 0; cpu < maxcpu; cpu++) {
518 		if (CPU_ABSENT(cpu))
519 			continue;
520 		CPU_LOCK(cpu);
521 		cache = &zone->uz_cpu[cpu];
522 		bucket_drain(zone, cache->uc_allocbucket);
523 		bucket_drain(zone, cache->uc_freebucket);
524 	}
525 
526 	/*
527 	 * Drain the bucket queues and free the buckets, we just keep two per
528 	 * cpu (alloc/free).
529 	 */
530 	ZONE_LOCK(zone);
531 	while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
532 		LIST_REMOVE(bucket, ub_link);
533 		ZONE_UNLOCK(zone);
534 		bucket_drain(zone, bucket);
535 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
536 		ZONE_LOCK(zone);
537 	}
538 
539 	/* Now we do the free queue.. */
540 	while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
541 		LIST_REMOVE(bucket, ub_link);
542 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
543 	}
544 
545 	/* We unlock here, but they will all block until the zone is unlocked */
546 	for (cpu = 0; cpu < maxcpu; cpu++) {
547 		if (CPU_ABSENT(cpu))
548 			continue;
549 		CPU_UNLOCK(cpu);
550 	}
551 
552 	zone->uz_cachefree = 0;
553 }
554 
555 /*
556  * Frees pages from a zone back to the system.  This is done on demand from
557  * the pageout daemon.
558  *
559  * Arguments:
560  *	zone  The zone to free pages from
561  *	all   Should we drain all items?
562  *
563  * Returns:
564  *	Nothing.
565  */
566 static void
567 zone_drain(uma_zone_t zone)
568 {
569 	struct slabhead freeslabs = {};
570 	uma_slab_t slab;
571 	uma_slab_t n;
572 	u_int64_t extra;
573 	u_int8_t flags;
574 	u_int8_t *mem;
575 	int i;
576 
577 	/*
578 	 * We don't want to take pages from staticly allocated zones at this
579 	 * time
580 	 */
581 	if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
582 		return;
583 
584 	ZONE_LOCK(zone);
585 
586 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
587 		cache_drain(zone);
588 
589 	if (zone->uz_free < zone->uz_wssize)
590 		goto finished;
591 #ifdef UMA_DEBUG
592 	printf("%s working set size: %llu free items: %u\n",
593 	    zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
594 #endif
595 	extra = zone->uz_free - zone->uz_wssize;
596 	extra /= zone->uz_ipers;
597 
598 	/* extra is now the number of extra slabs that we can free */
599 
600 	if (extra == 0)
601 		goto finished;
602 
603 	slab = LIST_FIRST(&zone->uz_free_slab);
604 	while (slab && extra) {
605 		n = LIST_NEXT(slab, us_link);
606 
607 		/* We have no where to free these to */
608 		if (slab->us_flags & UMA_SLAB_BOOT) {
609 			slab = n;
610 			continue;
611 		}
612 
613 		LIST_REMOVE(slab, us_link);
614 		zone->uz_pages -= zone->uz_ppera;
615 		zone->uz_free -= zone->uz_ipers;
616 
617 		if (zone->uz_flags & UMA_ZFLAG_HASH)
618 			UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
619 
620 		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
621 
622 		slab = n;
623 		extra--;
624 	}
625 finished:
626 	ZONE_UNLOCK(zone);
627 
628 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
629 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
630 		if (zone->uz_fini)
631 			for (i = 0; i < zone->uz_ipers; i++)
632 				zone->uz_fini(
633 				    slab->us_data + (zone->uz_rsize * i),
634 				    zone->uz_size);
635 		flags = slab->us_flags;
636 		mem = slab->us_data;
637 
638 		if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
639 			uma_zfree_internal(slabzone, slab, NULL, 0);
640 		if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
641 			vm_object_t obj;
642 
643 			if (flags & UMA_SLAB_KMEM)
644 				obj = kmem_object;
645 			else
646 				obj = NULL;
647 			for (i = 0; i < zone->uz_ppera; i++)
648 				vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
649 				    obj);
650 		}
651 #ifdef UMA_DEBUG
652 		printf("%s: Returning %d bytes.\n",
653 		    zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
654 #endif
655 		zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
656 	}
657 
658 }
659 
660 /*
661  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
662  *
663  * Arguments:
664  *	zone  The zone to allocate slabs for
665  *	wait  Shall we wait?
666  *
667  * Returns:
668  *	The slab that was allocated or NULL if there is no memory and the
669  *	caller specified M_NOWAIT.
670  *
671  */
672 static uma_slab_t
673 slab_zalloc(uma_zone_t zone, int wait)
674 {
675 	uma_slab_t slab;	/* Starting slab */
676 	u_int8_t *mem;
677 	u_int8_t flags;
678 	int i;
679 
680 	slab = NULL;
681 
682 #ifdef UMA_DEBUG
683 	printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
684 #endif
685 	ZONE_UNLOCK(zone);
686 
687 	if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
688 		slab = uma_zalloc_internal(slabzone, NULL, wait);
689 		if (slab == NULL) {
690 			ZONE_LOCK(zone);
691 			return NULL;
692 		}
693 	}
694 
695 	/*
696 	 * This reproduces the old vm_zone behavior of zero filling pages the
697 	 * first time they are added to a zone.
698 	 *
699 	 * Malloced items are zeroed in uma_zalloc.
700 	 */
701 
702 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
703 		wait |= M_ZERO;
704 	else
705 		wait &= ~M_ZERO;
706 
707 	if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
708 		if ((wait & M_NOWAIT) == 0) {
709 			mtx_lock(&Giant);
710 			mem = zone->uz_allocf(zone,
711 			    zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
712 			mtx_unlock(&Giant);
713 		} else {
714 			mem = zone->uz_allocf(zone,
715 			    zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
716 		}
717 		if (mem == NULL) {
718 			ZONE_LOCK(zone);
719 			return (NULL);
720 		}
721 	} else {
722 		uma_slab_t tmps;
723 
724 		if (zone->uz_ppera > 1)
725 			panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
726 		if (zone->uz_flags & UMA_ZFLAG_MALLOC)
727 			panic("Mallocing before uma_startup2 has been called.\n");
728 		if (uma_boot_free == 0)
729 			panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
730 		tmps = LIST_FIRST(&uma_boot_pages);
731 		LIST_REMOVE(tmps, us_link);
732 		uma_boot_free--;
733 		mem = tmps->us_data;
734 		flags = tmps->us_flags;
735 	}
736 
737 	/* Point the slab into the allocated memory */
738 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE))
739 		slab = (uma_slab_t )(mem + zone->uz_pgoff);
740 
741 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
742 		for (i = 0; i < zone->uz_ppera; i++)
743 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
744 
745 	slab->us_zone = zone;
746 	slab->us_data = mem;
747 
748 	/*
749 	 * This is intended to spread data out across cache lines.
750 	 *
751 	 * This code doesn't seem to work properly on x86, and on alpha
752 	 * it makes absolutely no performance difference. I'm sure it could
753 	 * use some tuning, but sun makes outrageous claims about it's
754 	 * performance.
755 	 */
756 #if 0
757 	if (zone->uz_cachemax) {
758 		slab->us_data += zone->uz_cacheoff;
759 		zone->uz_cacheoff += UMA_CACHE_INC;
760 		if (zone->uz_cacheoff > zone->uz_cachemax)
761 			zone->uz_cacheoff = 0;
762 	}
763 #endif
764 
765 	slab->us_freecount = zone->uz_ipers;
766 	slab->us_firstfree = 0;
767 	slab->us_flags = flags;
768 	for (i = 0; i < zone->uz_ipers; i++)
769 		slab->us_freelist[i] = i+1;
770 
771 	if (zone->uz_init)
772 		for (i = 0; i < zone->uz_ipers; i++)
773 			zone->uz_init(slab->us_data + (zone->uz_rsize * i),
774 			    zone->uz_size);
775 	ZONE_LOCK(zone);
776 
777 	if (zone->uz_flags & UMA_ZFLAG_HASH)
778 		UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
779 
780 	zone->uz_pages += zone->uz_ppera;
781 	zone->uz_free += zone->uz_ipers;
782 
783 
784 	return (slab);
785 }
786 
787 /*
788  * Allocates a number of pages from the system
789  *
790  * Arguments:
791  *	zone  Unused
792  *	bytes  The number of bytes requested
793  *	wait  Shall we wait?
794  *
795  * Returns:
796  *	A pointer to the alloced memory or possibly
797  *	NULL if M_NOWAIT is set.
798  */
799 static void *
800 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
801 {
802 	void *p;	/* Returned page */
803 
804 	*pflag = UMA_SLAB_KMEM;
805 	p = (void *) kmem_malloc(kmem_map, bytes, wait);
806 
807 	return (p);
808 }
809 
810 /*
811  * Allocates a number of pages from within an object
812  *
813  * Arguments:
814  *	zone   Unused
815  *	bytes  The number of bytes requested
816  *	wait   Shall we wait?
817  *
818  * Returns:
819  *	A pointer to the alloced memory or possibly
820  *	NULL if M_NOWAIT is set.
821  *
822  * TODO: If we fail during a multi-page allocation release the pages that have
823  *	 already been allocated.
824  */
825 static void *
826 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
827 {
828 	vm_offset_t zkva;
829 	vm_offset_t retkva;
830 	vm_page_t p;
831 	int pages;
832 
833 	retkva = 0;
834 	pages = zone->uz_pages;
835 
836 	/*
837 	 * This looks a little weird since we're getting one page at a time
838 	 */
839 	while (bytes > 0) {
840 		VM_OBJECT_LOCK(zone->uz_obj);
841 		p = vm_page_alloc(zone->uz_obj, pages,
842 		    VM_ALLOC_INTERRUPT);
843 		VM_OBJECT_UNLOCK(zone->uz_obj);
844 		if (p == NULL)
845 			return (NULL);
846 
847 		zkva = zone->uz_kva + pages * PAGE_SIZE;
848 		if (retkva == 0)
849 			retkva = zkva;
850 		pmap_qenter(zkva, &p, 1);
851 		bytes -= PAGE_SIZE;
852 		pages += 1;
853 	}
854 
855 	*flags = UMA_SLAB_PRIV;
856 
857 	return ((void *)retkva);
858 }
859 
860 /*
861  * Frees a number of pages to the system
862  *
863  * Arguments:
864  *	mem   A pointer to the memory to be freed
865  *	size  The size of the memory being freed
866  *	flags The original p->us_flags field
867  *
868  * Returns:
869  *	Nothing
870  *
871  */
872 static void
873 page_free(void *mem, int size, u_int8_t flags)
874 {
875 	vm_map_t map;
876 
877 	if (flags & UMA_SLAB_KMEM)
878 		map = kmem_map;
879 	else
880 		panic("UMA: page_free used with invalid flags %d\n", flags);
881 
882 	kmem_free(map, (vm_offset_t)mem, size);
883 }
884 
885 /*
886  * Zero fill initializer
887  *
888  * Arguments/Returns follow uma_init specifications
889  *
890  */
891 static void
892 zero_init(void *mem, int size)
893 {
894 	bzero(mem, size);
895 }
896 
897 /*
898  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
899  *
900  * Arguments
901  *	zone  The zone we should initialize
902  *
903  * Returns
904  *	Nothing
905  */
906 static void
907 zone_small_init(uma_zone_t zone)
908 {
909 	int rsize;
910 	int memused;
911 	int ipers;
912 
913 	rsize = zone->uz_size;
914 
915 	if (rsize < UMA_SMALLEST_UNIT)
916 		rsize = UMA_SMALLEST_UNIT;
917 
918 	if (rsize & zone->uz_align)
919 		rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
920 
921 	zone->uz_rsize = rsize;
922 
923 	rsize += 1;	/* Account for the byte of linkage */
924 	zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
925 	zone->uz_ppera = 1;
926 
927 	memused = zone->uz_ipers * zone->uz_rsize;
928 
929 	/* Can we do any better? */
930 	if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
931 		if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
932 			return;
933 		ipers = UMA_SLAB_SIZE / zone->uz_rsize;
934 		if (ipers > zone->uz_ipers) {
935 			zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
936 			if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
937 				zone->uz_flags |= UMA_ZFLAG_HASH;
938 			zone->uz_ipers = ipers;
939 		}
940 	}
941 
942 }
943 
944 /*
945  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
946  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
947  * more complicated.
948  *
949  * Arguments
950  *	zone  The zone we should initialize
951  *
952  * Returns
953  *	Nothing
954  */
955 static void
956 zone_large_init(uma_zone_t zone)
957 {
958 	int pages;
959 
960 	pages = zone->uz_size / UMA_SLAB_SIZE;
961 
962 	/* Account for remainder */
963 	if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
964 		pages++;
965 
966 	zone->uz_ppera = pages;
967 	zone->uz_ipers = 1;
968 
969 	zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
970 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
971 		zone->uz_flags |= UMA_ZFLAG_HASH;
972 
973 	zone->uz_rsize = zone->uz_size;
974 }
975 
976 /*
977  * Zone header ctor.  This initializes all fields, locks, etc.  And inserts
978  * the zone onto the global zone list.
979  *
980  * Arguments/Returns follow uma_ctor specifications
981  *	udata  Actually uma_zcreat_args
982  *
983  */
984 
985 static void
986 zone_ctor(void *mem, int size, void *udata)
987 {
988 	struct uma_zctor_args *arg = udata;
989 	uma_zone_t zone = mem;
990 	int privlc;
991 
992 	bzero(zone, size);
993 	zone->uz_name = arg->name;
994 	zone->uz_size = arg->size;
995 	zone->uz_ctor = arg->ctor;
996 	zone->uz_dtor = arg->dtor;
997 	zone->uz_init = arg->uminit;
998 	zone->uz_fini = arg->fini;
999 	zone->uz_align = arg->align;
1000 	zone->uz_free = 0;
1001 	zone->uz_pages = 0;
1002 	zone->uz_flags = 0;
1003 	zone->uz_allocf = page_alloc;
1004 	zone->uz_freef = page_free;
1005 
1006 	if (arg->flags & UMA_ZONE_ZINIT)
1007 		zone->uz_init = zero_init;
1008 
1009 	if (arg->flags & UMA_ZONE_INTERNAL)
1010 		zone->uz_flags |= UMA_ZFLAG_INTERNAL;
1011 
1012 	if (arg->flags & UMA_ZONE_MALLOC)
1013 		zone->uz_flags |= UMA_ZFLAG_MALLOC;
1014 
1015 	if (arg->flags & UMA_ZONE_NOFREE)
1016 		zone->uz_flags |= UMA_ZFLAG_NOFREE;
1017 
1018 	if (arg->flags & UMA_ZONE_VM)
1019 		zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE;
1020 
1021 	if (zone->uz_size > UMA_SLAB_SIZE)
1022 		zone_large_init(zone);
1023 	else
1024 		zone_small_init(zone);
1025 #ifdef UMA_MD_SMALL_ALLOC
1026 	if (zone->uz_ppera == 1) {
1027 		zone->uz_allocf = uma_small_alloc;
1028 		zone->uz_freef = uma_small_free;
1029 	}
1030 #endif	/* UMA_MD_SMALL_ALLOC */
1031 
1032 	if (arg->flags & UMA_ZONE_MTXCLASS)
1033 		privlc = 1;
1034 	else
1035 		privlc = 0;
1036 
1037 	/*
1038 	 * If we're putting the slab header in the actual page we need to
1039 	 * figure out where in each page it goes.  This calculates a right
1040 	 * justified offset into the memory on an ALIGN_PTR boundary.
1041 	 */
1042 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
1043 		int totsize;
1044 		int waste;
1045 
1046 		/* Size of the slab struct and free list */
1047 		totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1048 		if (totsize & UMA_ALIGN_PTR)
1049 			totsize = (totsize & ~UMA_ALIGN_PTR) +
1050 			    (UMA_ALIGN_PTR + 1);
1051 		zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1052 
1053 		waste = zone->uz_pgoff;
1054 		waste -= (zone->uz_ipers * zone->uz_rsize);
1055 
1056 		/*
1057 		 * This calculates how much space we have for cache line size
1058 		 * optimizations.  It works by offseting each slab slightly.
1059 		 * Currently it breaks on x86, and so it is disabled.
1060 		 */
1061 
1062 		if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
1063 			zone->uz_cachemax = waste - UMA_CACHE_INC;
1064 			zone->uz_cacheoff = 0;
1065 		}
1066 
1067 		totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1068 		    + zone->uz_ipers;
1069 		/* I don't think it's possible, but I'll make sure anyway */
1070 		if (totsize > UMA_SLAB_SIZE) {
1071 			printf("zone %s ipers %d rsize %d size %d\n",
1072 			    zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1073 			    zone->uz_size);
1074 			panic("UMA slab won't fit.\n");
1075 		}
1076 	}
1077 
1078 	if (zone->uz_flags & UMA_ZFLAG_HASH)
1079 		hash_alloc(&zone->uz_hash);
1080 
1081 #ifdef UMA_DEBUG
1082 	printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1083 	    zone->uz_name, zone,
1084 	    zone->uz_size, zone->uz_ipers,
1085 	    zone->uz_ppera, zone->uz_pgoff);
1086 #endif
1087 	ZONE_LOCK_INIT(zone, privlc);
1088 
1089 	mtx_lock(&uma_mtx);
1090 	LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1091 	mtx_unlock(&uma_mtx);
1092 
1093 	/*
1094 	 * Some internal zones don't have room allocated for the per cpu
1095 	 * caches.  If we're internal, bail out here.
1096 	 */
1097 
1098 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1099 		return;
1100 
1101 	if (zone->uz_ipers < UMA_BUCKET_SIZE)
1102 		zone->uz_count = zone->uz_ipers - 1;
1103 	else
1104 		zone->uz_count = UMA_BUCKET_SIZE - 1;
1105 }
1106 
1107 /*
1108  * Zone header dtor.  This frees all data, destroys locks, frees the hash table
1109  * and removes the zone from the global list.
1110  *
1111  * Arguments/Returns follow uma_dtor specifications
1112  *	udata  unused
1113  */
1114 
1115 static void
1116 zone_dtor(void *arg, int size, void *udata)
1117 {
1118 	uma_zone_t zone;
1119 
1120 	zone = (uma_zone_t)arg;
1121 	ZONE_LOCK(zone);
1122 	zone->uz_wssize = 0;
1123 	ZONE_UNLOCK(zone);
1124 
1125 	mtx_lock(&uma_mtx);
1126 	LIST_REMOVE(zone, uz_link);
1127 	zone_drain(zone);
1128 	mtx_unlock(&uma_mtx);
1129 
1130 	ZONE_LOCK(zone);
1131 	if (zone->uz_free != 0)
1132 		printf("Zone %s was not empty (%d items).  Lost %d pages of memory.\n",
1133 		    zone->uz_name, zone->uz_free, zone->uz_pages);
1134 
1135 	ZONE_UNLOCK(zone);
1136 	if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
1137 		hash_free(&zone->uz_hash);
1138 
1139 	ZONE_LOCK_FINI(zone);
1140 }
1141 /*
1142  * Traverses every zone in the system and calls a callback
1143  *
1144  * Arguments:
1145  *	zfunc  A pointer to a function which accepts a zone
1146  *		as an argument.
1147  *
1148  * Returns:
1149  *	Nothing
1150  */
1151 static void
1152 zone_foreach(void (*zfunc)(uma_zone_t))
1153 {
1154 	uma_zone_t zone;
1155 
1156 	mtx_lock(&uma_mtx);
1157 	LIST_FOREACH(zone, &uma_zones, uz_link) {
1158 		zfunc(zone);
1159 	}
1160 	mtx_unlock(&uma_mtx);
1161 }
1162 
1163 /* Public functions */
1164 /* See uma.h */
1165 void
1166 uma_startup(void *bootmem)
1167 {
1168 	struct uma_zctor_args args;
1169 	uma_slab_t slab;
1170 	int slabsize;
1171 	int i;
1172 
1173 #ifdef UMA_DEBUG
1174 	printf("Creating uma zone headers zone.\n");
1175 #endif
1176 #ifdef SMP
1177 	maxcpu = mp_maxid + 1;
1178 #else
1179 	maxcpu = 1;
1180 #endif
1181 #ifdef UMA_DEBUG
1182 	printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1183 	Debugger("stop");
1184 #endif
1185 	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1186 	/* "manually" Create the initial zone */
1187 	args.name = "UMA Zones";
1188 	args.size = sizeof(struct uma_zone) +
1189 	    (sizeof(struct uma_cache) * (maxcpu - 1));
1190 	args.ctor = zone_ctor;
1191 	args.dtor = zone_dtor;
1192 	args.uminit = zero_init;
1193 	args.fini = NULL;
1194 	args.align = 32 - 1;
1195 	args.flags = UMA_ZONE_INTERNAL;
1196 	/* The initial zone has no Per cpu queues so it's smaller */
1197 	zone_ctor(zones, sizeof(struct uma_zone), &args);
1198 
1199 	/* Initialize the pcpu cache lock set once and for all */
1200 	for (i = 0; i < maxcpu; i++)
1201 		CPU_LOCK_INIT(i);
1202 
1203 #ifdef UMA_DEBUG
1204 	printf("Filling boot free list.\n");
1205 #endif
1206 	for (i = 0; i < UMA_BOOT_PAGES; i++) {
1207 		slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1208 		slab->us_data = (u_int8_t *)slab;
1209 		slab->us_flags = UMA_SLAB_BOOT;
1210 		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1211 		uma_boot_free++;
1212 	}
1213 
1214 #ifdef UMA_DEBUG
1215 	printf("Creating slab zone.\n");
1216 #endif
1217 
1218 	/*
1219 	 * This is the max number of free list items we'll have with
1220 	 * offpage slabs.
1221 	 */
1222 
1223 	slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1224 	slabsize /= UMA_MAX_WASTE;
1225 	slabsize++;			/* In case there it's rounded */
1226 	slabsize += sizeof(struct uma_slab);
1227 
1228 	/* Now make a zone for slab headers */
1229 	slabzone = uma_zcreate("UMA Slabs",
1230 				slabsize,
1231 				NULL, NULL, NULL, NULL,
1232 				UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1233 
1234 	hashzone = uma_zcreate("UMA Hash",
1235 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1236 	    NULL, NULL, NULL, NULL,
1237 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1238 
1239 	bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1240 	    NULL, NULL, NULL, NULL,
1241 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1242 
1243 #ifdef UMA_MD_SMALL_ALLOC
1244 	booted = 1;
1245 #endif
1246 
1247 #ifdef UMA_DEBUG
1248 	printf("UMA startup complete.\n");
1249 #endif
1250 }
1251 
1252 /* see uma.h */
1253 void
1254 uma_startup2(void)
1255 {
1256 	booted = 1;
1257 	bucket_enable();
1258 #ifdef UMA_DEBUG
1259 	printf("UMA startup2 complete.\n");
1260 #endif
1261 }
1262 
1263 /*
1264  * Initialize our callout handle
1265  *
1266  */
1267 
1268 static void
1269 uma_startup3(void)
1270 {
1271 #ifdef UMA_DEBUG
1272 	printf("Starting callout.\n");
1273 #endif
1274 	callout_init(&uma_callout, 0);
1275 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1276 #ifdef UMA_DEBUG
1277 	printf("UMA startup3 complete.\n");
1278 #endif
1279 }
1280 
1281 /* See uma.h */
1282 uma_zone_t
1283 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1284 		uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1285 
1286 {
1287 	struct uma_zctor_args args;
1288 
1289 	/* This stuff is essential for the zone ctor */
1290 	args.name = name;
1291 	args.size = size;
1292 	args.ctor = ctor;
1293 	args.dtor = dtor;
1294 	args.uminit = uminit;
1295 	args.fini = fini;
1296 	args.align = align;
1297 	args.flags = flags;
1298 
1299 	return (uma_zalloc_internal(zones, &args, M_WAITOK));
1300 }
1301 
1302 /* See uma.h */
1303 void
1304 uma_zdestroy(uma_zone_t zone)
1305 {
1306 	uma_zfree_internal(zones, zone, NULL, 0);
1307 }
1308 
1309 /* See uma.h */
1310 void *
1311 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1312 {
1313 	void *item;
1314 	uma_cache_t cache;
1315 	uma_bucket_t bucket;
1316 	int cpu;
1317 
1318 	/* This is the fast path allocation */
1319 #ifdef UMA_DEBUG_ALLOC_1
1320 	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1321 #endif
1322 
1323 	if (!(flags & M_NOWAIT)) {
1324 		KASSERT(curthread->td_intr_nesting_level == 0,
1325 		   ("malloc(M_WAITOK) in interrupt context"));
1326 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1327 		    "malloc() of \"%s\"", zone->uz_name);
1328 	}
1329 
1330 zalloc_restart:
1331 	cpu = PCPU_GET(cpuid);
1332 	CPU_LOCK(cpu);
1333 	cache = &zone->uz_cpu[cpu];
1334 
1335 zalloc_start:
1336 	bucket = cache->uc_allocbucket;
1337 
1338 	if (bucket) {
1339 		if (bucket->ub_ptr > -1) {
1340 			item = bucket->ub_bucket[bucket->ub_ptr];
1341 #ifdef INVARIANTS
1342 			bucket->ub_bucket[bucket->ub_ptr] = NULL;
1343 #endif
1344 			bucket->ub_ptr--;
1345 			KASSERT(item != NULL,
1346 			    ("uma_zalloc: Bucket pointer mangled."));
1347 			cache->uc_allocs++;
1348 #ifdef INVARIANTS
1349 			ZONE_LOCK(zone);
1350 			uma_dbg_alloc(zone, NULL, item);
1351 			ZONE_UNLOCK(zone);
1352 #endif
1353 			CPU_UNLOCK(cpu);
1354 			if (zone->uz_ctor)
1355 				zone->uz_ctor(item, zone->uz_size, udata);
1356 			if (flags & M_ZERO)
1357 				bzero(item, zone->uz_size);
1358 			return (item);
1359 		} else if (cache->uc_freebucket) {
1360 			/*
1361 			 * We have run out of items in our allocbucket.
1362 			 * See if we can switch with our free bucket.
1363 			 */
1364 			if (cache->uc_freebucket->ub_ptr > -1) {
1365 				uma_bucket_t swap;
1366 
1367 #ifdef UMA_DEBUG_ALLOC
1368 				printf("uma_zalloc: Swapping empty with alloc.\n");
1369 #endif
1370 				swap = cache->uc_freebucket;
1371 				cache->uc_freebucket = cache->uc_allocbucket;
1372 				cache->uc_allocbucket = swap;
1373 
1374 				goto zalloc_start;
1375 			}
1376 		}
1377 	}
1378 	ZONE_LOCK(zone);
1379 	/* Since we have locked the zone we may as well send back our stats */
1380 	zone->uz_allocs += cache->uc_allocs;
1381 	cache->uc_allocs = 0;
1382 
1383 	/* Our old one is now a free bucket */
1384 	if (cache->uc_allocbucket) {
1385 		KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1386 		    ("uma_zalloc_arg: Freeing a non free bucket."));
1387 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1388 		    cache->uc_allocbucket, ub_link);
1389 		cache->uc_allocbucket = NULL;
1390 	}
1391 
1392 	/* Check the free list for a new alloc bucket */
1393 	if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1394 		KASSERT(bucket->ub_ptr != -1,
1395 		    ("uma_zalloc_arg: Returning an empty bucket."));
1396 
1397 		LIST_REMOVE(bucket, ub_link);
1398 		cache->uc_allocbucket = bucket;
1399 		ZONE_UNLOCK(zone);
1400 		goto zalloc_start;
1401 	}
1402 	/* We are no longer associated with this cpu!!! */
1403 	CPU_UNLOCK(cpu);
1404 
1405 	/* Bump up our uz_count so we get here less */
1406 	if (zone->uz_count < UMA_BUCKET_SIZE - 1)
1407 		zone->uz_count++;
1408 
1409 	/*
1410 	 * Now lets just fill a bucket and put it on the free list.  If that
1411 	 * works we'll restart the allocation from the begining.
1412 	 */
1413 
1414 	if (uma_zalloc_bucket(zone, flags)) {
1415 		ZONE_UNLOCK(zone);
1416 		goto zalloc_restart;
1417 	}
1418 	ZONE_UNLOCK(zone);
1419 	/*
1420 	 * We may not be able to get a bucket so return an actual item.
1421 	 */
1422 #ifdef UMA_DEBUG
1423 	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1424 #endif
1425 
1426 	return (uma_zalloc_internal(zone, udata, flags));
1427 }
1428 
1429 static uma_slab_t
1430 uma_zone_slab(uma_zone_t zone, int flags)
1431 {
1432 	uma_slab_t slab;
1433 
1434 	/*
1435 	 * This is to prevent us from recursively trying to allocate
1436 	 * buckets.  The problem is that if an allocation forces us to
1437 	 * grab a new bucket we will call page_alloc, which will go off
1438 	 * and cause the vm to allocate vm_map_entries.  If we need new
1439 	 * buckets there too we will recurse in kmem_alloc and bad
1440 	 * things happen.  So instead we return a NULL bucket, and make
1441 	 * the code that allocates buckets smart enough to deal with it
1442 	 */
1443 	if (zone == bucketzone && zone->uz_recurse != 0)
1444 		return (NULL);
1445 
1446 	slab = NULL;
1447 
1448 	for (;;) {
1449 		/*
1450 		 * Find a slab with some space.  Prefer slabs that are partially
1451 		 * used over those that are totally full.  This helps to reduce
1452 		 * fragmentation.
1453 		 */
1454 		if (zone->uz_free != 0) {
1455 			if (!LIST_EMPTY(&zone->uz_part_slab)) {
1456 				slab = LIST_FIRST(&zone->uz_part_slab);
1457 			} else {
1458 				slab = LIST_FIRST(&zone->uz_free_slab);
1459 				LIST_REMOVE(slab, us_link);
1460 				LIST_INSERT_HEAD(&zone->uz_part_slab, slab,
1461 				us_link);
1462 			}
1463 			return (slab);
1464 		}
1465 
1466 		/*
1467 		 * M_NOVM means don't ask at all!
1468 		 */
1469 		if (flags & M_NOVM)
1470 			break;
1471 
1472 		if (zone->uz_maxpages &&
1473 		    zone->uz_pages >= zone->uz_maxpages) {
1474 			zone->uz_flags |= UMA_ZFLAG_FULL;
1475 
1476 			if (flags & M_NOWAIT)
1477 				break;
1478 			else
1479 				msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
1480 			continue;
1481 		}
1482 		zone->uz_recurse++;
1483 		slab = slab_zalloc(zone, flags);
1484 		zone->uz_recurse--;
1485 		/*
1486 		 * If we got a slab here it's safe to mark it partially used
1487 		 * and return.  We assume that the caller is going to remove
1488 		 * at least one item.
1489 		 */
1490 		if (slab) {
1491 			LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1492 			return (slab);
1493 		}
1494 		/*
1495 		 * We might not have been able to get a slab but another cpu
1496 		 * could have while we were unlocked.  Check again before we
1497 		 * fail.
1498 		 */
1499 		if (flags & M_NOWAIT)
1500 			flags |= M_NOVM;
1501 	}
1502 	return (slab);
1503 }
1504 
1505 static __inline void *
1506 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
1507 {
1508 	void *item;
1509 	u_int8_t freei;
1510 
1511 	freei = slab->us_firstfree;
1512 	slab->us_firstfree = slab->us_freelist[freei];
1513 	item = slab->us_data + (zone->uz_rsize * freei);
1514 
1515 	slab->us_freecount--;
1516 	zone->uz_free--;
1517 #ifdef INVARIANTS
1518 	uma_dbg_alloc(zone, slab, item);
1519 #endif
1520 	/* Move this slab to the full list */
1521 	if (slab->us_freecount == 0) {
1522 		LIST_REMOVE(slab, us_link);
1523 		LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1524 	}
1525 
1526 	return (item);
1527 }
1528 
1529 static int
1530 uma_zalloc_bucket(uma_zone_t zone, int flags)
1531 {
1532 	uma_bucket_t bucket;
1533 	uma_slab_t slab;
1534 
1535 	/*
1536 	 * Try this zone's free list first so we don't allocate extra buckets.
1537 	 */
1538 
1539 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1540 		KASSERT(bucket->ub_ptr == -1,
1541 		    ("uma_zalloc_bucket: Bucket on free list is not empty."));
1542 		LIST_REMOVE(bucket, ub_link);
1543 	} else {
1544 		int bflags;
1545 
1546 		bflags = flags;
1547 		if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1548 			bflags |= M_NOVM;
1549 
1550 		ZONE_UNLOCK(zone);
1551 		bucket = uma_zalloc_internal(bucketzone,
1552 		    NULL, bflags);
1553 		ZONE_LOCK(zone);
1554 		if (bucket != NULL) {
1555 #ifdef INVARIANTS
1556 			bzero(bucket, bucketzone->uz_size);
1557 #endif
1558 			bucket->ub_ptr = -1;
1559 		}
1560 	}
1561 
1562 	if (bucket == NULL)
1563 		return (0);
1564 
1565 #ifdef SMP
1566 	/*
1567 	 * This code is here to limit the number of simultaneous bucket fills
1568 	 * for any given zone to the number of per cpu caches in this zone. This
1569 	 * is done so that we don't allocate more memory than we really need.
1570 	 */
1571 	if (zone->uz_fills >= mp_ncpus)
1572 		goto done;
1573 
1574 #endif
1575 	zone->uz_fills++;
1576 
1577 	/* Try to keep the buckets totally full */
1578 	while ((slab = uma_zone_slab(zone, flags)) != NULL &&
1579 	    bucket->ub_ptr < zone->uz_count) {
1580 		while (slab->us_freecount &&
1581 		    bucket->ub_ptr < zone->uz_count) {
1582 			bucket->ub_bucket[++bucket->ub_ptr] =
1583 			    uma_slab_alloc(zone, slab);
1584 		}
1585 		/* Don't block on the next fill */
1586 		flags |= M_NOWAIT;
1587 	}
1588 
1589 	zone->uz_fills--;
1590 
1591 	if (bucket->ub_ptr != -1) {
1592 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1593 		    bucket, ub_link);
1594 		return (1);
1595 	}
1596 #ifdef SMP
1597 done:
1598 #endif
1599 	uma_zfree_internal(bucketzone, bucket, NULL, 0);
1600 
1601 	return (0);
1602 }
1603 /*
1604  * Allocates an item for an internal zone
1605  *
1606  * Arguments
1607  *	zone   The zone to alloc for.
1608  *	udata  The data to be passed to the constructor.
1609  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
1610  *
1611  * Returns
1612  *	NULL if there is no memory and M_NOWAIT is set
1613  *	An item if successful
1614  */
1615 
1616 static void *
1617 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
1618 {
1619 	uma_slab_t slab;
1620 	void *item;
1621 
1622 	item = NULL;
1623 
1624 	/*
1625 	 * This is to stop us from allocating per cpu buckets while we're
1626 	 * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
1627 	 * boot pages.
1628 	 */
1629 
1630 	if (bucketdisable && zone == bucketzone)
1631 		return (NULL);
1632 
1633 #ifdef UMA_DEBUG_ALLOC
1634 	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1635 #endif
1636 	ZONE_LOCK(zone);
1637 
1638 	slab = uma_zone_slab(zone, flags);
1639 	if (slab == NULL) {
1640 		ZONE_UNLOCK(zone);
1641 		return (NULL);
1642 	}
1643 
1644 	item = uma_slab_alloc(zone, slab);
1645 
1646 	ZONE_UNLOCK(zone);
1647 
1648 	if (zone->uz_ctor != NULL)
1649 		zone->uz_ctor(item, zone->uz_size, udata);
1650 	if (flags & M_ZERO)
1651 		bzero(item, zone->uz_size);
1652 
1653 	return (item);
1654 }
1655 
1656 /* See uma.h */
1657 void
1658 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1659 {
1660 	uma_cache_t cache;
1661 	uma_bucket_t bucket;
1662 	int bflags;
1663 	int cpu;
1664 	int skip;
1665 
1666 	/* This is the fast path free */
1667 	skip = 0;
1668 #ifdef UMA_DEBUG_ALLOC_1
1669 	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1670 #endif
1671 	/*
1672 	 * The race here is acceptable.  If we miss it we'll just have to wait
1673 	 * a little longer for the limits to be reset.
1674 	 */
1675 
1676 	if (zone->uz_flags & UMA_ZFLAG_FULL)
1677 		goto zfree_internal;
1678 
1679 	if (zone->uz_dtor) {
1680 		zone->uz_dtor(item, zone->uz_size, udata);
1681 		skip = 1;
1682 	}
1683 
1684 zfree_restart:
1685 	cpu = PCPU_GET(cpuid);
1686 	CPU_LOCK(cpu);
1687 	cache = &zone->uz_cpu[cpu];
1688 
1689 zfree_start:
1690 	bucket = cache->uc_freebucket;
1691 
1692 	if (bucket) {
1693 		/*
1694 		 * Do we have room in our bucket? It is OK for this uz count
1695 		 * check to be slightly out of sync.
1696 		 */
1697 
1698 		if (bucket->ub_ptr < zone->uz_count) {
1699 			bucket->ub_ptr++;
1700 			KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1701 			    ("uma_zfree: Freeing to non free bucket index."));
1702 			bucket->ub_bucket[bucket->ub_ptr] = item;
1703 #ifdef INVARIANTS
1704 			ZONE_LOCK(zone);
1705 			if (zone->uz_flags & UMA_ZFLAG_MALLOC)
1706 				uma_dbg_free(zone, udata, item);
1707 			else
1708 				uma_dbg_free(zone, NULL, item);
1709 			ZONE_UNLOCK(zone);
1710 #endif
1711 			CPU_UNLOCK(cpu);
1712 			return;
1713 		} else if (cache->uc_allocbucket) {
1714 #ifdef UMA_DEBUG_ALLOC
1715 			printf("uma_zfree: Swapping buckets.\n");
1716 #endif
1717 			/*
1718 			 * We have run out of space in our freebucket.
1719 			 * See if we can switch with our alloc bucket.
1720 			 */
1721 			if (cache->uc_allocbucket->ub_ptr <
1722 			    cache->uc_freebucket->ub_ptr) {
1723 				uma_bucket_t swap;
1724 
1725 				swap = cache->uc_freebucket;
1726 				cache->uc_freebucket = cache->uc_allocbucket;
1727 				cache->uc_allocbucket = swap;
1728 
1729 				goto zfree_start;
1730 			}
1731 		}
1732 	}
1733 
1734 	/*
1735 	 * We can get here for two reasons:
1736 	 *
1737 	 * 1) The buckets are NULL
1738 	 * 2) The alloc and free buckets are both somewhat full.
1739 	 *
1740 	 */
1741 
1742 	ZONE_LOCK(zone);
1743 
1744 	bucket = cache->uc_freebucket;
1745 	cache->uc_freebucket = NULL;
1746 
1747 	/* Can we throw this on the zone full list? */
1748 	if (bucket != NULL) {
1749 #ifdef UMA_DEBUG_ALLOC
1750 		printf("uma_zfree: Putting old bucket on the free list.\n");
1751 #endif
1752 		/* ub_ptr is pointing to the last free item */
1753 		KASSERT(bucket->ub_ptr != -1,
1754 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1755 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1756 		    bucket, ub_link);
1757 	}
1758 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1759 		LIST_REMOVE(bucket, ub_link);
1760 		ZONE_UNLOCK(zone);
1761 		cache->uc_freebucket = bucket;
1762 		goto zfree_start;
1763 	}
1764 	/* We're done with this CPU now */
1765 	CPU_UNLOCK(cpu);
1766 
1767 	/* And the zone.. */
1768 	ZONE_UNLOCK(zone);
1769 
1770 #ifdef UMA_DEBUG_ALLOC
1771 	printf("uma_zfree: Allocating new free bucket.\n");
1772 #endif
1773 	bflags = M_NOWAIT;
1774 
1775 	if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1776 		bflags |= M_NOVM;
1777 #ifdef INVARIANTS
1778 	bflags |= M_ZERO;
1779 #endif
1780 	bucket = uma_zalloc_internal(bucketzone,
1781 	    NULL, bflags);
1782 	if (bucket) {
1783 		bucket->ub_ptr = -1;
1784 		ZONE_LOCK(zone);
1785 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1786 		    bucket, ub_link);
1787 		ZONE_UNLOCK(zone);
1788 		goto zfree_restart;
1789 	}
1790 
1791 	/*
1792 	 * If nothing else caught this, we'll just do an internal free.
1793 	 */
1794 
1795 zfree_internal:
1796 
1797 	uma_zfree_internal(zone, item, udata, skip);
1798 
1799 	return;
1800 
1801 }
1802 
1803 /*
1804  * Frees an item to an INTERNAL zone or allocates a free bucket
1805  *
1806  * Arguments:
1807  *	zone   The zone to free to
1808  *	item   The item we're freeing
1809  *	udata  User supplied data for the dtor
1810  *	skip   Skip the dtor, it was done in uma_zfree_arg
1811  */
1812 
1813 static void
1814 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1815 {
1816 	uma_slab_t slab;
1817 	u_int8_t *mem;
1818 	u_int8_t freei;
1819 
1820 	if (!skip && zone->uz_dtor)
1821 		zone->uz_dtor(item, zone->uz_size, udata);
1822 
1823 	ZONE_LOCK(zone);
1824 
1825 	if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1826 		mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1827 		if (zone->uz_flags & UMA_ZFLAG_HASH)
1828 			slab = hash_sfind(&zone->uz_hash, mem);
1829 		else {
1830 			mem += zone->uz_pgoff;
1831 			slab = (uma_slab_t)mem;
1832 		}
1833 	} else {
1834 		slab = (uma_slab_t)udata;
1835 	}
1836 
1837 	/* Do we need to remove from any lists? */
1838 	if (slab->us_freecount+1 == zone->uz_ipers) {
1839 		LIST_REMOVE(slab, us_link);
1840 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1841 	} else if (slab->us_freecount == 0) {
1842 		LIST_REMOVE(slab, us_link);
1843 		LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1844 	}
1845 
1846 	/* Slab management stuff */
1847 	freei = ((unsigned long)item - (unsigned long)slab->us_data)
1848 		/ zone->uz_rsize;
1849 
1850 #ifdef INVARIANTS
1851 	if (!skip)
1852 		uma_dbg_free(zone, slab, item);
1853 #endif
1854 
1855 	slab->us_freelist[freei] = slab->us_firstfree;
1856 	slab->us_firstfree = freei;
1857 	slab->us_freecount++;
1858 
1859 	/* Zone statistics */
1860 	zone->uz_free++;
1861 
1862 	if (zone->uz_flags & UMA_ZFLAG_FULL) {
1863 		if (zone->uz_pages < zone->uz_maxpages)
1864 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
1865 
1866 		/* We can handle one more allocation */
1867 		wakeup_one(zone);
1868 	}
1869 
1870 	ZONE_UNLOCK(zone);
1871 }
1872 
1873 /* See uma.h */
1874 void
1875 uma_zone_set_max(uma_zone_t zone, int nitems)
1876 {
1877 	ZONE_LOCK(zone);
1878 	if (zone->uz_ppera > 1)
1879 		zone->uz_maxpages = nitems * zone->uz_ppera;
1880 	else
1881 		zone->uz_maxpages = nitems / zone->uz_ipers;
1882 
1883 	if (zone->uz_maxpages * zone->uz_ipers < nitems)
1884 		zone->uz_maxpages++;
1885 
1886 	ZONE_UNLOCK(zone);
1887 }
1888 
1889 /* See uma.h */
1890 void
1891 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1892 {
1893 	ZONE_LOCK(zone);
1894 
1895 	zone->uz_freef = freef;
1896 
1897 	ZONE_UNLOCK(zone);
1898 }
1899 
1900 /* See uma.h */
1901 void
1902 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1903 {
1904 	ZONE_LOCK(zone);
1905 
1906 	zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1907 	zone->uz_allocf = allocf;
1908 
1909 	ZONE_UNLOCK(zone);
1910 }
1911 
1912 /* See uma.h */
1913 int
1914 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1915 {
1916 	int pages;
1917 	vm_offset_t kva;
1918 
1919 	mtx_lock(&Giant);
1920 
1921 	pages = count / zone->uz_ipers;
1922 
1923 	if (pages * zone->uz_ipers < count)
1924 		pages++;
1925 
1926 	kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1927 
1928 	if (kva == 0) {
1929 		mtx_unlock(&Giant);
1930 		return (0);
1931 	}
1932 
1933 
1934 	if (obj == NULL)
1935 		obj = vm_object_allocate(OBJT_DEFAULT,
1936 		    pages);
1937 	else {
1938 		VM_OBJECT_LOCK_INIT(obj);
1939 		_vm_object_allocate(OBJT_DEFAULT,
1940 		    pages, obj);
1941 	}
1942 	ZONE_LOCK(zone);
1943 	zone->uz_kva = kva;
1944 	zone->uz_obj = obj;
1945 	zone->uz_maxpages = pages;
1946 
1947 	zone->uz_allocf = obj_alloc;
1948 	zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1949 
1950 	ZONE_UNLOCK(zone);
1951 	mtx_unlock(&Giant);
1952 
1953 	return (1);
1954 }
1955 
1956 /* See uma.h */
1957 void
1958 uma_prealloc(uma_zone_t zone, int items)
1959 {
1960 	int slabs;
1961 	uma_slab_t slab;
1962 
1963 	ZONE_LOCK(zone);
1964 	slabs = items / zone->uz_ipers;
1965 	if (slabs * zone->uz_ipers < items)
1966 		slabs++;
1967 
1968 	while (slabs > 0) {
1969 		slab = slab_zalloc(zone, M_WAITOK);
1970 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1971 		slabs--;
1972 	}
1973 	ZONE_UNLOCK(zone);
1974 }
1975 
1976 /* See uma.h */
1977 void
1978 uma_reclaim(void)
1979 {
1980 	/*
1981 	 * You might think that the delay below would improve performance since
1982 	 * the allocator will give away memory that it may ask for immediately.
1983 	 * Really, it makes things worse, since cpu cycles are so much cheaper
1984 	 * than disk activity.
1985 	 */
1986 #if 0
1987 	static struct timeval tv = {0};
1988 	struct timeval now;
1989 	getmicrouptime(&now);
1990 	if (now.tv_sec > tv.tv_sec + 30)
1991 		tv = now;
1992 	else
1993 		return;
1994 #endif
1995 #ifdef UMA_DEBUG
1996 	printf("UMA: vm asked us to release pages!\n");
1997 #endif
1998 	bucket_enable();
1999 	zone_foreach(zone_drain);
2000 
2001 	/*
2002 	 * Some slabs may have been freed but this zone will be visited early
2003 	 * we visit again so that we can free pages that are empty once other
2004 	 * zones are drained.  We have to do the same for buckets.
2005 	 */
2006 	zone_drain(slabzone);
2007 	zone_drain(bucketzone);
2008 }
2009 
2010 void *
2011 uma_large_malloc(int size, int wait)
2012 {
2013 	void *mem;
2014 	uma_slab_t slab;
2015 	u_int8_t flags;
2016 
2017 	slab = uma_zalloc_internal(slabzone, NULL, wait);
2018 	if (slab == NULL)
2019 		return (NULL);
2020 
2021 	/* XXX: kmem_malloc panics if Giant isn't held and sleep allowed */
2022 	if ((wait & M_NOWAIT) == 0 && !mtx_owned(&Giant)) {
2023 		mtx_lock(&Giant);
2024 		mem = page_alloc(NULL, size, &flags, wait);
2025 		mtx_unlock(&Giant);
2026 	} else
2027 		mem = page_alloc(NULL, size, &flags, wait);
2028 	if (mem) {
2029 		vsetslab((vm_offset_t)mem, slab);
2030 		slab->us_data = mem;
2031 		slab->us_flags = flags | UMA_SLAB_MALLOC;
2032 		slab->us_size = size;
2033 	} else {
2034 		uma_zfree_internal(slabzone, slab, NULL, 0);
2035 	}
2036 
2037 
2038 	return (mem);
2039 }
2040 
2041 void
2042 uma_large_free(uma_slab_t slab)
2043 {
2044 	vsetobj((vm_offset_t)slab->us_data, kmem_object);
2045 	/*
2046 	 * XXX: We get a lock order reversal if we don't have Giant:
2047 	 * vm_map_remove (locks system map) -> vm_map_delete ->
2048 	 *    vm_map_entry_unwire -> vm_fault_unwire -> mtx_lock(&Giant)
2049 	 */
2050 	if (!mtx_owned(&Giant)) {
2051 		mtx_lock(&Giant);
2052 		page_free(slab->us_data, slab->us_size, slab->us_flags);
2053 		mtx_unlock(&Giant);
2054 	} else
2055 		page_free(slab->us_data, slab->us_size, slab->us_flags);
2056 	uma_zfree_internal(slabzone, slab, NULL, 0);
2057 }
2058 
2059 void
2060 uma_print_stats(void)
2061 {
2062 	zone_foreach(uma_print_zone);
2063 }
2064 
2065 void
2066 uma_print_zone(uma_zone_t zone)
2067 {
2068 	printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2069 	    zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
2070 	    zone->uz_ipers, zone->uz_ppera,
2071 	    (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
2072 }
2073 
2074 /*
2075  * Sysctl handler for vm.zone
2076  *
2077  * stolen from vm_zone.c
2078  */
2079 static int
2080 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2081 {
2082 	int error, len, cnt;
2083 	const int linesize = 128;	/* conservative */
2084 	int totalfree;
2085 	char *tmpbuf, *offset;
2086 	uma_zone_t z;
2087 	char *p;
2088 
2089 	cnt = 0;
2090 	mtx_lock(&uma_mtx);
2091 	LIST_FOREACH(z, &uma_zones, uz_link)
2092 		cnt++;
2093 	mtx_unlock(&uma_mtx);
2094 	MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2095 			M_TEMP, M_WAITOK);
2096 	len = snprintf(tmpbuf, linesize,
2097 	    "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
2098 	if (cnt == 0)
2099 		tmpbuf[len - 1] = '\0';
2100 	error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2101 	if (error || cnt == 0)
2102 		goto out;
2103 	offset = tmpbuf;
2104 	mtx_lock(&uma_mtx);
2105 	LIST_FOREACH(z, &uma_zones, uz_link) {
2106 		if (cnt == 0)	/* list may have changed size */
2107 			break;
2108 		ZONE_LOCK(z);
2109 		totalfree = z->uz_free + z->uz_cachefree;
2110 		len = snprintf(offset, linesize,
2111 		    "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2112 		    z->uz_name, z->uz_size,
2113 		    z->uz_maxpages * z->uz_ipers,
2114 		    (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2115 		    totalfree,
2116 		    (unsigned long long)z->uz_allocs);
2117 		ZONE_UNLOCK(z);
2118 		for (p = offset + 12; p > offset && *p == ' '; --p)
2119 			/* nothing */ ;
2120 		p[1] = ':';
2121 		cnt--;
2122 		offset += len;
2123 	}
2124 	mtx_unlock(&uma_mtx);
2125 	*offset++ = '\0';
2126 	error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2127 out:
2128 	FREE(tmpbuf, M_TEMP);
2129 	return (error);
2130 }
2131