xref: /freebsd/sys/vm/uma_core.c (revision e0c27215058b5786c78fcfb3963eebe61a989511)
1 /*
2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * uma_core.c  Implementation of the Universal Memory allocator
29  *
30  * This allocator is intended to replace the multitude of similar object caches
31  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
32  * effecient.  A primary design goal is to return unused memory to the rest of
33  * the system.  This will make the system as a whole more flexible due to the
34  * ability to move memory to subsystems which most need it instead of leaving
35  * pools of reserved memory unused.
36  *
37  * The basic ideas stem from similar slab/zone based allocators whose algorithms
38  * are well known.
39  *
40  */
41 
42 /*
43  * TODO:
44  *	- Improve memory usage for large allocations
45  *	- Investigate cache size adjustments
46  */
47 
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
50 
51 /* I should really use ktr.. */
52 /*
53 #define UMA_DEBUG 1
54 #define UMA_DEBUG_ALLOC 1
55 #define UMA_DEBUG_ALLOC_1 1
56 */
57 
58 #include "opt_param.h"
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/kernel.h>
62 #include <sys/types.h>
63 #include <sys/queue.h>
64 #include <sys/malloc.h>
65 #include <sys/lock.h>
66 #include <sys/sysctl.h>
67 #include <sys/mutex.h>
68 #include <sys/proc.h>
69 #include <sys/smp.h>
70 #include <sys/vmmeter.h>
71 #include <sys/mbuf.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_param.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_extern.h>
80 #include <vm/uma.h>
81 #include <vm/uma_int.h>
82 #include <vm/uma_dbg.h>
83 
84 #include <machine/vmparam.h>
85 
86 /*
87  * This is the zone from which all zones are spawned.  The idea is that even
88  * the zone heads are allocated from the allocator, so we use the bss section
89  * to bootstrap us.
90  */
91 static struct uma_zone masterzone;
92 static uma_zone_t zones = &masterzone;
93 
94 /* This is the zone from which all of uma_slab_t's are allocated. */
95 static uma_zone_t slabzone;
96 
97 /*
98  * The initial hash tables come out of this zone so they can be allocated
99  * prior to malloc coming up.
100  */
101 static uma_zone_t hashzone;
102 
103 /*
104  * Zone that buckets come from.
105  */
106 static uma_zone_t bucketzone;
107 
108 /*
109  * Are we allowed to allocate buckets?
110  */
111 static int bucketdisable = 1;
112 
113 /* Linked list of all zones in the system */
114 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
115 
116 /* This mutex protects the zone list */
117 static struct mtx uma_mtx;
118 
119 /* These are the pcpu cache locks */
120 static struct mtx uma_pcpu_mtx[MAXCPU];
121 
122 /* Linked list of boot time pages */
123 static LIST_HEAD(,uma_slab) uma_boot_pages =
124     LIST_HEAD_INITIALIZER(&uma_boot_pages);
125 
126 /* Count of free boottime pages */
127 static int uma_boot_free = 0;
128 
129 /* Is the VM done starting up? */
130 static int booted = 0;
131 
132 /* This is the handle used to schedule our working set calculator */
133 static struct callout uma_callout;
134 
135 /* This is mp_maxid + 1, for use while looping over each cpu */
136 static int maxcpu;
137 
138 /*
139  * This structure is passed as the zone ctor arg so that I don't have to create
140  * a special allocation function just for zones.
141  */
142 struct uma_zctor_args {
143 	char *name;
144 	size_t size;
145 	uma_ctor ctor;
146 	uma_dtor dtor;
147 	uma_init uminit;
148 	uma_fini fini;
149 	int align;
150 	u_int16_t flags;
151 };
152 
153 /* Prototypes.. */
154 
155 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
156 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
157 static void page_free(void *, int, u_int8_t);
158 static uma_slab_t slab_zalloc(uma_zone_t, int);
159 static void cache_drain(uma_zone_t);
160 static void bucket_drain(uma_zone_t, uma_bucket_t);
161 static void zone_drain(uma_zone_t);
162 static void zone_ctor(void *, int, void *);
163 static void zone_dtor(void *, int, void *);
164 static void zero_init(void *, int);
165 static void zone_small_init(uma_zone_t zone);
166 static void zone_large_init(uma_zone_t zone);
167 static void zone_foreach(void (*zfunc)(uma_zone_t));
168 static void zone_timeout(uma_zone_t zone);
169 static int hash_alloc(struct uma_hash *);
170 static int hash_expand(struct uma_hash *, struct uma_hash *);
171 static void hash_free(struct uma_hash *hash);
172 static void uma_timeout(void *);
173 static void uma_startup3(void);
174 static void *uma_zalloc_internal(uma_zone_t, void *, int);
175 static void uma_zfree_internal(uma_zone_t, void *, void *, int);
176 static void bucket_enable(void);
177 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
178 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
179 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
180 
181 void uma_print_zone(uma_zone_t);
182 void uma_print_stats(void);
183 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
184 
185 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
186     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
187 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
188 
189 /*
190  * This routine checks to see whether or not it's safe to enable buckets.
191  */
192 
193 static void
194 bucket_enable(void)
195 {
196 	if (cnt.v_free_count < cnt.v_free_min)
197 		bucketdisable = 1;
198 	else
199 		bucketdisable = 0;
200 }
201 
202 
203 /*
204  * Routine called by timeout which is used to fire off some time interval
205  * based calculations.  (working set, stats, etc.)
206  *
207  * Arguments:
208  *	arg   Unused
209  *
210  * Returns:
211  *	Nothing
212  */
213 static void
214 uma_timeout(void *unused)
215 {
216 	bucket_enable();
217 	zone_foreach(zone_timeout);
218 
219 	/* Reschedule this event */
220 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
221 }
222 
223 /*
224  * Routine to perform timeout driven calculations.  This does the working set
225  * as well as hash expanding, and per cpu statistics aggregation.
226  *
227  *  Arguments:
228  *	zone  The zone to operate on
229  *
230  *  Returns:
231  *	Nothing
232  */
233 static void
234 zone_timeout(uma_zone_t zone)
235 {
236 	uma_cache_t cache;
237 	u_int64_t alloc;
238 	int free;
239 	int cpu;
240 
241 	alloc = 0;
242 	free = 0;
243 
244 	/*
245 	 * Aggregate per cpu cache statistics back to the zone.
246 	 *
247 	 * I may rewrite this to set a flag in the per cpu cache instead of
248 	 * locking.  If the flag is not cleared on the next round I will have
249 	 * to lock and do it here instead so that the statistics don't get too
250 	 * far out of sync.
251 	 */
252 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
253 		for (cpu = 0; cpu < maxcpu; cpu++) {
254 			if (CPU_ABSENT(cpu))
255 				continue;
256 			CPU_LOCK(cpu);
257 			cache = &zone->uz_cpu[cpu];
258 			/* Add them up, and reset */
259 			alloc += cache->uc_allocs;
260 			cache->uc_allocs = 0;
261 			if (cache->uc_allocbucket)
262 				free += cache->uc_allocbucket->ub_ptr + 1;
263 			if (cache->uc_freebucket)
264 				free += cache->uc_freebucket->ub_ptr + 1;
265 			CPU_UNLOCK(cpu);
266 		}
267 	}
268 
269 	/* Now push these stats back into the zone.. */
270 	ZONE_LOCK(zone);
271 	zone->uz_allocs += alloc;
272 
273 	/*
274 	 * cachefree is an instantanious snapshot of what is in the per cpu
275 	 * caches, not an accurate counter
276 	 */
277 	zone->uz_cachefree = free;
278 
279 	/*
280 	 * Expand the zone hash table.
281 	 *
282 	 * This is done if the number of slabs is larger than the hash size.
283 	 * What I'm trying to do here is completely reduce collisions.  This
284 	 * may be a little aggressive.  Should I allow for two collisions max?
285 	 */
286 
287 	if (zone->uz_flags & UMA_ZFLAG_HASH &&
288 	    zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
289 		struct uma_hash newhash;
290 		struct uma_hash oldhash;
291 		int ret;
292 
293 		/*
294 		 * This is so involved because allocating and freeing
295 		 * while the zone lock is held will lead to deadlock.
296 		 * I have to do everything in stages and check for
297 		 * races.
298 		 */
299 		newhash = zone->uz_hash;
300 		ZONE_UNLOCK(zone);
301 		ret = hash_alloc(&newhash);
302 		ZONE_LOCK(zone);
303 		if (ret) {
304 			if (hash_expand(&zone->uz_hash, &newhash)) {
305 				oldhash = zone->uz_hash;
306 				zone->uz_hash = newhash;
307 			} else
308 				oldhash = newhash;
309 
310 			ZONE_UNLOCK(zone);
311 			hash_free(&oldhash);
312 			ZONE_LOCK(zone);
313 		}
314 	}
315 
316 	/*
317 	 * Here we compute the working set size as the total number of items
318 	 * left outstanding since the last time interval.  This is slightly
319 	 * suboptimal. What we really want is the highest number of outstanding
320 	 * items during the last time quantum.  This should be close enough.
321 	 *
322 	 * The working set size is used to throttle the zone_drain function.
323 	 * We don't want to return memory that we may need again immediately.
324 	 */
325 	alloc = zone->uz_allocs - zone->uz_oallocs;
326 	zone->uz_oallocs = zone->uz_allocs;
327 	zone->uz_wssize = alloc;
328 
329 	ZONE_UNLOCK(zone);
330 }
331 
332 /*
333  * Allocate and zero fill the next sized hash table from the appropriate
334  * backing store.
335  *
336  * Arguments:
337  *	hash  A new hash structure with the old hash size in uh_hashsize
338  *
339  * Returns:
340  *	1 on sucess and 0 on failure.
341  */
342 static int
343 hash_alloc(struct uma_hash *hash)
344 {
345 	int oldsize;
346 	int alloc;
347 
348 	oldsize = hash->uh_hashsize;
349 
350 	/* We're just going to go to a power of two greater */
351 	if (oldsize)  {
352 		hash->uh_hashsize = oldsize * 2;
353 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
354 		/* XXX Shouldn't be abusing DEVBUF here */
355 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
356 		    M_DEVBUF, M_NOWAIT);
357 	} else {
358 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
359 		hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
360 		    M_WAITOK);
361 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
362 	}
363 	if (hash->uh_slab_hash) {
364 		bzero(hash->uh_slab_hash, alloc);
365 		hash->uh_hashmask = hash->uh_hashsize - 1;
366 		return (1);
367 	}
368 
369 	return (0);
370 }
371 
372 /*
373  * Expands the hash table for OFFPAGE zones.  This is done from zone_timeout
374  * to reduce collisions.  This must not be done in the regular allocation path,
375  * otherwise, we can recurse on the vm while allocating pages.
376  *
377  * Arguments:
378  *	oldhash  The hash you want to expand
379  *	newhash  The hash structure for the new table
380  *
381  * Returns:
382  * 	Nothing
383  *
384  * Discussion:
385  */
386 static int
387 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
388 {
389 	uma_slab_t slab;
390 	int hval;
391 	int i;
392 
393 	if (!newhash->uh_slab_hash)
394 		return (0);
395 
396 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
397 		return (0);
398 
399 	/*
400 	 * I need to investigate hash algorithms for resizing without a
401 	 * full rehash.
402 	 */
403 
404 	for (i = 0; i < oldhash->uh_hashsize; i++)
405 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
406 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
407 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
408 			hval = UMA_HASH(newhash, slab->us_data);
409 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
410 			    slab, us_hlink);
411 		}
412 
413 	return (1);
414 }
415 
416 /*
417  * Free the hash bucket to the appropriate backing store.
418  *
419  * Arguments:
420  *	slab_hash  The hash bucket we're freeing
421  *	hashsize   The number of entries in that hash bucket
422  *
423  * Returns:
424  *	Nothing
425  */
426 static void
427 hash_free(struct uma_hash *hash)
428 {
429 	if (hash->uh_slab_hash == NULL)
430 		return;
431 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
432 		uma_zfree_internal(hashzone,
433 		    hash->uh_slab_hash, NULL, 0);
434 	else
435 		free(hash->uh_slab_hash, M_DEVBUF);
436 }
437 
438 /*
439  * Frees all outstanding items in a bucket
440  *
441  * Arguments:
442  *	zone   The zone to free to, must be unlocked.
443  *	bucket The free/alloc bucket with items, cpu queue must be locked.
444  *
445  * Returns:
446  *	Nothing
447  */
448 
449 static void
450 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
451 {
452 	uma_slab_t slab;
453 	int mzone;
454 	void *item;
455 
456 	if (bucket == NULL)
457 		return;
458 
459 	slab = NULL;
460 	mzone = 0;
461 
462 	/* We have to lookup the slab again for malloc.. */
463 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
464 		mzone = 1;
465 
466 	while (bucket->ub_ptr > -1)  {
467 		item = bucket->ub_bucket[bucket->ub_ptr];
468 #ifdef INVARIANTS
469 		bucket->ub_bucket[bucket->ub_ptr] = NULL;
470 		KASSERT(item != NULL,
471 		    ("bucket_drain: botched ptr, item is NULL"));
472 #endif
473 		bucket->ub_ptr--;
474 		/*
475 		 * This is extremely inefficient.  The slab pointer was passed
476 		 * to uma_zfree_arg, but we lost it because the buckets don't
477 		 * hold them.  This will go away when free() gets a size passed
478 		 * to it.
479 		 */
480 		if (mzone)
481 			slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
482 		uma_zfree_internal(zone, item, slab, 1);
483 	}
484 }
485 
486 /*
487  * Drains the per cpu caches for a zone.
488  *
489  * Arguments:
490  *	zone  The zone to drain, must be unlocked.
491  *
492  * Returns:
493  *	Nothing
494  *
495  * This function returns with the zone locked so that the per cpu queues can
496  * not be filled until zone_drain is finished.
497  *
498  */
499 static void
500 cache_drain(uma_zone_t zone)
501 {
502 	uma_bucket_t bucket;
503 	uma_cache_t cache;
504 	int cpu;
505 
506 	/*
507 	 * Flush out the per cpu queues.
508 	 *
509 	 * XXX This causes unnecessary thrashing due to immediately having
510 	 * empty per cpu queues.  I need to improve this.
511 	 */
512 
513 	/*
514 	 * We have to lock each cpu cache before locking the zone
515 	 */
516 	ZONE_UNLOCK(zone);
517 
518 	for (cpu = 0; cpu < maxcpu; cpu++) {
519 		if (CPU_ABSENT(cpu))
520 			continue;
521 		CPU_LOCK(cpu);
522 		cache = &zone->uz_cpu[cpu];
523 		bucket_drain(zone, cache->uc_allocbucket);
524 		bucket_drain(zone, cache->uc_freebucket);
525 	}
526 
527 	/*
528 	 * Drain the bucket queues and free the buckets, we just keep two per
529 	 * cpu (alloc/free).
530 	 */
531 	ZONE_LOCK(zone);
532 	while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
533 		LIST_REMOVE(bucket, ub_link);
534 		ZONE_UNLOCK(zone);
535 		bucket_drain(zone, bucket);
536 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
537 		ZONE_LOCK(zone);
538 	}
539 
540 	/* Now we do the free queue.. */
541 	while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
542 		LIST_REMOVE(bucket, ub_link);
543 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
544 	}
545 
546 	/* We unlock here, but they will all block until the zone is unlocked */
547 	for (cpu = 0; cpu < maxcpu; cpu++) {
548 		if (CPU_ABSENT(cpu))
549 			continue;
550 		CPU_UNLOCK(cpu);
551 	}
552 
553 	zone->uz_cachefree = 0;
554 }
555 
556 /*
557  * Frees pages from a zone back to the system.  This is done on demand from
558  * the pageout daemon.
559  *
560  * Arguments:
561  *	zone  The zone to free pages from
562  *	all   Should we drain all items?
563  *
564  * Returns:
565  *	Nothing.
566  */
567 static void
568 zone_drain(uma_zone_t zone)
569 {
570 	struct slabhead freeslabs = {};
571 	uma_slab_t slab;
572 	uma_slab_t n;
573 	u_int64_t extra;
574 	u_int8_t flags;
575 	u_int8_t *mem;
576 	int i;
577 
578 	/*
579 	 * We don't want to take pages from staticly allocated zones at this
580 	 * time
581 	 */
582 	if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
583 		return;
584 
585 	ZONE_LOCK(zone);
586 
587 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
588 		cache_drain(zone);
589 
590 	if (zone->uz_free < zone->uz_wssize)
591 		goto finished;
592 #ifdef UMA_DEBUG
593 	printf("%s working set size: %llu free items: %u\n",
594 	    zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
595 #endif
596 	extra = zone->uz_free - zone->uz_wssize;
597 	extra /= zone->uz_ipers;
598 
599 	/* extra is now the number of extra slabs that we can free */
600 
601 	if (extra == 0)
602 		goto finished;
603 
604 	slab = LIST_FIRST(&zone->uz_free_slab);
605 	while (slab && extra) {
606 		n = LIST_NEXT(slab, us_link);
607 
608 		/* We have no where to free these to */
609 		if (slab->us_flags & UMA_SLAB_BOOT) {
610 			slab = n;
611 			continue;
612 		}
613 
614 		LIST_REMOVE(slab, us_link);
615 		zone->uz_pages -= zone->uz_ppera;
616 		zone->uz_free -= zone->uz_ipers;
617 
618 		if (zone->uz_flags & UMA_ZFLAG_HASH)
619 			UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
620 
621 		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
622 
623 		slab = n;
624 		extra--;
625 	}
626 finished:
627 	ZONE_UNLOCK(zone);
628 
629 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
630 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
631 		if (zone->uz_fini)
632 			for (i = 0; i < zone->uz_ipers; i++)
633 				zone->uz_fini(
634 				    slab->us_data + (zone->uz_rsize * i),
635 				    zone->uz_size);
636 		flags = slab->us_flags;
637 		mem = slab->us_data;
638 
639 		if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
640 			uma_zfree_internal(slabzone, slab, NULL, 0);
641 		if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
642 			vm_object_t obj;
643 
644 			if (flags & UMA_SLAB_KMEM)
645 				obj = kmem_object;
646 			else
647 				obj = NULL;
648 			for (i = 0; i < zone->uz_ppera; i++)
649 				vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
650 				    obj);
651 		}
652 #ifdef UMA_DEBUG
653 		printf("%s: Returning %d bytes.\n",
654 		    zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
655 #endif
656 		zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
657 	}
658 
659 }
660 
661 /*
662  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
663  *
664  * Arguments:
665  *	zone  The zone to allocate slabs for
666  *	wait  Shall we wait?
667  *
668  * Returns:
669  *	The slab that was allocated or NULL if there is no memory and the
670  *	caller specified M_NOWAIT.
671  *
672  */
673 static uma_slab_t
674 slab_zalloc(uma_zone_t zone, int wait)
675 {
676 	uma_slab_t slab;	/* Starting slab */
677 	u_int8_t *mem;
678 	u_int8_t flags;
679 	int i;
680 
681 	slab = NULL;
682 
683 #ifdef UMA_DEBUG
684 	printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
685 #endif
686 	ZONE_UNLOCK(zone);
687 
688 	if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
689 		slab = uma_zalloc_internal(slabzone, NULL, wait);
690 		if (slab == NULL) {
691 			ZONE_LOCK(zone);
692 			return NULL;
693 		}
694 	}
695 
696 	/*
697 	 * This reproduces the old vm_zone behavior of zero filling pages the
698 	 * first time they are added to a zone.
699 	 *
700 	 * Malloced items are zeroed in uma_zalloc.
701 	 */
702 
703 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
704 		wait |= M_ZERO;
705 	else
706 		wait &= ~M_ZERO;
707 
708 	if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
709 		if ((wait & M_NOWAIT) == 0) {
710 			mtx_lock(&Giant);
711 			mem = zone->uz_allocf(zone,
712 			    zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
713 			mtx_unlock(&Giant);
714 		} else {
715 			mem = zone->uz_allocf(zone,
716 			    zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
717 		}
718 		if (mem == NULL) {
719 			ZONE_LOCK(zone);
720 			return (NULL);
721 		}
722 	} else {
723 		uma_slab_t tmps;
724 
725 		if (zone->uz_ppera > 1)
726 			panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
727 		if (zone->uz_flags & UMA_ZFLAG_MALLOC)
728 			panic("Mallocing before uma_startup2 has been called.\n");
729 		if (uma_boot_free == 0)
730 			panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
731 		tmps = LIST_FIRST(&uma_boot_pages);
732 		LIST_REMOVE(tmps, us_link);
733 		uma_boot_free--;
734 		mem = tmps->us_data;
735 		flags = tmps->us_flags;
736 	}
737 
738 	/* Point the slab into the allocated memory */
739 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE))
740 		slab = (uma_slab_t )(mem + zone->uz_pgoff);
741 
742 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
743 		for (i = 0; i < zone->uz_ppera; i++)
744 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
745 
746 	slab->us_zone = zone;
747 	slab->us_data = mem;
748 
749 	/*
750 	 * This is intended to spread data out across cache lines.
751 	 *
752 	 * This code doesn't seem to work properly on x86, and on alpha
753 	 * it makes absolutely no performance difference. I'm sure it could
754 	 * use some tuning, but sun makes outrageous claims about it's
755 	 * performance.
756 	 */
757 #if 0
758 	if (zone->uz_cachemax) {
759 		slab->us_data += zone->uz_cacheoff;
760 		zone->uz_cacheoff += UMA_CACHE_INC;
761 		if (zone->uz_cacheoff > zone->uz_cachemax)
762 			zone->uz_cacheoff = 0;
763 	}
764 #endif
765 
766 	slab->us_freecount = zone->uz_ipers;
767 	slab->us_firstfree = 0;
768 	slab->us_flags = flags;
769 	for (i = 0; i < zone->uz_ipers; i++)
770 		slab->us_freelist[i] = i+1;
771 
772 	if (zone->uz_init)
773 		for (i = 0; i < zone->uz_ipers; i++)
774 			zone->uz_init(slab->us_data + (zone->uz_rsize * i),
775 			    zone->uz_size);
776 	ZONE_LOCK(zone);
777 
778 	if (zone->uz_flags & UMA_ZFLAG_HASH)
779 		UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
780 
781 	zone->uz_pages += zone->uz_ppera;
782 	zone->uz_free += zone->uz_ipers;
783 
784 
785 	return (slab);
786 }
787 
788 /*
789  * Allocates a number of pages from the system
790  *
791  * Arguments:
792  *	zone  Unused
793  *	bytes  The number of bytes requested
794  *	wait  Shall we wait?
795  *
796  * Returns:
797  *	A pointer to the alloced memory or possibly
798  *	NULL if M_NOWAIT is set.
799  */
800 static void *
801 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
802 {
803 	void *p;	/* Returned page */
804 
805 	*pflag = UMA_SLAB_KMEM;
806 	p = (void *) kmem_malloc(kmem_map, bytes, wait);
807 
808 	return (p);
809 }
810 
811 /*
812  * Allocates a number of pages from within an object
813  *
814  * Arguments:
815  *	zone   Unused
816  *	bytes  The number of bytes requested
817  *	wait   Shall we wait?
818  *
819  * Returns:
820  *	A pointer to the alloced memory or possibly
821  *	NULL if M_NOWAIT is set.
822  *
823  * TODO: If we fail during a multi-page allocation release the pages that have
824  *	 already been allocated.
825  */
826 static void *
827 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
828 {
829 	vm_offset_t zkva;
830 	vm_offset_t retkva;
831 	vm_page_t p;
832 	int pages;
833 
834 	retkva = 0;
835 	pages = zone->uz_pages;
836 
837 	/*
838 	 * This looks a little weird since we're getting one page at a time
839 	 */
840 	while (bytes > 0) {
841 		VM_OBJECT_LOCK(zone->uz_obj);
842 		p = vm_page_alloc(zone->uz_obj, pages,
843 		    VM_ALLOC_INTERRUPT);
844 		VM_OBJECT_UNLOCK(zone->uz_obj);
845 		if (p == NULL)
846 			return (NULL);
847 
848 		zkva = zone->uz_kva + pages * PAGE_SIZE;
849 		if (retkva == 0)
850 			retkva = zkva;
851 		pmap_qenter(zkva, &p, 1);
852 		bytes -= PAGE_SIZE;
853 		pages += 1;
854 	}
855 
856 	*flags = UMA_SLAB_PRIV;
857 
858 	return ((void *)retkva);
859 }
860 
861 /*
862  * Frees a number of pages to the system
863  *
864  * Arguments:
865  *	mem   A pointer to the memory to be freed
866  *	size  The size of the memory being freed
867  *	flags The original p->us_flags field
868  *
869  * Returns:
870  *	Nothing
871  *
872  */
873 static void
874 page_free(void *mem, int size, u_int8_t flags)
875 {
876 	vm_map_t map;
877 
878 	if (flags & UMA_SLAB_KMEM)
879 		map = kmem_map;
880 	else
881 		panic("UMA: page_free used with invalid flags %d\n", flags);
882 
883 	kmem_free(map, (vm_offset_t)mem, size);
884 }
885 
886 /*
887  * Zero fill initializer
888  *
889  * Arguments/Returns follow uma_init specifications
890  *
891  */
892 static void
893 zero_init(void *mem, int size)
894 {
895 	bzero(mem, size);
896 }
897 
898 /*
899  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
900  *
901  * Arguments
902  *	zone  The zone we should initialize
903  *
904  * Returns
905  *	Nothing
906  */
907 static void
908 zone_small_init(uma_zone_t zone)
909 {
910 	int rsize;
911 	int memused;
912 	int ipers;
913 
914 	rsize = zone->uz_size;
915 
916 	if (rsize < UMA_SMALLEST_UNIT)
917 		rsize = UMA_SMALLEST_UNIT;
918 
919 	if (rsize & zone->uz_align)
920 		rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
921 
922 	zone->uz_rsize = rsize;
923 
924 	rsize += 1;	/* Account for the byte of linkage */
925 	zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
926 	zone->uz_ppera = 1;
927 
928 	memused = zone->uz_ipers * zone->uz_rsize;
929 
930 	/* Can we do any better? */
931 	if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
932 		if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
933 			return;
934 		ipers = UMA_SLAB_SIZE / zone->uz_rsize;
935 		if (ipers > zone->uz_ipers) {
936 			zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
937 			if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
938 				zone->uz_flags |= UMA_ZFLAG_HASH;
939 			zone->uz_ipers = ipers;
940 		}
941 	}
942 
943 }
944 
945 /*
946  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
947  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
948  * more complicated.
949  *
950  * Arguments
951  *	zone  The zone we should initialize
952  *
953  * Returns
954  *	Nothing
955  */
956 static void
957 zone_large_init(uma_zone_t zone)
958 {
959 	int pages;
960 
961 	pages = zone->uz_size / UMA_SLAB_SIZE;
962 
963 	/* Account for remainder */
964 	if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
965 		pages++;
966 
967 	zone->uz_ppera = pages;
968 	zone->uz_ipers = 1;
969 
970 	zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
971 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
972 		zone->uz_flags |= UMA_ZFLAG_HASH;
973 
974 	zone->uz_rsize = zone->uz_size;
975 }
976 
977 /*
978  * Zone header ctor.  This initializes all fields, locks, etc.  And inserts
979  * the zone onto the global zone list.
980  *
981  * Arguments/Returns follow uma_ctor specifications
982  *	udata  Actually uma_zcreat_args
983  *
984  */
985 
986 static void
987 zone_ctor(void *mem, int size, void *udata)
988 {
989 	struct uma_zctor_args *arg = udata;
990 	uma_zone_t zone = mem;
991 	int privlc;
992 
993 	bzero(zone, size);
994 	zone->uz_name = arg->name;
995 	zone->uz_size = arg->size;
996 	zone->uz_ctor = arg->ctor;
997 	zone->uz_dtor = arg->dtor;
998 	zone->uz_init = arg->uminit;
999 	zone->uz_fini = arg->fini;
1000 	zone->uz_align = arg->align;
1001 	zone->uz_free = 0;
1002 	zone->uz_pages = 0;
1003 	zone->uz_flags = 0;
1004 	zone->uz_allocf = page_alloc;
1005 	zone->uz_freef = page_free;
1006 
1007 	if (arg->flags & UMA_ZONE_ZINIT)
1008 		zone->uz_init = zero_init;
1009 
1010 	if (arg->flags & UMA_ZONE_INTERNAL)
1011 		zone->uz_flags |= UMA_ZFLAG_INTERNAL;
1012 
1013 	if (arg->flags & UMA_ZONE_MALLOC)
1014 		zone->uz_flags |= UMA_ZFLAG_MALLOC;
1015 
1016 	if (arg->flags & UMA_ZONE_NOFREE)
1017 		zone->uz_flags |= UMA_ZFLAG_NOFREE;
1018 
1019 	if (arg->flags & UMA_ZONE_VM)
1020 		zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE;
1021 
1022 	if (zone->uz_size > UMA_SLAB_SIZE)
1023 		zone_large_init(zone);
1024 	else
1025 		zone_small_init(zone);
1026 #ifdef UMA_MD_SMALL_ALLOC
1027 	if (zone->uz_ppera == 1) {
1028 		zone->uz_allocf = uma_small_alloc;
1029 		zone->uz_freef = uma_small_free;
1030 	}
1031 #endif	/* UMA_MD_SMALL_ALLOC */
1032 
1033 	if (arg->flags & UMA_ZONE_MTXCLASS)
1034 		privlc = 1;
1035 	else
1036 		privlc = 0;
1037 
1038 	/*
1039 	 * If we're putting the slab header in the actual page we need to
1040 	 * figure out where in each page it goes.  This calculates a right
1041 	 * justified offset into the memory on an ALIGN_PTR boundary.
1042 	 */
1043 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
1044 		int totsize;
1045 		int waste;
1046 
1047 		/* Size of the slab struct and free list */
1048 		totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1049 		if (totsize & UMA_ALIGN_PTR)
1050 			totsize = (totsize & ~UMA_ALIGN_PTR) +
1051 			    (UMA_ALIGN_PTR + 1);
1052 		zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1053 
1054 		waste = zone->uz_pgoff;
1055 		waste -= (zone->uz_ipers * zone->uz_rsize);
1056 
1057 		/*
1058 		 * This calculates how much space we have for cache line size
1059 		 * optimizations.  It works by offseting each slab slightly.
1060 		 * Currently it breaks on x86, and so it is disabled.
1061 		 */
1062 
1063 		if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
1064 			zone->uz_cachemax = waste - UMA_CACHE_INC;
1065 			zone->uz_cacheoff = 0;
1066 		}
1067 
1068 		totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1069 		    + zone->uz_ipers;
1070 		/* I don't think it's possible, but I'll make sure anyway */
1071 		if (totsize > UMA_SLAB_SIZE) {
1072 			printf("zone %s ipers %d rsize %d size %d\n",
1073 			    zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1074 			    zone->uz_size);
1075 			panic("UMA slab won't fit.\n");
1076 		}
1077 	}
1078 
1079 	if (zone->uz_flags & UMA_ZFLAG_HASH)
1080 		hash_alloc(&zone->uz_hash);
1081 
1082 #ifdef UMA_DEBUG
1083 	printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1084 	    zone->uz_name, zone,
1085 	    zone->uz_size, zone->uz_ipers,
1086 	    zone->uz_ppera, zone->uz_pgoff);
1087 #endif
1088 	ZONE_LOCK_INIT(zone, privlc);
1089 
1090 	mtx_lock(&uma_mtx);
1091 	LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1092 	mtx_unlock(&uma_mtx);
1093 
1094 	/*
1095 	 * Some internal zones don't have room allocated for the per cpu
1096 	 * caches.  If we're internal, bail out here.
1097 	 */
1098 
1099 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1100 		return;
1101 
1102 	if (zone->uz_ipers < UMA_BUCKET_SIZE)
1103 		zone->uz_count = zone->uz_ipers - 1;
1104 	else
1105 		zone->uz_count = UMA_BUCKET_SIZE - 1;
1106 }
1107 
1108 /*
1109  * Zone header dtor.  This frees all data, destroys locks, frees the hash table
1110  * and removes the zone from the global list.
1111  *
1112  * Arguments/Returns follow uma_dtor specifications
1113  *	udata  unused
1114  */
1115 
1116 static void
1117 zone_dtor(void *arg, int size, void *udata)
1118 {
1119 	uma_zone_t zone;
1120 
1121 	zone = (uma_zone_t)arg;
1122 	ZONE_LOCK(zone);
1123 	zone->uz_wssize = 0;
1124 	ZONE_UNLOCK(zone);
1125 
1126 	mtx_lock(&uma_mtx);
1127 	LIST_REMOVE(zone, uz_link);
1128 	zone_drain(zone);
1129 	mtx_unlock(&uma_mtx);
1130 
1131 	ZONE_LOCK(zone);
1132 	if (zone->uz_free != 0)
1133 		printf("Zone %s was not empty (%d items).  Lost %d pages of memory.\n",
1134 		    zone->uz_name, zone->uz_free, zone->uz_pages);
1135 
1136 	ZONE_UNLOCK(zone);
1137 	if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
1138 		hash_free(&zone->uz_hash);
1139 
1140 	ZONE_LOCK_FINI(zone);
1141 }
1142 /*
1143  * Traverses every zone in the system and calls a callback
1144  *
1145  * Arguments:
1146  *	zfunc  A pointer to a function which accepts a zone
1147  *		as an argument.
1148  *
1149  * Returns:
1150  *	Nothing
1151  */
1152 static void
1153 zone_foreach(void (*zfunc)(uma_zone_t))
1154 {
1155 	uma_zone_t zone;
1156 
1157 	mtx_lock(&uma_mtx);
1158 	LIST_FOREACH(zone, &uma_zones, uz_link) {
1159 		zfunc(zone);
1160 	}
1161 	mtx_unlock(&uma_mtx);
1162 }
1163 
1164 /* Public functions */
1165 /* See uma.h */
1166 void
1167 uma_startup(void *bootmem)
1168 {
1169 	struct uma_zctor_args args;
1170 	uma_slab_t slab;
1171 	int slabsize;
1172 	int i;
1173 
1174 #ifdef UMA_DEBUG
1175 	printf("Creating uma zone headers zone.\n");
1176 #endif
1177 #ifdef SMP
1178 	maxcpu = mp_maxid + 1;
1179 #else
1180 	maxcpu = 1;
1181 #endif
1182 #ifdef UMA_DEBUG
1183 	printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1184 	Debugger("stop");
1185 #endif
1186 	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1187 	/* "manually" Create the initial zone */
1188 	args.name = "UMA Zones";
1189 	args.size = sizeof(struct uma_zone) +
1190 	    (sizeof(struct uma_cache) * (maxcpu - 1));
1191 	args.ctor = zone_ctor;
1192 	args.dtor = zone_dtor;
1193 	args.uminit = zero_init;
1194 	args.fini = NULL;
1195 	args.align = 32 - 1;
1196 	args.flags = UMA_ZONE_INTERNAL;
1197 	/* The initial zone has no Per cpu queues so it's smaller */
1198 	zone_ctor(zones, sizeof(struct uma_zone), &args);
1199 
1200 	/* Initialize the pcpu cache lock set once and for all */
1201 	for (i = 0; i < maxcpu; i++)
1202 		CPU_LOCK_INIT(i);
1203 
1204 #ifdef UMA_DEBUG
1205 	printf("Filling boot free list.\n");
1206 #endif
1207 	for (i = 0; i < UMA_BOOT_PAGES; i++) {
1208 		slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1209 		slab->us_data = (u_int8_t *)slab;
1210 		slab->us_flags = UMA_SLAB_BOOT;
1211 		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1212 		uma_boot_free++;
1213 	}
1214 
1215 #ifdef UMA_DEBUG
1216 	printf("Creating slab zone.\n");
1217 #endif
1218 
1219 	/*
1220 	 * This is the max number of free list items we'll have with
1221 	 * offpage slabs.
1222 	 */
1223 
1224 	slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1225 	slabsize /= UMA_MAX_WASTE;
1226 	slabsize++;			/* In case there it's rounded */
1227 	slabsize += sizeof(struct uma_slab);
1228 
1229 	/* Now make a zone for slab headers */
1230 	slabzone = uma_zcreate("UMA Slabs",
1231 				slabsize,
1232 				NULL, NULL, NULL, NULL,
1233 				UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1234 
1235 	hashzone = uma_zcreate("UMA Hash",
1236 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1237 	    NULL, NULL, NULL, NULL,
1238 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1239 
1240 	bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1241 	    NULL, NULL, NULL, NULL,
1242 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1243 
1244 #ifdef UMA_MD_SMALL_ALLOC
1245 	booted = 1;
1246 #endif
1247 
1248 #ifdef UMA_DEBUG
1249 	printf("UMA startup complete.\n");
1250 #endif
1251 }
1252 
1253 /* see uma.h */
1254 void
1255 uma_startup2(void)
1256 {
1257 	booted = 1;
1258 	bucket_enable();
1259 #ifdef UMA_DEBUG
1260 	printf("UMA startup2 complete.\n");
1261 #endif
1262 }
1263 
1264 /*
1265  * Initialize our callout handle
1266  *
1267  */
1268 
1269 static void
1270 uma_startup3(void)
1271 {
1272 #ifdef UMA_DEBUG
1273 	printf("Starting callout.\n");
1274 #endif
1275 	callout_init(&uma_callout, 0);
1276 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1277 #ifdef UMA_DEBUG
1278 	printf("UMA startup3 complete.\n");
1279 #endif
1280 }
1281 
1282 /* See uma.h */
1283 uma_zone_t
1284 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1285 		uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1286 
1287 {
1288 	struct uma_zctor_args args;
1289 
1290 	/* This stuff is essential for the zone ctor */
1291 	args.name = name;
1292 	args.size = size;
1293 	args.ctor = ctor;
1294 	args.dtor = dtor;
1295 	args.uminit = uminit;
1296 	args.fini = fini;
1297 	args.align = align;
1298 	args.flags = flags;
1299 
1300 	return (uma_zalloc_internal(zones, &args, M_WAITOK));
1301 }
1302 
1303 /* See uma.h */
1304 void
1305 uma_zdestroy(uma_zone_t zone)
1306 {
1307 	uma_zfree_internal(zones, zone, NULL, 0);
1308 }
1309 
1310 /* See uma.h */
1311 void *
1312 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1313 {
1314 	void *item;
1315 	uma_cache_t cache;
1316 	uma_bucket_t bucket;
1317 	int cpu;
1318 
1319 	/* This is the fast path allocation */
1320 #ifdef UMA_DEBUG_ALLOC_1
1321 	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1322 #endif
1323 
1324 #ifdef INVARIANTS
1325 	/*
1326 	 * To make sure that WAITOK or NOWAIT is set, but not more than
1327 	 * one, and check against the API botches that are common.
1328 	 * The uma code implies M_WAITOK if M_NOWAIT is not set, so
1329 	 * we default to waiting if none of the flags is set.
1330 	 */
1331 	cpu = flags & (M_WAITOK | M_NOWAIT | M_DONTWAIT | M_TRYWAIT);
1332 	if (cpu != M_NOWAIT && cpu != M_WAITOK) {
1333 		static	struct timeval lasterr;
1334 		static	int curerr, once;
1335 		if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) {
1336 			printf("Bad uma_zalloc flags: %x\n", cpu);
1337 			backtrace();
1338 			once++;
1339 		}
1340 	}
1341 #endif
1342 
1343 	if (!(flags & M_NOWAIT)) {
1344 		KASSERT(curthread->td_intr_nesting_level == 0,
1345 		   ("malloc(M_WAITOK) in interrupt context"));
1346 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1347 		    "malloc() of \"%s\"", zone->uz_name);
1348 	}
1349 
1350 zalloc_restart:
1351 	cpu = PCPU_GET(cpuid);
1352 	CPU_LOCK(cpu);
1353 	cache = &zone->uz_cpu[cpu];
1354 
1355 zalloc_start:
1356 	bucket = cache->uc_allocbucket;
1357 
1358 	if (bucket) {
1359 		if (bucket->ub_ptr > -1) {
1360 			item = bucket->ub_bucket[bucket->ub_ptr];
1361 #ifdef INVARIANTS
1362 			bucket->ub_bucket[bucket->ub_ptr] = NULL;
1363 #endif
1364 			bucket->ub_ptr--;
1365 			KASSERT(item != NULL,
1366 			    ("uma_zalloc: Bucket pointer mangled."));
1367 			cache->uc_allocs++;
1368 #ifdef INVARIANTS
1369 			ZONE_LOCK(zone);
1370 			uma_dbg_alloc(zone, NULL, item);
1371 			ZONE_UNLOCK(zone);
1372 #endif
1373 			CPU_UNLOCK(cpu);
1374 			if (zone->uz_ctor)
1375 				zone->uz_ctor(item, zone->uz_size, udata);
1376 			if (flags & M_ZERO)
1377 				bzero(item, zone->uz_size);
1378 			return (item);
1379 		} else if (cache->uc_freebucket) {
1380 			/*
1381 			 * We have run out of items in our allocbucket.
1382 			 * See if we can switch with our free bucket.
1383 			 */
1384 			if (cache->uc_freebucket->ub_ptr > -1) {
1385 				uma_bucket_t swap;
1386 
1387 #ifdef UMA_DEBUG_ALLOC
1388 				printf("uma_zalloc: Swapping empty with alloc.\n");
1389 #endif
1390 				swap = cache->uc_freebucket;
1391 				cache->uc_freebucket = cache->uc_allocbucket;
1392 				cache->uc_allocbucket = swap;
1393 
1394 				goto zalloc_start;
1395 			}
1396 		}
1397 	}
1398 	ZONE_LOCK(zone);
1399 	/* Since we have locked the zone we may as well send back our stats */
1400 	zone->uz_allocs += cache->uc_allocs;
1401 	cache->uc_allocs = 0;
1402 
1403 	/* Our old one is now a free bucket */
1404 	if (cache->uc_allocbucket) {
1405 		KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1406 		    ("uma_zalloc_arg: Freeing a non free bucket."));
1407 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1408 		    cache->uc_allocbucket, ub_link);
1409 		cache->uc_allocbucket = NULL;
1410 	}
1411 
1412 	/* Check the free list for a new alloc bucket */
1413 	if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1414 		KASSERT(bucket->ub_ptr != -1,
1415 		    ("uma_zalloc_arg: Returning an empty bucket."));
1416 
1417 		LIST_REMOVE(bucket, ub_link);
1418 		cache->uc_allocbucket = bucket;
1419 		ZONE_UNLOCK(zone);
1420 		goto zalloc_start;
1421 	}
1422 	/* We are no longer associated with this cpu!!! */
1423 	CPU_UNLOCK(cpu);
1424 
1425 	/* Bump up our uz_count so we get here less */
1426 	if (zone->uz_count < UMA_BUCKET_SIZE - 1)
1427 		zone->uz_count++;
1428 
1429 	/*
1430 	 * Now lets just fill a bucket and put it on the free list.  If that
1431 	 * works we'll restart the allocation from the begining.
1432 	 */
1433 
1434 	if (uma_zalloc_bucket(zone, flags)) {
1435 		ZONE_UNLOCK(zone);
1436 		goto zalloc_restart;
1437 	}
1438 	ZONE_UNLOCK(zone);
1439 	/*
1440 	 * We may not be able to get a bucket so return an actual item.
1441 	 */
1442 #ifdef UMA_DEBUG
1443 	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1444 #endif
1445 
1446 	return (uma_zalloc_internal(zone, udata, flags));
1447 }
1448 
1449 static uma_slab_t
1450 uma_zone_slab(uma_zone_t zone, int flags)
1451 {
1452 	uma_slab_t slab;
1453 
1454 	/*
1455 	 * This is to prevent us from recursively trying to allocate
1456 	 * buckets.  The problem is that if an allocation forces us to
1457 	 * grab a new bucket we will call page_alloc, which will go off
1458 	 * and cause the vm to allocate vm_map_entries.  If we need new
1459 	 * buckets there too we will recurse in kmem_alloc and bad
1460 	 * things happen.  So instead we return a NULL bucket, and make
1461 	 * the code that allocates buckets smart enough to deal with it
1462 	 */
1463 	if (zone == bucketzone && zone->uz_recurse != 0)
1464 		return (NULL);
1465 
1466 	slab = NULL;
1467 
1468 	for (;;) {
1469 		/*
1470 		 * Find a slab with some space.  Prefer slabs that are partially
1471 		 * used over those that are totally full.  This helps to reduce
1472 		 * fragmentation.
1473 		 */
1474 		if (zone->uz_free != 0) {
1475 			if (!LIST_EMPTY(&zone->uz_part_slab)) {
1476 				slab = LIST_FIRST(&zone->uz_part_slab);
1477 			} else {
1478 				slab = LIST_FIRST(&zone->uz_free_slab);
1479 				LIST_REMOVE(slab, us_link);
1480 				LIST_INSERT_HEAD(&zone->uz_part_slab, slab,
1481 				us_link);
1482 			}
1483 			return (slab);
1484 		}
1485 
1486 		/*
1487 		 * M_NOVM means don't ask at all!
1488 		 */
1489 		if (flags & M_NOVM)
1490 			break;
1491 
1492 		if (zone->uz_maxpages &&
1493 		    zone->uz_pages >= zone->uz_maxpages) {
1494 			zone->uz_flags |= UMA_ZFLAG_FULL;
1495 
1496 			if (flags & M_NOWAIT)
1497 				break;
1498 			else
1499 				msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
1500 			continue;
1501 		}
1502 		zone->uz_recurse++;
1503 		slab = slab_zalloc(zone, flags);
1504 		zone->uz_recurse--;
1505 		/*
1506 		 * If we got a slab here it's safe to mark it partially used
1507 		 * and return.  We assume that the caller is going to remove
1508 		 * at least one item.
1509 		 */
1510 		if (slab) {
1511 			LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1512 			return (slab);
1513 		}
1514 		/*
1515 		 * We might not have been able to get a slab but another cpu
1516 		 * could have while we were unlocked.  Check again before we
1517 		 * fail.
1518 		 */
1519 		if (flags & M_NOWAIT)
1520 			flags |= M_NOVM;
1521 	}
1522 	return (slab);
1523 }
1524 
1525 static __inline void *
1526 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
1527 {
1528 	void *item;
1529 	u_int8_t freei;
1530 
1531 	freei = slab->us_firstfree;
1532 	slab->us_firstfree = slab->us_freelist[freei];
1533 	item = slab->us_data + (zone->uz_rsize * freei);
1534 
1535 	slab->us_freecount--;
1536 	zone->uz_free--;
1537 #ifdef INVARIANTS
1538 	uma_dbg_alloc(zone, slab, item);
1539 #endif
1540 	/* Move this slab to the full list */
1541 	if (slab->us_freecount == 0) {
1542 		LIST_REMOVE(slab, us_link);
1543 		LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1544 	}
1545 
1546 	return (item);
1547 }
1548 
1549 static int
1550 uma_zalloc_bucket(uma_zone_t zone, int flags)
1551 {
1552 	uma_bucket_t bucket;
1553 	uma_slab_t slab;
1554 
1555 	/*
1556 	 * Try this zone's free list first so we don't allocate extra buckets.
1557 	 */
1558 
1559 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1560 		KASSERT(bucket->ub_ptr == -1,
1561 		    ("uma_zalloc_bucket: Bucket on free list is not empty."));
1562 		LIST_REMOVE(bucket, ub_link);
1563 	} else {
1564 		int bflags;
1565 
1566 		bflags = flags;
1567 		if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1568 			bflags |= M_NOVM;
1569 
1570 		ZONE_UNLOCK(zone);
1571 		bucket = uma_zalloc_internal(bucketzone,
1572 		    NULL, bflags);
1573 		ZONE_LOCK(zone);
1574 		if (bucket != NULL) {
1575 #ifdef INVARIANTS
1576 			bzero(bucket, bucketzone->uz_size);
1577 #endif
1578 			bucket->ub_ptr = -1;
1579 		}
1580 	}
1581 
1582 	if (bucket == NULL)
1583 		return (0);
1584 
1585 #ifdef SMP
1586 	/*
1587 	 * This code is here to limit the number of simultaneous bucket fills
1588 	 * for any given zone to the number of per cpu caches in this zone. This
1589 	 * is done so that we don't allocate more memory than we really need.
1590 	 */
1591 	if (zone->uz_fills >= mp_ncpus)
1592 		goto done;
1593 
1594 #endif
1595 	zone->uz_fills++;
1596 
1597 	/* Try to keep the buckets totally full */
1598 	while ((slab = uma_zone_slab(zone, flags)) != NULL &&
1599 	    bucket->ub_ptr < zone->uz_count) {
1600 		while (slab->us_freecount &&
1601 		    bucket->ub_ptr < zone->uz_count) {
1602 			bucket->ub_bucket[++bucket->ub_ptr] =
1603 			    uma_slab_alloc(zone, slab);
1604 		}
1605 		/* Don't block on the next fill */
1606 		flags |= M_NOWAIT;
1607 	}
1608 
1609 	zone->uz_fills--;
1610 
1611 	if (bucket->ub_ptr != -1) {
1612 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1613 		    bucket, ub_link);
1614 		return (1);
1615 	}
1616 #ifdef SMP
1617 done:
1618 #endif
1619 	uma_zfree_internal(bucketzone, bucket, NULL, 0);
1620 
1621 	return (0);
1622 }
1623 /*
1624  * Allocates an item for an internal zone
1625  *
1626  * Arguments
1627  *	zone   The zone to alloc for.
1628  *	udata  The data to be passed to the constructor.
1629  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
1630  *
1631  * Returns
1632  *	NULL if there is no memory and M_NOWAIT is set
1633  *	An item if successful
1634  */
1635 
1636 static void *
1637 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
1638 {
1639 	uma_slab_t slab;
1640 	void *item;
1641 
1642 	item = NULL;
1643 
1644 	/*
1645 	 * This is to stop us from allocating per cpu buckets while we're
1646 	 * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
1647 	 * boot pages.
1648 	 */
1649 
1650 	if (bucketdisable && zone == bucketzone)
1651 		return (NULL);
1652 
1653 #ifdef UMA_DEBUG_ALLOC
1654 	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1655 #endif
1656 	ZONE_LOCK(zone);
1657 
1658 	slab = uma_zone_slab(zone, flags);
1659 	if (slab == NULL) {
1660 		ZONE_UNLOCK(zone);
1661 		return (NULL);
1662 	}
1663 
1664 	item = uma_slab_alloc(zone, slab);
1665 
1666 	ZONE_UNLOCK(zone);
1667 
1668 	if (zone->uz_ctor != NULL)
1669 		zone->uz_ctor(item, zone->uz_size, udata);
1670 	if (flags & M_ZERO)
1671 		bzero(item, zone->uz_size);
1672 
1673 	return (item);
1674 }
1675 
1676 /* See uma.h */
1677 void
1678 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1679 {
1680 	uma_cache_t cache;
1681 	uma_bucket_t bucket;
1682 	int bflags;
1683 	int cpu;
1684 	int skip;
1685 
1686 	/* This is the fast path free */
1687 	skip = 0;
1688 #ifdef UMA_DEBUG_ALLOC_1
1689 	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1690 #endif
1691 	/*
1692 	 * The race here is acceptable.  If we miss it we'll just have to wait
1693 	 * a little longer for the limits to be reset.
1694 	 */
1695 
1696 	if (zone->uz_flags & UMA_ZFLAG_FULL)
1697 		goto zfree_internal;
1698 
1699 	if (zone->uz_dtor) {
1700 		zone->uz_dtor(item, zone->uz_size, udata);
1701 		skip = 1;
1702 	}
1703 
1704 zfree_restart:
1705 	cpu = PCPU_GET(cpuid);
1706 	CPU_LOCK(cpu);
1707 	cache = &zone->uz_cpu[cpu];
1708 
1709 zfree_start:
1710 	bucket = cache->uc_freebucket;
1711 
1712 	if (bucket) {
1713 		/*
1714 		 * Do we have room in our bucket? It is OK for this uz count
1715 		 * check to be slightly out of sync.
1716 		 */
1717 
1718 		if (bucket->ub_ptr < zone->uz_count) {
1719 			bucket->ub_ptr++;
1720 			KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1721 			    ("uma_zfree: Freeing to non free bucket index."));
1722 			bucket->ub_bucket[bucket->ub_ptr] = item;
1723 #ifdef INVARIANTS
1724 			ZONE_LOCK(zone);
1725 			if (zone->uz_flags & UMA_ZFLAG_MALLOC)
1726 				uma_dbg_free(zone, udata, item);
1727 			else
1728 				uma_dbg_free(zone, NULL, item);
1729 			ZONE_UNLOCK(zone);
1730 #endif
1731 			CPU_UNLOCK(cpu);
1732 			return;
1733 		} else if (cache->uc_allocbucket) {
1734 #ifdef UMA_DEBUG_ALLOC
1735 			printf("uma_zfree: Swapping buckets.\n");
1736 #endif
1737 			/*
1738 			 * We have run out of space in our freebucket.
1739 			 * See if we can switch with our alloc bucket.
1740 			 */
1741 			if (cache->uc_allocbucket->ub_ptr <
1742 			    cache->uc_freebucket->ub_ptr) {
1743 				uma_bucket_t swap;
1744 
1745 				swap = cache->uc_freebucket;
1746 				cache->uc_freebucket = cache->uc_allocbucket;
1747 				cache->uc_allocbucket = swap;
1748 
1749 				goto zfree_start;
1750 			}
1751 		}
1752 	}
1753 
1754 	/*
1755 	 * We can get here for two reasons:
1756 	 *
1757 	 * 1) The buckets are NULL
1758 	 * 2) The alloc and free buckets are both somewhat full.
1759 	 *
1760 	 */
1761 
1762 	ZONE_LOCK(zone);
1763 
1764 	bucket = cache->uc_freebucket;
1765 	cache->uc_freebucket = NULL;
1766 
1767 	/* Can we throw this on the zone full list? */
1768 	if (bucket != NULL) {
1769 #ifdef UMA_DEBUG_ALLOC
1770 		printf("uma_zfree: Putting old bucket on the free list.\n");
1771 #endif
1772 		/* ub_ptr is pointing to the last free item */
1773 		KASSERT(bucket->ub_ptr != -1,
1774 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1775 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1776 		    bucket, ub_link);
1777 	}
1778 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1779 		LIST_REMOVE(bucket, ub_link);
1780 		ZONE_UNLOCK(zone);
1781 		cache->uc_freebucket = bucket;
1782 		goto zfree_start;
1783 	}
1784 	/* We're done with this CPU now */
1785 	CPU_UNLOCK(cpu);
1786 
1787 	/* And the zone.. */
1788 	ZONE_UNLOCK(zone);
1789 
1790 #ifdef UMA_DEBUG_ALLOC
1791 	printf("uma_zfree: Allocating new free bucket.\n");
1792 #endif
1793 	bflags = M_NOWAIT;
1794 
1795 	if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1796 		bflags |= M_NOVM;
1797 #ifdef INVARIANTS
1798 	bflags |= M_ZERO;
1799 #endif
1800 	bucket = uma_zalloc_internal(bucketzone,
1801 	    NULL, bflags);
1802 	if (bucket) {
1803 		bucket->ub_ptr = -1;
1804 		ZONE_LOCK(zone);
1805 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1806 		    bucket, ub_link);
1807 		ZONE_UNLOCK(zone);
1808 		goto zfree_restart;
1809 	}
1810 
1811 	/*
1812 	 * If nothing else caught this, we'll just do an internal free.
1813 	 */
1814 
1815 zfree_internal:
1816 
1817 	uma_zfree_internal(zone, item, udata, skip);
1818 
1819 	return;
1820 
1821 }
1822 
1823 /*
1824  * Frees an item to an INTERNAL zone or allocates a free bucket
1825  *
1826  * Arguments:
1827  *	zone   The zone to free to
1828  *	item   The item we're freeing
1829  *	udata  User supplied data for the dtor
1830  *	skip   Skip the dtor, it was done in uma_zfree_arg
1831  */
1832 
1833 static void
1834 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1835 {
1836 	uma_slab_t slab;
1837 	u_int8_t *mem;
1838 	u_int8_t freei;
1839 
1840 	if (!skip && zone->uz_dtor)
1841 		zone->uz_dtor(item, zone->uz_size, udata);
1842 
1843 	ZONE_LOCK(zone);
1844 
1845 	if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1846 		mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1847 		if (zone->uz_flags & UMA_ZFLAG_HASH)
1848 			slab = hash_sfind(&zone->uz_hash, mem);
1849 		else {
1850 			mem += zone->uz_pgoff;
1851 			slab = (uma_slab_t)mem;
1852 		}
1853 	} else {
1854 		slab = (uma_slab_t)udata;
1855 	}
1856 
1857 	/* Do we need to remove from any lists? */
1858 	if (slab->us_freecount+1 == zone->uz_ipers) {
1859 		LIST_REMOVE(slab, us_link);
1860 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1861 	} else if (slab->us_freecount == 0) {
1862 		LIST_REMOVE(slab, us_link);
1863 		LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1864 	}
1865 
1866 	/* Slab management stuff */
1867 	freei = ((unsigned long)item - (unsigned long)slab->us_data)
1868 		/ zone->uz_rsize;
1869 
1870 #ifdef INVARIANTS
1871 	if (!skip)
1872 		uma_dbg_free(zone, slab, item);
1873 #endif
1874 
1875 	slab->us_freelist[freei] = slab->us_firstfree;
1876 	slab->us_firstfree = freei;
1877 	slab->us_freecount++;
1878 
1879 	/* Zone statistics */
1880 	zone->uz_free++;
1881 
1882 	if (zone->uz_flags & UMA_ZFLAG_FULL) {
1883 		if (zone->uz_pages < zone->uz_maxpages)
1884 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
1885 
1886 		/* We can handle one more allocation */
1887 		wakeup_one(zone);
1888 	}
1889 
1890 	ZONE_UNLOCK(zone);
1891 }
1892 
1893 /* See uma.h */
1894 void
1895 uma_zone_set_max(uma_zone_t zone, int nitems)
1896 {
1897 	ZONE_LOCK(zone);
1898 	if (zone->uz_ppera > 1)
1899 		zone->uz_maxpages = nitems * zone->uz_ppera;
1900 	else
1901 		zone->uz_maxpages = nitems / zone->uz_ipers;
1902 
1903 	if (zone->uz_maxpages * zone->uz_ipers < nitems)
1904 		zone->uz_maxpages++;
1905 
1906 	ZONE_UNLOCK(zone);
1907 }
1908 
1909 /* See uma.h */
1910 void
1911 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1912 {
1913 	ZONE_LOCK(zone);
1914 
1915 	zone->uz_freef = freef;
1916 
1917 	ZONE_UNLOCK(zone);
1918 }
1919 
1920 /* See uma.h */
1921 void
1922 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1923 {
1924 	ZONE_LOCK(zone);
1925 
1926 	zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1927 	zone->uz_allocf = allocf;
1928 
1929 	ZONE_UNLOCK(zone);
1930 }
1931 
1932 /* See uma.h */
1933 int
1934 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1935 {
1936 	int pages;
1937 	vm_offset_t kva;
1938 
1939 	mtx_lock(&Giant);
1940 
1941 	pages = count / zone->uz_ipers;
1942 
1943 	if (pages * zone->uz_ipers < count)
1944 		pages++;
1945 
1946 	kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1947 
1948 	if (kva == 0) {
1949 		mtx_unlock(&Giant);
1950 		return (0);
1951 	}
1952 
1953 
1954 	if (obj == NULL)
1955 		obj = vm_object_allocate(OBJT_DEFAULT,
1956 		    pages);
1957 	else {
1958 		VM_OBJECT_LOCK_INIT(obj);
1959 		_vm_object_allocate(OBJT_DEFAULT,
1960 		    pages, obj);
1961 	}
1962 	ZONE_LOCK(zone);
1963 	zone->uz_kva = kva;
1964 	zone->uz_obj = obj;
1965 	zone->uz_maxpages = pages;
1966 
1967 	zone->uz_allocf = obj_alloc;
1968 	zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1969 
1970 	ZONE_UNLOCK(zone);
1971 	mtx_unlock(&Giant);
1972 
1973 	return (1);
1974 }
1975 
1976 /* See uma.h */
1977 void
1978 uma_prealloc(uma_zone_t zone, int items)
1979 {
1980 	int slabs;
1981 	uma_slab_t slab;
1982 
1983 	ZONE_LOCK(zone);
1984 	slabs = items / zone->uz_ipers;
1985 	if (slabs * zone->uz_ipers < items)
1986 		slabs++;
1987 
1988 	while (slabs > 0) {
1989 		slab = slab_zalloc(zone, M_WAITOK);
1990 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1991 		slabs--;
1992 	}
1993 	ZONE_UNLOCK(zone);
1994 }
1995 
1996 /* See uma.h */
1997 void
1998 uma_reclaim(void)
1999 {
2000 	/*
2001 	 * You might think that the delay below would improve performance since
2002 	 * the allocator will give away memory that it may ask for immediately.
2003 	 * Really, it makes things worse, since cpu cycles are so much cheaper
2004 	 * than disk activity.
2005 	 */
2006 #if 0
2007 	static struct timeval tv = {0};
2008 	struct timeval now;
2009 	getmicrouptime(&now);
2010 	if (now.tv_sec > tv.tv_sec + 30)
2011 		tv = now;
2012 	else
2013 		return;
2014 #endif
2015 #ifdef UMA_DEBUG
2016 	printf("UMA: vm asked us to release pages!\n");
2017 #endif
2018 	bucket_enable();
2019 	zone_foreach(zone_drain);
2020 
2021 	/*
2022 	 * Some slabs may have been freed but this zone will be visited early
2023 	 * we visit again so that we can free pages that are empty once other
2024 	 * zones are drained.  We have to do the same for buckets.
2025 	 */
2026 	zone_drain(slabzone);
2027 	zone_drain(bucketzone);
2028 }
2029 
2030 void *
2031 uma_large_malloc(int size, int wait)
2032 {
2033 	void *mem;
2034 	uma_slab_t slab;
2035 	u_int8_t flags;
2036 
2037 	slab = uma_zalloc_internal(slabzone, NULL, wait);
2038 	if (slab == NULL)
2039 		return (NULL);
2040 
2041 	/* XXX: kmem_malloc panics if Giant isn't held and sleep allowed */
2042 	if ((wait & M_NOWAIT) == 0 && !mtx_owned(&Giant)) {
2043 		mtx_lock(&Giant);
2044 		mem = page_alloc(NULL, size, &flags, wait);
2045 		mtx_unlock(&Giant);
2046 	} else
2047 		mem = page_alloc(NULL, size, &flags, wait);
2048 	if (mem) {
2049 		vsetslab((vm_offset_t)mem, slab);
2050 		slab->us_data = mem;
2051 		slab->us_flags = flags | UMA_SLAB_MALLOC;
2052 		slab->us_size = size;
2053 	} else {
2054 		uma_zfree_internal(slabzone, slab, NULL, 0);
2055 	}
2056 
2057 
2058 	return (mem);
2059 }
2060 
2061 void
2062 uma_large_free(uma_slab_t slab)
2063 {
2064 	vsetobj((vm_offset_t)slab->us_data, kmem_object);
2065 	/*
2066 	 * XXX: We get a lock order reversal if we don't have Giant:
2067 	 * vm_map_remove (locks system map) -> vm_map_delete ->
2068 	 *    vm_map_entry_unwire -> vm_fault_unwire -> mtx_lock(&Giant)
2069 	 */
2070 	if (!mtx_owned(&Giant)) {
2071 		mtx_lock(&Giant);
2072 		page_free(slab->us_data, slab->us_size, slab->us_flags);
2073 		mtx_unlock(&Giant);
2074 	} else
2075 		page_free(slab->us_data, slab->us_size, slab->us_flags);
2076 	uma_zfree_internal(slabzone, slab, NULL, 0);
2077 }
2078 
2079 void
2080 uma_print_stats(void)
2081 {
2082 	zone_foreach(uma_print_zone);
2083 }
2084 
2085 void
2086 uma_print_zone(uma_zone_t zone)
2087 {
2088 	printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2089 	    zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
2090 	    zone->uz_ipers, zone->uz_ppera,
2091 	    (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
2092 }
2093 
2094 /*
2095  * Sysctl handler for vm.zone
2096  *
2097  * stolen from vm_zone.c
2098  */
2099 static int
2100 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2101 {
2102 	int error, len, cnt;
2103 	const int linesize = 128;	/* conservative */
2104 	int totalfree;
2105 	char *tmpbuf, *offset;
2106 	uma_zone_t z;
2107 	char *p;
2108 
2109 	cnt = 0;
2110 	mtx_lock(&uma_mtx);
2111 	LIST_FOREACH(z, &uma_zones, uz_link)
2112 		cnt++;
2113 	mtx_unlock(&uma_mtx);
2114 	MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2115 			M_TEMP, M_WAITOK);
2116 	len = snprintf(tmpbuf, linesize,
2117 	    "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
2118 	if (cnt == 0)
2119 		tmpbuf[len - 1] = '\0';
2120 	error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2121 	if (error || cnt == 0)
2122 		goto out;
2123 	offset = tmpbuf;
2124 	mtx_lock(&uma_mtx);
2125 	LIST_FOREACH(z, &uma_zones, uz_link) {
2126 		if (cnt == 0)	/* list may have changed size */
2127 			break;
2128 		ZONE_LOCK(z);
2129 		totalfree = z->uz_free + z->uz_cachefree;
2130 		len = snprintf(offset, linesize,
2131 		    "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2132 		    z->uz_name, z->uz_size,
2133 		    z->uz_maxpages * z->uz_ipers,
2134 		    (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2135 		    totalfree,
2136 		    (unsigned long long)z->uz_allocs);
2137 		ZONE_UNLOCK(z);
2138 		for (p = offset + 12; p > offset && *p == ' '; --p)
2139 			/* nothing */ ;
2140 		p[1] = ':';
2141 		cnt--;
2142 		offset += len;
2143 	}
2144 	mtx_unlock(&uma_mtx);
2145 	*offset++ = '\0';
2146 	error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2147 out:
2148 	FREE(tmpbuf, M_TEMP);
2149 	return (error);
2150 }
2151