xref: /freebsd/sys/kern/subr_unit.c (revision 608da65de9552d5678c1000776ed69da04a45983)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2004 Poul-Henning Kamp
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  *
29  * Unit number allocation functions.
30  *
31  * These functions implement a mixed run-length/bitmap management of unit
32  * number spaces in a very memory efficient manner.
33  *
34  * Allocation policy is always lowest free number first.
35  *
36  * A return value of -1 signals that no more unit numbers are available.
37  *
38  * There is no cost associated with the range of unitnumbers, so unless
39  * the resource really is finite, specify INT_MAX to new_unrhdr() and
40  * forget about checking the return value.
41  *
42  * If a mutex is not provided when the unit number space is created, a
43  * default global mutex is used.  The advantage to passing a mutex in, is
44  * that the alloc_unrl() function can be called with the mutex already
45  * held (it will not be released by alloc_unrl()).
46  *
47  * The allocation function alloc_unr{l}() never sleeps (but it may block on
48  * the mutex of course).
49  *
50  * Freeing a unit number may require allocating memory, and can therefore
51  * sleep so the free_unr() function does not come in a pre-locked variant.
52  *
53  * A userland test program is included.
54  *
55  * Memory usage is a very complex function of the exact allocation
56  * pattern, but always very compact:
57  *    * For the very typical case where a single unbroken run of unit
58  *      numbers are allocated 44 bytes are used on i386.
59  *    * For a unit number space of 1000 units and the random pattern
60  *      in the usermode test program included, the worst case usage
61  *	was 252 bytes on i386 for 500 allocated and 500 free units.
62  *    * For a unit number space of 10000 units and the random pattern
63  *      in the usermode test program included, the worst case usage
64  *	was 798 bytes on i386 for 5000 allocated and 5000 free units.
65  *    * The worst case is where every other unit number is allocated and
66  *	the rest are free.  In that case 44 + N/4 bytes are used where
67  *	N is the number of the highest unit allocated.
68  */
69 
70 #include <sys/param.h>
71 #include <sys/types.h>
72 #include <sys/_unrhdr.h>
73 
74 #ifdef _KERNEL
75 
76 #include <sys/bitstring.h>
77 #include <sys/malloc.h>
78 #include <sys/kernel.h>
79 #include <sys/systm.h>
80 #include <sys/limits.h>
81 #include <sys/lock.h>
82 #include <sys/mutex.h>
83 
84 /*
85  * In theory it would be smarter to allocate the individual blocks
86  * with the zone allocator, but at this time the expectation is that
87  * there will typically not even be enough allocations to fill a single
88  * page, so we stick with malloc for now.
89  */
90 static MALLOC_DEFINE(M_UNIT, "Unitno", "Unit number allocation");
91 
92 #define Malloc(foo) malloc(foo, M_UNIT, M_WAITOK | M_ZERO)
93 #define Free(foo) free(foo, M_UNIT)
94 
95 static struct mtx unitmtx;
96 
97 MTX_SYSINIT(unit, &unitmtx, "unit# allocation", MTX_DEF);
98 
99 #else /* ...USERLAND */
100 
101 #include <bitstring.h>
102 #include <err.h>
103 #include <errno.h>
104 #include <getopt.h>
105 #include <stdbool.h>
106 #include <stdio.h>
107 #include <stdlib.h>
108 #include <string.h>
109 
110 #define KASSERT(cond, arg) \
111 	do { \
112 		if (!(cond)) { \
113 			printf arg; \
114 			abort(); \
115 		} \
116 	} while (0)
117 
118 static int no_alloc;
119 #define Malloc(foo) _Malloc(foo, __LINE__)
120 static void *
121 _Malloc(size_t foo, int line)
122 {
123 
124 	KASSERT(no_alloc == 0, ("malloc in wrong place() line %d", line));
125 	return (calloc(foo, 1));
126 }
127 #define Free(foo) free(foo)
128 
129 struct unrhdr;
130 
131 #define	UNR_NO_MTX	((void *)(uintptr_t)-1)
132 
133 struct mtx {
134 	int	state;
135 } unitmtx;
136 
137 static void
138 mtx_lock(struct mtx *mp)
139 {
140 	KASSERT(mp->state == 0, ("mutex already locked"));
141 	mp->state = 1;
142 }
143 
144 static void
145 mtx_unlock(struct mtx *mp)
146 {
147 	KASSERT(mp->state == 1, ("mutex not locked"));
148 	mp->state = 0;
149 }
150 
151 #define MA_OWNED	9
152 
153 static void
154 mtx_assert(struct mtx *mp, int flag)
155 {
156 	if (flag == MA_OWNED) {
157 		KASSERT(mp->state == 1, ("mtx_assert(MA_OWNED) not true"));
158 	}
159 }
160 
161 #define CTASSERT(foo)
162 #define WITNESS_WARN(flags, lock, fmt, ...)	(void)0
163 
164 #endif /* USERLAND */
165 
166 /*
167  * This is our basic building block.
168  *
169  * It can be used in three different ways depending on the value of the ptr
170  * element:
171  *     If ptr is NULL, it represents a run of free items.
172  *     If ptr points to the unrhdr it represents a run of allocated items.
173  *     Otherwise it points to a bitstring of allocated items.
174  *
175  * For runs the len field is the length of the run.
176  * For bitmaps the len field represents the number of allocated items.
177  *
178  * The bitmap is the same size as struct unr to optimize memory management.
179  *
180  * Two special ranges are not covered by unrs:
181  * - at the start of the allocator space, all elements in [low, low + first)
182  *   are allocated;
183  * - at the end of the allocator space, all elements in [high - last, high]
184  *   are free.
185  */
186 struct unr {
187 	TAILQ_ENTRY(unr)	list;
188 	u_int			len;
189 	void			*ptr;
190 };
191 
192 struct unrb {
193 	bitstr_t		map[sizeof(struct unr) / sizeof(bitstr_t)];
194 };
195 
196 CTASSERT((sizeof(struct unr) % sizeof(bitstr_t)) == 0);
197 
198 /* Number of bits we can store in the bitmap */
199 #define NBITS (NBBY * sizeof(((struct unrb *)NULL)->map))
200 
201 static inline bool
202 is_bitmap(struct unrhdr *uh, struct unr *up)
203 {
204 	return (up->ptr != uh && up->ptr != NULL);
205 }
206 
207 /* Is the unrb empty in at least the first len bits? */
208 static inline bool
209 ub_empty(struct unrb *ub, int len) {
210 	int first_set;
211 
212 	bit_ffs(ub->map, len, &first_set);
213 	return (first_set == -1);
214 }
215 
216 /* Is the unrb full?  That is, is the number of set elements equal to len? */
217 static inline bool
218 ub_full(struct unrb *ub, int len)
219 {
220 	int first_clear;
221 
222 	bit_ffc(ub->map, len, &first_clear);
223 	return (first_clear == -1);
224 }
225 
226 /*
227  * start: ipos = -1, upos = NULL;
228  * end:   ipos = -1, upos = uh
229  */
230 struct unrhdr_iter {
231 	struct unrhdr *uh;
232 	int ipos;
233 	int upos_first_item;
234 	void *upos;
235 };
236 
237 void *
238 create_iter_unr(struct unrhdr *uh)
239 {
240 	struct unrhdr_iter *iter;
241 
242 	iter = Malloc(sizeof(*iter));
243 	iter->ipos = -1;
244 	iter->uh = uh;
245 	iter->upos = NULL;
246 	iter->upos_first_item = -1;
247 	return (iter);
248 }
249 
250 static void
251 next_iter_unrl(struct unrhdr *uh, struct unrhdr_iter *iter)
252 {
253 	struct unr *up;
254 	struct unrb *ub;
255 	u_int y;
256 	int c;
257 
258 	if (iter->ipos == -1) {
259 		if (iter->upos == uh)
260 			return;
261 		y = uh->low - 1;
262 		if (uh->first == 0) {
263 			up = TAILQ_FIRST(&uh->head);
264 			if (up == NULL) {
265 				iter->upos = uh;
266 				return;
267 			}
268 			iter->upos = up;
269 			if (up->ptr == NULL)
270 				iter->upos = NULL;
271 			else
272 				iter->upos_first_item = uh->low;
273 		}
274 	} else {
275 		y = iter->ipos;
276 	}
277 
278 	up = iter->upos;
279 
280 	/* Special case for the compacted [low, first) run. */
281 	if (up == NULL) {
282 		if (y + 1 < uh->low + uh->first) {
283 			iter->ipos = y + 1;
284 			return;
285 		}
286 		up = iter->upos = TAILQ_FIRST(&uh->head);
287 		iter->upos_first_item = uh->low + uh->first;
288 	}
289 
290 	for (;;) {
291 		if (y + 1 < iter->upos_first_item + up->len) {
292 			if (up->ptr == uh) {
293 				iter->ipos = y + 1;
294 				return;
295 			} else if (is_bitmap(uh, up)) {
296 				ub = up->ptr;
297 				bit_ffs_at(&ub->map[0],
298 				    y + 1 - iter->upos_first_item,
299 				    up->len, &c);
300 				if (c != -1) {
301 					iter->ipos = iter->upos_first_item + c;
302 					return;
303 				}
304 			}
305 		}
306 		iter->upos_first_item += up->len;
307 		y = iter->upos_first_item - 1;
308 		up = iter->upos = TAILQ_NEXT((struct unr *)iter->upos, list);
309 		if (iter->upos == NULL) {
310 			iter->ipos = -1;
311 			iter->upos = uh;
312 			return;
313 		}
314 	}
315 }
316 
317 /*
318  * returns -1 on end, otherwise the next element
319  */
320 int
321 next_iter_unr(void *handle)
322 {
323 	struct unrhdr *uh;
324 	struct unrhdr_iter *iter;
325 
326 	iter = handle;
327 	uh = iter->uh;
328 	if (uh->mtx != NULL)
329 		mtx_lock(uh->mtx);
330 	next_iter_unrl(uh, iter);
331 	if (uh->mtx != NULL)
332 		mtx_unlock(uh->mtx);
333 	return (iter->ipos);
334 }
335 
336 void
337 free_iter_unr(void *handle)
338 {
339 	Free(handle);
340 }
341 
342 #if defined(DIAGNOSTIC) || !defined(_KERNEL)
343 #ifndef __diagused
344 #define	__diagused
345 #endif
346 
347 /*
348  * Consistency check function.
349  *
350  * Checks the internal consistency as well as we can.
351  *
352  * Called at all boundaries of this API.
353  */
354 static void
355 check_unrhdr(struct unrhdr *uh, int line)
356 {
357 	struct unr *up;
358 	struct unrb *ub;
359 	int w;
360 	u_int y __diagused, z __diagused;
361 
362 	y = uh->first;
363 	z = 0;
364 	TAILQ_FOREACH(up, &uh->head, list) {
365 		z++;
366 		if (is_bitmap(uh, up)) {
367 			ub = up->ptr;
368 			KASSERT (up->len <= NBITS,
369 			    ("UNR inconsistency: len %u max %zd (line %d)\n",
370 			    up->len, NBITS, line));
371 			z++;
372 			w = 0;
373 			bit_count(ub->map, 0, up->len, &w);
374 			y += w;
375 		} else if (up->ptr != NULL)
376 			y += up->len;
377 	}
378 	KASSERT (y == uh->busy,
379 	    ("UNR inconsistency: items %u found %u (line %d)\n",
380 	    uh->busy, y, line));
381 	KASSERT (z == uh->alloc,
382 	    ("UNR inconsistency: chunks %u found %u (line %d)\n",
383 	    uh->alloc, z, line));
384 }
385 
386 #else
387 
388 static __inline void
389 check_unrhdr(struct unrhdr *uh __unused, int line __unused)
390 {
391 
392 }
393 
394 #endif
395 
396 /*
397  * Userland memory management.  Just use calloc and keep track of how
398  * many elements we have allocated for check_unrhdr().
399  */
400 
401 static __inline void *
402 new_unr(struct unrhdr *uh, void **p1, void **p2)
403 {
404 	void *p;
405 
406 	uh->alloc++;
407 	KASSERT(*p1 != NULL || *p2 != NULL, ("Out of cached memory"));
408 	if (*p1 != NULL) {
409 		p = *p1;
410 		*p1 = NULL;
411 		return (p);
412 	} else {
413 		p = *p2;
414 		*p2 = NULL;
415 		return (p);
416 	}
417 }
418 
419 static __inline void
420 delete_unr(struct unrhdr *uh, void *ptr)
421 {
422 	struct unr *up;
423 
424 	uh->alloc--;
425 	up = ptr;
426 	TAILQ_INSERT_TAIL(&uh->ppfree, up, list);
427 }
428 
429 void
430 clean_unrhdrl(struct unrhdr *uh)
431 {
432 	struct unr *up;
433 
434 	if (uh->mtx != NULL)
435 		mtx_assert(uh->mtx, MA_OWNED);
436 	while ((up = TAILQ_FIRST(&uh->ppfree)) != NULL) {
437 		TAILQ_REMOVE(&uh->ppfree, up, list);
438 		if (uh->mtx != NULL)
439 			mtx_unlock(uh->mtx);
440 		Free(up);
441 		if (uh->mtx != NULL)
442 			mtx_lock(uh->mtx);
443 	}
444 
445 }
446 
447 void
448 clean_unrhdr(struct unrhdr *uh)
449 {
450 
451 	if (uh->mtx != NULL)
452 		mtx_lock(uh->mtx);
453 	clean_unrhdrl(uh);
454 	if (uh->mtx != NULL)
455 		mtx_unlock(uh->mtx);
456 }
457 
458 void
459 init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex)
460 {
461 
462 	KASSERT(low >= 0 && low <= high,
463 	    ("UNR: use error: new_unrhdr(%d, %d)", low, high));
464 	if (mutex == UNR_NO_MTX)
465 		uh->mtx = NULL;
466 	else if (mutex != NULL)
467 		uh->mtx = mutex;
468 	else
469 		uh->mtx = &unitmtx;
470 	TAILQ_INIT(&uh->head);
471 	TAILQ_INIT(&uh->ppfree);
472 	uh->low = low;
473 	uh->high = high;
474 	uh->first = 0;
475 	uh->last = 1 + (high - low);
476 	uh->busy = 0;
477 	uh->alloc = 0;
478 	check_unrhdr(uh, __LINE__);
479 }
480 
481 /*
482  * Allocate a new unrheader set.
483  *
484  * Highest and lowest valid values given as parameters.
485  */
486 
487 struct unrhdr *
488 new_unrhdr(int low, int high, struct mtx *mutex)
489 {
490 	struct unrhdr *uh;
491 
492 	uh = Malloc(sizeof *uh);
493 	init_unrhdr(uh, low, high, mutex);
494 	return (uh);
495 }
496 
497 void
498 delete_unrhdr(struct unrhdr *uh)
499 {
500 
501 	check_unrhdr(uh, __LINE__);
502 	KASSERT(uh->busy == 0, ("unrhdr has %u allocations", uh->busy));
503 	KASSERT(uh->alloc == 0, ("UNR memory leak in delete_unrhdr"));
504 	KASSERT(TAILQ_FIRST(&uh->ppfree) == NULL,
505 	    ("unrhdr has postponed item for free"));
506 	Free(uh);
507 }
508 
509 void
510 clear_unrhdr(struct unrhdr *uh)
511 {
512 	struct unr *up, *uq;
513 
514 	KASSERT(TAILQ_EMPTY(&uh->ppfree),
515 	    ("unrhdr has postponed item for free"));
516 	TAILQ_FOREACH_SAFE(up, &uh->head, list, uq) {
517 		if (up->ptr != uh) {
518 			Free(up->ptr);
519 		}
520 		Free(up);
521 	}
522 	uh->busy = 0;
523 	uh->alloc = 0;
524 	init_unrhdr(uh, uh->low, uh->high, uh->mtx);
525 
526 	check_unrhdr(uh, __LINE__);
527 }
528 
529 /*
530  * Look for sequence of items which can be combined into a bitmap, if
531  * multiple are present, take the one which saves most memory.
532  *
533  * Return (1) if a sequence was found to indicate that another call
534  * might be able to do more.  Return (0) if we found no suitable sequence.
535  *
536  * NB: called from alloc_unr(), no new memory allocation allowed.
537  */
538 static int
539 optimize_unr(struct unrhdr *uh)
540 {
541 	struct unr *up, *uf, *us;
542 	struct unrb *ub, *ubf;
543 	u_int a, l, ba;
544 
545 	/*
546 	 * Look for the run of items (if any) which when collapsed into
547 	 * a bitmap would save most memory.
548 	 */
549 	us = NULL;
550 	ba = 0;
551 	TAILQ_FOREACH(uf, &uh->head, list) {
552 		if (uf->len >= NBITS)
553 			continue;
554 		a = 1;
555 		if (is_bitmap(uh, uf))
556 			a++;
557 		l = uf->len;
558 		up = uf;
559 		while (1) {
560 			up = TAILQ_NEXT(up, list);
561 			if (up == NULL)
562 				break;
563 			if ((up->len + l) > NBITS)
564 				break;
565 			a++;
566 			if (is_bitmap(uh, up))
567 				a++;
568 			l += up->len;
569 		}
570 		if (a > ba) {
571 			ba = a;
572 			us = uf;
573 		}
574 	}
575 	if (ba < 3)
576 		return (0);
577 
578 	/*
579 	 * If the first element is not a bitmap, make it one.
580 	 * Trying to do so without allocating more memory complicates things
581 	 * a bit
582 	 */
583 	if (!is_bitmap(uh, us)) {
584 		uf = TAILQ_NEXT(us, list);
585 		TAILQ_REMOVE(&uh->head, us, list);
586 		a = us->len;
587 		l = us->ptr == uh ? 1 : 0;
588 		ub = (void *)us;
589 		bit_nclear(ub->map, 0, NBITS - 1);
590 		if (l)
591 			bit_nset(ub->map, 0, a);
592 		if (!is_bitmap(uh, uf)) {
593 			if (uf->ptr == NULL)
594 				bit_nclear(ub->map, a, a + uf->len - 1);
595 			else
596 				bit_nset(ub->map, a, a + uf->len - 1);
597 			uf->ptr = ub;
598 			uf->len += a;
599 			us = uf;
600 		} else {
601 			ubf = uf->ptr;
602 			for (l = 0; l < uf->len; l++, a++) {
603 				if (bit_test(ubf->map, l))
604 					bit_set(ub->map, a);
605 				else
606 					bit_clear(ub->map, a);
607 			}
608 			uf->len = a;
609 			delete_unr(uh, uf->ptr);
610 			uf->ptr = ub;
611 			us = uf;
612 		}
613 	}
614 	ub = us->ptr;
615 	while (1) {
616 		uf = TAILQ_NEXT(us, list);
617 		if (uf == NULL)
618 			return (1);
619 		if (uf->len + us->len > NBITS)
620 			return (1);
621 		if (uf->ptr == NULL) {
622 			bit_nclear(ub->map, us->len, us->len + uf->len - 1);
623 			us->len += uf->len;
624 			TAILQ_REMOVE(&uh->head, uf, list);
625 			delete_unr(uh, uf);
626 		} else if (uf->ptr == uh) {
627 			bit_nset(ub->map, us->len, us->len + uf->len - 1);
628 			us->len += uf->len;
629 			TAILQ_REMOVE(&uh->head, uf, list);
630 			delete_unr(uh, uf);
631 		} else {
632 			ubf = uf->ptr;
633 			for (l = 0; l < uf->len; l++, us->len++) {
634 				if (bit_test(ubf->map, l))
635 					bit_set(ub->map, us->len);
636 				else
637 					bit_clear(ub->map, us->len);
638 			}
639 			TAILQ_REMOVE(&uh->head, uf, list);
640 			delete_unr(uh, ubf);
641 			delete_unr(uh, uf);
642 		}
643 	}
644 }
645 
646 /*
647  * See if a given unr should be collapsed with a neighbor.
648  *
649  * NB: called from alloc_unr(), no new memory allocation allowed.
650  */
651 static void
652 collapse_unr(struct unrhdr *uh, struct unr *up)
653 {
654 	struct unr *upp;
655 	struct unrb *ub;
656 
657 	/* If bitmap is all set or clear, change it to runlength */
658 	if (is_bitmap(uh, up)) {
659 		ub = up->ptr;
660 		if (ub_full(ub, up->len)) {
661 			delete_unr(uh, up->ptr);
662 			up->ptr = uh;
663 		} else if (ub_empty(ub, up->len)) {
664 			delete_unr(uh, up->ptr);
665 			up->ptr = NULL;
666 		}
667 	}
668 
669 	/* If nothing left in runlength, delete it */
670 	if (up->len == 0) {
671 		upp = TAILQ_PREV(up, unrhd, list);
672 		if (upp == NULL)
673 			upp = TAILQ_NEXT(up, list);
674 		TAILQ_REMOVE(&uh->head, up, list);
675 		delete_unr(uh, up);
676 		up = upp;
677 	}
678 
679 	/* If we have "hot-spot" still, merge with neighbor if possible */
680 	if (up != NULL) {
681 		upp = TAILQ_PREV(up, unrhd, list);
682 		if (upp != NULL && up->ptr == upp->ptr) {
683 			up->len += upp->len;
684 			TAILQ_REMOVE(&uh->head, upp, list);
685 			delete_unr(uh, upp);
686 			}
687 		upp = TAILQ_NEXT(up, list);
688 		if (upp != NULL && up->ptr == upp->ptr) {
689 			up->len += upp->len;
690 			TAILQ_REMOVE(&uh->head, upp, list);
691 			delete_unr(uh, upp);
692 		}
693 	}
694 
695 	/* Merge into ->first if possible */
696 	upp = TAILQ_FIRST(&uh->head);
697 	if (upp != NULL && upp->ptr == uh) {
698 		uh->first += upp->len;
699 		TAILQ_REMOVE(&uh->head, upp, list);
700 		delete_unr(uh, upp);
701 		if (up == upp)
702 			up = NULL;
703 	}
704 
705 	/* Merge into ->last if possible */
706 	upp = TAILQ_LAST(&uh->head, unrhd);
707 	if (upp != NULL && upp->ptr == NULL) {
708 		uh->last += upp->len;
709 		TAILQ_REMOVE(&uh->head, upp, list);
710 		delete_unr(uh, upp);
711 		if (up == upp)
712 			up = NULL;
713 	}
714 
715 	/* Try to make bitmaps */
716 	while (optimize_unr(uh))
717 		continue;
718 }
719 
720 /*
721  * Allocate a free unr.
722  */
723 int
724 alloc_unrl(struct unrhdr *uh)
725 {
726 	struct unr *up;
727 	struct unrb *ub;
728 	u_int x;
729 	int y;
730 
731 	if (uh->mtx != NULL)
732 		mtx_assert(uh->mtx, MA_OWNED);
733 	check_unrhdr(uh, __LINE__);
734 	x = uh->low + uh->first;
735 
736 	up = TAILQ_FIRST(&uh->head);
737 
738 	/*
739 	 * If we have an ideal split, just adjust the first+last
740 	 */
741 	if (up == NULL && uh->last > 0) {
742 		uh->first++;
743 		uh->last--;
744 		uh->busy++;
745 		return (x);
746 	}
747 
748 	/*
749 	 * We can always allocate from the first list element, so if we have
750 	 * nothing on the list, we must have run out of unit numbers.
751 	 */
752 	if (up == NULL)
753 		return (-1);
754 
755 	KASSERT(up->ptr != uh, ("UNR first element is allocated"));
756 
757 	if (up->ptr == NULL) {	/* free run */
758 		uh->first++;
759 		up->len--;
760 	} else {		/* bitmap */
761 		ub = up->ptr;
762 		bit_ffc(ub->map, up->len, &y);
763 		KASSERT(y != -1, ("UNR corruption: No clear bit in bitmap."));
764 		bit_set(ub->map, y);
765 		x += y;
766 	}
767 	uh->busy++;
768 	collapse_unr(uh, up);
769 	return (x);
770 }
771 
772 int
773 alloc_unr(struct unrhdr *uh)
774 {
775 	int i;
776 
777 	if (uh->mtx != NULL)
778 		mtx_lock(uh->mtx);
779 	i = alloc_unrl(uh);
780 	clean_unrhdrl(uh);
781 	if (uh->mtx != NULL)
782 		mtx_unlock(uh->mtx);
783 	return (i);
784 }
785 
786 static int
787 alloc_unr_specificl(struct unrhdr *uh, u_int item, void **p1, void **p2)
788 {
789 	struct unr *up, *upn;
790 	struct unrb *ub;
791 	u_int i, last, tl;
792 
793 	if (uh->mtx != NULL)
794 		mtx_assert(uh->mtx, MA_OWNED);
795 
796 	if (item < uh->low + uh->first || item > uh->high)
797 		return (-1);
798 
799 	up = TAILQ_FIRST(&uh->head);
800 	/* Ideal split. */
801 	if (up == NULL && item - uh->low == uh->first) {
802 		uh->first++;
803 		uh->last--;
804 		uh->busy++;
805 		check_unrhdr(uh, __LINE__);
806 		return (item);
807 	}
808 
809 	i = item - uh->low - uh->first;
810 
811 	if (up == NULL) {
812 		up = new_unr(uh, p1, p2);
813 		up->ptr = NULL;
814 		up->len = i;
815 		TAILQ_INSERT_TAIL(&uh->head, up, list);
816 		up = new_unr(uh, p1, p2);
817 		up->ptr = uh;
818 		up->len = 1;
819 		TAILQ_INSERT_TAIL(&uh->head, up, list);
820 		uh->last = uh->high - uh->low - i;
821 		uh->busy++;
822 		check_unrhdr(uh, __LINE__);
823 		return (item);
824 	} else {
825 		/* Find the item which contains the unit we want to allocate. */
826 		TAILQ_FOREACH(up, &uh->head, list) {
827 			if (up->len > i)
828 				break;
829 			i -= up->len;
830 		}
831 	}
832 
833 	if (up == NULL) {
834 		if (i > 0) {
835 			up = new_unr(uh, p1, p2);
836 			up->ptr = NULL;
837 			up->len = i;
838 			TAILQ_INSERT_TAIL(&uh->head, up, list);
839 		}
840 		up = new_unr(uh, p1, p2);
841 		up->ptr = uh;
842 		up->len = 1;
843 		TAILQ_INSERT_TAIL(&uh->head, up, list);
844 		goto done;
845 	}
846 
847 	if (is_bitmap(uh, up)) {
848 		ub = up->ptr;
849 		if (bit_test(ub->map, i) == 0) {
850 			bit_set(ub->map, i);
851 			goto done;
852 		} else
853 			return (-1);
854 	} else if (up->ptr == uh)
855 		return (-1);
856 
857 	KASSERT(up->ptr == NULL,
858 	    ("alloc_unr_specificl: up->ptr != NULL (up=%p)", up));
859 
860 	/* Split off the tail end, if any. */
861 	tl = up->len - (1 + i);
862 	if (tl > 0) {
863 		upn = new_unr(uh, p1, p2);
864 		upn->ptr = NULL;
865 		upn->len = tl;
866 		TAILQ_INSERT_AFTER(&uh->head, up, upn, list);
867 	}
868 
869 	/* Split off head end, if any */
870 	if (i > 0) {
871 		upn = new_unr(uh, p1, p2);
872 		upn->len = i;
873 		upn->ptr = NULL;
874 		TAILQ_INSERT_BEFORE(up, upn, list);
875 	}
876 	up->len = 1;
877 	up->ptr = uh;
878 
879 done:
880 	last = uh->high - uh->low - (item - uh->low);
881 	if (uh->last > last)
882 		uh->last = last;
883 	uh->busy++;
884 	collapse_unr(uh, up);
885 	check_unrhdr(uh, __LINE__);
886 	return (item);
887 }
888 
889 int
890 alloc_unr_specific(struct unrhdr *uh, u_int item)
891 {
892 	void *p1, *p2;
893 	int i;
894 
895 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "alloc_unr_specific");
896 
897 	p1 = Malloc(sizeof(struct unr));
898 	p2 = Malloc(sizeof(struct unr));
899 
900 	if (uh->mtx != NULL)
901 		mtx_lock(uh->mtx);
902 	i = alloc_unr_specificl(uh, item, &p1, &p2);
903 	if (uh->mtx != NULL)
904 		mtx_unlock(uh->mtx);
905 
906 	if (p1 != NULL)
907 		Free(p1);
908 	if (p2 != NULL)
909 		Free(p2);
910 
911 	return (i);
912 }
913 
914 /*
915  * Free a unr.
916  *
917  * If we can save unrs by using a bitmap, do so.
918  */
919 static void
920 free_unrl(struct unrhdr *uh, u_int item, void **p1, void **p2)
921 {
922 	struct unr *up, *upp, *upn;
923 	struct unrb *ub;
924 	u_int pl;
925 
926 	KASSERT(item >= uh->low && item <= uh->high,
927 	    ("UNR: free_unr(%u) out of range [%u...%u]",
928 	     item, uh->low, uh->high));
929 	check_unrhdr(uh, __LINE__);
930 	item -= uh->low;
931 	upp = TAILQ_FIRST(&uh->head);
932 	/*
933 	 * Freeing in the ideal split case
934 	 */
935 	if (item + 1 == uh->first && upp == NULL) {
936 		uh->last++;
937 		uh->first--;
938 		uh->busy--;
939 		check_unrhdr(uh, __LINE__);
940 		return;
941 	}
942 	/*
943  	 * Freeing in the ->first section.  Create a run starting at the
944 	 * freed item.  The code below will subdivide it.
945 	 */
946 	if (item < uh->first) {
947 		up = new_unr(uh, p1, p2);
948 		up->ptr = uh;
949 		up->len = uh->first - item;
950 		TAILQ_INSERT_HEAD(&uh->head, up, list);
951 		uh->first -= up->len;
952 	}
953 
954 	item -= uh->first;
955 
956 	/* Find the item which contains the unit we want to free */
957 	TAILQ_FOREACH(up, &uh->head, list) {
958 		if (up->len > item)
959 			break;
960 		item -= up->len;
961 	}
962 
963 	/* Handle bitmap items */
964 	if (is_bitmap(uh, up)) {
965 		ub = up->ptr;
966 
967 		KASSERT(bit_test(ub->map, item) != 0,
968 		    ("UNR: Freeing free item %d (bitmap)\n", item));
969 		bit_clear(ub->map, item);
970 		uh->busy--;
971 		collapse_unr(uh, up);
972 		return;
973 	}
974 
975 	KASSERT(up->ptr == uh, ("UNR Freeing free item %d (run))\n", item));
976 
977 	/* Just this one left, reap it */
978 	if (up->len == 1) {
979 		up->ptr = NULL;
980 		uh->busy--;
981 		collapse_unr(uh, up);
982 		return;
983 	}
984 
985 	/* Check if we can shift the item into the previous 'free' run */
986 	upp = TAILQ_PREV(up, unrhd, list);
987 	if (item == 0 && upp != NULL && upp->ptr == NULL) {
988 		upp->len++;
989 		up->len--;
990 		uh->busy--;
991 		collapse_unr(uh, up);
992 		return;
993 	}
994 
995 	/* Check if we can shift the item to the next 'free' run */
996 	upn = TAILQ_NEXT(up, list);
997 	if (item == up->len - 1 && upn != NULL && upn->ptr == NULL) {
998 		upn->len++;
999 		up->len--;
1000 		uh->busy--;
1001 		collapse_unr(uh, up);
1002 		return;
1003 	}
1004 
1005 	/* Split off the tail end, if any. */
1006 	pl = up->len - (1 + item);
1007 	if (pl > 0) {
1008 		upp = new_unr(uh, p1, p2);
1009 		upp->ptr = uh;
1010 		upp->len = pl;
1011 		TAILQ_INSERT_AFTER(&uh->head, up, upp, list);
1012 	}
1013 
1014 	/* Split off head end, if any */
1015 	if (item > 0) {
1016 		upp = new_unr(uh, p1, p2);
1017 		upp->len = item;
1018 		upp->ptr = uh;
1019 		TAILQ_INSERT_BEFORE(up, upp, list);
1020 	}
1021 	up->len = 1;
1022 	up->ptr = NULL;
1023 	uh->busy--;
1024 	collapse_unr(uh, up);
1025 }
1026 
1027 void
1028 free_unr(struct unrhdr *uh, u_int item)
1029 {
1030 	void *p1, *p2;
1031 
1032 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "free_unr");
1033 	p1 = Malloc(sizeof(struct unr));
1034 	p2 = Malloc(sizeof(struct unr));
1035 	if (uh->mtx != NULL)
1036 		mtx_lock(uh->mtx);
1037 	free_unrl(uh, item, &p1, &p2);
1038 	clean_unrhdrl(uh);
1039 	if (uh->mtx != NULL)
1040 		mtx_unlock(uh->mtx);
1041 	if (p1 != NULL)
1042 		Free(p1);
1043 	if (p2 != NULL)
1044 		Free(p2);
1045 }
1046 
1047 #ifdef _KERNEL
1048 #include "opt_ddb.h"
1049 #ifdef DDB
1050 #include <ddb/ddb.h>
1051 #endif
1052 #endif
1053 
1054 #if (defined(_KERNEL) && defined(DDB)) || !defined(_KERNEL)
1055 
1056 #if !defined(_KERNEL)
1057 #define db_printf printf
1058 #endif
1059 
1060 static void
1061 print_unr(struct unrhdr *uh, struct unr *up)
1062 {
1063 	u_int x;
1064 	struct unrb *ub;
1065 
1066 	db_printf("  %p len = %5u ", up, up->len);
1067 	if (up->ptr == NULL)
1068 		db_printf("free\n");
1069 	else if (up->ptr == uh)
1070 		db_printf("alloc\n");
1071 	else {
1072 		ub = up->ptr;
1073 		db_printf("bitmap [");
1074 		for (x = 0; x < up->len; x++) {
1075 			if (bit_test(ub->map, x))
1076 				db_printf("#");
1077 			else
1078 				db_printf(" ");
1079 		}
1080 		db_printf("]\n");
1081 	}
1082 }
1083 
1084 static void
1085 print_unrhdr(struct unrhdr *uh)
1086 {
1087 	struct unr *up;
1088 	u_int x;
1089 
1090 	db_printf(
1091 	    "%p low = %u high = %u first = %u last = %u busy %u chunks = %u\n",
1092 	    uh, uh->low, uh->high, uh->first, uh->last, uh->busy, uh->alloc);
1093 	x = uh->low + uh->first;
1094 	TAILQ_FOREACH(up, &uh->head, list) {
1095 		db_printf("  from = %5u", x);
1096 		print_unr(uh, up);
1097 		if (up->ptr == NULL || up->ptr == uh)
1098 			x += up->len;
1099 		else
1100 			x += NBITS;
1101 	}
1102 }
1103 
1104 #endif
1105 
1106 #if defined(_KERNEL) && defined(DDB)
1107 DB_SHOW_COMMAND(unrhdr, unrhdr_print_unrhdr)
1108 {
1109 	if (!have_addr) {
1110 		db_printf("show unrhdr addr\n");
1111 		return;
1112 	}
1113 
1114 	print_unrhdr((struct unrhdr *)addr);
1115 }
1116 
1117 static void
1118 print_unrhdr_iter(struct unrhdr_iter *iter)
1119 {
1120 	db_printf("iter %p unrhdr %p ipos %d upos %p ufi %d\n",
1121 	    iter, iter->uh, iter->ipos, iter->upos, iter->upos_first_item);
1122 }
1123 
1124 DB_SHOW_COMMAND(unrhdr_iter, unrhdr_print_iter)
1125 {
1126 	if (!have_addr) {
1127 		db_printf("show unrhdr_iter addr\n");
1128 		return;
1129 	}
1130 
1131 	print_unrhdr_iter((struct unrhdr_iter *)addr);
1132 }
1133 #endif
1134 
1135 #ifndef _KERNEL	/* USERLAND test driver */
1136 
1137 /*
1138  * Simple stochastic test driver for the above functions.  The code resides
1139  * here so that it can access static functions and structures.
1140  */
1141 
1142 static bool verbose;
1143 #define VPRINTF(...)	{if (verbose) printf(__VA_ARGS__);}
1144 
1145 static void
1146 test_alloc_unr(struct unrhdr *uh, u_int i, char a[])
1147 {
1148 	int j;
1149 
1150 	if (a[i]) {
1151 		VPRINTF("F %u\n", i);
1152 		free_unr(uh, i);
1153 		a[i] = 0;
1154 	} else {
1155 		no_alloc = 1;
1156 		j = alloc_unr(uh);
1157 		if (j != -1) {
1158 			a[j] = 1;
1159 			VPRINTF("A %d\n", j);
1160 		}
1161 		no_alloc = 0;
1162 	}
1163 }
1164 
1165 static void
1166 test_alloc_unr_specific(struct unrhdr *uh, u_int i, char a[])
1167 {
1168 	int j;
1169 
1170 	j = alloc_unr_specific(uh, i);
1171 	if (j == -1) {
1172 		VPRINTF("F %u\n", i);
1173 		a[i] = 0;
1174 		free_unr(uh, i);
1175 	} else {
1176 		a[i] = 1;
1177 		VPRINTF("A %d\n", j);
1178 	}
1179 }
1180 
1181 #define	TBASE	7
1182 #define	XSIZE	10
1183 #define	ISIZE	1000
1184 
1185 static int
1186 test_iter_compar(const void *a, const void *b)
1187 {
1188 	return (*(const int *)a - *(const int *)b);
1189 }
1190 
1191 static void
1192 test_iter_fill(int *vals, struct unrhdr *uh, int i, int v, int *res)
1193 {
1194 	int x;
1195 
1196 	vals[i] = v;
1197 	x = alloc_unr_specific(uh, v);
1198 	if (x != v) {
1199 		VPRINTF("alloc_unr_specific failed %d %d\n", x, v);
1200 		*res = 1;
1201 	}
1202 }
1203 
1204 static void
1205 test_iter(void)
1206 {
1207 	struct unrhdr *uh;
1208 	void *ihandle;
1209 	int vals[ISIZE];
1210 	int i, j, v, x, res;
1211 
1212 	res = 0;
1213 	uh = new_unrhdr(TBASE, INT_MAX, NULL);
1214 	for (i = 0; i < XSIZE; i++) {
1215 		vals[i] = i + TBASE;
1216 		x = alloc_unr_specific(uh, i + TBASE);
1217 		if (x != i + TBASE) {
1218 			VPRINTF("alloc_unr_specific failed %d %d\n", x,
1219 			    i + TBASE);
1220 			res = 1;
1221 		}
1222 	}
1223 	for (; i < ISIZE; i++) {
1224 		for (;;) {
1225 again:
1226 			v = arc4random_uniform(INT_MAX);
1227 			if (v < TBASE)
1228 				goto again;
1229 			for (j = 0; j < i; j++) {
1230 				if (v == vals[j] || v + 1 == vals[j])
1231 					goto again;
1232 			}
1233 			break;
1234 		}
1235 		test_iter_fill(vals, uh, i, v, &res);
1236 		i++, v++;
1237 		if (i < ISIZE)
1238 			test_iter_fill(vals, uh, i, v, &res);
1239 	}
1240 	qsort(vals, ISIZE, sizeof(vals[0]), test_iter_compar);
1241 
1242 	ihandle = create_iter_unr(uh);
1243 	i = 0;
1244 	while ((v = next_iter_unr(ihandle)) != -1) {
1245 		if (vals[i] != v) {
1246 			VPRINTF("iter %d: iter %d != val %d\n", i, v, vals[i]);
1247 			if (res == 0) {
1248 				if (verbose)
1249 					print_unrhdr(uh);
1250 				res = 1;
1251 			}
1252 		} else {
1253 			VPRINTF("iter %d: val %d\n", i, v);
1254 		}
1255 		i++;
1256 	}
1257 	free_iter_unr(ihandle);
1258 	clean_unrhdr(uh);
1259 	clear_unrhdr(uh);
1260 	delete_unrhdr(uh);
1261 	exit(res);
1262 }
1263 
1264 static void
1265 usage(char **argv)
1266 {
1267 	printf("%s [-h] [-i] [-r REPETITIONS] [-v]\n", argv[0]);
1268 }
1269 
1270 int
1271 main(int argc, char **argv)
1272 {
1273 	struct unrhdr *uh;
1274 	char *a;
1275 	long count = 10000;	/* Number of unrs to test */
1276 	long reps = 1, m;
1277 	int ch;
1278 	u_int i;
1279 	bool testing_iter;
1280 
1281 	verbose = false;
1282 	testing_iter = false;
1283 
1284 	while ((ch = getopt(argc, argv, "hir:v")) != -1) {
1285 		switch (ch) {
1286 		case 'i':
1287 			testing_iter = true;
1288 			break;
1289 		case 'r':
1290 			errno = 0;
1291 			reps = strtol(optarg, NULL, 0);
1292 			if (errno == ERANGE || errno == EINVAL) {
1293 				usage(argv);
1294 				exit(2);
1295 			}
1296 
1297 			break;
1298 		case 'v':
1299 			verbose = true;
1300 			break;
1301 		case 'h':
1302 		default:
1303 			usage(argv);
1304 			exit(2);
1305 		}
1306 	}
1307 
1308 	setbuf(stdout, NULL);
1309 
1310 	if (testing_iter)
1311 		test_iter();
1312 
1313 	uh = new_unrhdr(0, count - 1, NULL);
1314 	print_unrhdr(uh);
1315 
1316 	a = calloc(count, sizeof(char));
1317 	if (a == NULL)
1318 		err(1, "calloc failed");
1319 
1320 	printf("sizeof(struct unr) %zu\n", sizeof(struct unr));
1321 	printf("sizeof(struct unrb) %zu\n", sizeof(struct unrb));
1322 	printf("sizeof(struct unrhdr) %zu\n", sizeof(struct unrhdr));
1323 	printf("NBITS %lu\n", (unsigned long)NBITS);
1324 	for (m = 0; m < count * reps; m++) {
1325 		i = arc4random_uniform(count);
1326 #if 0
1327 		if (a[i] && (j & 1))
1328 			continue;
1329 #endif
1330 		if ((arc4random() & 1) != 0)
1331 			test_alloc_unr(uh, i, a);
1332 		else
1333 			test_alloc_unr_specific(uh, i, a);
1334 
1335 		if (verbose)
1336 			print_unrhdr(uh);
1337 		check_unrhdr(uh, __LINE__);
1338 	}
1339 	for (i = 0; i < (u_int)count; i++) {
1340 		if (a[i]) {
1341 			if (verbose) {
1342 				printf("C %u\n", i);
1343 				print_unrhdr(uh);
1344 			}
1345 			free_unr(uh, i);
1346 		}
1347 	}
1348 	print_unrhdr(uh);
1349 	delete_unrhdr(uh);
1350 	free(a);
1351 	return (0);
1352 }
1353 #endif
1354