xref: /linux/lib/idr.c (revision a8fe58cec351c25e09c393bf46117c0c47b5a17c)
1 /*
2  * 2002-10-18  written by Jim Houston jim.houston@ccur.com
3  *	Copyright (C) 2002 by Concurrent Computer Corporation
4  *	Distributed under the GNU GPL license version 2.
5  *
6  * Modified by George Anzinger to reuse immediately and to use
7  * find bit instructions.  Also removed _irq on spinlocks.
8  *
9  * Modified by Nadia Derbey to make it RCU safe.
10  *
11  * Small id to pointer translation service.
12  *
13  * It uses a radix tree like structure as a sparse array indexed
14  * by the id to obtain the pointer.  The bitmap makes allocating
15  * a new id quick.
16  *
17  * You call it to allocate an id (an int) an associate with that id a
18  * pointer or what ever, we treat it as a (void *).  You can pass this
19  * id to a user for him to pass back at a later time.  You then pass
20  * that id to this code and it returns your pointer.
21  */
22 
23 #ifndef TEST                        // to test in user space...
24 #include <linux/slab.h>
25 #include <linux/init.h>
26 #include <linux/export.h>
27 #endif
28 #include <linux/err.h>
29 #include <linux/string.h>
30 #include <linux/idr.h>
31 #include <linux/spinlock.h>
32 #include <linux/percpu.h>
33 
34 #define MAX_IDR_SHIFT		(sizeof(int) * 8 - 1)
35 #define MAX_IDR_BIT		(1U << MAX_IDR_SHIFT)
36 
37 /* Leave the possibility of an incomplete final layer */
38 #define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS)
39 
40 /* Number of id_layer structs to leave in free list */
41 #define MAX_IDR_FREE (MAX_IDR_LEVEL * 2)
42 
43 static struct kmem_cache *idr_layer_cache;
44 static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head);
45 static DEFINE_PER_CPU(int, idr_preload_cnt);
46 static DEFINE_SPINLOCK(simple_ida_lock);
47 
48 /* the maximum ID which can be allocated given idr->layers */
49 static int idr_max(int layers)
50 {
51 	int bits = min_t(int, layers * IDR_BITS, MAX_IDR_SHIFT);
52 
53 	return (1 << bits) - 1;
54 }
55 
56 /*
57  * Prefix mask for an idr_layer at @layer.  For layer 0, the prefix mask is
58  * all bits except for the lower IDR_BITS.  For layer 1, 2 * IDR_BITS, and
59  * so on.
60  */
61 static int idr_layer_prefix_mask(int layer)
62 {
63 	return ~idr_max(layer + 1);
64 }
65 
66 static struct idr_layer *get_from_free_list(struct idr *idp)
67 {
68 	struct idr_layer *p;
69 	unsigned long flags;
70 
71 	spin_lock_irqsave(&idp->lock, flags);
72 	if ((p = idp->id_free)) {
73 		idp->id_free = p->ary[0];
74 		idp->id_free_cnt--;
75 		p->ary[0] = NULL;
76 	}
77 	spin_unlock_irqrestore(&idp->lock, flags);
78 	return(p);
79 }
80 
81 /**
82  * idr_layer_alloc - allocate a new idr_layer
83  * @gfp_mask: allocation mask
84  * @layer_idr: optional idr to allocate from
85  *
86  * If @layer_idr is %NULL, directly allocate one using @gfp_mask or fetch
87  * one from the per-cpu preload buffer.  If @layer_idr is not %NULL, fetch
88  * an idr_layer from @idr->id_free.
89  *
90  * @layer_idr is to maintain backward compatibility with the old alloc
91  * interface - idr_pre_get() and idr_get_new*() - and will be removed
92  * together with per-pool preload buffer.
93  */
94 static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr)
95 {
96 	struct idr_layer *new;
97 
98 	/* this is the old path, bypass to get_from_free_list() */
99 	if (layer_idr)
100 		return get_from_free_list(layer_idr);
101 
102 	/*
103 	 * Try to allocate directly from kmem_cache.  We want to try this
104 	 * before preload buffer; otherwise, non-preloading idr_alloc()
105 	 * users will end up taking advantage of preloading ones.  As the
106 	 * following is allowed to fail for preloaded cases, suppress
107 	 * warning this time.
108 	 */
109 	new = kmem_cache_zalloc(idr_layer_cache, gfp_mask | __GFP_NOWARN);
110 	if (new)
111 		return new;
112 
113 	/*
114 	 * Try to fetch one from the per-cpu preload buffer if in process
115 	 * context.  See idr_preload() for details.
116 	 */
117 	if (!in_interrupt()) {
118 		preempt_disable();
119 		new = __this_cpu_read(idr_preload_head);
120 		if (new) {
121 			__this_cpu_write(idr_preload_head, new->ary[0]);
122 			__this_cpu_dec(idr_preload_cnt);
123 			new->ary[0] = NULL;
124 		}
125 		preempt_enable();
126 		if (new)
127 			return new;
128 	}
129 
130 	/*
131 	 * Both failed.  Try kmem_cache again w/o adding __GFP_NOWARN so
132 	 * that memory allocation failure warning is printed as intended.
133 	 */
134 	return kmem_cache_zalloc(idr_layer_cache, gfp_mask);
135 }
136 
137 static void idr_layer_rcu_free(struct rcu_head *head)
138 {
139 	struct idr_layer *layer;
140 
141 	layer = container_of(head, struct idr_layer, rcu_head);
142 	kmem_cache_free(idr_layer_cache, layer);
143 }
144 
145 static inline void free_layer(struct idr *idr, struct idr_layer *p)
146 {
147 	if (idr->hint == p)
148 		RCU_INIT_POINTER(idr->hint, NULL);
149 	call_rcu(&p->rcu_head, idr_layer_rcu_free);
150 }
151 
152 /* only called when idp->lock is held */
153 static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
154 {
155 	p->ary[0] = idp->id_free;
156 	idp->id_free = p;
157 	idp->id_free_cnt++;
158 }
159 
160 static void move_to_free_list(struct idr *idp, struct idr_layer *p)
161 {
162 	unsigned long flags;
163 
164 	/*
165 	 * Depends on the return element being zeroed.
166 	 */
167 	spin_lock_irqsave(&idp->lock, flags);
168 	__move_to_free_list(idp, p);
169 	spin_unlock_irqrestore(&idp->lock, flags);
170 }
171 
172 static void idr_mark_full(struct idr_layer **pa, int id)
173 {
174 	struct idr_layer *p = pa[0];
175 	int l = 0;
176 
177 	__set_bit(id & IDR_MASK, p->bitmap);
178 	/*
179 	 * If this layer is full mark the bit in the layer above to
180 	 * show that this part of the radix tree is full.  This may
181 	 * complete the layer above and require walking up the radix
182 	 * tree.
183 	 */
184 	while (bitmap_full(p->bitmap, IDR_SIZE)) {
185 		if (!(p = pa[++l]))
186 			break;
187 		id = id >> IDR_BITS;
188 		__set_bit((id & IDR_MASK), p->bitmap);
189 	}
190 }
191 
192 static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
193 {
194 	while (idp->id_free_cnt < MAX_IDR_FREE) {
195 		struct idr_layer *new;
196 		new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
197 		if (new == NULL)
198 			return (0);
199 		move_to_free_list(idp, new);
200 	}
201 	return 1;
202 }
203 
204 /**
205  * sub_alloc - try to allocate an id without growing the tree depth
206  * @idp: idr handle
207  * @starting_id: id to start search at
208  * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer
209  * @gfp_mask: allocation mask for idr_layer_alloc()
210  * @layer_idr: optional idr passed to idr_layer_alloc()
211  *
212  * Allocate an id in range [@starting_id, INT_MAX] from @idp without
213  * growing its depth.  Returns
214  *
215  *  the allocated id >= 0 if successful,
216  *  -EAGAIN if the tree needs to grow for allocation to succeed,
217  *  -ENOSPC if the id space is exhausted,
218  *  -ENOMEM if more idr_layers need to be allocated.
219  */
220 static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
221 		     gfp_t gfp_mask, struct idr *layer_idr)
222 {
223 	int n, m, sh;
224 	struct idr_layer *p, *new;
225 	int l, id, oid;
226 
227 	id = *starting_id;
228  restart:
229 	p = idp->top;
230 	l = idp->layers;
231 	pa[l--] = NULL;
232 	while (1) {
233 		/*
234 		 * We run around this while until we reach the leaf node...
235 		 */
236 		n = (id >> (IDR_BITS*l)) & IDR_MASK;
237 		m = find_next_zero_bit(p->bitmap, IDR_SIZE, n);
238 		if (m == IDR_SIZE) {
239 			/* no space available go back to previous layer. */
240 			l++;
241 			oid = id;
242 			id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
243 
244 			/* if already at the top layer, we need to grow */
245 			if (id > idr_max(idp->layers)) {
246 				*starting_id = id;
247 				return -EAGAIN;
248 			}
249 			p = pa[l];
250 			BUG_ON(!p);
251 
252 			/* If we need to go up one layer, continue the
253 			 * loop; otherwise, restart from the top.
254 			 */
255 			sh = IDR_BITS * (l + 1);
256 			if (oid >> sh == id >> sh)
257 				continue;
258 			else
259 				goto restart;
260 		}
261 		if (m != n) {
262 			sh = IDR_BITS*l;
263 			id = ((id >> sh) ^ n ^ m) << sh;
264 		}
265 		if ((id >= MAX_IDR_BIT) || (id < 0))
266 			return -ENOSPC;
267 		if (l == 0)
268 			break;
269 		/*
270 		 * Create the layer below if it is missing.
271 		 */
272 		if (!p->ary[m]) {
273 			new = idr_layer_alloc(gfp_mask, layer_idr);
274 			if (!new)
275 				return -ENOMEM;
276 			new->layer = l-1;
277 			new->prefix = id & idr_layer_prefix_mask(new->layer);
278 			rcu_assign_pointer(p->ary[m], new);
279 			p->count++;
280 		}
281 		pa[l--] = p;
282 		p = p->ary[m];
283 	}
284 
285 	pa[l] = p;
286 	return id;
287 }
288 
289 static int idr_get_empty_slot(struct idr *idp, int starting_id,
290 			      struct idr_layer **pa, gfp_t gfp_mask,
291 			      struct idr *layer_idr)
292 {
293 	struct idr_layer *p, *new;
294 	int layers, v, id;
295 	unsigned long flags;
296 
297 	id = starting_id;
298 build_up:
299 	p = idp->top;
300 	layers = idp->layers;
301 	if (unlikely(!p)) {
302 		if (!(p = idr_layer_alloc(gfp_mask, layer_idr)))
303 			return -ENOMEM;
304 		p->layer = 0;
305 		layers = 1;
306 	}
307 	/*
308 	 * Add a new layer to the top of the tree if the requested
309 	 * id is larger than the currently allocated space.
310 	 */
311 	while (id > idr_max(layers)) {
312 		layers++;
313 		if (!p->count) {
314 			/* special case: if the tree is currently empty,
315 			 * then we grow the tree by moving the top node
316 			 * upwards.
317 			 */
318 			p->layer++;
319 			WARN_ON_ONCE(p->prefix);
320 			continue;
321 		}
322 		if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) {
323 			/*
324 			 * The allocation failed.  If we built part of
325 			 * the structure tear it down.
326 			 */
327 			spin_lock_irqsave(&idp->lock, flags);
328 			for (new = p; p && p != idp->top; new = p) {
329 				p = p->ary[0];
330 				new->ary[0] = NULL;
331 				new->count = 0;
332 				bitmap_clear(new->bitmap, 0, IDR_SIZE);
333 				__move_to_free_list(idp, new);
334 			}
335 			spin_unlock_irqrestore(&idp->lock, flags);
336 			return -ENOMEM;
337 		}
338 		new->ary[0] = p;
339 		new->count = 1;
340 		new->layer = layers-1;
341 		new->prefix = id & idr_layer_prefix_mask(new->layer);
342 		if (bitmap_full(p->bitmap, IDR_SIZE))
343 			__set_bit(0, new->bitmap);
344 		p = new;
345 	}
346 	rcu_assign_pointer(idp->top, p);
347 	idp->layers = layers;
348 	v = sub_alloc(idp, &id, pa, gfp_mask, layer_idr);
349 	if (v == -EAGAIN)
350 		goto build_up;
351 	return(v);
352 }
353 
354 /*
355  * @id and @pa are from a successful allocation from idr_get_empty_slot().
356  * Install the user pointer @ptr and mark the slot full.
357  */
358 static void idr_fill_slot(struct idr *idr, void *ptr, int id,
359 			  struct idr_layer **pa)
360 {
361 	/* update hint used for lookup, cleared from free_layer() */
362 	rcu_assign_pointer(idr->hint, pa[0]);
363 
364 	rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr);
365 	pa[0]->count++;
366 	idr_mark_full(pa, id);
367 }
368 
369 
370 /**
371  * idr_preload - preload for idr_alloc()
372  * @gfp_mask: allocation mask to use for preloading
373  *
374  * Preload per-cpu layer buffer for idr_alloc().  Can only be used from
375  * process context and each idr_preload() invocation should be matched with
376  * idr_preload_end().  Note that preemption is disabled while preloaded.
377  *
378  * The first idr_alloc() in the preloaded section can be treated as if it
379  * were invoked with @gfp_mask used for preloading.  This allows using more
380  * permissive allocation masks for idrs protected by spinlocks.
381  *
382  * For example, if idr_alloc() below fails, the failure can be treated as
383  * if idr_alloc() were called with GFP_KERNEL rather than GFP_NOWAIT.
384  *
385  *	idr_preload(GFP_KERNEL);
386  *	spin_lock(lock);
387  *
388  *	id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT);
389  *
390  *	spin_unlock(lock);
391  *	idr_preload_end();
392  *	if (id < 0)
393  *		error;
394  */
395 void idr_preload(gfp_t gfp_mask)
396 {
397 	/*
398 	 * Consuming preload buffer from non-process context breaks preload
399 	 * allocation guarantee.  Disallow usage from those contexts.
400 	 */
401 	WARN_ON_ONCE(in_interrupt());
402 	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
403 
404 	preempt_disable();
405 
406 	/*
407 	 * idr_alloc() is likely to succeed w/o full idr_layer buffer and
408 	 * return value from idr_alloc() needs to be checked for failure
409 	 * anyway.  Silently give up if allocation fails.  The caller can
410 	 * treat failures from idr_alloc() as if idr_alloc() were called
411 	 * with @gfp_mask which should be enough.
412 	 */
413 	while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
414 		struct idr_layer *new;
415 
416 		preempt_enable();
417 		new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
418 		preempt_disable();
419 		if (!new)
420 			break;
421 
422 		/* link the new one to per-cpu preload list */
423 		new->ary[0] = __this_cpu_read(idr_preload_head);
424 		__this_cpu_write(idr_preload_head, new);
425 		__this_cpu_inc(idr_preload_cnt);
426 	}
427 }
428 EXPORT_SYMBOL(idr_preload);
429 
430 /**
431  * idr_alloc - allocate new idr entry
432  * @idr: the (initialized) idr
433  * @ptr: pointer to be associated with the new id
434  * @start: the minimum id (inclusive)
435  * @end: the maximum id (exclusive, <= 0 for max)
436  * @gfp_mask: memory allocation flags
437  *
438  * Allocate an id in [start, end) and associate it with @ptr.  If no ID is
439  * available in the specified range, returns -ENOSPC.  On memory allocation
440  * failure, returns -ENOMEM.
441  *
442  * Note that @end is treated as max when <= 0.  This is to always allow
443  * using @start + N as @end as long as N is inside integer range.
444  *
445  * The user is responsible for exclusively synchronizing all operations
446  * which may modify @idr.  However, read-only accesses such as idr_find()
447  * or iteration can be performed under RCU read lock provided the user
448  * destroys @ptr in RCU-safe way after removal from idr.
449  */
450 int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
451 {
452 	int max = end > 0 ? end - 1 : INT_MAX;	/* inclusive upper limit */
453 	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
454 	int id;
455 
456 	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
457 
458 	/* sanity checks */
459 	if (WARN_ON_ONCE(start < 0))
460 		return -EINVAL;
461 	if (unlikely(max < start))
462 		return -ENOSPC;
463 
464 	/* allocate id */
465 	id = idr_get_empty_slot(idr, start, pa, gfp_mask, NULL);
466 	if (unlikely(id < 0))
467 		return id;
468 	if (unlikely(id > max))
469 		return -ENOSPC;
470 
471 	idr_fill_slot(idr, ptr, id, pa);
472 	return id;
473 }
474 EXPORT_SYMBOL_GPL(idr_alloc);
475 
476 /**
477  * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion
478  * @idr: the (initialized) idr
479  * @ptr: pointer to be associated with the new id
480  * @start: the minimum id (inclusive)
481  * @end: the maximum id (exclusive, <= 0 for max)
482  * @gfp_mask: memory allocation flags
483  *
484  * Essentially the same as idr_alloc, but prefers to allocate progressively
485  * higher ids if it can. If the "cur" counter wraps, then it will start again
486  * at the "start" end of the range and allocate one that has already been used.
487  */
488 int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end,
489 			gfp_t gfp_mask)
490 {
491 	int id;
492 
493 	id = idr_alloc(idr, ptr, max(start, idr->cur), end, gfp_mask);
494 	if (id == -ENOSPC)
495 		id = idr_alloc(idr, ptr, start, end, gfp_mask);
496 
497 	if (likely(id >= 0))
498 		idr->cur = id + 1;
499 	return id;
500 }
501 EXPORT_SYMBOL(idr_alloc_cyclic);
502 
503 static void idr_remove_warning(int id)
504 {
505 	WARN(1, "idr_remove called for id=%d which is not allocated.\n", id);
506 }
507 
508 static void sub_remove(struct idr *idp, int shift, int id)
509 {
510 	struct idr_layer *p = idp->top;
511 	struct idr_layer **pa[MAX_IDR_LEVEL + 1];
512 	struct idr_layer ***paa = &pa[0];
513 	struct idr_layer *to_free;
514 	int n;
515 
516 	*paa = NULL;
517 	*++paa = &idp->top;
518 
519 	while ((shift > 0) && p) {
520 		n = (id >> shift) & IDR_MASK;
521 		__clear_bit(n, p->bitmap);
522 		*++paa = &p->ary[n];
523 		p = p->ary[n];
524 		shift -= IDR_BITS;
525 	}
526 	n = id & IDR_MASK;
527 	if (likely(p != NULL && test_bit(n, p->bitmap))) {
528 		__clear_bit(n, p->bitmap);
529 		RCU_INIT_POINTER(p->ary[n], NULL);
530 		to_free = NULL;
531 		while(*paa && ! --((**paa)->count)){
532 			if (to_free)
533 				free_layer(idp, to_free);
534 			to_free = **paa;
535 			**paa-- = NULL;
536 		}
537 		if (!*paa)
538 			idp->layers = 0;
539 		if (to_free)
540 			free_layer(idp, to_free);
541 	} else
542 		idr_remove_warning(id);
543 }
544 
545 /**
546  * idr_remove - remove the given id and free its slot
547  * @idp: idr handle
548  * @id: unique key
549  */
550 void idr_remove(struct idr *idp, int id)
551 {
552 	struct idr_layer *p;
553 	struct idr_layer *to_free;
554 
555 	if (id < 0)
556 		return;
557 
558 	if (id > idr_max(idp->layers)) {
559 		idr_remove_warning(id);
560 		return;
561 	}
562 
563 	sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
564 	if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
565 	    idp->top->ary[0]) {
566 		/*
567 		 * Single child at leftmost slot: we can shrink the tree.
568 		 * This level is not needed anymore since when layers are
569 		 * inserted, they are inserted at the top of the existing
570 		 * tree.
571 		 */
572 		to_free = idp->top;
573 		p = idp->top->ary[0];
574 		rcu_assign_pointer(idp->top, p);
575 		--idp->layers;
576 		to_free->count = 0;
577 		bitmap_clear(to_free->bitmap, 0, IDR_SIZE);
578 		free_layer(idp, to_free);
579 	}
580 }
581 EXPORT_SYMBOL(idr_remove);
582 
583 static void __idr_remove_all(struct idr *idp)
584 {
585 	int n, id, max;
586 	int bt_mask;
587 	struct idr_layer *p;
588 	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
589 	struct idr_layer **paa = &pa[0];
590 
591 	n = idp->layers * IDR_BITS;
592 	*paa = idp->top;
593 	RCU_INIT_POINTER(idp->top, NULL);
594 	max = idr_max(idp->layers);
595 
596 	id = 0;
597 	while (id >= 0 && id <= max) {
598 		p = *paa;
599 		while (n > IDR_BITS && p) {
600 			n -= IDR_BITS;
601 			p = p->ary[(id >> n) & IDR_MASK];
602 			*++paa = p;
603 		}
604 
605 		bt_mask = id;
606 		id += 1 << n;
607 		/* Get the highest bit that the above add changed from 0->1. */
608 		while (n < fls(id ^ bt_mask)) {
609 			if (*paa)
610 				free_layer(idp, *paa);
611 			n += IDR_BITS;
612 			--paa;
613 		}
614 	}
615 	idp->layers = 0;
616 }
617 
618 /**
619  * idr_destroy - release all cached layers within an idr tree
620  * @idp: idr handle
621  *
622  * Free all id mappings and all idp_layers.  After this function, @idp is
623  * completely unused and can be freed / recycled.  The caller is
624  * responsible for ensuring that no one else accesses @idp during or after
625  * idr_destroy().
626  *
627  * A typical clean-up sequence for objects stored in an idr tree will use
628  * idr_for_each() to free all objects, if necessary, then idr_destroy() to
629  * free up the id mappings and cached idr_layers.
630  */
631 void idr_destroy(struct idr *idp)
632 {
633 	__idr_remove_all(idp);
634 
635 	while (idp->id_free_cnt) {
636 		struct idr_layer *p = get_from_free_list(idp);
637 		kmem_cache_free(idr_layer_cache, p);
638 	}
639 }
640 EXPORT_SYMBOL(idr_destroy);
641 
642 void *idr_find_slowpath(struct idr *idp, int id)
643 {
644 	int n;
645 	struct idr_layer *p;
646 
647 	if (id < 0)
648 		return NULL;
649 
650 	p = rcu_dereference_raw(idp->top);
651 	if (!p)
652 		return NULL;
653 	n = (p->layer+1) * IDR_BITS;
654 
655 	if (id > idr_max(p->layer + 1))
656 		return NULL;
657 	BUG_ON(n == 0);
658 
659 	while (n > 0 && p) {
660 		n -= IDR_BITS;
661 		BUG_ON(n != p->layer*IDR_BITS);
662 		p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
663 	}
664 	return((void *)p);
665 }
666 EXPORT_SYMBOL(idr_find_slowpath);
667 
668 /**
669  * idr_for_each - iterate through all stored pointers
670  * @idp: idr handle
671  * @fn: function to be called for each pointer
672  * @data: data passed back to callback function
673  *
674  * Iterate over the pointers registered with the given idr.  The
675  * callback function will be called for each pointer currently
676  * registered, passing the id, the pointer and the data pointer passed
677  * to this function.  It is not safe to modify the idr tree while in
678  * the callback, so functions such as idr_get_new and idr_remove are
679  * not allowed.
680  *
681  * We check the return of @fn each time. If it returns anything other
682  * than %0, we break out and return that value.
683  *
684  * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove().
685  */
686 int idr_for_each(struct idr *idp,
687 		 int (*fn)(int id, void *p, void *data), void *data)
688 {
689 	int n, id, max, error = 0;
690 	struct idr_layer *p;
691 	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
692 	struct idr_layer **paa = &pa[0];
693 
694 	n = idp->layers * IDR_BITS;
695 	*paa = rcu_dereference_raw(idp->top);
696 	max = idr_max(idp->layers);
697 
698 	id = 0;
699 	while (id >= 0 && id <= max) {
700 		p = *paa;
701 		while (n > 0 && p) {
702 			n -= IDR_BITS;
703 			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
704 			*++paa = p;
705 		}
706 
707 		if (p) {
708 			error = fn(id, (void *)p, data);
709 			if (error)
710 				break;
711 		}
712 
713 		id += 1 << n;
714 		while (n < fls(id)) {
715 			n += IDR_BITS;
716 			--paa;
717 		}
718 	}
719 
720 	return error;
721 }
722 EXPORT_SYMBOL(idr_for_each);
723 
724 /**
725  * idr_get_next - lookup next object of id to given id.
726  * @idp: idr handle
727  * @nextidp:  pointer to lookup key
728  *
729  * Returns pointer to registered object with id, which is next number to
730  * given id. After being looked up, *@nextidp will be updated for the next
731  * iteration.
732  *
733  * This function can be called under rcu_read_lock(), given that the leaf
734  * pointers lifetimes are correctly managed.
735  */
736 void *idr_get_next(struct idr *idp, int *nextidp)
737 {
738 	struct idr_layer *p, *pa[MAX_IDR_LEVEL + 1];
739 	struct idr_layer **paa = &pa[0];
740 	int id = *nextidp;
741 	int n, max;
742 
743 	/* find first ent */
744 	p = *paa = rcu_dereference_raw(idp->top);
745 	if (!p)
746 		return NULL;
747 	n = (p->layer + 1) * IDR_BITS;
748 	max = idr_max(p->layer + 1);
749 
750 	while (id >= 0 && id <= max) {
751 		p = *paa;
752 		while (n > 0 && p) {
753 			n -= IDR_BITS;
754 			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
755 			*++paa = p;
756 		}
757 
758 		if (p) {
759 			*nextidp = id;
760 			return p;
761 		}
762 
763 		/*
764 		 * Proceed to the next layer at the current level.  Unlike
765 		 * idr_for_each(), @id isn't guaranteed to be aligned to
766 		 * layer boundary at this point and adding 1 << n may
767 		 * incorrectly skip IDs.  Make sure we jump to the
768 		 * beginning of the next layer using round_up().
769 		 */
770 		id = round_up(id + 1, 1 << n);
771 		while (n < fls(id)) {
772 			n += IDR_BITS;
773 			--paa;
774 		}
775 	}
776 	return NULL;
777 }
778 EXPORT_SYMBOL(idr_get_next);
779 
780 
781 /**
782  * idr_replace - replace pointer for given id
783  * @idp: idr handle
784  * @ptr: pointer you want associated with the id
785  * @id: lookup key
786  *
787  * Replace the pointer registered with an id and return the old value.
788  * A %-ENOENT return indicates that @id was not found.
789  * A %-EINVAL return indicates that @id was not within valid constraints.
790  *
791  * The caller must serialize with writers.
792  */
793 void *idr_replace(struct idr *idp, void *ptr, int id)
794 {
795 	int n;
796 	struct idr_layer *p, *old_p;
797 
798 	if (id < 0)
799 		return ERR_PTR(-EINVAL);
800 
801 	p = idp->top;
802 	if (!p)
803 		return ERR_PTR(-ENOENT);
804 
805 	if (id > idr_max(p->layer + 1))
806 		return ERR_PTR(-ENOENT);
807 
808 	n = p->layer * IDR_BITS;
809 	while ((n > 0) && p) {
810 		p = p->ary[(id >> n) & IDR_MASK];
811 		n -= IDR_BITS;
812 	}
813 
814 	n = id & IDR_MASK;
815 	if (unlikely(p == NULL || !test_bit(n, p->bitmap)))
816 		return ERR_PTR(-ENOENT);
817 
818 	old_p = p->ary[n];
819 	rcu_assign_pointer(p->ary[n], ptr);
820 
821 	return old_p;
822 }
823 EXPORT_SYMBOL(idr_replace);
824 
825 void __init idr_init_cache(void)
826 {
827 	idr_layer_cache = kmem_cache_create("idr_layer_cache",
828 				sizeof(struct idr_layer), 0, SLAB_PANIC, NULL);
829 }
830 
831 /**
832  * idr_init - initialize idr handle
833  * @idp:	idr handle
834  *
835  * This function is use to set up the handle (@idp) that you will pass
836  * to the rest of the functions.
837  */
838 void idr_init(struct idr *idp)
839 {
840 	memset(idp, 0, sizeof(struct idr));
841 	spin_lock_init(&idp->lock);
842 }
843 EXPORT_SYMBOL(idr_init);
844 
845 static int idr_has_entry(int id, void *p, void *data)
846 {
847 	return 1;
848 }
849 
850 bool idr_is_empty(struct idr *idp)
851 {
852 	return !idr_for_each(idp, idr_has_entry, NULL);
853 }
854 EXPORT_SYMBOL(idr_is_empty);
855 
856 /**
857  * DOC: IDA description
858  * IDA - IDR based ID allocator
859  *
860  * This is id allocator without id -> pointer translation.  Memory
861  * usage is much lower than full blown idr because each id only
862  * occupies a bit.  ida uses a custom leaf node which contains
863  * IDA_BITMAP_BITS slots.
864  *
865  * 2007-04-25  written by Tejun Heo <htejun@gmail.com>
866  */
867 
868 static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
869 {
870 	unsigned long flags;
871 
872 	if (!ida->free_bitmap) {
873 		spin_lock_irqsave(&ida->idr.lock, flags);
874 		if (!ida->free_bitmap) {
875 			ida->free_bitmap = bitmap;
876 			bitmap = NULL;
877 		}
878 		spin_unlock_irqrestore(&ida->idr.lock, flags);
879 	}
880 
881 	kfree(bitmap);
882 }
883 
884 /**
885  * ida_pre_get - reserve resources for ida allocation
886  * @ida:	ida handle
887  * @gfp_mask:	memory allocation flag
888  *
889  * This function should be called prior to locking and calling the
890  * following function.  It preallocates enough memory to satisfy the
891  * worst possible allocation.
892  *
893  * If the system is REALLY out of memory this function returns %0,
894  * otherwise %1.
895  */
896 int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
897 {
898 	/* allocate idr_layers */
899 	if (!__idr_pre_get(&ida->idr, gfp_mask))
900 		return 0;
901 
902 	/* allocate free_bitmap */
903 	if (!ida->free_bitmap) {
904 		struct ida_bitmap *bitmap;
905 
906 		bitmap = kmalloc(sizeof(struct ida_bitmap), gfp_mask);
907 		if (!bitmap)
908 			return 0;
909 
910 		free_bitmap(ida, bitmap);
911 	}
912 
913 	return 1;
914 }
915 EXPORT_SYMBOL(ida_pre_get);
916 
917 /**
918  * ida_get_new_above - allocate new ID above or equal to a start id
919  * @ida:	ida handle
920  * @starting_id: id to start search at
921  * @p_id:	pointer to the allocated handle
922  *
923  * Allocate new ID above or equal to @starting_id.  It should be called
924  * with any required locks.
925  *
926  * If memory is required, it will return %-EAGAIN, you should unlock
927  * and go back to the ida_pre_get() call.  If the ida is full, it will
928  * return %-ENOSPC.
929  *
930  * @p_id returns a value in the range @starting_id ... %0x7fffffff.
931  */
932 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
933 {
934 	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
935 	struct ida_bitmap *bitmap;
936 	unsigned long flags;
937 	int idr_id = starting_id / IDA_BITMAP_BITS;
938 	int offset = starting_id % IDA_BITMAP_BITS;
939 	int t, id;
940 
941  restart:
942 	/* get vacant slot */
943 	t = idr_get_empty_slot(&ida->idr, idr_id, pa, 0, &ida->idr);
944 	if (t < 0)
945 		return t == -ENOMEM ? -EAGAIN : t;
946 
947 	if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT)
948 		return -ENOSPC;
949 
950 	if (t != idr_id)
951 		offset = 0;
952 	idr_id = t;
953 
954 	/* if bitmap isn't there, create a new one */
955 	bitmap = (void *)pa[0]->ary[idr_id & IDR_MASK];
956 	if (!bitmap) {
957 		spin_lock_irqsave(&ida->idr.lock, flags);
958 		bitmap = ida->free_bitmap;
959 		ida->free_bitmap = NULL;
960 		spin_unlock_irqrestore(&ida->idr.lock, flags);
961 
962 		if (!bitmap)
963 			return -EAGAIN;
964 
965 		memset(bitmap, 0, sizeof(struct ida_bitmap));
966 		rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK],
967 				(void *)bitmap);
968 		pa[0]->count++;
969 	}
970 
971 	/* lookup for empty slot */
972 	t = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, offset);
973 	if (t == IDA_BITMAP_BITS) {
974 		/* no empty slot after offset, continue to the next chunk */
975 		idr_id++;
976 		offset = 0;
977 		goto restart;
978 	}
979 
980 	id = idr_id * IDA_BITMAP_BITS + t;
981 	if (id >= MAX_IDR_BIT)
982 		return -ENOSPC;
983 
984 	__set_bit(t, bitmap->bitmap);
985 	if (++bitmap->nr_busy == IDA_BITMAP_BITS)
986 		idr_mark_full(pa, idr_id);
987 
988 	*p_id = id;
989 
990 	/* Each leaf node can handle nearly a thousand slots and the
991 	 * whole idea of ida is to have small memory foot print.
992 	 * Throw away extra resources one by one after each successful
993 	 * allocation.
994 	 */
995 	if (ida->idr.id_free_cnt || ida->free_bitmap) {
996 		struct idr_layer *p = get_from_free_list(&ida->idr);
997 		if (p)
998 			kmem_cache_free(idr_layer_cache, p);
999 	}
1000 
1001 	return 0;
1002 }
1003 EXPORT_SYMBOL(ida_get_new_above);
1004 
1005 /**
1006  * ida_remove - remove the given ID
1007  * @ida:	ida handle
1008  * @id:		ID to free
1009  */
1010 void ida_remove(struct ida *ida, int id)
1011 {
1012 	struct idr_layer *p = ida->idr.top;
1013 	int shift = (ida->idr.layers - 1) * IDR_BITS;
1014 	int idr_id = id / IDA_BITMAP_BITS;
1015 	int offset = id % IDA_BITMAP_BITS;
1016 	int n;
1017 	struct ida_bitmap *bitmap;
1018 
1019 	if (idr_id > idr_max(ida->idr.layers))
1020 		goto err;
1021 
1022 	/* clear full bits while looking up the leaf idr_layer */
1023 	while ((shift > 0) && p) {
1024 		n = (idr_id >> shift) & IDR_MASK;
1025 		__clear_bit(n, p->bitmap);
1026 		p = p->ary[n];
1027 		shift -= IDR_BITS;
1028 	}
1029 
1030 	if (p == NULL)
1031 		goto err;
1032 
1033 	n = idr_id & IDR_MASK;
1034 	__clear_bit(n, p->bitmap);
1035 
1036 	bitmap = (void *)p->ary[n];
1037 	if (!bitmap || !test_bit(offset, bitmap->bitmap))
1038 		goto err;
1039 
1040 	/* update bitmap and remove it if empty */
1041 	__clear_bit(offset, bitmap->bitmap);
1042 	if (--bitmap->nr_busy == 0) {
1043 		__set_bit(n, p->bitmap);	/* to please idr_remove() */
1044 		idr_remove(&ida->idr, idr_id);
1045 		free_bitmap(ida, bitmap);
1046 	}
1047 
1048 	return;
1049 
1050  err:
1051 	WARN(1, "ida_remove called for id=%d which is not allocated.\n", id);
1052 }
1053 EXPORT_SYMBOL(ida_remove);
1054 
1055 /**
1056  * ida_destroy - release all cached layers within an ida tree
1057  * @ida:		ida handle
1058  */
1059 void ida_destroy(struct ida *ida)
1060 {
1061 	idr_destroy(&ida->idr);
1062 	kfree(ida->free_bitmap);
1063 }
1064 EXPORT_SYMBOL(ida_destroy);
1065 
1066 /**
1067  * ida_simple_get - get a new id.
1068  * @ida: the (initialized) ida.
1069  * @start: the minimum id (inclusive, < 0x8000000)
1070  * @end: the maximum id (exclusive, < 0x8000000 or 0)
1071  * @gfp_mask: memory allocation flags
1072  *
1073  * Allocates an id in the range start <= id < end, or returns -ENOSPC.
1074  * On memory allocation failure, returns -ENOMEM.
1075  *
1076  * Use ida_simple_remove() to get rid of an id.
1077  */
1078 int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
1079 		   gfp_t gfp_mask)
1080 {
1081 	int ret, id;
1082 	unsigned int max;
1083 	unsigned long flags;
1084 
1085 	BUG_ON((int)start < 0);
1086 	BUG_ON((int)end < 0);
1087 
1088 	if (end == 0)
1089 		max = 0x80000000;
1090 	else {
1091 		BUG_ON(end < start);
1092 		max = end - 1;
1093 	}
1094 
1095 again:
1096 	if (!ida_pre_get(ida, gfp_mask))
1097 		return -ENOMEM;
1098 
1099 	spin_lock_irqsave(&simple_ida_lock, flags);
1100 	ret = ida_get_new_above(ida, start, &id);
1101 	if (!ret) {
1102 		if (id > max) {
1103 			ida_remove(ida, id);
1104 			ret = -ENOSPC;
1105 		} else {
1106 			ret = id;
1107 		}
1108 	}
1109 	spin_unlock_irqrestore(&simple_ida_lock, flags);
1110 
1111 	if (unlikely(ret == -EAGAIN))
1112 		goto again;
1113 
1114 	return ret;
1115 }
1116 EXPORT_SYMBOL(ida_simple_get);
1117 
1118 /**
1119  * ida_simple_remove - remove an allocated id.
1120  * @ida: the (initialized) ida.
1121  * @id: the id returned by ida_simple_get.
1122  */
1123 void ida_simple_remove(struct ida *ida, unsigned int id)
1124 {
1125 	unsigned long flags;
1126 
1127 	BUG_ON((int)id < 0);
1128 	spin_lock_irqsave(&simple_ida_lock, flags);
1129 	ida_remove(ida, id);
1130 	spin_unlock_irqrestore(&simple_ida_lock, flags);
1131 }
1132 EXPORT_SYMBOL(ida_simple_remove);
1133 
1134 /**
1135  * ida_init - initialize ida handle
1136  * @ida:	ida handle
1137  *
1138  * This function is use to set up the handle (@ida) that you will pass
1139  * to the rest of the functions.
1140  */
1141 void ida_init(struct ida *ida)
1142 {
1143 	memset(ida, 0, sizeof(struct ida));
1144 	idr_init(&ida->idr);
1145 
1146 }
1147 EXPORT_SYMBOL(ida_init);
1148