10fc479b1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 288459642SOmar Sandoval /* 388459642SOmar Sandoval * Copyright (C) 2016 Facebook 488459642SOmar Sandoval * Copyright (C) 2013-2014 Jens Axboe 588459642SOmar Sandoval */ 688459642SOmar Sandoval 7af8601adSIngo Molnar #include <linux/sched.h> 898d95416SOmar Sandoval #include <linux/random.h> 988459642SOmar Sandoval #include <linux/sbitmap.h> 1024af1ccfSOmar Sandoval #include <linux/seq_file.h> 1188459642SOmar Sandoval 12b2dbff1bSJens Axboe /* 13b2dbff1bSJens Axboe * See if we have deferred clears that we can batch move 14b2dbff1bSJens Axboe */ 15b78beea0SPavel Begunkov static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) 16b2dbff1bSJens Axboe { 17*c3250c8dSPavel Begunkov unsigned long mask; 18b2dbff1bSJens Axboe 19661d4f55SPavel Begunkov if (!READ_ONCE(map->cleared)) 20661d4f55SPavel Begunkov return false; 21b2dbff1bSJens Axboe 22b2dbff1bSJens Axboe /* 23b2dbff1bSJens Axboe * First get a stable cleared mask, setting the old mask to 0. 24b2dbff1bSJens Axboe */ 25b78beea0SPavel Begunkov mask = xchg(&map->cleared, 0); 26b2dbff1bSJens Axboe 27b2dbff1bSJens Axboe /* 28b2dbff1bSJens Axboe * Now clear the masked bits in our free word 29b2dbff1bSJens Axboe */ 30*c3250c8dSPavel Begunkov atomic_long_andnot(mask, (atomic_long_t *)&map->word); 31*c3250c8dSPavel Begunkov BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(map->word)); 32661d4f55SPavel Begunkov return true; 33b2dbff1bSJens Axboe } 34b2dbff1bSJens Axboe 3588459642SOmar Sandoval int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 3688459642SOmar Sandoval gfp_t flags, int node) 3788459642SOmar Sandoval { 3888459642SOmar Sandoval unsigned int bits_per_word; 3988459642SOmar Sandoval unsigned int i; 4088459642SOmar Sandoval 4188459642SOmar Sandoval if (shift < 0) { 4288459642SOmar Sandoval shift = ilog2(BITS_PER_LONG); 4388459642SOmar Sandoval /* 4488459642SOmar Sandoval * If the bitmap is small, shrink the number of bits per word so 4588459642SOmar Sandoval * we spread over a few cachelines, at least. If less than 4 4688459642SOmar Sandoval * bits, just forget about it, it's not going to work optimally 4788459642SOmar Sandoval * anyway. 4888459642SOmar Sandoval */ 4988459642SOmar Sandoval if (depth >= 4) { 5088459642SOmar Sandoval while ((4U << shift) > depth) 5188459642SOmar Sandoval shift--; 5288459642SOmar Sandoval } 5388459642SOmar Sandoval } 5488459642SOmar Sandoval bits_per_word = 1U << shift; 5588459642SOmar Sandoval if (bits_per_word > BITS_PER_LONG) 5688459642SOmar Sandoval return -EINVAL; 5788459642SOmar Sandoval 5888459642SOmar Sandoval sb->shift = shift; 5988459642SOmar Sandoval sb->depth = depth; 6088459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 6188459642SOmar Sandoval 6288459642SOmar Sandoval if (depth == 0) { 6388459642SOmar Sandoval sb->map = NULL; 6488459642SOmar Sandoval return 0; 6588459642SOmar Sandoval } 6688459642SOmar Sandoval 67590b5b7dSKees Cook sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); 6888459642SOmar Sandoval if (!sb->map) 6988459642SOmar Sandoval return -ENOMEM; 7088459642SOmar Sandoval 7188459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 7288459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 7388459642SOmar Sandoval depth -= sb->map[i].depth; 7488459642SOmar Sandoval } 7588459642SOmar Sandoval return 0; 7688459642SOmar Sandoval } 7788459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_init_node); 7888459642SOmar Sandoval 7988459642SOmar Sandoval void sbitmap_resize(struct sbitmap *sb, unsigned int depth) 8088459642SOmar Sandoval { 8188459642SOmar Sandoval unsigned int bits_per_word = 1U << sb->shift; 8288459642SOmar Sandoval unsigned int i; 8388459642SOmar Sandoval 84b2dbff1bSJens Axboe for (i = 0; i < sb->map_nr; i++) 85b78beea0SPavel Begunkov sbitmap_deferred_clear(&sb->map[i]); 86b2dbff1bSJens Axboe 8788459642SOmar Sandoval sb->depth = depth; 8888459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 8988459642SOmar Sandoval 9088459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 9188459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 9288459642SOmar Sandoval depth -= sb->map[i].depth; 9388459642SOmar Sandoval } 9488459642SOmar Sandoval } 9588459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_resize); 9688459642SOmar Sandoval 97c05e6673SOmar Sandoval static int __sbitmap_get_word(unsigned long *word, unsigned long depth, 98c05e6673SOmar Sandoval unsigned int hint, bool wrap) 9988459642SOmar Sandoval { 10088459642SOmar Sandoval unsigned int orig_hint = hint; 10188459642SOmar Sandoval int nr; 10288459642SOmar Sandoval 10388459642SOmar Sandoval while (1) { 104c05e6673SOmar Sandoval nr = find_next_zero_bit(word, depth, hint); 105c05e6673SOmar Sandoval if (unlikely(nr >= depth)) { 10688459642SOmar Sandoval /* 10788459642SOmar Sandoval * We started with an offset, and we didn't reset the 10888459642SOmar Sandoval * offset to 0 in a failure case, so start from 0 to 10988459642SOmar Sandoval * exhaust the map. 11088459642SOmar Sandoval */ 11188459642SOmar Sandoval if (orig_hint && hint && wrap) { 11288459642SOmar Sandoval hint = orig_hint = 0; 11388459642SOmar Sandoval continue; 11488459642SOmar Sandoval } 11588459642SOmar Sandoval return -1; 11688459642SOmar Sandoval } 11788459642SOmar Sandoval 1184ace53f1SOmar Sandoval if (!test_and_set_bit_lock(nr, word)) 11988459642SOmar Sandoval break; 12088459642SOmar Sandoval 12188459642SOmar Sandoval hint = nr + 1; 122c05e6673SOmar Sandoval if (hint >= depth - 1) 12388459642SOmar Sandoval hint = 0; 12488459642SOmar Sandoval } 12588459642SOmar Sandoval 12688459642SOmar Sandoval return nr; 12788459642SOmar Sandoval } 12888459642SOmar Sandoval 129ea86ea2cSJens Axboe static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, 130ea86ea2cSJens Axboe unsigned int alloc_hint, bool round_robin) 131ea86ea2cSJens Axboe { 132b78beea0SPavel Begunkov struct sbitmap_word *map = &sb->map[index]; 133ea86ea2cSJens Axboe int nr; 134ea86ea2cSJens Axboe 135ea86ea2cSJens Axboe do { 136b78beea0SPavel Begunkov nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint, 137ea86ea2cSJens Axboe !round_robin); 138ea86ea2cSJens Axboe if (nr != -1) 139ea86ea2cSJens Axboe break; 140b78beea0SPavel Begunkov if (!sbitmap_deferred_clear(map)) 141ea86ea2cSJens Axboe break; 142ea86ea2cSJens Axboe } while (1); 143ea86ea2cSJens Axboe 144ea86ea2cSJens Axboe return nr; 145ea86ea2cSJens Axboe } 146ea86ea2cSJens Axboe 14788459642SOmar Sandoval int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) 14888459642SOmar Sandoval { 14988459642SOmar Sandoval unsigned int i, index; 15088459642SOmar Sandoval int nr = -1; 15188459642SOmar Sandoval 15288459642SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 15388459642SOmar Sandoval 15427fae429SJens Axboe /* 15527fae429SJens Axboe * Unless we're doing round robin tag allocation, just use the 15627fae429SJens Axboe * alloc_hint to find the right word index. No point in looping 15727fae429SJens Axboe * twice in find_next_zero_bit() for that case. 15827fae429SJens Axboe */ 15927fae429SJens Axboe if (round_robin) 16027fae429SJens Axboe alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); 16127fae429SJens Axboe else 16227fae429SJens Axboe alloc_hint = 0; 16327fae429SJens Axboe 16488459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 165ea86ea2cSJens Axboe nr = sbitmap_find_bit_in_index(sb, index, alloc_hint, 166ea86ea2cSJens Axboe round_robin); 16788459642SOmar Sandoval if (nr != -1) { 16888459642SOmar Sandoval nr += index << sb->shift; 16988459642SOmar Sandoval break; 17088459642SOmar Sandoval } 17188459642SOmar Sandoval 17288459642SOmar Sandoval /* Jump to next index. */ 17388459642SOmar Sandoval alloc_hint = 0; 17427fae429SJens Axboe if (++index >= sb->map_nr) 17527fae429SJens Axboe index = 0; 17688459642SOmar Sandoval } 17788459642SOmar Sandoval 17888459642SOmar Sandoval return nr; 17988459642SOmar Sandoval } 18088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get); 18188459642SOmar Sandoval 182c05e6673SOmar Sandoval int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, 183c05e6673SOmar Sandoval unsigned long shallow_depth) 184c05e6673SOmar Sandoval { 185c05e6673SOmar Sandoval unsigned int i, index; 186c05e6673SOmar Sandoval int nr = -1; 187c05e6673SOmar Sandoval 188c05e6673SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 189c05e6673SOmar Sandoval 190c05e6673SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 191b2dbff1bSJens Axboe again: 192c05e6673SOmar Sandoval nr = __sbitmap_get_word(&sb->map[index].word, 193c05e6673SOmar Sandoval min(sb->map[index].depth, shallow_depth), 194c05e6673SOmar Sandoval SB_NR_TO_BIT(sb, alloc_hint), true); 195c05e6673SOmar Sandoval if (nr != -1) { 196c05e6673SOmar Sandoval nr += index << sb->shift; 197c05e6673SOmar Sandoval break; 198c05e6673SOmar Sandoval } 199c05e6673SOmar Sandoval 200b78beea0SPavel Begunkov if (sbitmap_deferred_clear(&sb->map[index])) 201b2dbff1bSJens Axboe goto again; 202b2dbff1bSJens Axboe 203c05e6673SOmar Sandoval /* Jump to next index. */ 204c05e6673SOmar Sandoval index++; 205c05e6673SOmar Sandoval alloc_hint = index << sb->shift; 206c05e6673SOmar Sandoval 207c05e6673SOmar Sandoval if (index >= sb->map_nr) { 208c05e6673SOmar Sandoval index = 0; 209c05e6673SOmar Sandoval alloc_hint = 0; 210c05e6673SOmar Sandoval } 211c05e6673SOmar Sandoval } 212c05e6673SOmar Sandoval 213c05e6673SOmar Sandoval return nr; 214c05e6673SOmar Sandoval } 215c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get_shallow); 216c05e6673SOmar Sandoval 21788459642SOmar Sandoval bool sbitmap_any_bit_set(const struct sbitmap *sb) 21888459642SOmar Sandoval { 21988459642SOmar Sandoval unsigned int i; 22088459642SOmar Sandoval 22188459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 222b2dbff1bSJens Axboe if (sb->map[i].word & ~sb->map[i].cleared) 22388459642SOmar Sandoval return true; 22488459642SOmar Sandoval } 22588459642SOmar Sandoval return false; 22688459642SOmar Sandoval } 22788459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); 22888459642SOmar Sandoval 229ea86ea2cSJens Axboe static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) 23088459642SOmar Sandoval { 23160658e0dSColin Ian King unsigned int i, weight = 0; 23288459642SOmar Sandoval 23388459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 23488459642SOmar Sandoval const struct sbitmap_word *word = &sb->map[i]; 23588459642SOmar Sandoval 236ea86ea2cSJens Axboe if (set) 23788459642SOmar Sandoval weight += bitmap_weight(&word->word, word->depth); 238ea86ea2cSJens Axboe else 239ea86ea2cSJens Axboe weight += bitmap_weight(&word->cleared, word->depth); 24088459642SOmar Sandoval } 24188459642SOmar Sandoval return weight; 24288459642SOmar Sandoval } 243ea86ea2cSJens Axboe 244ea86ea2cSJens Axboe static unsigned int sbitmap_weight(const struct sbitmap *sb) 245ea86ea2cSJens Axboe { 246ea86ea2cSJens Axboe return __sbitmap_weight(sb, true); 247ea86ea2cSJens Axboe } 248ea86ea2cSJens Axboe 249ea86ea2cSJens Axboe static unsigned int sbitmap_cleared(const struct sbitmap *sb) 250ea86ea2cSJens Axboe { 251ea86ea2cSJens Axboe return __sbitmap_weight(sb, false); 252ea86ea2cSJens Axboe } 25388459642SOmar Sandoval 25424af1ccfSOmar Sandoval void sbitmap_show(struct sbitmap *sb, struct seq_file *m) 25524af1ccfSOmar Sandoval { 25624af1ccfSOmar Sandoval seq_printf(m, "depth=%u\n", sb->depth); 257ea86ea2cSJens Axboe seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb)); 258ea86ea2cSJens Axboe seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); 25924af1ccfSOmar Sandoval seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); 26024af1ccfSOmar Sandoval seq_printf(m, "map_nr=%u\n", sb->map_nr); 26124af1ccfSOmar Sandoval } 26224af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_show); 26324af1ccfSOmar Sandoval 26424af1ccfSOmar Sandoval static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) 26524af1ccfSOmar Sandoval { 26624af1ccfSOmar Sandoval if ((offset & 0xf) == 0) { 26724af1ccfSOmar Sandoval if (offset != 0) 26824af1ccfSOmar Sandoval seq_putc(m, '\n'); 26924af1ccfSOmar Sandoval seq_printf(m, "%08x:", offset); 27024af1ccfSOmar Sandoval } 27124af1ccfSOmar Sandoval if ((offset & 0x1) == 0) 27224af1ccfSOmar Sandoval seq_putc(m, ' '); 27324af1ccfSOmar Sandoval seq_printf(m, "%02x", byte); 27424af1ccfSOmar Sandoval } 27524af1ccfSOmar Sandoval 27624af1ccfSOmar Sandoval void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) 27724af1ccfSOmar Sandoval { 27824af1ccfSOmar Sandoval u8 byte = 0; 27924af1ccfSOmar Sandoval unsigned int byte_bits = 0; 28024af1ccfSOmar Sandoval unsigned int offset = 0; 28124af1ccfSOmar Sandoval int i; 28224af1ccfSOmar Sandoval 28324af1ccfSOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 28424af1ccfSOmar Sandoval unsigned long word = READ_ONCE(sb->map[i].word); 2856bf0eb55SJohn Garry unsigned long cleared = READ_ONCE(sb->map[i].cleared); 28624af1ccfSOmar Sandoval unsigned int word_bits = READ_ONCE(sb->map[i].depth); 28724af1ccfSOmar Sandoval 2886bf0eb55SJohn Garry word &= ~cleared; 2896bf0eb55SJohn Garry 29024af1ccfSOmar Sandoval while (word_bits > 0) { 29124af1ccfSOmar Sandoval unsigned int bits = min(8 - byte_bits, word_bits); 29224af1ccfSOmar Sandoval 29324af1ccfSOmar Sandoval byte |= (word & (BIT(bits) - 1)) << byte_bits; 29424af1ccfSOmar Sandoval byte_bits += bits; 29524af1ccfSOmar Sandoval if (byte_bits == 8) { 29624af1ccfSOmar Sandoval emit_byte(m, offset, byte); 29724af1ccfSOmar Sandoval byte = 0; 29824af1ccfSOmar Sandoval byte_bits = 0; 29924af1ccfSOmar Sandoval offset++; 30024af1ccfSOmar Sandoval } 30124af1ccfSOmar Sandoval word >>= bits; 30224af1ccfSOmar Sandoval word_bits -= bits; 30324af1ccfSOmar Sandoval } 30424af1ccfSOmar Sandoval } 30524af1ccfSOmar Sandoval if (byte_bits) { 30624af1ccfSOmar Sandoval emit_byte(m, offset, byte); 30724af1ccfSOmar Sandoval offset++; 30824af1ccfSOmar Sandoval } 30924af1ccfSOmar Sandoval if (offset) 31024af1ccfSOmar Sandoval seq_putc(m, '\n'); 31124af1ccfSOmar Sandoval } 31224af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); 31324af1ccfSOmar Sandoval 314a3275539SOmar Sandoval static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, 315a3275539SOmar Sandoval unsigned int depth) 31688459642SOmar Sandoval { 31788459642SOmar Sandoval unsigned int wake_batch; 318a3275539SOmar Sandoval unsigned int shallow_depth; 31988459642SOmar Sandoval 32088459642SOmar Sandoval /* 32188459642SOmar Sandoval * For each batch, we wake up one queue. We need to make sure that our 322a3275539SOmar Sandoval * batch size is small enough that the full depth of the bitmap, 323a3275539SOmar Sandoval * potentially limited by a shallow depth, is enough to wake up all of 324a3275539SOmar Sandoval * the queues. 325a3275539SOmar Sandoval * 326a3275539SOmar Sandoval * Each full word of the bitmap has bits_per_word bits, and there might 327a3275539SOmar Sandoval * be a partial word. There are depth / bits_per_word full words and 328a3275539SOmar Sandoval * depth % bits_per_word bits left over. In bitwise arithmetic: 329a3275539SOmar Sandoval * 330a3275539SOmar Sandoval * bits_per_word = 1 << shift 331a3275539SOmar Sandoval * depth / bits_per_word = depth >> shift 332a3275539SOmar Sandoval * depth % bits_per_word = depth & ((1 << shift) - 1) 333a3275539SOmar Sandoval * 334a3275539SOmar Sandoval * Each word can be limited to sbq->min_shallow_depth bits. 33588459642SOmar Sandoval */ 336a3275539SOmar Sandoval shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); 337a3275539SOmar Sandoval depth = ((depth >> sbq->sb.shift) * shallow_depth + 338a3275539SOmar Sandoval min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); 339a3275539SOmar Sandoval wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, 340a3275539SOmar Sandoval SBQ_WAKE_BATCH); 34188459642SOmar Sandoval 34288459642SOmar Sandoval return wake_batch; 34388459642SOmar Sandoval } 34488459642SOmar Sandoval 34588459642SOmar Sandoval int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, 346f4a644dbSOmar Sandoval int shift, bool round_robin, gfp_t flags, int node) 34788459642SOmar Sandoval { 34888459642SOmar Sandoval int ret; 34988459642SOmar Sandoval int i; 35088459642SOmar Sandoval 35188459642SOmar Sandoval ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); 35288459642SOmar Sandoval if (ret) 35388459642SOmar Sandoval return ret; 35488459642SOmar Sandoval 35540aabb67SOmar Sandoval sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); 35640aabb67SOmar Sandoval if (!sbq->alloc_hint) { 35740aabb67SOmar Sandoval sbitmap_free(&sbq->sb); 35840aabb67SOmar Sandoval return -ENOMEM; 35940aabb67SOmar Sandoval } 36040aabb67SOmar Sandoval 36198d95416SOmar Sandoval if (depth && !round_robin) { 36298d95416SOmar Sandoval for_each_possible_cpu(i) 36398d95416SOmar Sandoval *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; 36498d95416SOmar Sandoval } 36598d95416SOmar Sandoval 366a3275539SOmar Sandoval sbq->min_shallow_depth = UINT_MAX; 367a3275539SOmar Sandoval sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); 36888459642SOmar Sandoval atomic_set(&sbq->wake_index, 0); 3695d2ee712SJens Axboe atomic_set(&sbq->ws_active, 0); 37088459642SOmar Sandoval 37148e28166SOmar Sandoval sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 37288459642SOmar Sandoval if (!sbq->ws) { 37340aabb67SOmar Sandoval free_percpu(sbq->alloc_hint); 37488459642SOmar Sandoval sbitmap_free(&sbq->sb); 37588459642SOmar Sandoval return -ENOMEM; 37688459642SOmar Sandoval } 37788459642SOmar Sandoval 37888459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 37988459642SOmar Sandoval init_waitqueue_head(&sbq->ws[i].wait); 38088459642SOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); 38188459642SOmar Sandoval } 382f4a644dbSOmar Sandoval 383f4a644dbSOmar Sandoval sbq->round_robin = round_robin; 38488459642SOmar Sandoval return 0; 38588459642SOmar Sandoval } 38688459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 38788459642SOmar Sandoval 388a3275539SOmar Sandoval static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, 389a3275539SOmar Sandoval unsigned int depth) 39088459642SOmar Sandoval { 391a3275539SOmar Sandoval unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); 3926c0ca7aeSOmar Sandoval int i; 3936c0ca7aeSOmar Sandoval 3946c0ca7aeSOmar Sandoval if (sbq->wake_batch != wake_batch) { 3956c0ca7aeSOmar Sandoval WRITE_ONCE(sbq->wake_batch, wake_batch); 3966c0ca7aeSOmar Sandoval /* 397e6fc4649SMing Lei * Pairs with the memory barrier in sbitmap_queue_wake_up() 398e6fc4649SMing Lei * to ensure that the batch size is updated before the wait 399e6fc4649SMing Lei * counts. 4006c0ca7aeSOmar Sandoval */ 401a0934fd2SAndrea Parri smp_mb(); 4026c0ca7aeSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) 4036c0ca7aeSOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, 1); 4046c0ca7aeSOmar Sandoval } 405a3275539SOmar Sandoval } 406a3275539SOmar Sandoval 407a3275539SOmar Sandoval void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 408a3275539SOmar Sandoval { 409a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, depth); 41088459642SOmar Sandoval sbitmap_resize(&sbq->sb, depth); 41188459642SOmar Sandoval } 41288459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 41388459642SOmar Sandoval 414f4a644dbSOmar Sandoval int __sbitmap_queue_get(struct sbitmap_queue *sbq) 41540aabb67SOmar Sandoval { 41605fd095dSOmar Sandoval unsigned int hint, depth; 41740aabb67SOmar Sandoval int nr; 41840aabb67SOmar Sandoval 41940aabb67SOmar Sandoval hint = this_cpu_read(*sbq->alloc_hint); 42005fd095dSOmar Sandoval depth = READ_ONCE(sbq->sb.depth); 42105fd095dSOmar Sandoval if (unlikely(hint >= depth)) { 42205fd095dSOmar Sandoval hint = depth ? prandom_u32() % depth : 0; 42305fd095dSOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 42405fd095dSOmar Sandoval } 425f4a644dbSOmar Sandoval nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); 42640aabb67SOmar Sandoval 42740aabb67SOmar Sandoval if (nr == -1) { 42840aabb67SOmar Sandoval /* If the map is full, a hint won't do us much good. */ 42940aabb67SOmar Sandoval this_cpu_write(*sbq->alloc_hint, 0); 430f4a644dbSOmar Sandoval } else if (nr == hint || unlikely(sbq->round_robin)) { 43140aabb67SOmar Sandoval /* Only update the hint if we used it. */ 43240aabb67SOmar Sandoval hint = nr + 1; 43305fd095dSOmar Sandoval if (hint >= depth - 1) 43440aabb67SOmar Sandoval hint = 0; 43540aabb67SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 43640aabb67SOmar Sandoval } 43740aabb67SOmar Sandoval 43840aabb67SOmar Sandoval return nr; 43940aabb67SOmar Sandoval } 44040aabb67SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get); 44140aabb67SOmar Sandoval 442c05e6673SOmar Sandoval int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, 443c05e6673SOmar Sandoval unsigned int shallow_depth) 444c05e6673SOmar Sandoval { 445c05e6673SOmar Sandoval unsigned int hint, depth; 446c05e6673SOmar Sandoval int nr; 447c05e6673SOmar Sandoval 44861445b56SOmar Sandoval WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); 44961445b56SOmar Sandoval 450c05e6673SOmar Sandoval hint = this_cpu_read(*sbq->alloc_hint); 451c05e6673SOmar Sandoval depth = READ_ONCE(sbq->sb.depth); 452c05e6673SOmar Sandoval if (unlikely(hint >= depth)) { 453c05e6673SOmar Sandoval hint = depth ? prandom_u32() % depth : 0; 454c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 455c05e6673SOmar Sandoval } 456c05e6673SOmar Sandoval nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth); 457c05e6673SOmar Sandoval 458c05e6673SOmar Sandoval if (nr == -1) { 459c05e6673SOmar Sandoval /* If the map is full, a hint won't do us much good. */ 460c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, 0); 461c05e6673SOmar Sandoval } else if (nr == hint || unlikely(sbq->round_robin)) { 462c05e6673SOmar Sandoval /* Only update the hint if we used it. */ 463c05e6673SOmar Sandoval hint = nr + 1; 464c05e6673SOmar Sandoval if (hint >= depth - 1) 465c05e6673SOmar Sandoval hint = 0; 466c05e6673SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 467c05e6673SOmar Sandoval } 468c05e6673SOmar Sandoval 469c05e6673SOmar Sandoval return nr; 470c05e6673SOmar Sandoval } 471c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); 472c05e6673SOmar Sandoval 473a3275539SOmar Sandoval void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, 474a3275539SOmar Sandoval unsigned int min_shallow_depth) 475a3275539SOmar Sandoval { 476a3275539SOmar Sandoval sbq->min_shallow_depth = min_shallow_depth; 477a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); 478a3275539SOmar Sandoval } 479a3275539SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); 480a3275539SOmar Sandoval 48188459642SOmar Sandoval static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 48288459642SOmar Sandoval { 48388459642SOmar Sandoval int i, wake_index; 48488459642SOmar Sandoval 4855d2ee712SJens Axboe if (!atomic_read(&sbq->ws_active)) 4865d2ee712SJens Axboe return NULL; 4875d2ee712SJens Axboe 48888459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 48988459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 49088459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 49188459642SOmar Sandoval 49288459642SOmar Sandoval if (waitqueue_active(&ws->wait)) { 49341723288SPavel Begunkov if (wake_index != atomic_read(&sbq->wake_index)) 49441723288SPavel Begunkov atomic_set(&sbq->wake_index, wake_index); 49588459642SOmar Sandoval return ws; 49688459642SOmar Sandoval } 49788459642SOmar Sandoval 49888459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 49988459642SOmar Sandoval } 50088459642SOmar Sandoval 50188459642SOmar Sandoval return NULL; 50288459642SOmar Sandoval } 50388459642SOmar Sandoval 504c854ab57SJens Axboe static bool __sbq_wake_up(struct sbitmap_queue *sbq) 50588459642SOmar Sandoval { 50688459642SOmar Sandoval struct sbq_wait_state *ws; 5076c0ca7aeSOmar Sandoval unsigned int wake_batch; 50888459642SOmar Sandoval int wait_cnt; 50988459642SOmar Sandoval 51088459642SOmar Sandoval ws = sbq_wake_ptr(sbq); 51188459642SOmar Sandoval if (!ws) 512c854ab57SJens Axboe return false; 51388459642SOmar Sandoval 51488459642SOmar Sandoval wait_cnt = atomic_dec_return(&ws->wait_cnt); 5156c0ca7aeSOmar Sandoval if (wait_cnt <= 0) { 516c854ab57SJens Axboe int ret; 517c854ab57SJens Axboe 5186c0ca7aeSOmar Sandoval wake_batch = READ_ONCE(sbq->wake_batch); 519c854ab57SJens Axboe 5206c0ca7aeSOmar Sandoval /* 5216c0ca7aeSOmar Sandoval * Pairs with the memory barrier in sbitmap_queue_resize() to 5226c0ca7aeSOmar Sandoval * ensure that we see the batch size update before the wait 5236c0ca7aeSOmar Sandoval * count is reset. 5246c0ca7aeSOmar Sandoval */ 5256c0ca7aeSOmar Sandoval smp_mb__before_atomic(); 526c854ab57SJens Axboe 5276c0ca7aeSOmar Sandoval /* 528c854ab57SJens Axboe * For concurrent callers of this, the one that failed the 529c854ab57SJens Axboe * atomic_cmpxhcg() race should call this function again 530c854ab57SJens Axboe * to wakeup a new batch on a different 'ws'. 5316c0ca7aeSOmar Sandoval */ 532c854ab57SJens Axboe ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); 533c854ab57SJens Axboe if (ret == wait_cnt) { 53488459642SOmar Sandoval sbq_index_atomic_inc(&sbq->wake_index); 5354e5dff41SJens Axboe wake_up_nr(&ws->wait, wake_batch); 536c854ab57SJens Axboe return false; 53788459642SOmar Sandoval } 538c854ab57SJens Axboe 539c854ab57SJens Axboe return true; 540c854ab57SJens Axboe } 541c854ab57SJens Axboe 542c854ab57SJens Axboe return false; 543c854ab57SJens Axboe } 544c854ab57SJens Axboe 545e6fc4649SMing Lei void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) 546c854ab57SJens Axboe { 547c854ab57SJens Axboe while (__sbq_wake_up(sbq)) 548c854ab57SJens Axboe ; 54988459642SOmar Sandoval } 550e6fc4649SMing Lei EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); 55188459642SOmar Sandoval 55240aabb67SOmar Sandoval void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 553f4a644dbSOmar Sandoval unsigned int cpu) 55488459642SOmar Sandoval { 555e6d1fa58SMing Lei /* 556e6d1fa58SMing Lei * Once the clear bit is set, the bit may be allocated out. 557e6d1fa58SMing Lei * 558e6d1fa58SMing Lei * Orders READ/WRITE on the asssociated instance(such as request 559e6d1fa58SMing Lei * of blk_mq) by this bit for avoiding race with re-allocation, 560e6d1fa58SMing Lei * and its pair is the memory barrier implied in __sbitmap_get_word. 561e6d1fa58SMing Lei * 562e6d1fa58SMing Lei * One invariant is that the clear bit has to be zero when the bit 563e6d1fa58SMing Lei * is in use. 564e6d1fa58SMing Lei */ 565e6d1fa58SMing Lei smp_mb__before_atomic(); 566ea86ea2cSJens Axboe sbitmap_deferred_clear_bit(&sbq->sb, nr); 567ea86ea2cSJens Axboe 568e6fc4649SMing Lei /* 569e6fc4649SMing Lei * Pairs with the memory barrier in set_current_state() to ensure the 570e6fc4649SMing Lei * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker 571e6fc4649SMing Lei * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the 572e6fc4649SMing Lei * waiter. See the comment on waitqueue_active(). 573e6fc4649SMing Lei */ 574e6fc4649SMing Lei smp_mb__after_atomic(); 575e6fc4649SMing Lei sbitmap_queue_wake_up(sbq); 576e6fc4649SMing Lei 5775c64a8dfSOmar Sandoval if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 57840aabb67SOmar Sandoval *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 57988459642SOmar Sandoval } 58088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_clear); 58188459642SOmar Sandoval 58288459642SOmar Sandoval void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) 58388459642SOmar Sandoval { 58488459642SOmar Sandoval int i, wake_index; 58588459642SOmar Sandoval 58688459642SOmar Sandoval /* 587f66227deSOmar Sandoval * Pairs with the memory barrier in set_current_state() like in 588e6fc4649SMing Lei * sbitmap_queue_wake_up(). 58988459642SOmar Sandoval */ 59088459642SOmar Sandoval smp_mb(); 59188459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 59288459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 59388459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 59488459642SOmar Sandoval 59588459642SOmar Sandoval if (waitqueue_active(&ws->wait)) 59688459642SOmar Sandoval wake_up(&ws->wait); 59788459642SOmar Sandoval 59888459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 59988459642SOmar Sandoval } 60088459642SOmar Sandoval } 60188459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 60224af1ccfSOmar Sandoval 60324af1ccfSOmar Sandoval void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) 60424af1ccfSOmar Sandoval { 60524af1ccfSOmar Sandoval bool first; 60624af1ccfSOmar Sandoval int i; 60724af1ccfSOmar Sandoval 60824af1ccfSOmar Sandoval sbitmap_show(&sbq->sb, m); 60924af1ccfSOmar Sandoval 61024af1ccfSOmar Sandoval seq_puts(m, "alloc_hint={"); 61124af1ccfSOmar Sandoval first = true; 61224af1ccfSOmar Sandoval for_each_possible_cpu(i) { 61324af1ccfSOmar Sandoval if (!first) 61424af1ccfSOmar Sandoval seq_puts(m, ", "); 61524af1ccfSOmar Sandoval first = false; 61624af1ccfSOmar Sandoval seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); 61724af1ccfSOmar Sandoval } 61824af1ccfSOmar Sandoval seq_puts(m, "}\n"); 61924af1ccfSOmar Sandoval 62024af1ccfSOmar Sandoval seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); 62124af1ccfSOmar Sandoval seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); 6225d2ee712SJens Axboe seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active)); 62324af1ccfSOmar Sandoval 62424af1ccfSOmar Sandoval seq_puts(m, "ws={\n"); 62524af1ccfSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 62624af1ccfSOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[i]; 62724af1ccfSOmar Sandoval 62824af1ccfSOmar Sandoval seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", 62924af1ccfSOmar Sandoval atomic_read(&ws->wait_cnt), 63024af1ccfSOmar Sandoval waitqueue_active(&ws->wait) ? "active" : "inactive"); 63124af1ccfSOmar Sandoval } 63224af1ccfSOmar Sandoval seq_puts(m, "}\n"); 63324af1ccfSOmar Sandoval 63424af1ccfSOmar Sandoval seq_printf(m, "round_robin=%d\n", sbq->round_robin); 635a3275539SOmar Sandoval seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); 63624af1ccfSOmar Sandoval } 63724af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_show); 6385d2ee712SJens Axboe 6399f6b7ef6SJens Axboe void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, 6409f6b7ef6SJens Axboe struct sbq_wait_state *ws, 6419f6b7ef6SJens Axboe struct sbq_wait *sbq_wait) 6429f6b7ef6SJens Axboe { 6439f6b7ef6SJens Axboe if (!sbq_wait->sbq) { 6449f6b7ef6SJens Axboe sbq_wait->sbq = sbq; 6459f6b7ef6SJens Axboe atomic_inc(&sbq->ws_active); 6469f6b7ef6SJens Axboe add_wait_queue(&ws->wait, &sbq_wait->wait); 6479f6b7ef6SJens Axboe } 648df034c93SDavid Jeffery } 6499f6b7ef6SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue); 6509f6b7ef6SJens Axboe 6519f6b7ef6SJens Axboe void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait) 6529f6b7ef6SJens Axboe { 6539f6b7ef6SJens Axboe list_del_init(&sbq_wait->wait.entry); 6549f6b7ef6SJens Axboe if (sbq_wait->sbq) { 6559f6b7ef6SJens Axboe atomic_dec(&sbq_wait->sbq->ws_active); 6569f6b7ef6SJens Axboe sbq_wait->sbq = NULL; 6579f6b7ef6SJens Axboe } 6589f6b7ef6SJens Axboe } 6599f6b7ef6SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue); 6609f6b7ef6SJens Axboe 6615d2ee712SJens Axboe void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, 6625d2ee712SJens Axboe struct sbq_wait_state *ws, 6635d2ee712SJens Axboe struct sbq_wait *sbq_wait, int state) 6645d2ee712SJens Axboe { 6659f6b7ef6SJens Axboe if (!sbq_wait->sbq) { 6665d2ee712SJens Axboe atomic_inc(&sbq->ws_active); 6679f6b7ef6SJens Axboe sbq_wait->sbq = sbq; 6685d2ee712SJens Axboe } 6695d2ee712SJens Axboe prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state); 6705d2ee712SJens Axboe } 6715d2ee712SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); 6725d2ee712SJens Axboe 6735d2ee712SJens Axboe void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, 6745d2ee712SJens Axboe struct sbq_wait *sbq_wait) 6755d2ee712SJens Axboe { 6765d2ee712SJens Axboe finish_wait(&ws->wait, &sbq_wait->wait); 6779f6b7ef6SJens Axboe if (sbq_wait->sbq) { 6785d2ee712SJens Axboe atomic_dec(&sbq->ws_active); 6799f6b7ef6SJens Axboe sbq_wait->sbq = NULL; 6805d2ee712SJens Axboe } 6815d2ee712SJens Axboe } 6825d2ee712SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_finish_wait); 683