10fc479b1SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 288459642SOmar Sandoval /* 388459642SOmar Sandoval * Copyright (C) 2016 Facebook 488459642SOmar Sandoval * Copyright (C) 2013-2014 Jens Axboe 588459642SOmar Sandoval */ 688459642SOmar Sandoval 7af8601adSIngo Molnar #include <linux/sched.h> 898d95416SOmar Sandoval #include <linux/random.h> 988459642SOmar Sandoval #include <linux/sbitmap.h> 1024af1ccfSOmar Sandoval #include <linux/seq_file.h> 1188459642SOmar Sandoval 12*bf2c4282SMing Lei static int init_alloc_hint(struct sbitmap_queue *sbq, gfp_t flags) 13*bf2c4282SMing Lei { 14*bf2c4282SMing Lei unsigned depth = sbq->sb.depth; 15*bf2c4282SMing Lei 16*bf2c4282SMing Lei sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); 17*bf2c4282SMing Lei if (!sbq->alloc_hint) 18*bf2c4282SMing Lei return -ENOMEM; 19*bf2c4282SMing Lei 20*bf2c4282SMing Lei if (depth && !sbq->sb.round_robin) { 21*bf2c4282SMing Lei int i; 22*bf2c4282SMing Lei 23*bf2c4282SMing Lei for_each_possible_cpu(i) 24*bf2c4282SMing Lei *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; 25*bf2c4282SMing Lei } 26*bf2c4282SMing Lei 27*bf2c4282SMing Lei return 0; 28*bf2c4282SMing Lei } 29*bf2c4282SMing Lei 30*bf2c4282SMing Lei static inline unsigned update_alloc_hint_before_get(struct sbitmap_queue *sbq, 31*bf2c4282SMing Lei unsigned int depth) 32*bf2c4282SMing Lei { 33*bf2c4282SMing Lei unsigned hint; 34*bf2c4282SMing Lei 35*bf2c4282SMing Lei hint = this_cpu_read(*sbq->alloc_hint); 36*bf2c4282SMing Lei if (unlikely(hint >= depth)) { 37*bf2c4282SMing Lei hint = depth ? prandom_u32() % depth : 0; 38*bf2c4282SMing Lei this_cpu_write(*sbq->alloc_hint, hint); 39*bf2c4282SMing Lei } 40*bf2c4282SMing Lei 41*bf2c4282SMing Lei return hint; 42*bf2c4282SMing Lei } 43*bf2c4282SMing Lei 44*bf2c4282SMing Lei static inline void update_alloc_hint_after_get(struct sbitmap_queue *sbq, 45*bf2c4282SMing Lei unsigned int depth, 46*bf2c4282SMing Lei unsigned int hint, 47*bf2c4282SMing Lei unsigned int nr) 48*bf2c4282SMing Lei { 49*bf2c4282SMing Lei if (nr == -1) { 50*bf2c4282SMing Lei /* If the map is full, a hint won't do us much good. */ 51*bf2c4282SMing Lei this_cpu_write(*sbq->alloc_hint, 0); 52*bf2c4282SMing Lei } else if (nr == hint || unlikely(sbq->sb.round_robin)) { 53*bf2c4282SMing Lei /* Only update the hint if we used it. */ 54*bf2c4282SMing Lei hint = nr + 1; 55*bf2c4282SMing Lei if (hint >= depth - 1) 56*bf2c4282SMing Lei hint = 0; 57*bf2c4282SMing Lei this_cpu_write(*sbq->alloc_hint, hint); 58*bf2c4282SMing Lei } 59*bf2c4282SMing Lei } 60*bf2c4282SMing Lei 61b2dbff1bSJens Axboe /* 62b2dbff1bSJens Axboe * See if we have deferred clears that we can batch move 63b2dbff1bSJens Axboe */ 64b78beea0SPavel Begunkov static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) 65b2dbff1bSJens Axboe { 66c3250c8dSPavel Begunkov unsigned long mask; 67b2dbff1bSJens Axboe 68661d4f55SPavel Begunkov if (!READ_ONCE(map->cleared)) 69661d4f55SPavel Begunkov return false; 70b2dbff1bSJens Axboe 71b2dbff1bSJens Axboe /* 72b2dbff1bSJens Axboe * First get a stable cleared mask, setting the old mask to 0. 73b2dbff1bSJens Axboe */ 74b78beea0SPavel Begunkov mask = xchg(&map->cleared, 0); 75b2dbff1bSJens Axboe 76b2dbff1bSJens Axboe /* 77b2dbff1bSJens Axboe * Now clear the masked bits in our free word 78b2dbff1bSJens Axboe */ 79c3250c8dSPavel Begunkov atomic_long_andnot(mask, (atomic_long_t *)&map->word); 80c3250c8dSPavel Begunkov BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(map->word)); 81661d4f55SPavel Begunkov return true; 82b2dbff1bSJens Axboe } 83b2dbff1bSJens Axboe 8488459642SOmar Sandoval int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 85efe1f3a1SMing Lei gfp_t flags, int node, bool round_robin) 8688459642SOmar Sandoval { 8788459642SOmar Sandoval unsigned int bits_per_word; 8888459642SOmar Sandoval unsigned int i; 8988459642SOmar Sandoval 9088459642SOmar Sandoval if (shift < 0) { 9188459642SOmar Sandoval shift = ilog2(BITS_PER_LONG); 9288459642SOmar Sandoval /* 9388459642SOmar Sandoval * If the bitmap is small, shrink the number of bits per word so 9488459642SOmar Sandoval * we spread over a few cachelines, at least. If less than 4 9588459642SOmar Sandoval * bits, just forget about it, it's not going to work optimally 9688459642SOmar Sandoval * anyway. 9788459642SOmar Sandoval */ 9888459642SOmar Sandoval if (depth >= 4) { 9988459642SOmar Sandoval while ((4U << shift) > depth) 10088459642SOmar Sandoval shift--; 10188459642SOmar Sandoval } 10288459642SOmar Sandoval } 10388459642SOmar Sandoval bits_per_word = 1U << shift; 10488459642SOmar Sandoval if (bits_per_word > BITS_PER_LONG) 10588459642SOmar Sandoval return -EINVAL; 10688459642SOmar Sandoval 10788459642SOmar Sandoval sb->shift = shift; 10888459642SOmar Sandoval sb->depth = depth; 10988459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 110efe1f3a1SMing Lei sb->round_robin = round_robin; 11188459642SOmar Sandoval 11288459642SOmar Sandoval if (depth == 0) { 11388459642SOmar Sandoval sb->map = NULL; 11488459642SOmar Sandoval return 0; 11588459642SOmar Sandoval } 11688459642SOmar Sandoval 117590b5b7dSKees Cook sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); 11888459642SOmar Sandoval if (!sb->map) 11988459642SOmar Sandoval return -ENOMEM; 12088459642SOmar Sandoval 12188459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 12288459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 12388459642SOmar Sandoval depth -= sb->map[i].depth; 12488459642SOmar Sandoval } 12588459642SOmar Sandoval return 0; 12688459642SOmar Sandoval } 12788459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_init_node); 12888459642SOmar Sandoval 12988459642SOmar Sandoval void sbitmap_resize(struct sbitmap *sb, unsigned int depth) 13088459642SOmar Sandoval { 13188459642SOmar Sandoval unsigned int bits_per_word = 1U << sb->shift; 13288459642SOmar Sandoval unsigned int i; 13388459642SOmar Sandoval 134b2dbff1bSJens Axboe for (i = 0; i < sb->map_nr; i++) 135b78beea0SPavel Begunkov sbitmap_deferred_clear(&sb->map[i]); 136b2dbff1bSJens Axboe 13788459642SOmar Sandoval sb->depth = depth; 13888459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 13988459642SOmar Sandoval 14088459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 14188459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 14288459642SOmar Sandoval depth -= sb->map[i].depth; 14388459642SOmar Sandoval } 14488459642SOmar Sandoval } 14588459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_resize); 14688459642SOmar Sandoval 147c05e6673SOmar Sandoval static int __sbitmap_get_word(unsigned long *word, unsigned long depth, 148c05e6673SOmar Sandoval unsigned int hint, bool wrap) 14988459642SOmar Sandoval { 15088459642SOmar Sandoval int nr; 15188459642SOmar Sandoval 1520eff1f1aSPavel Begunkov /* don't wrap if starting from 0 */ 1530eff1f1aSPavel Begunkov wrap = wrap && hint; 1540eff1f1aSPavel Begunkov 15588459642SOmar Sandoval while (1) { 156c05e6673SOmar Sandoval nr = find_next_zero_bit(word, depth, hint); 157c05e6673SOmar Sandoval if (unlikely(nr >= depth)) { 15888459642SOmar Sandoval /* 15988459642SOmar Sandoval * We started with an offset, and we didn't reset the 16088459642SOmar Sandoval * offset to 0 in a failure case, so start from 0 to 16188459642SOmar Sandoval * exhaust the map. 16288459642SOmar Sandoval */ 1630eff1f1aSPavel Begunkov if (hint && wrap) { 1640eff1f1aSPavel Begunkov hint = 0; 16588459642SOmar Sandoval continue; 16688459642SOmar Sandoval } 16788459642SOmar Sandoval return -1; 16888459642SOmar Sandoval } 16988459642SOmar Sandoval 1704ace53f1SOmar Sandoval if (!test_and_set_bit_lock(nr, word)) 17188459642SOmar Sandoval break; 17288459642SOmar Sandoval 17388459642SOmar Sandoval hint = nr + 1; 174c05e6673SOmar Sandoval if (hint >= depth - 1) 17588459642SOmar Sandoval hint = 0; 17688459642SOmar Sandoval } 17788459642SOmar Sandoval 17888459642SOmar Sandoval return nr; 17988459642SOmar Sandoval } 18088459642SOmar Sandoval 181ea86ea2cSJens Axboe static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, 182efe1f3a1SMing Lei unsigned int alloc_hint) 183ea86ea2cSJens Axboe { 184b78beea0SPavel Begunkov struct sbitmap_word *map = &sb->map[index]; 185ea86ea2cSJens Axboe int nr; 186ea86ea2cSJens Axboe 187ea86ea2cSJens Axboe do { 188b78beea0SPavel Begunkov nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint, 189efe1f3a1SMing Lei !sb->round_robin); 190ea86ea2cSJens Axboe if (nr != -1) 191ea86ea2cSJens Axboe break; 192b78beea0SPavel Begunkov if (!sbitmap_deferred_clear(map)) 193ea86ea2cSJens Axboe break; 194ea86ea2cSJens Axboe } while (1); 195ea86ea2cSJens Axboe 196ea86ea2cSJens Axboe return nr; 197ea86ea2cSJens Axboe } 198ea86ea2cSJens Axboe 199efe1f3a1SMing Lei int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) 20088459642SOmar Sandoval { 20188459642SOmar Sandoval unsigned int i, index; 20288459642SOmar Sandoval int nr = -1; 20388459642SOmar Sandoval 20488459642SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 20588459642SOmar Sandoval 20627fae429SJens Axboe /* 20727fae429SJens Axboe * Unless we're doing round robin tag allocation, just use the 20827fae429SJens Axboe * alloc_hint to find the right word index. No point in looping 20927fae429SJens Axboe * twice in find_next_zero_bit() for that case. 21027fae429SJens Axboe */ 211efe1f3a1SMing Lei if (sb->round_robin) 21227fae429SJens Axboe alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); 21327fae429SJens Axboe else 21427fae429SJens Axboe alloc_hint = 0; 21527fae429SJens Axboe 21688459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 217efe1f3a1SMing Lei nr = sbitmap_find_bit_in_index(sb, index, alloc_hint); 21888459642SOmar Sandoval if (nr != -1) { 21988459642SOmar Sandoval nr += index << sb->shift; 22088459642SOmar Sandoval break; 22188459642SOmar Sandoval } 22288459642SOmar Sandoval 22388459642SOmar Sandoval /* Jump to next index. */ 22488459642SOmar Sandoval alloc_hint = 0; 22527fae429SJens Axboe if (++index >= sb->map_nr) 22627fae429SJens Axboe index = 0; 22788459642SOmar Sandoval } 22888459642SOmar Sandoval 22988459642SOmar Sandoval return nr; 23088459642SOmar Sandoval } 23188459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get); 23288459642SOmar Sandoval 233c05e6673SOmar Sandoval int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, 234c05e6673SOmar Sandoval unsigned long shallow_depth) 235c05e6673SOmar Sandoval { 236c05e6673SOmar Sandoval unsigned int i, index; 237c05e6673SOmar Sandoval int nr = -1; 238c05e6673SOmar Sandoval 239c05e6673SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 240c05e6673SOmar Sandoval 241c05e6673SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 242b2dbff1bSJens Axboe again: 243c05e6673SOmar Sandoval nr = __sbitmap_get_word(&sb->map[index].word, 244c05e6673SOmar Sandoval min(sb->map[index].depth, shallow_depth), 245c05e6673SOmar Sandoval SB_NR_TO_BIT(sb, alloc_hint), true); 246c05e6673SOmar Sandoval if (nr != -1) { 247c05e6673SOmar Sandoval nr += index << sb->shift; 248c05e6673SOmar Sandoval break; 249c05e6673SOmar Sandoval } 250c05e6673SOmar Sandoval 251b78beea0SPavel Begunkov if (sbitmap_deferred_clear(&sb->map[index])) 252b2dbff1bSJens Axboe goto again; 253b2dbff1bSJens Axboe 254c05e6673SOmar Sandoval /* Jump to next index. */ 255c05e6673SOmar Sandoval index++; 256c05e6673SOmar Sandoval alloc_hint = index << sb->shift; 257c05e6673SOmar Sandoval 258c05e6673SOmar Sandoval if (index >= sb->map_nr) { 259c05e6673SOmar Sandoval index = 0; 260c05e6673SOmar Sandoval alloc_hint = 0; 261c05e6673SOmar Sandoval } 262c05e6673SOmar Sandoval } 263c05e6673SOmar Sandoval 264c05e6673SOmar Sandoval return nr; 265c05e6673SOmar Sandoval } 266c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get_shallow); 267c05e6673SOmar Sandoval 26888459642SOmar Sandoval bool sbitmap_any_bit_set(const struct sbitmap *sb) 26988459642SOmar Sandoval { 27088459642SOmar Sandoval unsigned int i; 27188459642SOmar Sandoval 27288459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 273b2dbff1bSJens Axboe if (sb->map[i].word & ~sb->map[i].cleared) 27488459642SOmar Sandoval return true; 27588459642SOmar Sandoval } 27688459642SOmar Sandoval return false; 27788459642SOmar Sandoval } 27888459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); 27988459642SOmar Sandoval 280ea86ea2cSJens Axboe static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) 28188459642SOmar Sandoval { 28260658e0dSColin Ian King unsigned int i, weight = 0; 28388459642SOmar Sandoval 28488459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 28588459642SOmar Sandoval const struct sbitmap_word *word = &sb->map[i]; 28688459642SOmar Sandoval 287ea86ea2cSJens Axboe if (set) 28888459642SOmar Sandoval weight += bitmap_weight(&word->word, word->depth); 289ea86ea2cSJens Axboe else 290ea86ea2cSJens Axboe weight += bitmap_weight(&word->cleared, word->depth); 29188459642SOmar Sandoval } 29288459642SOmar Sandoval return weight; 29388459642SOmar Sandoval } 294ea86ea2cSJens Axboe 295ea86ea2cSJens Axboe static unsigned int sbitmap_weight(const struct sbitmap *sb) 296ea86ea2cSJens Axboe { 297ea86ea2cSJens Axboe return __sbitmap_weight(sb, true); 298ea86ea2cSJens Axboe } 299ea86ea2cSJens Axboe 300ea86ea2cSJens Axboe static unsigned int sbitmap_cleared(const struct sbitmap *sb) 301ea86ea2cSJens Axboe { 302ea86ea2cSJens Axboe return __sbitmap_weight(sb, false); 303ea86ea2cSJens Axboe } 30488459642SOmar Sandoval 30524af1ccfSOmar Sandoval void sbitmap_show(struct sbitmap *sb, struct seq_file *m) 30624af1ccfSOmar Sandoval { 30724af1ccfSOmar Sandoval seq_printf(m, "depth=%u\n", sb->depth); 308ea86ea2cSJens Axboe seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb)); 309ea86ea2cSJens Axboe seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); 31024af1ccfSOmar Sandoval seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); 31124af1ccfSOmar Sandoval seq_printf(m, "map_nr=%u\n", sb->map_nr); 31224af1ccfSOmar Sandoval } 31324af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_show); 31424af1ccfSOmar Sandoval 31524af1ccfSOmar Sandoval static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) 31624af1ccfSOmar Sandoval { 31724af1ccfSOmar Sandoval if ((offset & 0xf) == 0) { 31824af1ccfSOmar Sandoval if (offset != 0) 31924af1ccfSOmar Sandoval seq_putc(m, '\n'); 32024af1ccfSOmar Sandoval seq_printf(m, "%08x:", offset); 32124af1ccfSOmar Sandoval } 32224af1ccfSOmar Sandoval if ((offset & 0x1) == 0) 32324af1ccfSOmar Sandoval seq_putc(m, ' '); 32424af1ccfSOmar Sandoval seq_printf(m, "%02x", byte); 32524af1ccfSOmar Sandoval } 32624af1ccfSOmar Sandoval 32724af1ccfSOmar Sandoval void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) 32824af1ccfSOmar Sandoval { 32924af1ccfSOmar Sandoval u8 byte = 0; 33024af1ccfSOmar Sandoval unsigned int byte_bits = 0; 33124af1ccfSOmar Sandoval unsigned int offset = 0; 33224af1ccfSOmar Sandoval int i; 33324af1ccfSOmar Sandoval 33424af1ccfSOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 33524af1ccfSOmar Sandoval unsigned long word = READ_ONCE(sb->map[i].word); 3366bf0eb55SJohn Garry unsigned long cleared = READ_ONCE(sb->map[i].cleared); 33724af1ccfSOmar Sandoval unsigned int word_bits = READ_ONCE(sb->map[i].depth); 33824af1ccfSOmar Sandoval 3396bf0eb55SJohn Garry word &= ~cleared; 3406bf0eb55SJohn Garry 34124af1ccfSOmar Sandoval while (word_bits > 0) { 34224af1ccfSOmar Sandoval unsigned int bits = min(8 - byte_bits, word_bits); 34324af1ccfSOmar Sandoval 34424af1ccfSOmar Sandoval byte |= (word & (BIT(bits) - 1)) << byte_bits; 34524af1ccfSOmar Sandoval byte_bits += bits; 34624af1ccfSOmar Sandoval if (byte_bits == 8) { 34724af1ccfSOmar Sandoval emit_byte(m, offset, byte); 34824af1ccfSOmar Sandoval byte = 0; 34924af1ccfSOmar Sandoval byte_bits = 0; 35024af1ccfSOmar Sandoval offset++; 35124af1ccfSOmar Sandoval } 35224af1ccfSOmar Sandoval word >>= bits; 35324af1ccfSOmar Sandoval word_bits -= bits; 35424af1ccfSOmar Sandoval } 35524af1ccfSOmar Sandoval } 35624af1ccfSOmar Sandoval if (byte_bits) { 35724af1ccfSOmar Sandoval emit_byte(m, offset, byte); 35824af1ccfSOmar Sandoval offset++; 35924af1ccfSOmar Sandoval } 36024af1ccfSOmar Sandoval if (offset) 36124af1ccfSOmar Sandoval seq_putc(m, '\n'); 36224af1ccfSOmar Sandoval } 36324af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); 36424af1ccfSOmar Sandoval 365a3275539SOmar Sandoval static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, 366a3275539SOmar Sandoval unsigned int depth) 36788459642SOmar Sandoval { 36888459642SOmar Sandoval unsigned int wake_batch; 369a3275539SOmar Sandoval unsigned int shallow_depth; 37088459642SOmar Sandoval 37188459642SOmar Sandoval /* 37288459642SOmar Sandoval * For each batch, we wake up one queue. We need to make sure that our 373a3275539SOmar Sandoval * batch size is small enough that the full depth of the bitmap, 374a3275539SOmar Sandoval * potentially limited by a shallow depth, is enough to wake up all of 375a3275539SOmar Sandoval * the queues. 376a3275539SOmar Sandoval * 377a3275539SOmar Sandoval * Each full word of the bitmap has bits_per_word bits, and there might 378a3275539SOmar Sandoval * be a partial word. There are depth / bits_per_word full words and 379a3275539SOmar Sandoval * depth % bits_per_word bits left over. In bitwise arithmetic: 380a3275539SOmar Sandoval * 381a3275539SOmar Sandoval * bits_per_word = 1 << shift 382a3275539SOmar Sandoval * depth / bits_per_word = depth >> shift 383a3275539SOmar Sandoval * depth % bits_per_word = depth & ((1 << shift) - 1) 384a3275539SOmar Sandoval * 385a3275539SOmar Sandoval * Each word can be limited to sbq->min_shallow_depth bits. 38688459642SOmar Sandoval */ 387a3275539SOmar Sandoval shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); 388a3275539SOmar Sandoval depth = ((depth >> sbq->sb.shift) * shallow_depth + 389a3275539SOmar Sandoval min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); 390a3275539SOmar Sandoval wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, 391a3275539SOmar Sandoval SBQ_WAKE_BATCH); 39288459642SOmar Sandoval 39388459642SOmar Sandoval return wake_batch; 39488459642SOmar Sandoval } 39588459642SOmar Sandoval 39688459642SOmar Sandoval int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, 397f4a644dbSOmar Sandoval int shift, bool round_robin, gfp_t flags, int node) 39888459642SOmar Sandoval { 39988459642SOmar Sandoval int ret; 40088459642SOmar Sandoval int i; 40188459642SOmar Sandoval 402efe1f3a1SMing Lei ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node, 403efe1f3a1SMing Lei round_robin); 40488459642SOmar Sandoval if (ret) 40588459642SOmar Sandoval return ret; 40688459642SOmar Sandoval 407*bf2c4282SMing Lei if (init_alloc_hint(sbq, flags) != 0) { 40840aabb67SOmar Sandoval sbitmap_free(&sbq->sb); 40940aabb67SOmar Sandoval return -ENOMEM; 41040aabb67SOmar Sandoval } 41140aabb67SOmar Sandoval 412a3275539SOmar Sandoval sbq->min_shallow_depth = UINT_MAX; 413a3275539SOmar Sandoval sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); 41488459642SOmar Sandoval atomic_set(&sbq->wake_index, 0); 4155d2ee712SJens Axboe atomic_set(&sbq->ws_active, 0); 41688459642SOmar Sandoval 41748e28166SOmar Sandoval sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 41888459642SOmar Sandoval if (!sbq->ws) { 41940aabb67SOmar Sandoval free_percpu(sbq->alloc_hint); 42088459642SOmar Sandoval sbitmap_free(&sbq->sb); 42188459642SOmar Sandoval return -ENOMEM; 42288459642SOmar Sandoval } 42388459642SOmar Sandoval 42488459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 42588459642SOmar Sandoval init_waitqueue_head(&sbq->ws[i].wait); 42688459642SOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); 42788459642SOmar Sandoval } 428f4a644dbSOmar Sandoval 42988459642SOmar Sandoval return 0; 43088459642SOmar Sandoval } 43188459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 43288459642SOmar Sandoval 433a3275539SOmar Sandoval static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq, 434a3275539SOmar Sandoval unsigned int depth) 43588459642SOmar Sandoval { 436a3275539SOmar Sandoval unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth); 4376c0ca7aeSOmar Sandoval int i; 4386c0ca7aeSOmar Sandoval 4396c0ca7aeSOmar Sandoval if (sbq->wake_batch != wake_batch) { 4406c0ca7aeSOmar Sandoval WRITE_ONCE(sbq->wake_batch, wake_batch); 4416c0ca7aeSOmar Sandoval /* 442e6fc4649SMing Lei * Pairs with the memory barrier in sbitmap_queue_wake_up() 443e6fc4649SMing Lei * to ensure that the batch size is updated before the wait 444e6fc4649SMing Lei * counts. 4456c0ca7aeSOmar Sandoval */ 446a0934fd2SAndrea Parri smp_mb(); 4476c0ca7aeSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) 4486c0ca7aeSOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, 1); 4496c0ca7aeSOmar Sandoval } 450a3275539SOmar Sandoval } 451a3275539SOmar Sandoval 452a3275539SOmar Sandoval void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 453a3275539SOmar Sandoval { 454a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, depth); 45588459642SOmar Sandoval sbitmap_resize(&sbq->sb, depth); 45688459642SOmar Sandoval } 45788459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 45888459642SOmar Sandoval 459f4a644dbSOmar Sandoval int __sbitmap_queue_get(struct sbitmap_queue *sbq) 46040aabb67SOmar Sandoval { 46105fd095dSOmar Sandoval unsigned int hint, depth; 46240aabb67SOmar Sandoval int nr; 46340aabb67SOmar Sandoval 46405fd095dSOmar Sandoval depth = READ_ONCE(sbq->sb.depth); 465*bf2c4282SMing Lei hint = update_alloc_hint_before_get(sbq, depth); 466efe1f3a1SMing Lei nr = sbitmap_get(&sbq->sb, hint); 467*bf2c4282SMing Lei update_alloc_hint_after_get(sbq, depth, hint, nr); 46840aabb67SOmar Sandoval 46940aabb67SOmar Sandoval return nr; 47040aabb67SOmar Sandoval } 47140aabb67SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get); 47240aabb67SOmar Sandoval 473c05e6673SOmar Sandoval int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, 474c05e6673SOmar Sandoval unsigned int shallow_depth) 475c05e6673SOmar Sandoval { 476c05e6673SOmar Sandoval unsigned int hint, depth; 477c05e6673SOmar Sandoval int nr; 478c05e6673SOmar Sandoval 47961445b56SOmar Sandoval WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); 48061445b56SOmar Sandoval 481c05e6673SOmar Sandoval depth = READ_ONCE(sbq->sb.depth); 482*bf2c4282SMing Lei hint = update_alloc_hint_before_get(sbq, depth); 483c05e6673SOmar Sandoval nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth); 484*bf2c4282SMing Lei update_alloc_hint_after_get(sbq, depth, hint, nr); 485c05e6673SOmar Sandoval 486c05e6673SOmar Sandoval return nr; 487c05e6673SOmar Sandoval } 488c05e6673SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); 489c05e6673SOmar Sandoval 490a3275539SOmar Sandoval void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq, 491a3275539SOmar Sandoval unsigned int min_shallow_depth) 492a3275539SOmar Sandoval { 493a3275539SOmar Sandoval sbq->min_shallow_depth = min_shallow_depth; 494a3275539SOmar Sandoval sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth); 495a3275539SOmar Sandoval } 496a3275539SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth); 497a3275539SOmar Sandoval 49888459642SOmar Sandoval static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 49988459642SOmar Sandoval { 50088459642SOmar Sandoval int i, wake_index; 50188459642SOmar Sandoval 5025d2ee712SJens Axboe if (!atomic_read(&sbq->ws_active)) 5035d2ee712SJens Axboe return NULL; 5045d2ee712SJens Axboe 50588459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 50688459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 50788459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 50888459642SOmar Sandoval 50988459642SOmar Sandoval if (waitqueue_active(&ws->wait)) { 51041723288SPavel Begunkov if (wake_index != atomic_read(&sbq->wake_index)) 51141723288SPavel Begunkov atomic_set(&sbq->wake_index, wake_index); 51288459642SOmar Sandoval return ws; 51388459642SOmar Sandoval } 51488459642SOmar Sandoval 51588459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 51688459642SOmar Sandoval } 51788459642SOmar Sandoval 51888459642SOmar Sandoval return NULL; 51988459642SOmar Sandoval } 52088459642SOmar Sandoval 521c854ab57SJens Axboe static bool __sbq_wake_up(struct sbitmap_queue *sbq) 52288459642SOmar Sandoval { 52388459642SOmar Sandoval struct sbq_wait_state *ws; 5246c0ca7aeSOmar Sandoval unsigned int wake_batch; 52588459642SOmar Sandoval int wait_cnt; 52688459642SOmar Sandoval 52788459642SOmar Sandoval ws = sbq_wake_ptr(sbq); 52888459642SOmar Sandoval if (!ws) 529c854ab57SJens Axboe return false; 53088459642SOmar Sandoval 53188459642SOmar Sandoval wait_cnt = atomic_dec_return(&ws->wait_cnt); 5326c0ca7aeSOmar Sandoval if (wait_cnt <= 0) { 533c854ab57SJens Axboe int ret; 534c854ab57SJens Axboe 5356c0ca7aeSOmar Sandoval wake_batch = READ_ONCE(sbq->wake_batch); 536c854ab57SJens Axboe 5376c0ca7aeSOmar Sandoval /* 5386c0ca7aeSOmar Sandoval * Pairs with the memory barrier in sbitmap_queue_resize() to 5396c0ca7aeSOmar Sandoval * ensure that we see the batch size update before the wait 5406c0ca7aeSOmar Sandoval * count is reset. 5416c0ca7aeSOmar Sandoval */ 5426c0ca7aeSOmar Sandoval smp_mb__before_atomic(); 543c854ab57SJens Axboe 5446c0ca7aeSOmar Sandoval /* 545c854ab57SJens Axboe * For concurrent callers of this, the one that failed the 546c854ab57SJens Axboe * atomic_cmpxhcg() race should call this function again 547c854ab57SJens Axboe * to wakeup a new batch on a different 'ws'. 5486c0ca7aeSOmar Sandoval */ 549c854ab57SJens Axboe ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch); 550c854ab57SJens Axboe if (ret == wait_cnt) { 55188459642SOmar Sandoval sbq_index_atomic_inc(&sbq->wake_index); 5524e5dff41SJens Axboe wake_up_nr(&ws->wait, wake_batch); 553c854ab57SJens Axboe return false; 55488459642SOmar Sandoval } 555c854ab57SJens Axboe 556c854ab57SJens Axboe return true; 557c854ab57SJens Axboe } 558c854ab57SJens Axboe 559c854ab57SJens Axboe return false; 560c854ab57SJens Axboe } 561c854ab57SJens Axboe 562e6fc4649SMing Lei void sbitmap_queue_wake_up(struct sbitmap_queue *sbq) 563c854ab57SJens Axboe { 564c854ab57SJens Axboe while (__sbq_wake_up(sbq)) 565c854ab57SJens Axboe ; 56688459642SOmar Sandoval } 567e6fc4649SMing Lei EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); 56888459642SOmar Sandoval 56940aabb67SOmar Sandoval void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 570f4a644dbSOmar Sandoval unsigned int cpu) 57188459642SOmar Sandoval { 572e6d1fa58SMing Lei /* 573e6d1fa58SMing Lei * Once the clear bit is set, the bit may be allocated out. 574e6d1fa58SMing Lei * 575e6d1fa58SMing Lei * Orders READ/WRITE on the asssociated instance(such as request 576e6d1fa58SMing Lei * of blk_mq) by this bit for avoiding race with re-allocation, 577e6d1fa58SMing Lei * and its pair is the memory barrier implied in __sbitmap_get_word. 578e6d1fa58SMing Lei * 579e6d1fa58SMing Lei * One invariant is that the clear bit has to be zero when the bit 580e6d1fa58SMing Lei * is in use. 581e6d1fa58SMing Lei */ 582e6d1fa58SMing Lei smp_mb__before_atomic(); 583ea86ea2cSJens Axboe sbitmap_deferred_clear_bit(&sbq->sb, nr); 584ea86ea2cSJens Axboe 585e6fc4649SMing Lei /* 586e6fc4649SMing Lei * Pairs with the memory barrier in set_current_state() to ensure the 587e6fc4649SMing Lei * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker 588e6fc4649SMing Lei * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the 589e6fc4649SMing Lei * waiter. See the comment on waitqueue_active(). 590e6fc4649SMing Lei */ 591e6fc4649SMing Lei smp_mb__after_atomic(); 592e6fc4649SMing Lei sbitmap_queue_wake_up(sbq); 593e6fc4649SMing Lei 594efe1f3a1SMing Lei if (likely(!sbq->sb.round_robin && nr < sbq->sb.depth)) 59540aabb67SOmar Sandoval *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 59688459642SOmar Sandoval } 59788459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_clear); 59888459642SOmar Sandoval 59988459642SOmar Sandoval void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) 60088459642SOmar Sandoval { 60188459642SOmar Sandoval int i, wake_index; 60288459642SOmar Sandoval 60388459642SOmar Sandoval /* 604f66227deSOmar Sandoval * Pairs with the memory barrier in set_current_state() like in 605e6fc4649SMing Lei * sbitmap_queue_wake_up(). 60688459642SOmar Sandoval */ 60788459642SOmar Sandoval smp_mb(); 60888459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 60988459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 61088459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 61188459642SOmar Sandoval 61288459642SOmar Sandoval if (waitqueue_active(&ws->wait)) 61388459642SOmar Sandoval wake_up(&ws->wait); 61488459642SOmar Sandoval 61588459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 61688459642SOmar Sandoval } 61788459642SOmar Sandoval } 61888459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 61924af1ccfSOmar Sandoval 62024af1ccfSOmar Sandoval void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) 62124af1ccfSOmar Sandoval { 62224af1ccfSOmar Sandoval bool first; 62324af1ccfSOmar Sandoval int i; 62424af1ccfSOmar Sandoval 62524af1ccfSOmar Sandoval sbitmap_show(&sbq->sb, m); 62624af1ccfSOmar Sandoval 62724af1ccfSOmar Sandoval seq_puts(m, "alloc_hint={"); 62824af1ccfSOmar Sandoval first = true; 62924af1ccfSOmar Sandoval for_each_possible_cpu(i) { 63024af1ccfSOmar Sandoval if (!first) 63124af1ccfSOmar Sandoval seq_puts(m, ", "); 63224af1ccfSOmar Sandoval first = false; 63324af1ccfSOmar Sandoval seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); 63424af1ccfSOmar Sandoval } 63524af1ccfSOmar Sandoval seq_puts(m, "}\n"); 63624af1ccfSOmar Sandoval 63724af1ccfSOmar Sandoval seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); 63824af1ccfSOmar Sandoval seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); 6395d2ee712SJens Axboe seq_printf(m, "ws_active=%d\n", atomic_read(&sbq->ws_active)); 64024af1ccfSOmar Sandoval 64124af1ccfSOmar Sandoval seq_puts(m, "ws={\n"); 64224af1ccfSOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 64324af1ccfSOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[i]; 64424af1ccfSOmar Sandoval 64524af1ccfSOmar Sandoval seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", 64624af1ccfSOmar Sandoval atomic_read(&ws->wait_cnt), 64724af1ccfSOmar Sandoval waitqueue_active(&ws->wait) ? "active" : "inactive"); 64824af1ccfSOmar Sandoval } 64924af1ccfSOmar Sandoval seq_puts(m, "}\n"); 65024af1ccfSOmar Sandoval 651efe1f3a1SMing Lei seq_printf(m, "round_robin=%d\n", sbq->sb.round_robin); 652a3275539SOmar Sandoval seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); 65324af1ccfSOmar Sandoval } 65424af1ccfSOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_show); 6555d2ee712SJens Axboe 6569f6b7ef6SJens Axboe void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, 6579f6b7ef6SJens Axboe struct sbq_wait_state *ws, 6589f6b7ef6SJens Axboe struct sbq_wait *sbq_wait) 6599f6b7ef6SJens Axboe { 6609f6b7ef6SJens Axboe if (!sbq_wait->sbq) { 6619f6b7ef6SJens Axboe sbq_wait->sbq = sbq; 6629f6b7ef6SJens Axboe atomic_inc(&sbq->ws_active); 6639f6b7ef6SJens Axboe add_wait_queue(&ws->wait, &sbq_wait->wait); 6649f6b7ef6SJens Axboe } 665df034c93SDavid Jeffery } 6669f6b7ef6SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue); 6679f6b7ef6SJens Axboe 6689f6b7ef6SJens Axboe void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait) 6699f6b7ef6SJens Axboe { 6709f6b7ef6SJens Axboe list_del_init(&sbq_wait->wait.entry); 6719f6b7ef6SJens Axboe if (sbq_wait->sbq) { 6729f6b7ef6SJens Axboe atomic_dec(&sbq_wait->sbq->ws_active); 6739f6b7ef6SJens Axboe sbq_wait->sbq = NULL; 6749f6b7ef6SJens Axboe } 6759f6b7ef6SJens Axboe } 6769f6b7ef6SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_del_wait_queue); 6779f6b7ef6SJens Axboe 6785d2ee712SJens Axboe void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, 6795d2ee712SJens Axboe struct sbq_wait_state *ws, 6805d2ee712SJens Axboe struct sbq_wait *sbq_wait, int state) 6815d2ee712SJens Axboe { 6829f6b7ef6SJens Axboe if (!sbq_wait->sbq) { 6835d2ee712SJens Axboe atomic_inc(&sbq->ws_active); 6849f6b7ef6SJens Axboe sbq_wait->sbq = sbq; 6855d2ee712SJens Axboe } 6865d2ee712SJens Axboe prepare_to_wait_exclusive(&ws->wait, &sbq_wait->wait, state); 6875d2ee712SJens Axboe } 6885d2ee712SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); 6895d2ee712SJens Axboe 6905d2ee712SJens Axboe void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, 6915d2ee712SJens Axboe struct sbq_wait *sbq_wait) 6925d2ee712SJens Axboe { 6935d2ee712SJens Axboe finish_wait(&ws->wait, &sbq_wait->wait); 6949f6b7ef6SJens Axboe if (sbq_wait->sbq) { 6955d2ee712SJens Axboe atomic_dec(&sbq->ws_active); 6969f6b7ef6SJens Axboe sbq_wait->sbq = NULL; 6975d2ee712SJens Axboe } 6985d2ee712SJens Axboe } 6995d2ee712SJens Axboe EXPORT_SYMBOL_GPL(sbitmap_finish_wait); 700