188459642SOmar Sandoval /* 288459642SOmar Sandoval * Copyright (C) 2016 Facebook 388459642SOmar Sandoval * Copyright (C) 2013-2014 Jens Axboe 488459642SOmar Sandoval * 588459642SOmar Sandoval * This program is free software; you can redistribute it and/or 688459642SOmar Sandoval * modify it under the terms of the GNU General Public 788459642SOmar Sandoval * License v2 as published by the Free Software Foundation. 888459642SOmar Sandoval * 988459642SOmar Sandoval * This program is distributed in the hope that it will be useful, 1088459642SOmar Sandoval * but WITHOUT ANY WARRANTY; without even the implied warranty of 1188459642SOmar Sandoval * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1288459642SOmar Sandoval * General Public License for more details. 1388459642SOmar Sandoval * 1488459642SOmar Sandoval * You should have received a copy of the GNU General Public License 1588459642SOmar Sandoval * along with this program. If not, see <https://www.gnu.org/licenses/>. 1688459642SOmar Sandoval */ 1788459642SOmar Sandoval 1898d95416SOmar Sandoval #include <linux/random.h> 1988459642SOmar Sandoval #include <linux/sbitmap.h> 2088459642SOmar Sandoval 2188459642SOmar Sandoval int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 2288459642SOmar Sandoval gfp_t flags, int node) 2388459642SOmar Sandoval { 2488459642SOmar Sandoval unsigned int bits_per_word; 2588459642SOmar Sandoval unsigned int i; 2688459642SOmar Sandoval 2788459642SOmar Sandoval if (shift < 0) { 2888459642SOmar Sandoval shift = ilog2(BITS_PER_LONG); 2988459642SOmar Sandoval /* 3088459642SOmar Sandoval * If the bitmap is small, shrink the number of bits per word so 3188459642SOmar Sandoval * we spread over a few cachelines, at least. If less than 4 3288459642SOmar Sandoval * bits, just forget about it, it's not going to work optimally 3388459642SOmar Sandoval * anyway. 3488459642SOmar Sandoval */ 3588459642SOmar Sandoval if (depth >= 4) { 3688459642SOmar Sandoval while ((4U << shift) > depth) 3788459642SOmar Sandoval shift--; 3888459642SOmar Sandoval } 3988459642SOmar Sandoval } 4088459642SOmar Sandoval bits_per_word = 1U << shift; 4188459642SOmar Sandoval if (bits_per_word > BITS_PER_LONG) 4288459642SOmar Sandoval return -EINVAL; 4388459642SOmar Sandoval 4488459642SOmar Sandoval sb->shift = shift; 4588459642SOmar Sandoval sb->depth = depth; 4688459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 4788459642SOmar Sandoval 4888459642SOmar Sandoval if (depth == 0) { 4988459642SOmar Sandoval sb->map = NULL; 5088459642SOmar Sandoval return 0; 5188459642SOmar Sandoval } 5288459642SOmar Sandoval 5388459642SOmar Sandoval sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node); 5488459642SOmar Sandoval if (!sb->map) 5588459642SOmar Sandoval return -ENOMEM; 5688459642SOmar Sandoval 5788459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 5888459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 5988459642SOmar Sandoval depth -= sb->map[i].depth; 6088459642SOmar Sandoval } 6188459642SOmar Sandoval return 0; 6288459642SOmar Sandoval } 6388459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_init_node); 6488459642SOmar Sandoval 6588459642SOmar Sandoval void sbitmap_resize(struct sbitmap *sb, unsigned int depth) 6688459642SOmar Sandoval { 6788459642SOmar Sandoval unsigned int bits_per_word = 1U << sb->shift; 6888459642SOmar Sandoval unsigned int i; 6988459642SOmar Sandoval 7088459642SOmar Sandoval sb->depth = depth; 7188459642SOmar Sandoval sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 7288459642SOmar Sandoval 7388459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 7488459642SOmar Sandoval sb->map[i].depth = min(depth, bits_per_word); 7588459642SOmar Sandoval depth -= sb->map[i].depth; 7688459642SOmar Sandoval } 7788459642SOmar Sandoval } 7888459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_resize); 7988459642SOmar Sandoval 8088459642SOmar Sandoval static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint, 8188459642SOmar Sandoval bool wrap) 8288459642SOmar Sandoval { 8388459642SOmar Sandoval unsigned int orig_hint = hint; 8488459642SOmar Sandoval int nr; 8588459642SOmar Sandoval 8688459642SOmar Sandoval while (1) { 8788459642SOmar Sandoval nr = find_next_zero_bit(&word->word, word->depth, hint); 8888459642SOmar Sandoval if (unlikely(nr >= word->depth)) { 8988459642SOmar Sandoval /* 9088459642SOmar Sandoval * We started with an offset, and we didn't reset the 9188459642SOmar Sandoval * offset to 0 in a failure case, so start from 0 to 9288459642SOmar Sandoval * exhaust the map. 9388459642SOmar Sandoval */ 9488459642SOmar Sandoval if (orig_hint && hint && wrap) { 9588459642SOmar Sandoval hint = orig_hint = 0; 9688459642SOmar Sandoval continue; 9788459642SOmar Sandoval } 9888459642SOmar Sandoval return -1; 9988459642SOmar Sandoval } 10088459642SOmar Sandoval 10188459642SOmar Sandoval if (!test_and_set_bit(nr, &word->word)) 10288459642SOmar Sandoval break; 10388459642SOmar Sandoval 10488459642SOmar Sandoval hint = nr + 1; 10588459642SOmar Sandoval if (hint >= word->depth - 1) 10688459642SOmar Sandoval hint = 0; 10788459642SOmar Sandoval } 10888459642SOmar Sandoval 10988459642SOmar Sandoval return nr; 11088459642SOmar Sandoval } 11188459642SOmar Sandoval 11288459642SOmar Sandoval int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) 11388459642SOmar Sandoval { 11488459642SOmar Sandoval unsigned int i, index; 11588459642SOmar Sandoval int nr = -1; 11688459642SOmar Sandoval 11788459642SOmar Sandoval index = SB_NR_TO_INDEX(sb, alloc_hint); 11888459642SOmar Sandoval 11988459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 12088459642SOmar Sandoval nr = __sbitmap_get_word(&sb->map[index], 12188459642SOmar Sandoval SB_NR_TO_BIT(sb, alloc_hint), 12288459642SOmar Sandoval !round_robin); 12388459642SOmar Sandoval if (nr != -1) { 12488459642SOmar Sandoval nr += index << sb->shift; 12588459642SOmar Sandoval break; 12688459642SOmar Sandoval } 12788459642SOmar Sandoval 12888459642SOmar Sandoval /* Jump to next index. */ 12988459642SOmar Sandoval index++; 13088459642SOmar Sandoval alloc_hint = index << sb->shift; 13188459642SOmar Sandoval 13288459642SOmar Sandoval if (index >= sb->map_nr) { 13388459642SOmar Sandoval index = 0; 13488459642SOmar Sandoval alloc_hint = 0; 13588459642SOmar Sandoval } 13688459642SOmar Sandoval } 13788459642SOmar Sandoval 13888459642SOmar Sandoval return nr; 13988459642SOmar Sandoval } 14088459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_get); 14188459642SOmar Sandoval 14288459642SOmar Sandoval bool sbitmap_any_bit_set(const struct sbitmap *sb) 14388459642SOmar Sandoval { 14488459642SOmar Sandoval unsigned int i; 14588459642SOmar Sandoval 14688459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 14788459642SOmar Sandoval if (sb->map[i].word) 14888459642SOmar Sandoval return true; 14988459642SOmar Sandoval } 15088459642SOmar Sandoval return false; 15188459642SOmar Sandoval } 15288459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); 15388459642SOmar Sandoval 15488459642SOmar Sandoval bool sbitmap_any_bit_clear(const struct sbitmap *sb) 15588459642SOmar Sandoval { 15688459642SOmar Sandoval unsigned int i; 15788459642SOmar Sandoval 15888459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 15988459642SOmar Sandoval const struct sbitmap_word *word = &sb->map[i]; 16088459642SOmar Sandoval unsigned long ret; 16188459642SOmar Sandoval 16288459642SOmar Sandoval ret = find_first_zero_bit(&word->word, word->depth); 16388459642SOmar Sandoval if (ret < word->depth) 16488459642SOmar Sandoval return true; 16588459642SOmar Sandoval } 16688459642SOmar Sandoval return false; 16788459642SOmar Sandoval } 16888459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); 16988459642SOmar Sandoval 17088459642SOmar Sandoval unsigned int sbitmap_weight(const struct sbitmap *sb) 17188459642SOmar Sandoval { 17260658e0dSColin Ian King unsigned int i, weight = 0; 17388459642SOmar Sandoval 17488459642SOmar Sandoval for (i = 0; i < sb->map_nr; i++) { 17588459642SOmar Sandoval const struct sbitmap_word *word = &sb->map[i]; 17688459642SOmar Sandoval 17788459642SOmar Sandoval weight += bitmap_weight(&word->word, word->depth); 17888459642SOmar Sandoval } 17988459642SOmar Sandoval return weight; 18088459642SOmar Sandoval } 18188459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_weight); 18288459642SOmar Sandoval 18388459642SOmar Sandoval static unsigned int sbq_calc_wake_batch(unsigned int depth) 18488459642SOmar Sandoval { 18588459642SOmar Sandoval unsigned int wake_batch; 18688459642SOmar Sandoval 18788459642SOmar Sandoval /* 18888459642SOmar Sandoval * For each batch, we wake up one queue. We need to make sure that our 18988459642SOmar Sandoval * batch size is small enough that the full depth of the bitmap is 19088459642SOmar Sandoval * enough to wake up all of the queues. 19188459642SOmar Sandoval */ 19288459642SOmar Sandoval wake_batch = SBQ_WAKE_BATCH; 19388459642SOmar Sandoval if (wake_batch > depth / SBQ_WAIT_QUEUES) 19488459642SOmar Sandoval wake_batch = max(1U, depth / SBQ_WAIT_QUEUES); 19588459642SOmar Sandoval 19688459642SOmar Sandoval return wake_batch; 19788459642SOmar Sandoval } 19888459642SOmar Sandoval 19988459642SOmar Sandoval int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, 200f4a644dbSOmar Sandoval int shift, bool round_robin, gfp_t flags, int node) 20188459642SOmar Sandoval { 20288459642SOmar Sandoval int ret; 20388459642SOmar Sandoval int i; 20488459642SOmar Sandoval 20588459642SOmar Sandoval ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); 20688459642SOmar Sandoval if (ret) 20788459642SOmar Sandoval return ret; 20888459642SOmar Sandoval 20940aabb67SOmar Sandoval sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); 21040aabb67SOmar Sandoval if (!sbq->alloc_hint) { 21140aabb67SOmar Sandoval sbitmap_free(&sbq->sb); 21240aabb67SOmar Sandoval return -ENOMEM; 21340aabb67SOmar Sandoval } 21440aabb67SOmar Sandoval 21598d95416SOmar Sandoval if (depth && !round_robin) { 21698d95416SOmar Sandoval for_each_possible_cpu(i) 21798d95416SOmar Sandoval *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; 21898d95416SOmar Sandoval } 21998d95416SOmar Sandoval 22088459642SOmar Sandoval sbq->wake_batch = sbq_calc_wake_batch(depth); 22188459642SOmar Sandoval atomic_set(&sbq->wake_index, 0); 22288459642SOmar Sandoval 22348e28166SOmar Sandoval sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 22488459642SOmar Sandoval if (!sbq->ws) { 22540aabb67SOmar Sandoval free_percpu(sbq->alloc_hint); 22688459642SOmar Sandoval sbitmap_free(&sbq->sb); 22788459642SOmar Sandoval return -ENOMEM; 22888459642SOmar Sandoval } 22988459642SOmar Sandoval 23088459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 23188459642SOmar Sandoval init_waitqueue_head(&sbq->ws[i].wait); 23288459642SOmar Sandoval atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); 23388459642SOmar Sandoval } 234f4a644dbSOmar Sandoval 235f4a644dbSOmar Sandoval sbq->round_robin = round_robin; 23688459642SOmar Sandoval return 0; 23788459642SOmar Sandoval } 23888459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 23988459642SOmar Sandoval 24088459642SOmar Sandoval void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 24188459642SOmar Sandoval { 24288459642SOmar Sandoval sbq->wake_batch = sbq_calc_wake_batch(depth); 24388459642SOmar Sandoval sbitmap_resize(&sbq->sb, depth); 24488459642SOmar Sandoval } 24588459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 24688459642SOmar Sandoval 247f4a644dbSOmar Sandoval int __sbitmap_queue_get(struct sbitmap_queue *sbq) 24840aabb67SOmar Sandoval { 24905fd095dSOmar Sandoval unsigned int hint, depth; 25040aabb67SOmar Sandoval int nr; 25140aabb67SOmar Sandoval 25240aabb67SOmar Sandoval hint = this_cpu_read(*sbq->alloc_hint); 25305fd095dSOmar Sandoval depth = READ_ONCE(sbq->sb.depth); 25405fd095dSOmar Sandoval if (unlikely(hint >= depth)) { 25505fd095dSOmar Sandoval hint = depth ? prandom_u32() % depth : 0; 25605fd095dSOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 25705fd095dSOmar Sandoval } 258f4a644dbSOmar Sandoval nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); 25940aabb67SOmar Sandoval 26040aabb67SOmar Sandoval if (nr == -1) { 26140aabb67SOmar Sandoval /* If the map is full, a hint won't do us much good. */ 26240aabb67SOmar Sandoval this_cpu_write(*sbq->alloc_hint, 0); 263f4a644dbSOmar Sandoval } else if (nr == hint || unlikely(sbq->round_robin)) { 26440aabb67SOmar Sandoval /* Only update the hint if we used it. */ 26540aabb67SOmar Sandoval hint = nr + 1; 26605fd095dSOmar Sandoval if (hint >= depth - 1) 26740aabb67SOmar Sandoval hint = 0; 26840aabb67SOmar Sandoval this_cpu_write(*sbq->alloc_hint, hint); 26940aabb67SOmar Sandoval } 27040aabb67SOmar Sandoval 27140aabb67SOmar Sandoval return nr; 27240aabb67SOmar Sandoval } 27340aabb67SOmar Sandoval EXPORT_SYMBOL_GPL(__sbitmap_queue_get); 27440aabb67SOmar Sandoval 27588459642SOmar Sandoval static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 27688459642SOmar Sandoval { 27788459642SOmar Sandoval int i, wake_index; 27888459642SOmar Sandoval 27988459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 28088459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 28188459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 28288459642SOmar Sandoval 28388459642SOmar Sandoval if (waitqueue_active(&ws->wait)) { 28488459642SOmar Sandoval int o = atomic_read(&sbq->wake_index); 28588459642SOmar Sandoval 28688459642SOmar Sandoval if (wake_index != o) 28788459642SOmar Sandoval atomic_cmpxchg(&sbq->wake_index, o, wake_index); 28888459642SOmar Sandoval return ws; 28988459642SOmar Sandoval } 29088459642SOmar Sandoval 29188459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 29288459642SOmar Sandoval } 29388459642SOmar Sandoval 29488459642SOmar Sandoval return NULL; 29588459642SOmar Sandoval } 29688459642SOmar Sandoval 29788459642SOmar Sandoval static void sbq_wake_up(struct sbitmap_queue *sbq) 29888459642SOmar Sandoval { 29988459642SOmar Sandoval struct sbq_wait_state *ws; 30088459642SOmar Sandoval int wait_cnt; 30188459642SOmar Sandoval 302*f66227deSOmar Sandoval /* 303*f66227deSOmar Sandoval * Pairs with the memory barrier in set_current_state() to ensure the 304*f66227deSOmar Sandoval * proper ordering of clear_bit()/waitqueue_active() in the waker and 305*f66227deSOmar Sandoval * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See 306*f66227deSOmar Sandoval * the comment on waitqueue_active(). This is __after_atomic because we 307*f66227deSOmar Sandoval * just did clear_bit() in the caller. 308*f66227deSOmar Sandoval */ 309*f66227deSOmar Sandoval smp_mb__after_atomic(); 31088459642SOmar Sandoval 31188459642SOmar Sandoval ws = sbq_wake_ptr(sbq); 31288459642SOmar Sandoval if (!ws) 31388459642SOmar Sandoval return; 31488459642SOmar Sandoval 31588459642SOmar Sandoval wait_cnt = atomic_dec_return(&ws->wait_cnt); 31688459642SOmar Sandoval if (unlikely(wait_cnt < 0)) 31788459642SOmar Sandoval wait_cnt = atomic_inc_return(&ws->wait_cnt); 31888459642SOmar Sandoval if (wait_cnt == 0) { 31988459642SOmar Sandoval atomic_add(sbq->wake_batch, &ws->wait_cnt); 32088459642SOmar Sandoval sbq_index_atomic_inc(&sbq->wake_index); 32188459642SOmar Sandoval wake_up(&ws->wait); 32288459642SOmar Sandoval } 32388459642SOmar Sandoval } 32488459642SOmar Sandoval 32540aabb67SOmar Sandoval void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 326f4a644dbSOmar Sandoval unsigned int cpu) 32788459642SOmar Sandoval { 32888459642SOmar Sandoval sbitmap_clear_bit(&sbq->sb, nr); 32988459642SOmar Sandoval sbq_wake_up(sbq); 3305c64a8dfSOmar Sandoval if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 33140aabb67SOmar Sandoval *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 33288459642SOmar Sandoval } 33388459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_clear); 33488459642SOmar Sandoval 33588459642SOmar Sandoval void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) 33688459642SOmar Sandoval { 33788459642SOmar Sandoval int i, wake_index; 33888459642SOmar Sandoval 33988459642SOmar Sandoval /* 340*f66227deSOmar Sandoval * Pairs with the memory barrier in set_current_state() like in 341*f66227deSOmar Sandoval * sbq_wake_up(). 34288459642SOmar Sandoval */ 34388459642SOmar Sandoval smp_mb(); 34488459642SOmar Sandoval wake_index = atomic_read(&sbq->wake_index); 34588459642SOmar Sandoval for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 34688459642SOmar Sandoval struct sbq_wait_state *ws = &sbq->ws[wake_index]; 34788459642SOmar Sandoval 34888459642SOmar Sandoval if (waitqueue_active(&ws->wait)) 34988459642SOmar Sandoval wake_up(&ws->wait); 35088459642SOmar Sandoval 35188459642SOmar Sandoval wake_index = sbq_index_inc(wake_index); 35288459642SOmar Sandoval } 35388459642SOmar Sandoval } 35488459642SOmar Sandoval EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 355