1 /* 2 * Copyright (C) 2016 Facebook 3 * Copyright (C) 2013-2014 Jens Axboe 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public 7 * License v2 as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18 #include <linux/random.h> 19 #include <linux/sbitmap.h> 20 #include <linux/seq_file.h> 21 22 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, 23 gfp_t flags, int node) 24 { 25 unsigned int bits_per_word; 26 unsigned int i; 27 28 if (shift < 0) { 29 shift = ilog2(BITS_PER_LONG); 30 /* 31 * If the bitmap is small, shrink the number of bits per word so 32 * we spread over a few cachelines, at least. If less than 4 33 * bits, just forget about it, it's not going to work optimally 34 * anyway. 35 */ 36 if (depth >= 4) { 37 while ((4U << shift) > depth) 38 shift--; 39 } 40 } 41 bits_per_word = 1U << shift; 42 if (bits_per_word > BITS_PER_LONG) 43 return -EINVAL; 44 45 sb->shift = shift; 46 sb->depth = depth; 47 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 48 49 if (depth == 0) { 50 sb->map = NULL; 51 return 0; 52 } 53 54 sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node); 55 if (!sb->map) 56 return -ENOMEM; 57 58 for (i = 0; i < sb->map_nr; i++) { 59 sb->map[i].depth = min(depth, bits_per_word); 60 depth -= sb->map[i].depth; 61 } 62 return 0; 63 } 64 EXPORT_SYMBOL_GPL(sbitmap_init_node); 65 66 void sbitmap_resize(struct sbitmap *sb, unsigned int depth) 67 { 68 unsigned int bits_per_word = 1U << sb->shift; 69 unsigned int i; 70 71 sb->depth = depth; 72 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); 73 74 for (i = 0; i < sb->map_nr; i++) { 75 sb->map[i].depth = min(depth, bits_per_word); 76 depth -= sb->map[i].depth; 77 } 78 } 79 EXPORT_SYMBOL_GPL(sbitmap_resize); 80 81 static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint, 82 bool wrap) 83 { 84 unsigned int orig_hint = hint; 85 int nr; 86 87 while (1) { 88 nr = find_next_zero_bit(&word->word, word->depth, hint); 89 if (unlikely(nr >= word->depth)) { 90 /* 91 * We started with an offset, and we didn't reset the 92 * offset to 0 in a failure case, so start from 0 to 93 * exhaust the map. 94 */ 95 if (orig_hint && hint && wrap) { 96 hint = orig_hint = 0; 97 continue; 98 } 99 return -1; 100 } 101 102 if (!test_and_set_bit(nr, &word->word)) 103 break; 104 105 hint = nr + 1; 106 if (hint >= word->depth - 1) 107 hint = 0; 108 } 109 110 return nr; 111 } 112 113 int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) 114 { 115 unsigned int i, index; 116 int nr = -1; 117 118 index = SB_NR_TO_INDEX(sb, alloc_hint); 119 120 for (i = 0; i < sb->map_nr; i++) { 121 nr = __sbitmap_get_word(&sb->map[index], 122 SB_NR_TO_BIT(sb, alloc_hint), 123 !round_robin); 124 if (nr != -1) { 125 nr += index << sb->shift; 126 break; 127 } 128 129 /* Jump to next index. */ 130 index++; 131 alloc_hint = index << sb->shift; 132 133 if (index >= sb->map_nr) { 134 index = 0; 135 alloc_hint = 0; 136 } 137 } 138 139 return nr; 140 } 141 EXPORT_SYMBOL_GPL(sbitmap_get); 142 143 bool sbitmap_any_bit_set(const struct sbitmap *sb) 144 { 145 unsigned int i; 146 147 for (i = 0; i < sb->map_nr; i++) { 148 if (sb->map[i].word) 149 return true; 150 } 151 return false; 152 } 153 EXPORT_SYMBOL_GPL(sbitmap_any_bit_set); 154 155 bool sbitmap_any_bit_clear(const struct sbitmap *sb) 156 { 157 unsigned int i; 158 159 for (i = 0; i < sb->map_nr; i++) { 160 const struct sbitmap_word *word = &sb->map[i]; 161 unsigned long ret; 162 163 ret = find_first_zero_bit(&word->word, word->depth); 164 if (ret < word->depth) 165 return true; 166 } 167 return false; 168 } 169 EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear); 170 171 unsigned int sbitmap_weight(const struct sbitmap *sb) 172 { 173 unsigned int i, weight = 0; 174 175 for (i = 0; i < sb->map_nr; i++) { 176 const struct sbitmap_word *word = &sb->map[i]; 177 178 weight += bitmap_weight(&word->word, word->depth); 179 } 180 return weight; 181 } 182 EXPORT_SYMBOL_GPL(sbitmap_weight); 183 184 void sbitmap_show(struct sbitmap *sb, struct seq_file *m) 185 { 186 seq_printf(m, "depth=%u\n", sb->depth); 187 seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); 188 seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); 189 seq_printf(m, "map_nr=%u\n", sb->map_nr); 190 } 191 EXPORT_SYMBOL_GPL(sbitmap_show); 192 193 static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) 194 { 195 if ((offset & 0xf) == 0) { 196 if (offset != 0) 197 seq_putc(m, '\n'); 198 seq_printf(m, "%08x:", offset); 199 } 200 if ((offset & 0x1) == 0) 201 seq_putc(m, ' '); 202 seq_printf(m, "%02x", byte); 203 } 204 205 void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) 206 { 207 u8 byte = 0; 208 unsigned int byte_bits = 0; 209 unsigned int offset = 0; 210 int i; 211 212 for (i = 0; i < sb->map_nr; i++) { 213 unsigned long word = READ_ONCE(sb->map[i].word); 214 unsigned int word_bits = READ_ONCE(sb->map[i].depth); 215 216 while (word_bits > 0) { 217 unsigned int bits = min(8 - byte_bits, word_bits); 218 219 byte |= (word & (BIT(bits) - 1)) << byte_bits; 220 byte_bits += bits; 221 if (byte_bits == 8) { 222 emit_byte(m, offset, byte); 223 byte = 0; 224 byte_bits = 0; 225 offset++; 226 } 227 word >>= bits; 228 word_bits -= bits; 229 } 230 } 231 if (byte_bits) { 232 emit_byte(m, offset, byte); 233 offset++; 234 } 235 if (offset) 236 seq_putc(m, '\n'); 237 } 238 EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); 239 240 static unsigned int sbq_calc_wake_batch(unsigned int depth) 241 { 242 unsigned int wake_batch; 243 244 /* 245 * For each batch, we wake up one queue. We need to make sure that our 246 * batch size is small enough that the full depth of the bitmap is 247 * enough to wake up all of the queues. 248 */ 249 wake_batch = SBQ_WAKE_BATCH; 250 if (wake_batch > depth / SBQ_WAIT_QUEUES) 251 wake_batch = max(1U, depth / SBQ_WAIT_QUEUES); 252 253 return wake_batch; 254 } 255 256 int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, 257 int shift, bool round_robin, gfp_t flags, int node) 258 { 259 int ret; 260 int i; 261 262 ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); 263 if (ret) 264 return ret; 265 266 sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); 267 if (!sbq->alloc_hint) { 268 sbitmap_free(&sbq->sb); 269 return -ENOMEM; 270 } 271 272 if (depth && !round_robin) { 273 for_each_possible_cpu(i) 274 *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; 275 } 276 277 sbq->wake_batch = sbq_calc_wake_batch(depth); 278 atomic_set(&sbq->wake_index, 0); 279 280 sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); 281 if (!sbq->ws) { 282 free_percpu(sbq->alloc_hint); 283 sbitmap_free(&sbq->sb); 284 return -ENOMEM; 285 } 286 287 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 288 init_waitqueue_head(&sbq->ws[i].wait); 289 atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); 290 } 291 292 sbq->round_robin = round_robin; 293 return 0; 294 } 295 EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); 296 297 void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) 298 { 299 unsigned int wake_batch = sbq_calc_wake_batch(depth); 300 int i; 301 302 if (sbq->wake_batch != wake_batch) { 303 WRITE_ONCE(sbq->wake_batch, wake_batch); 304 /* 305 * Pairs with the memory barrier in sbq_wake_up() to ensure that 306 * the batch size is updated before the wait counts. 307 */ 308 smp_mb__before_atomic(); 309 for (i = 0; i < SBQ_WAIT_QUEUES; i++) 310 atomic_set(&sbq->ws[i].wait_cnt, 1); 311 } 312 sbitmap_resize(&sbq->sb, depth); 313 } 314 EXPORT_SYMBOL_GPL(sbitmap_queue_resize); 315 316 int __sbitmap_queue_get(struct sbitmap_queue *sbq) 317 { 318 unsigned int hint, depth; 319 int nr; 320 321 hint = this_cpu_read(*sbq->alloc_hint); 322 depth = READ_ONCE(sbq->sb.depth); 323 if (unlikely(hint >= depth)) { 324 hint = depth ? prandom_u32() % depth : 0; 325 this_cpu_write(*sbq->alloc_hint, hint); 326 } 327 nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); 328 329 if (nr == -1) { 330 /* If the map is full, a hint won't do us much good. */ 331 this_cpu_write(*sbq->alloc_hint, 0); 332 } else if (nr == hint || unlikely(sbq->round_robin)) { 333 /* Only update the hint if we used it. */ 334 hint = nr + 1; 335 if (hint >= depth - 1) 336 hint = 0; 337 this_cpu_write(*sbq->alloc_hint, hint); 338 } 339 340 return nr; 341 } 342 EXPORT_SYMBOL_GPL(__sbitmap_queue_get); 343 344 static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) 345 { 346 int i, wake_index; 347 348 wake_index = atomic_read(&sbq->wake_index); 349 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 350 struct sbq_wait_state *ws = &sbq->ws[wake_index]; 351 352 if (waitqueue_active(&ws->wait)) { 353 int o = atomic_read(&sbq->wake_index); 354 355 if (wake_index != o) 356 atomic_cmpxchg(&sbq->wake_index, o, wake_index); 357 return ws; 358 } 359 360 wake_index = sbq_index_inc(wake_index); 361 } 362 363 return NULL; 364 } 365 366 static void sbq_wake_up(struct sbitmap_queue *sbq) 367 { 368 struct sbq_wait_state *ws; 369 unsigned int wake_batch; 370 int wait_cnt; 371 372 /* 373 * Pairs with the memory barrier in set_current_state() to ensure the 374 * proper ordering of clear_bit()/waitqueue_active() in the waker and 375 * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See 376 * the comment on waitqueue_active(). This is __after_atomic because we 377 * just did clear_bit() in the caller. 378 */ 379 smp_mb__after_atomic(); 380 381 ws = sbq_wake_ptr(sbq); 382 if (!ws) 383 return; 384 385 wait_cnt = atomic_dec_return(&ws->wait_cnt); 386 if (wait_cnt <= 0) { 387 wake_batch = READ_ONCE(sbq->wake_batch); 388 /* 389 * Pairs with the memory barrier in sbitmap_queue_resize() to 390 * ensure that we see the batch size update before the wait 391 * count is reset. 392 */ 393 smp_mb__before_atomic(); 394 /* 395 * If there are concurrent callers to sbq_wake_up(), the last 396 * one to decrement the wait count below zero will bump it back 397 * up. If there is a concurrent resize, the count reset will 398 * either cause the cmpxchg to fail or overwrite after the 399 * cmpxchg. 400 */ 401 atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); 402 sbq_index_atomic_inc(&sbq->wake_index); 403 wake_up(&ws->wait); 404 } 405 } 406 407 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 408 unsigned int cpu) 409 { 410 sbitmap_clear_bit(&sbq->sb, nr); 411 sbq_wake_up(sbq); 412 if (likely(!sbq->round_robin && nr < sbq->sb.depth)) 413 *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; 414 } 415 EXPORT_SYMBOL_GPL(sbitmap_queue_clear); 416 417 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) 418 { 419 int i, wake_index; 420 421 /* 422 * Pairs with the memory barrier in set_current_state() like in 423 * sbq_wake_up(). 424 */ 425 smp_mb(); 426 wake_index = atomic_read(&sbq->wake_index); 427 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 428 struct sbq_wait_state *ws = &sbq->ws[wake_index]; 429 430 if (waitqueue_active(&ws->wait)) 431 wake_up(&ws->wait); 432 433 wake_index = sbq_index_inc(wake_index); 434 } 435 } 436 EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); 437 438 void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) 439 { 440 bool first; 441 int i; 442 443 sbitmap_show(&sbq->sb, m); 444 445 seq_puts(m, "alloc_hint={"); 446 first = true; 447 for_each_possible_cpu(i) { 448 if (!first) 449 seq_puts(m, ", "); 450 first = false; 451 seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); 452 } 453 seq_puts(m, "}\n"); 454 455 seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); 456 seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); 457 458 seq_puts(m, "ws={\n"); 459 for (i = 0; i < SBQ_WAIT_QUEUES; i++) { 460 struct sbq_wait_state *ws = &sbq->ws[i]; 461 462 seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", 463 atomic_read(&ws->wait_cnt), 464 waitqueue_active(&ws->wait) ? "active" : "inactive"); 465 } 466 seq_puts(m, "}\n"); 467 468 seq_printf(m, "round_robin=%d\n", sbq->round_robin); 469 } 470 EXPORT_SYMBOL_GPL(sbitmap_queue_show); 471