1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * random utiility code, for bcache but in theory not specific to bcache 4 * 5 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 6 * Copyright 2012 Google, Inc. 7 */ 8 9 #include <linux/bio.h> 10 #include <linux/blkdev.h> 11 #include <linux/console.h> 12 #include <linux/ctype.h> 13 #include <linux/debugfs.h> 14 #include <linux/freezer.h> 15 #include <linux/kthread.h> 16 #include <linux/log2.h> 17 #include <linux/math64.h> 18 #include <linux/percpu.h> 19 #include <linux/preempt.h> 20 #include <linux/random.h> 21 #include <linux/seq_file.h> 22 #include <linux/string.h> 23 #include <linux/types.h> 24 #include <linux/sched/clock.h> 25 26 #include "eytzinger.h" 27 #include "mean_and_variance.h" 28 #include "util.h" 29 30 static const char si_units[] = "?kMGTPEZY"; 31 32 /* string_get_size units: */ 33 static const char *const units_2[] = { 34 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" 35 }; 36 static const char *const units_10[] = { 37 "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" 38 }; 39 40 static int parse_u64(const char *cp, u64 *res) 41 { 42 const char *start = cp; 43 u64 v = 0; 44 45 if (!isdigit(*cp)) 46 return -EINVAL; 47 48 do { 49 if (v > U64_MAX / 10) 50 return -ERANGE; 51 v *= 10; 52 if (v > U64_MAX - (*cp - '0')) 53 return -ERANGE; 54 v += *cp - '0'; 55 cp++; 56 } while (isdigit(*cp)); 57 58 *res = v; 59 return cp - start; 60 } 61 62 static int bch2_pow(u64 n, u64 p, u64 *res) 63 { 64 *res = 1; 65 66 while (p--) { 67 if (*res > div_u64(U64_MAX, n)) 68 return -ERANGE; 69 *res *= n; 70 } 71 return 0; 72 } 73 74 static int parse_unit_suffix(const char *cp, u64 *res) 75 { 76 const char *start = cp; 77 u64 base = 1024; 78 unsigned u; 79 int ret; 80 81 if (*cp == ' ') 82 cp++; 83 84 for (u = 1; u < strlen(si_units); u++) 85 if (*cp == si_units[u]) { 86 cp++; 87 goto got_unit; 88 } 89 90 for (u = 0; u < ARRAY_SIZE(units_2); u++) 91 if (!strncmp(cp, units_2[u], strlen(units_2[u]))) { 92 cp += strlen(units_2[u]); 93 goto got_unit; 94 } 95 96 for (u = 0; u < ARRAY_SIZE(units_10); u++) 97 if (!strncmp(cp, units_10[u], strlen(units_10[u]))) { 98 cp += strlen(units_10[u]); 99 base = 1000; 100 goto got_unit; 101 } 102 103 *res = 1; 104 return 0; 105 got_unit: 106 ret = bch2_pow(base, u, res); 107 if (ret) 108 return ret; 109 110 return cp - start; 111 } 112 113 #define parse_or_ret(cp, _f) \ 114 do { \ 115 int _ret = _f; \ 116 if (_ret < 0) \ 117 return _ret; \ 118 cp += _ret; \ 119 } while (0) 120 121 static int __bch2_strtou64_h(const char *cp, u64 *res) 122 { 123 const char *start = cp; 124 u64 v = 0, b, f_n = 0, f_d = 1; 125 int ret; 126 127 parse_or_ret(cp, parse_u64(cp, &v)); 128 129 if (*cp == '.') { 130 cp++; 131 ret = parse_u64(cp, &f_n); 132 if (ret < 0) 133 return ret; 134 cp += ret; 135 136 ret = bch2_pow(10, ret, &f_d); 137 if (ret) 138 return ret; 139 } 140 141 parse_or_ret(cp, parse_unit_suffix(cp, &b)); 142 143 if (v > div_u64(U64_MAX, b)) 144 return -ERANGE; 145 v *= b; 146 147 if (f_n > div_u64(U64_MAX, b)) 148 return -ERANGE; 149 150 f_n = div_u64(f_n * b, f_d); 151 if (v + f_n < v) 152 return -ERANGE; 153 v += f_n; 154 155 *res = v; 156 return cp - start; 157 } 158 159 static int __bch2_strtoh(const char *cp, u64 *res, 160 u64 t_max, bool t_signed) 161 { 162 bool positive = *cp != '-'; 163 u64 v = 0; 164 165 if (*cp == '+' || *cp == '-') 166 cp++; 167 168 parse_or_ret(cp, __bch2_strtou64_h(cp, &v)); 169 170 if (*cp == '\n') 171 cp++; 172 if (*cp) 173 return -EINVAL; 174 175 if (positive) { 176 if (v > t_max) 177 return -ERANGE; 178 } else { 179 if (v && !t_signed) 180 return -ERANGE; 181 182 if (v > t_max + 1) 183 return -ERANGE; 184 v = -v; 185 } 186 187 *res = v; 188 return 0; 189 } 190 191 #define STRTO_H(name, type) \ 192 int bch2_ ## name ## _h(const char *cp, type *res) \ 193 { \ 194 u64 v = 0; \ 195 int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type), \ 196 ANYSINT_MAX(type) != ((type) ~0ULL)); \ 197 *res = v; \ 198 return ret; \ 199 } 200 201 STRTO_H(strtoint, int) 202 STRTO_H(strtouint, unsigned int) 203 STRTO_H(strtoll, long long) 204 STRTO_H(strtoull, unsigned long long) 205 STRTO_H(strtou64, u64) 206 207 u64 bch2_read_flag_list(char *opt, const char * const list[]) 208 { 209 u64 ret = 0; 210 char *p, *s, *d = kstrdup(opt, GFP_KERNEL); 211 212 if (!d) 213 return -ENOMEM; 214 215 s = strim(d); 216 217 while ((p = strsep(&s, ","))) { 218 int flag = match_string(list, -1, p); 219 220 if (flag < 0) { 221 ret = -1; 222 break; 223 } 224 225 ret |= 1 << flag; 226 } 227 228 kfree(d); 229 230 return ret; 231 } 232 233 bool bch2_is_zero(const void *_p, size_t n) 234 { 235 const char *p = _p; 236 size_t i; 237 238 for (i = 0; i < n; i++) 239 if (p[i]) 240 return false; 241 return true; 242 } 243 244 void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits) 245 { 246 while (nr_bits) 247 prt_char(out, '0' + ((v >> --nr_bits) & 1)); 248 } 249 250 void bch2_print_string_as_lines(const char *prefix, const char *lines) 251 { 252 const char *p; 253 254 if (!lines) { 255 printk("%s (null)\n", prefix); 256 return; 257 } 258 259 console_lock(); 260 while (1) { 261 p = strchrnul(lines, '\n'); 262 printk("%s%.*s\n", prefix, (int) (p - lines), lines); 263 if (!*p) 264 break; 265 lines = p + 1; 266 } 267 console_unlock(); 268 } 269 270 int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task) 271 { 272 #ifdef CONFIG_STACKTRACE 273 unsigned nr_entries = 0; 274 int ret = 0; 275 276 stack->nr = 0; 277 ret = darray_make_room(stack, 32); 278 if (ret) 279 return ret; 280 281 if (!down_read_trylock(&task->signal->exec_update_lock)) 282 return -1; 283 284 do { 285 nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, 0); 286 } while (nr_entries == stack->size && 287 !(ret = darray_make_room(stack, stack->size * 2))); 288 289 stack->nr = nr_entries; 290 up_read(&task->signal->exec_update_lock); 291 292 return ret; 293 #else 294 return 0; 295 #endif 296 } 297 298 void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) 299 { 300 unsigned long *i; 301 302 darray_for_each(*stack, i) { 303 prt_printf(out, "[<0>] %pB", (void *) *i); 304 prt_newline(out); 305 } 306 } 307 308 int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task) 309 { 310 bch_stacktrace stack = { 0 }; 311 int ret = bch2_save_backtrace(&stack, task); 312 313 bch2_prt_backtrace(out, &stack); 314 darray_exit(&stack); 315 return ret; 316 } 317 318 /* time stats: */ 319 320 #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT 321 static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v) 322 { 323 unsigned i = 0; 324 325 while (i < ARRAY_SIZE(q->entries)) { 326 struct bch2_quantile_entry *e = q->entries + i; 327 328 if (unlikely(!e->step)) { 329 e->m = v; 330 e->step = max_t(unsigned, v / 2, 1024); 331 } else if (e->m > v) { 332 e->m = e->m >= e->step 333 ? e->m - e->step 334 : 0; 335 } else if (e->m < v) { 336 e->m = e->m + e->step > e->m 337 ? e->m + e->step 338 : U32_MAX; 339 } 340 341 if ((e->m > v ? e->m - v : v - e->m) < e->step) 342 e->step = max_t(unsigned, e->step / 2, 1); 343 344 if (v >= e->m) 345 break; 346 347 i = eytzinger0_child(i, v > e->m); 348 } 349 } 350 351 static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats, 352 u64 start, u64 end) 353 { 354 u64 duration, freq; 355 356 if (time_after64(end, start)) { 357 duration = end - start; 358 mean_and_variance_update(&stats->duration_stats, duration); 359 mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration); 360 stats->max_duration = max(stats->max_duration, duration); 361 stats->min_duration = min(stats->min_duration, duration); 362 bch2_quantiles_update(&stats->quantiles, duration); 363 } 364 365 if (time_after64(end, stats->last_event)) { 366 freq = end - stats->last_event; 367 mean_and_variance_update(&stats->freq_stats, freq); 368 mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq); 369 stats->max_freq = max(stats->max_freq, freq); 370 stats->min_freq = min(stats->min_freq, freq); 371 stats->last_event = end; 372 } 373 } 374 375 static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, 376 struct bch2_time_stat_buffer *b) 377 { 378 struct bch2_time_stat_buffer_entry *i; 379 unsigned long flags; 380 381 spin_lock_irqsave(&stats->lock, flags); 382 for (i = b->entries; 383 i < b->entries + ARRAY_SIZE(b->entries); 384 i++) 385 bch2_time_stats_update_one(stats, i->start, i->end); 386 spin_unlock_irqrestore(&stats->lock, flags); 387 388 b->nr = 0; 389 } 390 391 void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) 392 { 393 unsigned long flags; 394 395 WARN_RATELIMIT(!stats->min_duration || !stats->min_freq, 396 "time_stats: min_duration = %llu, min_freq = %llu", 397 stats->min_duration, stats->min_freq); 398 399 if (!stats->buffer) { 400 spin_lock_irqsave(&stats->lock, flags); 401 bch2_time_stats_update_one(stats, start, end); 402 403 if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 && 404 stats->duration_stats.n > 1024) 405 stats->buffer = 406 alloc_percpu_gfp(struct bch2_time_stat_buffer, 407 GFP_ATOMIC); 408 spin_unlock_irqrestore(&stats->lock, flags); 409 } else { 410 struct bch2_time_stat_buffer *b; 411 412 preempt_disable(); 413 b = this_cpu_ptr(stats->buffer); 414 415 BUG_ON(b->nr >= ARRAY_SIZE(b->entries)); 416 b->entries[b->nr++] = (struct bch2_time_stat_buffer_entry) { 417 .start = start, 418 .end = end 419 }; 420 421 if (unlikely(b->nr == ARRAY_SIZE(b->entries))) 422 bch2_time_stats_clear_buffer(stats, b); 423 preempt_enable(); 424 } 425 } 426 #endif 427 428 static const struct time_unit { 429 const char *name; 430 u64 nsecs; 431 } time_units[] = { 432 { "ns", 1 }, 433 { "us", NSEC_PER_USEC }, 434 { "ms", NSEC_PER_MSEC }, 435 { "s", NSEC_PER_SEC }, 436 { "m", (u64) NSEC_PER_SEC * 60}, 437 { "h", (u64) NSEC_PER_SEC * 3600}, 438 { "eon", U64_MAX }, 439 }; 440 441 static const struct time_unit *pick_time_units(u64 ns) 442 { 443 const struct time_unit *u; 444 445 for (u = time_units; 446 u + 1 < time_units + ARRAY_SIZE(time_units) && 447 ns >= u[1].nsecs << 1; 448 u++) 449 ; 450 451 return u; 452 } 453 454 void bch2_pr_time_units(struct printbuf *out, u64 ns) 455 { 456 const struct time_unit *u = pick_time_units(ns); 457 458 prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name); 459 } 460 461 static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns) 462 { 463 const struct time_unit *u = pick_time_units(ns); 464 465 prt_printf(out, "%llu ", div64_u64(ns, u->nsecs)); 466 prt_tab_rjust(out); 467 prt_printf(out, "%s", u->name); 468 } 469 470 #define TABSTOP_SIZE 12 471 472 static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns) 473 { 474 prt_str(out, name); 475 prt_tab(out); 476 bch2_pr_time_units_aligned(out, ns); 477 prt_newline(out); 478 } 479 480 void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) 481 { 482 const struct time_unit *u; 483 s64 f_mean = 0, d_mean = 0; 484 u64 q, last_q = 0, f_stddev = 0, d_stddev = 0; 485 int i; 486 /* 487 * avoid divide by zero 488 */ 489 if (stats->freq_stats.n) { 490 f_mean = mean_and_variance_get_mean(stats->freq_stats); 491 f_stddev = mean_and_variance_get_stddev(stats->freq_stats); 492 d_mean = mean_and_variance_get_mean(stats->duration_stats); 493 d_stddev = mean_and_variance_get_stddev(stats->duration_stats); 494 } 495 496 printbuf_tabstop_push(out, out->indent + TABSTOP_SIZE); 497 prt_printf(out, "count:"); 498 prt_tab(out); 499 prt_printf(out, "%llu ", 500 stats->duration_stats.n); 501 printbuf_tabstop_pop(out); 502 prt_newline(out); 503 504 printbuf_tabstops_reset(out); 505 506 printbuf_tabstop_push(out, out->indent + 20); 507 printbuf_tabstop_push(out, TABSTOP_SIZE + 2); 508 printbuf_tabstop_push(out, 0); 509 printbuf_tabstop_push(out, TABSTOP_SIZE + 2); 510 511 prt_tab(out); 512 prt_printf(out, "since mount"); 513 prt_tab_rjust(out); 514 prt_tab(out); 515 prt_printf(out, "recent"); 516 prt_tab_rjust(out); 517 prt_newline(out); 518 519 printbuf_tabstops_reset(out); 520 printbuf_tabstop_push(out, out->indent + 20); 521 printbuf_tabstop_push(out, TABSTOP_SIZE); 522 printbuf_tabstop_push(out, 2); 523 printbuf_tabstop_push(out, TABSTOP_SIZE); 524 525 prt_printf(out, "duration of events"); 526 prt_newline(out); 527 printbuf_indent_add(out, 2); 528 529 pr_name_and_units(out, "min:", stats->min_duration); 530 pr_name_and_units(out, "max:", stats->max_duration); 531 532 prt_printf(out, "mean:"); 533 prt_tab(out); 534 bch2_pr_time_units_aligned(out, d_mean); 535 prt_tab(out); 536 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted)); 537 prt_newline(out); 538 539 prt_printf(out, "stddev:"); 540 prt_tab(out); 541 bch2_pr_time_units_aligned(out, d_stddev); 542 prt_tab(out); 543 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted)); 544 545 printbuf_indent_sub(out, 2); 546 prt_newline(out); 547 548 prt_printf(out, "time between events"); 549 prt_newline(out); 550 printbuf_indent_add(out, 2); 551 552 pr_name_and_units(out, "min:", stats->min_freq); 553 pr_name_and_units(out, "max:", stats->max_freq); 554 555 prt_printf(out, "mean:"); 556 prt_tab(out); 557 bch2_pr_time_units_aligned(out, f_mean); 558 prt_tab(out); 559 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted)); 560 prt_newline(out); 561 562 prt_printf(out, "stddev:"); 563 prt_tab(out); 564 bch2_pr_time_units_aligned(out, f_stddev); 565 prt_tab(out); 566 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted)); 567 568 printbuf_indent_sub(out, 2); 569 prt_newline(out); 570 571 printbuf_tabstops_reset(out); 572 573 i = eytzinger0_first(NR_QUANTILES); 574 u = pick_time_units(stats->quantiles.entries[i].m); 575 576 prt_printf(out, "quantiles (%s):\t", u->name); 577 eytzinger0_for_each(i, NR_QUANTILES) { 578 bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1; 579 580 q = max(stats->quantiles.entries[i].m, last_q); 581 prt_printf(out, "%llu ", 582 div_u64(q, u->nsecs)); 583 if (is_last) 584 prt_newline(out); 585 last_q = q; 586 } 587 } 588 589 void bch2_time_stats_exit(struct bch2_time_stats *stats) 590 { 591 free_percpu(stats->buffer); 592 } 593 594 void bch2_time_stats_init(struct bch2_time_stats *stats) 595 { 596 memset(stats, 0, sizeof(*stats)); 597 stats->duration_stats_weighted.weight = 8; 598 stats->freq_stats_weighted.weight = 8; 599 stats->min_duration = U64_MAX; 600 stats->min_freq = U64_MAX; 601 spin_lock_init(&stats->lock); 602 } 603 604 /* ratelimit: */ 605 606 /** 607 * bch2_ratelimit_delay() - return how long to delay until the next time to do 608 * some work 609 * @d: the struct bch_ratelimit to update 610 * Returns: the amount of time to delay by, in jiffies 611 */ 612 u64 bch2_ratelimit_delay(struct bch_ratelimit *d) 613 { 614 u64 now = local_clock(); 615 616 return time_after64(d->next, now) 617 ? nsecs_to_jiffies(d->next - now) 618 : 0; 619 } 620 621 /** 622 * bch2_ratelimit_increment() - increment @d by the amount of work done 623 * @d: the struct bch_ratelimit to update 624 * @done: the amount of work done, in arbitrary units 625 */ 626 void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done) 627 { 628 u64 now = local_clock(); 629 630 d->next += div_u64(done * NSEC_PER_SEC, d->rate); 631 632 if (time_before64(now + NSEC_PER_SEC, d->next)) 633 d->next = now + NSEC_PER_SEC; 634 635 if (time_after64(now - NSEC_PER_SEC * 2, d->next)) 636 d->next = now - NSEC_PER_SEC * 2; 637 } 638 639 /* pd controller: */ 640 641 /* 642 * Updates pd_controller. Attempts to scale inputed values to units per second. 643 * @target: desired value 644 * @actual: current value 645 * 646 * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing 647 * it makes actual go down. 648 */ 649 void bch2_pd_controller_update(struct bch_pd_controller *pd, 650 s64 target, s64 actual, int sign) 651 { 652 s64 proportional, derivative, change; 653 654 unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ; 655 656 if (seconds_since_update == 0) 657 return; 658 659 pd->last_update = jiffies; 660 661 proportional = actual - target; 662 proportional *= seconds_since_update; 663 proportional = div_s64(proportional, pd->p_term_inverse); 664 665 derivative = actual - pd->last_actual; 666 derivative = div_s64(derivative, seconds_since_update); 667 derivative = ewma_add(pd->smoothed_derivative, derivative, 668 (pd->d_term / seconds_since_update) ?: 1); 669 derivative = derivative * pd->d_term; 670 derivative = div_s64(derivative, pd->p_term_inverse); 671 672 change = proportional + derivative; 673 674 /* Don't increase rate if not keeping up */ 675 if (change > 0 && 676 pd->backpressure && 677 time_after64(local_clock(), 678 pd->rate.next + NSEC_PER_MSEC)) 679 change = 0; 680 681 change *= (sign * -1); 682 683 pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change, 684 1, UINT_MAX); 685 686 pd->last_actual = actual; 687 pd->last_derivative = derivative; 688 pd->last_proportional = proportional; 689 pd->last_change = change; 690 pd->last_target = target; 691 } 692 693 void bch2_pd_controller_init(struct bch_pd_controller *pd) 694 { 695 pd->rate.rate = 1024; 696 pd->last_update = jiffies; 697 pd->p_term_inverse = 6000; 698 pd->d_term = 30; 699 pd->d_smooth = pd->d_term; 700 pd->backpressure = 1; 701 } 702 703 void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd) 704 { 705 if (!out->nr_tabstops) 706 printbuf_tabstop_push(out, 20); 707 708 prt_printf(out, "rate:"); 709 prt_tab(out); 710 prt_human_readable_s64(out, pd->rate.rate); 711 prt_newline(out); 712 713 prt_printf(out, "target:"); 714 prt_tab(out); 715 prt_human_readable_u64(out, pd->last_target); 716 prt_newline(out); 717 718 prt_printf(out, "actual:"); 719 prt_tab(out); 720 prt_human_readable_u64(out, pd->last_actual); 721 prt_newline(out); 722 723 prt_printf(out, "proportional:"); 724 prt_tab(out); 725 prt_human_readable_s64(out, pd->last_proportional); 726 prt_newline(out); 727 728 prt_printf(out, "derivative:"); 729 prt_tab(out); 730 prt_human_readable_s64(out, pd->last_derivative); 731 prt_newline(out); 732 733 prt_printf(out, "change:"); 734 prt_tab(out); 735 prt_human_readable_s64(out, pd->last_change); 736 prt_newline(out); 737 738 prt_printf(out, "next io:"); 739 prt_tab(out); 740 prt_printf(out, "%llims", div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC)); 741 prt_newline(out); 742 } 743 744 /* misc: */ 745 746 void bch2_bio_map(struct bio *bio, void *base, size_t size) 747 { 748 while (size) { 749 struct page *page = is_vmalloc_addr(base) 750 ? vmalloc_to_page(base) 751 : virt_to_page(base); 752 unsigned offset = offset_in_page(base); 753 unsigned len = min_t(size_t, PAGE_SIZE - offset, size); 754 755 BUG_ON(!bio_add_page(bio, page, len, offset)); 756 size -= len; 757 base += len; 758 } 759 } 760 761 int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) 762 { 763 while (size) { 764 struct page *page = alloc_pages(gfp_mask, 0); 765 unsigned len = min_t(size_t, PAGE_SIZE, size); 766 767 if (!page) 768 return -ENOMEM; 769 770 if (unlikely(!bio_add_page(bio, page, len, 0))) { 771 __free_page(page); 772 break; 773 } 774 775 size -= len; 776 } 777 778 return 0; 779 } 780 781 size_t bch2_rand_range(size_t max) 782 { 783 size_t rand; 784 785 if (!max) 786 return 0; 787 788 do { 789 rand = get_random_long(); 790 rand &= roundup_pow_of_two(max) - 1; 791 } while (rand >= max); 792 793 return rand; 794 } 795 796 void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src) 797 { 798 struct bio_vec bv; 799 struct bvec_iter iter; 800 801 __bio_for_each_segment(bv, dst, iter, dst_iter) { 802 void *dstp = kmap_local_page(bv.bv_page); 803 804 memcpy(dstp + bv.bv_offset, src, bv.bv_len); 805 kunmap_local(dstp); 806 807 src += bv.bv_len; 808 } 809 } 810 811 void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) 812 { 813 struct bio_vec bv; 814 struct bvec_iter iter; 815 816 __bio_for_each_segment(bv, src, iter, src_iter) { 817 void *srcp = kmap_local_page(bv.bv_page); 818 819 memcpy(dst, srcp + bv.bv_offset, bv.bv_len); 820 kunmap_local(srcp); 821 822 dst += bv.bv_len; 823 } 824 } 825 826 static int alignment_ok(const void *base, size_t align) 827 { 828 return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || 829 ((unsigned long)base & (align - 1)) == 0; 830 } 831 832 static void u32_swap(void *a, void *b, size_t size) 833 { 834 u32 t = *(u32 *)a; 835 *(u32 *)a = *(u32 *)b; 836 *(u32 *)b = t; 837 } 838 839 static void u64_swap(void *a, void *b, size_t size) 840 { 841 u64 t = *(u64 *)a; 842 *(u64 *)a = *(u64 *)b; 843 *(u64 *)b = t; 844 } 845 846 static void generic_swap(void *a, void *b, size_t size) 847 { 848 char t; 849 850 do { 851 t = *(char *)a; 852 *(char *)a++ = *(char *)b; 853 *(char *)b++ = t; 854 } while (--size > 0); 855 } 856 857 static inline int do_cmp(void *base, size_t n, size_t size, 858 int (*cmp_func)(const void *, const void *, size_t), 859 size_t l, size_t r) 860 { 861 return cmp_func(base + inorder_to_eytzinger0(l, n) * size, 862 base + inorder_to_eytzinger0(r, n) * size, 863 size); 864 } 865 866 static inline void do_swap(void *base, size_t n, size_t size, 867 void (*swap_func)(void *, void *, size_t), 868 size_t l, size_t r) 869 { 870 swap_func(base + inorder_to_eytzinger0(l, n) * size, 871 base + inorder_to_eytzinger0(r, n) * size, 872 size); 873 } 874 875 void eytzinger0_sort(void *base, size_t n, size_t size, 876 int (*cmp_func)(const void *, const void *, size_t), 877 void (*swap_func)(void *, void *, size_t)) 878 { 879 int i, c, r; 880 881 if (!swap_func) { 882 if (size == 4 && alignment_ok(base, 4)) 883 swap_func = u32_swap; 884 else if (size == 8 && alignment_ok(base, 8)) 885 swap_func = u64_swap; 886 else 887 swap_func = generic_swap; 888 } 889 890 /* heapify */ 891 for (i = n / 2 - 1; i >= 0; --i) { 892 for (r = i; r * 2 + 1 < n; r = c) { 893 c = r * 2 + 1; 894 895 if (c + 1 < n && 896 do_cmp(base, n, size, cmp_func, c, c + 1) < 0) 897 c++; 898 899 if (do_cmp(base, n, size, cmp_func, r, c) >= 0) 900 break; 901 902 do_swap(base, n, size, swap_func, r, c); 903 } 904 } 905 906 /* sort */ 907 for (i = n - 1; i > 0; --i) { 908 do_swap(base, n, size, swap_func, 0, i); 909 910 for (r = 0; r * 2 + 1 < i; r = c) { 911 c = r * 2 + 1; 912 913 if (c + 1 < i && 914 do_cmp(base, n, size, cmp_func, c, c + 1) < 0) 915 c++; 916 917 if (do_cmp(base, n, size, cmp_func, r, c) >= 0) 918 break; 919 920 do_swap(base, n, size, swap_func, r, c); 921 } 922 } 923 } 924 925 void sort_cmp_size(void *base, size_t num, size_t size, 926 int (*cmp_func)(const void *, const void *, size_t), 927 void (*swap_func)(void *, void *, size_t size)) 928 { 929 /* pre-scale counters for performance */ 930 int i = (num/2 - 1) * size, n = num * size, c, r; 931 932 if (!swap_func) { 933 if (size == 4 && alignment_ok(base, 4)) 934 swap_func = u32_swap; 935 else if (size == 8 && alignment_ok(base, 8)) 936 swap_func = u64_swap; 937 else 938 swap_func = generic_swap; 939 } 940 941 /* heapify */ 942 for ( ; i >= 0; i -= size) { 943 for (r = i; r * 2 + size < n; r = c) { 944 c = r * 2 + size; 945 if (c < n - size && 946 cmp_func(base + c, base + c + size, size) < 0) 947 c += size; 948 if (cmp_func(base + r, base + c, size) >= 0) 949 break; 950 swap_func(base + r, base + c, size); 951 } 952 } 953 954 /* sort */ 955 for (i = n - size; i > 0; i -= size) { 956 swap_func(base, base + i, size); 957 for (r = 0; r * 2 + size < i; r = c) { 958 c = r * 2 + size; 959 if (c < i - size && 960 cmp_func(base + c, base + c + size, size) < 0) 961 c += size; 962 if (cmp_func(base + r, base + c, size) >= 0) 963 break; 964 swap_func(base + r, base + c, size); 965 } 966 } 967 } 968 969 static void mempool_free_vp(void *element, void *pool_data) 970 { 971 size_t size = (size_t) pool_data; 972 973 vpfree(element, size); 974 } 975 976 static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data) 977 { 978 size_t size = (size_t) pool_data; 979 980 return vpmalloc(size, gfp_mask); 981 } 982 983 int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size) 984 { 985 return size < PAGE_SIZE 986 ? mempool_init_kmalloc_pool(pool, min_nr, size) 987 : mempool_init(pool, min_nr, mempool_alloc_vp, 988 mempool_free_vp, (void *) size); 989 } 990 991 #if 0 992 void eytzinger1_test(void) 993 { 994 unsigned inorder, eytz, size; 995 996 pr_info("1 based eytzinger test:"); 997 998 for (size = 2; 999 size < 65536; 1000 size++) { 1001 unsigned extra = eytzinger1_extra(size); 1002 1003 if (!(size % 4096)) 1004 pr_info("tree size %u", size); 1005 1006 BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size)); 1007 BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size)); 1008 1009 BUG_ON(eytzinger1_prev(eytzinger1_first(size), size) != 0); 1010 BUG_ON(eytzinger1_next(eytzinger1_last(size), size) != 0); 1011 1012 inorder = 1; 1013 eytzinger1_for_each(eytz, size) { 1014 BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz); 1015 BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder); 1016 BUG_ON(eytz != eytzinger1_last(size) && 1017 eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz); 1018 1019 inorder++; 1020 } 1021 } 1022 } 1023 1024 void eytzinger0_test(void) 1025 { 1026 1027 unsigned inorder, eytz, size; 1028 1029 pr_info("0 based eytzinger test:"); 1030 1031 for (size = 1; 1032 size < 65536; 1033 size++) { 1034 unsigned extra = eytzinger0_extra(size); 1035 1036 if (!(size % 4096)) 1037 pr_info("tree size %u", size); 1038 1039 BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size)); 1040 BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size)); 1041 1042 BUG_ON(eytzinger0_prev(eytzinger0_first(size), size) != -1); 1043 BUG_ON(eytzinger0_next(eytzinger0_last(size), size) != -1); 1044 1045 inorder = 0; 1046 eytzinger0_for_each(eytz, size) { 1047 BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz); 1048 BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder); 1049 BUG_ON(eytz != eytzinger0_last(size) && 1050 eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz); 1051 1052 inorder++; 1053 } 1054 } 1055 } 1056 1057 static inline int cmp_u16(const void *_l, const void *_r, size_t size) 1058 { 1059 const u16 *l = _l, *r = _r; 1060 1061 return (*l > *r) - (*r - *l); 1062 } 1063 1064 static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search) 1065 { 1066 int i, c1 = -1, c2 = -1; 1067 ssize_t r; 1068 1069 r = eytzinger0_find_le(test_array, nr, 1070 sizeof(test_array[0]), 1071 cmp_u16, &search); 1072 if (r >= 0) 1073 c1 = test_array[r]; 1074 1075 for (i = 0; i < nr; i++) 1076 if (test_array[i] <= search && test_array[i] > c2) 1077 c2 = test_array[i]; 1078 1079 if (c1 != c2) { 1080 eytzinger0_for_each(i, nr) 1081 pr_info("[%3u] = %12u", i, test_array[i]); 1082 pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i", 1083 i, r, c1, c2); 1084 } 1085 } 1086 1087 void eytzinger0_find_test(void) 1088 { 1089 unsigned i, nr, allocated = 1 << 12; 1090 u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL); 1091 1092 for (nr = 1; nr < allocated; nr++) { 1093 pr_info("testing %u elems", nr); 1094 1095 get_random_bytes(test_array, nr * sizeof(test_array[0])); 1096 eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL); 1097 1098 /* verify array is sorted correctly: */ 1099 eytzinger0_for_each(i, nr) 1100 BUG_ON(i != eytzinger0_last(nr) && 1101 test_array[i] > test_array[eytzinger0_next(i, nr)]); 1102 1103 for (i = 0; i < U16_MAX; i += 1 << 12) 1104 eytzinger0_find_test_val(test_array, nr, i); 1105 1106 for (i = 0; i < nr; i++) { 1107 eytzinger0_find_test_val(test_array, nr, test_array[i] - 1); 1108 eytzinger0_find_test_val(test_array, nr, test_array[i]); 1109 eytzinger0_find_test_val(test_array, nr, test_array[i] + 1); 1110 } 1111 } 1112 1113 kfree(test_array); 1114 } 1115 #endif 1116 1117 /* 1118 * Accumulate percpu counters onto one cpu's copy - only valid when access 1119 * against any percpu counter is guarded against 1120 */ 1121 u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) 1122 { 1123 u64 *ret; 1124 int cpu; 1125 1126 /* access to pcpu vars has to be blocked by other locking */ 1127 preempt_disable(); 1128 ret = this_cpu_ptr(p); 1129 preempt_enable(); 1130 1131 for_each_possible_cpu(cpu) { 1132 u64 *i = per_cpu_ptr(p, cpu); 1133 1134 if (i != ret) { 1135 acc_u64s(ret, i, nr); 1136 memset(i, 0, nr * sizeof(u64)); 1137 } 1138 } 1139 1140 return ret; 1141 } 1142