1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * random utiility code, for bcache but in theory not specific to bcache 4 * 5 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 6 * Copyright 2012 Google, Inc. 7 */ 8 9 #include <linux/bio.h> 10 #include <linux/blkdev.h> 11 #include <linux/console.h> 12 #include <linux/ctype.h> 13 #include <linux/debugfs.h> 14 #include <linux/freezer.h> 15 #include <linux/kthread.h> 16 #include <linux/log2.h> 17 #include <linux/math64.h> 18 #include <linux/percpu.h> 19 #include <linux/preempt.h> 20 #include <linux/random.h> 21 #include <linux/seq_file.h> 22 #include <linux/string.h> 23 #include <linux/types.h> 24 #include <linux/sched/clock.h> 25 26 #include "eytzinger.h" 27 #include "mean_and_variance.h" 28 #include "util.h" 29 30 static const char si_units[] = "?kMGTPEZY"; 31 32 /* string_get_size units: */ 33 static const char *const units_2[] = { 34 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" 35 }; 36 static const char *const units_10[] = { 37 "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" 38 }; 39 40 static int parse_u64(const char *cp, u64 *res) 41 { 42 const char *start = cp; 43 u64 v = 0; 44 45 if (!isdigit(*cp)) 46 return -EINVAL; 47 48 do { 49 if (v > U64_MAX / 10) 50 return -ERANGE; 51 v *= 10; 52 if (v > U64_MAX - (*cp - '0')) 53 return -ERANGE; 54 v += *cp - '0'; 55 cp++; 56 } while (isdigit(*cp)); 57 58 *res = v; 59 return cp - start; 60 } 61 62 static int bch2_pow(u64 n, u64 p, u64 *res) 63 { 64 *res = 1; 65 66 while (p--) { 67 if (*res > div_u64(U64_MAX, n)) 68 return -ERANGE; 69 *res *= n; 70 } 71 return 0; 72 } 73 74 static int parse_unit_suffix(const char *cp, u64 *res) 75 { 76 const char *start = cp; 77 u64 base = 1024; 78 unsigned u; 79 int ret; 80 81 if (*cp == ' ') 82 cp++; 83 84 for (u = 1; u < strlen(si_units); u++) 85 if (*cp == si_units[u]) { 86 cp++; 87 goto got_unit; 88 } 89 90 for (u = 0; u < ARRAY_SIZE(units_2); u++) 91 if (!strncmp(cp, units_2[u], strlen(units_2[u]))) { 92 cp += strlen(units_2[u]); 93 goto got_unit; 94 } 95 96 for (u = 0; u < ARRAY_SIZE(units_10); u++) 97 if (!strncmp(cp, units_10[u], strlen(units_10[u]))) { 98 cp += strlen(units_10[u]); 99 base = 1000; 100 goto got_unit; 101 } 102 103 *res = 1; 104 return 0; 105 got_unit: 106 ret = bch2_pow(base, u, res); 107 if (ret) 108 return ret; 109 110 return cp - start; 111 } 112 113 #define parse_or_ret(cp, _f) \ 114 do { \ 115 int _ret = _f; \ 116 if (_ret < 0) \ 117 return _ret; \ 118 cp += _ret; \ 119 } while (0) 120 121 static int __bch2_strtou64_h(const char *cp, u64 *res) 122 { 123 const char *start = cp; 124 u64 v = 0, b, f_n = 0, f_d = 1; 125 int ret; 126 127 parse_or_ret(cp, parse_u64(cp, &v)); 128 129 if (*cp == '.') { 130 cp++; 131 ret = parse_u64(cp, &f_n); 132 if (ret < 0) 133 return ret; 134 cp += ret; 135 136 ret = bch2_pow(10, ret, &f_d); 137 if (ret) 138 return ret; 139 } 140 141 parse_or_ret(cp, parse_unit_suffix(cp, &b)); 142 143 if (v > div_u64(U64_MAX, b)) 144 return -ERANGE; 145 v *= b; 146 147 if (f_n > div_u64(U64_MAX, b)) 148 return -ERANGE; 149 150 f_n = div_u64(f_n * b, f_d); 151 if (v + f_n < v) 152 return -ERANGE; 153 v += f_n; 154 155 *res = v; 156 return cp - start; 157 } 158 159 static int __bch2_strtoh(const char *cp, u64 *res, 160 u64 t_max, bool t_signed) 161 { 162 bool positive = *cp != '-'; 163 u64 v = 0; 164 165 if (*cp == '+' || *cp == '-') 166 cp++; 167 168 parse_or_ret(cp, __bch2_strtou64_h(cp, &v)); 169 170 if (*cp == '\n') 171 cp++; 172 if (*cp) 173 return -EINVAL; 174 175 if (positive) { 176 if (v > t_max) 177 return -ERANGE; 178 } else { 179 if (v && !t_signed) 180 return -ERANGE; 181 182 if (v > t_max + 1) 183 return -ERANGE; 184 v = -v; 185 } 186 187 *res = v; 188 return 0; 189 } 190 191 #define STRTO_H(name, type) \ 192 int bch2_ ## name ## _h(const char *cp, type *res) \ 193 { \ 194 u64 v = 0; \ 195 int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type), \ 196 ANYSINT_MAX(type) != ((type) ~0ULL)); \ 197 *res = v; \ 198 return ret; \ 199 } 200 201 STRTO_H(strtoint, int) 202 STRTO_H(strtouint, unsigned int) 203 STRTO_H(strtoll, long long) 204 STRTO_H(strtoull, unsigned long long) 205 STRTO_H(strtou64, u64) 206 207 u64 bch2_read_flag_list(char *opt, const char * const list[]) 208 { 209 u64 ret = 0; 210 char *p, *s, *d = kstrdup(opt, GFP_KERNEL); 211 212 if (!d) 213 return -ENOMEM; 214 215 s = strim(d); 216 217 while ((p = strsep(&s, ","))) { 218 int flag = match_string(list, -1, p); 219 220 if (flag < 0) { 221 ret = -1; 222 break; 223 } 224 225 ret |= 1 << flag; 226 } 227 228 kfree(d); 229 230 return ret; 231 } 232 233 bool bch2_is_zero(const void *_p, size_t n) 234 { 235 const char *p = _p; 236 size_t i; 237 238 for (i = 0; i < n; i++) 239 if (p[i]) 240 return false; 241 return true; 242 } 243 244 void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits) 245 { 246 while (nr_bits) 247 prt_char(out, '0' + ((v >> --nr_bits) & 1)); 248 } 249 250 void bch2_print_string_as_lines(const char *prefix, const char *lines) 251 { 252 const char *p; 253 254 if (!lines) { 255 printk("%s (null)\n", prefix); 256 return; 257 } 258 259 console_lock(); 260 while (1) { 261 p = strchrnul(lines, '\n'); 262 printk("%s%.*s\n", prefix, (int) (p - lines), lines); 263 if (!*p) 264 break; 265 lines = p + 1; 266 } 267 console_unlock(); 268 } 269 270 int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task) 271 { 272 #ifdef CONFIG_STACKTRACE 273 unsigned nr_entries = 0; 274 int ret = 0; 275 276 stack->nr = 0; 277 ret = darray_make_room(stack, 32); 278 if (ret) 279 return ret; 280 281 if (!down_read_trylock(&task->signal->exec_update_lock)) 282 return -1; 283 284 do { 285 nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, 0); 286 } while (nr_entries == stack->size && 287 !(ret = darray_make_room(stack, stack->size * 2))); 288 289 stack->nr = nr_entries; 290 up_read(&task->signal->exec_update_lock); 291 292 return ret; 293 #else 294 return 0; 295 #endif 296 } 297 298 void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) 299 { 300 unsigned long *i; 301 302 darray_for_each(*stack, i) { 303 prt_printf(out, "[<0>] %pB", (void *) *i); 304 prt_newline(out); 305 } 306 } 307 308 int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task) 309 { 310 bch_stacktrace stack = { 0 }; 311 int ret = bch2_save_backtrace(&stack, task); 312 313 bch2_prt_backtrace(out, &stack); 314 darray_exit(&stack); 315 return ret; 316 } 317 318 /* time stats: */ 319 320 #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT 321 static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v) 322 { 323 unsigned i = 0; 324 325 while (i < ARRAY_SIZE(q->entries)) { 326 struct bch2_quantile_entry *e = q->entries + i; 327 328 if (unlikely(!e->step)) { 329 e->m = v; 330 e->step = max_t(unsigned, v / 2, 1024); 331 } else if (e->m > v) { 332 e->m = e->m >= e->step 333 ? e->m - e->step 334 : 0; 335 } else if (e->m < v) { 336 e->m = e->m + e->step > e->m 337 ? e->m + e->step 338 : U32_MAX; 339 } 340 341 if ((e->m > v ? e->m - v : v - e->m) < e->step) 342 e->step = max_t(unsigned, e->step / 2, 1); 343 344 if (v >= e->m) 345 break; 346 347 i = eytzinger0_child(i, v > e->m); 348 } 349 } 350 351 static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats, 352 u64 start, u64 end) 353 { 354 u64 duration, freq; 355 356 if (time_after64(end, start)) { 357 duration = end - start; 358 mean_and_variance_update(&stats->duration_stats, duration); 359 mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration); 360 stats->max_duration = max(stats->max_duration, duration); 361 stats->min_duration = min(stats->min_duration, duration); 362 bch2_quantiles_update(&stats->quantiles, duration); 363 } 364 365 if (time_after64(end, stats->last_event)) { 366 freq = end - stats->last_event; 367 mean_and_variance_update(&stats->freq_stats, freq); 368 mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq); 369 stats->max_freq = max(stats->max_freq, freq); 370 stats->min_freq = min(stats->min_freq, freq); 371 stats->last_event = end; 372 } 373 } 374 375 static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, 376 struct bch2_time_stat_buffer *b) 377 { 378 struct bch2_time_stat_buffer_entry *i; 379 unsigned long flags; 380 381 spin_lock_irqsave(&stats->lock, flags); 382 for (i = b->entries; 383 i < b->entries + ARRAY_SIZE(b->entries); 384 i++) 385 bch2_time_stats_update_one(stats, i->start, i->end); 386 spin_unlock_irqrestore(&stats->lock, flags); 387 388 b->nr = 0; 389 } 390 391 void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) 392 { 393 unsigned long flags; 394 395 WARN_RATELIMIT(!stats->min_duration || !stats->min_freq, 396 "time_stats: min_duration = %llu, min_freq = %llu", 397 stats->min_duration, stats->min_freq); 398 399 if (!stats->buffer) { 400 spin_lock_irqsave(&stats->lock, flags); 401 bch2_time_stats_update_one(stats, start, end); 402 403 if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 && 404 stats->duration_stats.n > 1024) 405 stats->buffer = 406 alloc_percpu_gfp(struct bch2_time_stat_buffer, 407 GFP_ATOMIC); 408 spin_unlock_irqrestore(&stats->lock, flags); 409 } else { 410 struct bch2_time_stat_buffer *b; 411 412 preempt_disable(); 413 b = this_cpu_ptr(stats->buffer); 414 415 BUG_ON(b->nr >= ARRAY_SIZE(b->entries)); 416 b->entries[b->nr++] = (struct bch2_time_stat_buffer_entry) { 417 .start = start, 418 .end = end 419 }; 420 421 if (unlikely(b->nr == ARRAY_SIZE(b->entries))) 422 bch2_time_stats_clear_buffer(stats, b); 423 preempt_enable(); 424 } 425 } 426 #endif 427 428 static const struct time_unit { 429 const char *name; 430 u64 nsecs; 431 } time_units[] = { 432 { "ns", 1 }, 433 { "us", NSEC_PER_USEC }, 434 { "ms", NSEC_PER_MSEC }, 435 { "s", NSEC_PER_SEC }, 436 { "m", (u64) NSEC_PER_SEC * 60}, 437 { "h", (u64) NSEC_PER_SEC * 3600}, 438 { "eon", U64_MAX }, 439 }; 440 441 static const struct time_unit *pick_time_units(u64 ns) 442 { 443 const struct time_unit *u; 444 445 for (u = time_units; 446 u + 1 < time_units + ARRAY_SIZE(time_units) && 447 ns >= u[1].nsecs << 1; 448 u++) 449 ; 450 451 return u; 452 } 453 454 void bch2_pr_time_units(struct printbuf *out, u64 ns) 455 { 456 const struct time_unit *u = pick_time_units(ns); 457 458 prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name); 459 } 460 461 static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns) 462 { 463 const struct time_unit *u = pick_time_units(ns); 464 465 prt_printf(out, "%llu ", div64_u64(ns, u->nsecs)); 466 prt_tab_rjust(out); 467 prt_printf(out, "%s", u->name); 468 } 469 470 #ifndef __KERNEL__ 471 #include <time.h> 472 void bch2_prt_datetime(struct printbuf *out, time64_t sec) 473 { 474 time_t t = sec; 475 char buf[64]; 476 ctime_r(&t, buf); 477 prt_str(out, buf); 478 } 479 #else 480 void bch2_prt_datetime(struct printbuf *out, time64_t sec) 481 { 482 char buf[64]; 483 snprintf(buf, sizeof(buf), "%ptT", &sec); 484 prt_u64(out, sec); 485 } 486 #endif 487 488 #define TABSTOP_SIZE 12 489 490 static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns) 491 { 492 prt_str(out, name); 493 prt_tab(out); 494 bch2_pr_time_units_aligned(out, ns); 495 prt_newline(out); 496 } 497 498 void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) 499 { 500 const struct time_unit *u; 501 s64 f_mean = 0, d_mean = 0; 502 u64 q, last_q = 0, f_stddev = 0, d_stddev = 0; 503 int i; 504 /* 505 * avoid divide by zero 506 */ 507 if (stats->freq_stats.n) { 508 f_mean = mean_and_variance_get_mean(stats->freq_stats); 509 f_stddev = mean_and_variance_get_stddev(stats->freq_stats); 510 d_mean = mean_and_variance_get_mean(stats->duration_stats); 511 d_stddev = mean_and_variance_get_stddev(stats->duration_stats); 512 } 513 514 printbuf_tabstop_push(out, out->indent + TABSTOP_SIZE); 515 prt_printf(out, "count:"); 516 prt_tab(out); 517 prt_printf(out, "%llu ", 518 stats->duration_stats.n); 519 printbuf_tabstop_pop(out); 520 prt_newline(out); 521 522 printbuf_tabstops_reset(out); 523 524 printbuf_tabstop_push(out, out->indent + 20); 525 printbuf_tabstop_push(out, TABSTOP_SIZE + 2); 526 printbuf_tabstop_push(out, 0); 527 printbuf_tabstop_push(out, TABSTOP_SIZE + 2); 528 529 prt_tab(out); 530 prt_printf(out, "since mount"); 531 prt_tab_rjust(out); 532 prt_tab(out); 533 prt_printf(out, "recent"); 534 prt_tab_rjust(out); 535 prt_newline(out); 536 537 printbuf_tabstops_reset(out); 538 printbuf_tabstop_push(out, out->indent + 20); 539 printbuf_tabstop_push(out, TABSTOP_SIZE); 540 printbuf_tabstop_push(out, 2); 541 printbuf_tabstop_push(out, TABSTOP_SIZE); 542 543 prt_printf(out, "duration of events"); 544 prt_newline(out); 545 printbuf_indent_add(out, 2); 546 547 pr_name_and_units(out, "min:", stats->min_duration); 548 pr_name_and_units(out, "max:", stats->max_duration); 549 550 prt_printf(out, "mean:"); 551 prt_tab(out); 552 bch2_pr_time_units_aligned(out, d_mean); 553 prt_tab(out); 554 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted)); 555 prt_newline(out); 556 557 prt_printf(out, "stddev:"); 558 prt_tab(out); 559 bch2_pr_time_units_aligned(out, d_stddev); 560 prt_tab(out); 561 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted)); 562 563 printbuf_indent_sub(out, 2); 564 prt_newline(out); 565 566 prt_printf(out, "time between events"); 567 prt_newline(out); 568 printbuf_indent_add(out, 2); 569 570 pr_name_and_units(out, "min:", stats->min_freq); 571 pr_name_and_units(out, "max:", stats->max_freq); 572 573 prt_printf(out, "mean:"); 574 prt_tab(out); 575 bch2_pr_time_units_aligned(out, f_mean); 576 prt_tab(out); 577 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted)); 578 prt_newline(out); 579 580 prt_printf(out, "stddev:"); 581 prt_tab(out); 582 bch2_pr_time_units_aligned(out, f_stddev); 583 prt_tab(out); 584 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted)); 585 586 printbuf_indent_sub(out, 2); 587 prt_newline(out); 588 589 printbuf_tabstops_reset(out); 590 591 i = eytzinger0_first(NR_QUANTILES); 592 u = pick_time_units(stats->quantiles.entries[i].m); 593 594 prt_printf(out, "quantiles (%s):\t", u->name); 595 eytzinger0_for_each(i, NR_QUANTILES) { 596 bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1; 597 598 q = max(stats->quantiles.entries[i].m, last_q); 599 prt_printf(out, "%llu ", 600 div_u64(q, u->nsecs)); 601 if (is_last) 602 prt_newline(out); 603 last_q = q; 604 } 605 } 606 607 void bch2_time_stats_exit(struct bch2_time_stats *stats) 608 { 609 free_percpu(stats->buffer); 610 } 611 612 void bch2_time_stats_init(struct bch2_time_stats *stats) 613 { 614 memset(stats, 0, sizeof(*stats)); 615 stats->duration_stats_weighted.weight = 8; 616 stats->freq_stats_weighted.weight = 8; 617 stats->min_duration = U64_MAX; 618 stats->min_freq = U64_MAX; 619 spin_lock_init(&stats->lock); 620 } 621 622 /* ratelimit: */ 623 624 /** 625 * bch2_ratelimit_delay() - return how long to delay until the next time to do 626 * some work 627 * @d: the struct bch_ratelimit to update 628 * Returns: the amount of time to delay by, in jiffies 629 */ 630 u64 bch2_ratelimit_delay(struct bch_ratelimit *d) 631 { 632 u64 now = local_clock(); 633 634 return time_after64(d->next, now) 635 ? nsecs_to_jiffies(d->next - now) 636 : 0; 637 } 638 639 /** 640 * bch2_ratelimit_increment() - increment @d by the amount of work done 641 * @d: the struct bch_ratelimit to update 642 * @done: the amount of work done, in arbitrary units 643 */ 644 void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done) 645 { 646 u64 now = local_clock(); 647 648 d->next += div_u64(done * NSEC_PER_SEC, d->rate); 649 650 if (time_before64(now + NSEC_PER_SEC, d->next)) 651 d->next = now + NSEC_PER_SEC; 652 653 if (time_after64(now - NSEC_PER_SEC * 2, d->next)) 654 d->next = now - NSEC_PER_SEC * 2; 655 } 656 657 /* pd controller: */ 658 659 /* 660 * Updates pd_controller. Attempts to scale inputed values to units per second. 661 * @target: desired value 662 * @actual: current value 663 * 664 * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing 665 * it makes actual go down. 666 */ 667 void bch2_pd_controller_update(struct bch_pd_controller *pd, 668 s64 target, s64 actual, int sign) 669 { 670 s64 proportional, derivative, change; 671 672 unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ; 673 674 if (seconds_since_update == 0) 675 return; 676 677 pd->last_update = jiffies; 678 679 proportional = actual - target; 680 proportional *= seconds_since_update; 681 proportional = div_s64(proportional, pd->p_term_inverse); 682 683 derivative = actual - pd->last_actual; 684 derivative = div_s64(derivative, seconds_since_update); 685 derivative = ewma_add(pd->smoothed_derivative, derivative, 686 (pd->d_term / seconds_since_update) ?: 1); 687 derivative = derivative * pd->d_term; 688 derivative = div_s64(derivative, pd->p_term_inverse); 689 690 change = proportional + derivative; 691 692 /* Don't increase rate if not keeping up */ 693 if (change > 0 && 694 pd->backpressure && 695 time_after64(local_clock(), 696 pd->rate.next + NSEC_PER_MSEC)) 697 change = 0; 698 699 change *= (sign * -1); 700 701 pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change, 702 1, UINT_MAX); 703 704 pd->last_actual = actual; 705 pd->last_derivative = derivative; 706 pd->last_proportional = proportional; 707 pd->last_change = change; 708 pd->last_target = target; 709 } 710 711 void bch2_pd_controller_init(struct bch_pd_controller *pd) 712 { 713 pd->rate.rate = 1024; 714 pd->last_update = jiffies; 715 pd->p_term_inverse = 6000; 716 pd->d_term = 30; 717 pd->d_smooth = pd->d_term; 718 pd->backpressure = 1; 719 } 720 721 void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd) 722 { 723 if (!out->nr_tabstops) 724 printbuf_tabstop_push(out, 20); 725 726 prt_printf(out, "rate:"); 727 prt_tab(out); 728 prt_human_readable_s64(out, pd->rate.rate); 729 prt_newline(out); 730 731 prt_printf(out, "target:"); 732 prt_tab(out); 733 prt_human_readable_u64(out, pd->last_target); 734 prt_newline(out); 735 736 prt_printf(out, "actual:"); 737 prt_tab(out); 738 prt_human_readable_u64(out, pd->last_actual); 739 prt_newline(out); 740 741 prt_printf(out, "proportional:"); 742 prt_tab(out); 743 prt_human_readable_s64(out, pd->last_proportional); 744 prt_newline(out); 745 746 prt_printf(out, "derivative:"); 747 prt_tab(out); 748 prt_human_readable_s64(out, pd->last_derivative); 749 prt_newline(out); 750 751 prt_printf(out, "change:"); 752 prt_tab(out); 753 prt_human_readable_s64(out, pd->last_change); 754 prt_newline(out); 755 756 prt_printf(out, "next io:"); 757 prt_tab(out); 758 prt_printf(out, "%llims", div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC)); 759 prt_newline(out); 760 } 761 762 /* misc: */ 763 764 void bch2_bio_map(struct bio *bio, void *base, size_t size) 765 { 766 while (size) { 767 struct page *page = is_vmalloc_addr(base) 768 ? vmalloc_to_page(base) 769 : virt_to_page(base); 770 unsigned offset = offset_in_page(base); 771 unsigned len = min_t(size_t, PAGE_SIZE - offset, size); 772 773 BUG_ON(!bio_add_page(bio, page, len, offset)); 774 size -= len; 775 base += len; 776 } 777 } 778 779 int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) 780 { 781 while (size) { 782 struct page *page = alloc_pages(gfp_mask, 0); 783 unsigned len = min_t(size_t, PAGE_SIZE, size); 784 785 if (!page) 786 return -ENOMEM; 787 788 if (unlikely(!bio_add_page(bio, page, len, 0))) { 789 __free_page(page); 790 break; 791 } 792 793 size -= len; 794 } 795 796 return 0; 797 } 798 799 size_t bch2_rand_range(size_t max) 800 { 801 size_t rand; 802 803 if (!max) 804 return 0; 805 806 do { 807 rand = get_random_long(); 808 rand &= roundup_pow_of_two(max) - 1; 809 } while (rand >= max); 810 811 return rand; 812 } 813 814 void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src) 815 { 816 struct bio_vec bv; 817 struct bvec_iter iter; 818 819 __bio_for_each_segment(bv, dst, iter, dst_iter) { 820 void *dstp = kmap_local_page(bv.bv_page); 821 822 memcpy(dstp + bv.bv_offset, src, bv.bv_len); 823 kunmap_local(dstp); 824 825 src += bv.bv_len; 826 } 827 } 828 829 void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) 830 { 831 struct bio_vec bv; 832 struct bvec_iter iter; 833 834 __bio_for_each_segment(bv, src, iter, src_iter) { 835 void *srcp = kmap_local_page(bv.bv_page); 836 837 memcpy(dst, srcp + bv.bv_offset, bv.bv_len); 838 kunmap_local(srcp); 839 840 dst += bv.bv_len; 841 } 842 } 843 844 static int alignment_ok(const void *base, size_t align) 845 { 846 return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || 847 ((unsigned long)base & (align - 1)) == 0; 848 } 849 850 static void u32_swap(void *a, void *b, size_t size) 851 { 852 u32 t = *(u32 *)a; 853 *(u32 *)a = *(u32 *)b; 854 *(u32 *)b = t; 855 } 856 857 static void u64_swap(void *a, void *b, size_t size) 858 { 859 u64 t = *(u64 *)a; 860 *(u64 *)a = *(u64 *)b; 861 *(u64 *)b = t; 862 } 863 864 static void generic_swap(void *a, void *b, size_t size) 865 { 866 char t; 867 868 do { 869 t = *(char *)a; 870 *(char *)a++ = *(char *)b; 871 *(char *)b++ = t; 872 } while (--size > 0); 873 } 874 875 static inline int do_cmp(void *base, size_t n, size_t size, 876 int (*cmp_func)(const void *, const void *, size_t), 877 size_t l, size_t r) 878 { 879 return cmp_func(base + inorder_to_eytzinger0(l, n) * size, 880 base + inorder_to_eytzinger0(r, n) * size, 881 size); 882 } 883 884 static inline void do_swap(void *base, size_t n, size_t size, 885 void (*swap_func)(void *, void *, size_t), 886 size_t l, size_t r) 887 { 888 swap_func(base + inorder_to_eytzinger0(l, n) * size, 889 base + inorder_to_eytzinger0(r, n) * size, 890 size); 891 } 892 893 void eytzinger0_sort(void *base, size_t n, size_t size, 894 int (*cmp_func)(const void *, const void *, size_t), 895 void (*swap_func)(void *, void *, size_t)) 896 { 897 int i, c, r; 898 899 if (!swap_func) { 900 if (size == 4 && alignment_ok(base, 4)) 901 swap_func = u32_swap; 902 else if (size == 8 && alignment_ok(base, 8)) 903 swap_func = u64_swap; 904 else 905 swap_func = generic_swap; 906 } 907 908 /* heapify */ 909 for (i = n / 2 - 1; i >= 0; --i) { 910 for (r = i; r * 2 + 1 < n; r = c) { 911 c = r * 2 + 1; 912 913 if (c + 1 < n && 914 do_cmp(base, n, size, cmp_func, c, c + 1) < 0) 915 c++; 916 917 if (do_cmp(base, n, size, cmp_func, r, c) >= 0) 918 break; 919 920 do_swap(base, n, size, swap_func, r, c); 921 } 922 } 923 924 /* sort */ 925 for (i = n - 1; i > 0; --i) { 926 do_swap(base, n, size, swap_func, 0, i); 927 928 for (r = 0; r * 2 + 1 < i; r = c) { 929 c = r * 2 + 1; 930 931 if (c + 1 < i && 932 do_cmp(base, n, size, cmp_func, c, c + 1) < 0) 933 c++; 934 935 if (do_cmp(base, n, size, cmp_func, r, c) >= 0) 936 break; 937 938 do_swap(base, n, size, swap_func, r, c); 939 } 940 } 941 } 942 943 void sort_cmp_size(void *base, size_t num, size_t size, 944 int (*cmp_func)(const void *, const void *, size_t), 945 void (*swap_func)(void *, void *, size_t size)) 946 { 947 /* pre-scale counters for performance */ 948 int i = (num/2 - 1) * size, n = num * size, c, r; 949 950 if (!swap_func) { 951 if (size == 4 && alignment_ok(base, 4)) 952 swap_func = u32_swap; 953 else if (size == 8 && alignment_ok(base, 8)) 954 swap_func = u64_swap; 955 else 956 swap_func = generic_swap; 957 } 958 959 /* heapify */ 960 for ( ; i >= 0; i -= size) { 961 for (r = i; r * 2 + size < n; r = c) { 962 c = r * 2 + size; 963 if (c < n - size && 964 cmp_func(base + c, base + c + size, size) < 0) 965 c += size; 966 if (cmp_func(base + r, base + c, size) >= 0) 967 break; 968 swap_func(base + r, base + c, size); 969 } 970 } 971 972 /* sort */ 973 for (i = n - size; i > 0; i -= size) { 974 swap_func(base, base + i, size); 975 for (r = 0; r * 2 + size < i; r = c) { 976 c = r * 2 + size; 977 if (c < i - size && 978 cmp_func(base + c, base + c + size, size) < 0) 979 c += size; 980 if (cmp_func(base + r, base + c, size) >= 0) 981 break; 982 swap_func(base + r, base + c, size); 983 } 984 } 985 } 986 987 static void mempool_free_vp(void *element, void *pool_data) 988 { 989 size_t size = (size_t) pool_data; 990 991 vpfree(element, size); 992 } 993 994 static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data) 995 { 996 size_t size = (size_t) pool_data; 997 998 return vpmalloc(size, gfp_mask); 999 } 1000 1001 int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size) 1002 { 1003 return size < PAGE_SIZE 1004 ? mempool_init_kmalloc_pool(pool, min_nr, size) 1005 : mempool_init(pool, min_nr, mempool_alloc_vp, 1006 mempool_free_vp, (void *) size); 1007 } 1008 1009 #if 0 1010 void eytzinger1_test(void) 1011 { 1012 unsigned inorder, eytz, size; 1013 1014 pr_info("1 based eytzinger test:"); 1015 1016 for (size = 2; 1017 size < 65536; 1018 size++) { 1019 unsigned extra = eytzinger1_extra(size); 1020 1021 if (!(size % 4096)) 1022 pr_info("tree size %u", size); 1023 1024 BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size)); 1025 BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size)); 1026 1027 BUG_ON(eytzinger1_prev(eytzinger1_first(size), size) != 0); 1028 BUG_ON(eytzinger1_next(eytzinger1_last(size), size) != 0); 1029 1030 inorder = 1; 1031 eytzinger1_for_each(eytz, size) { 1032 BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz); 1033 BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder); 1034 BUG_ON(eytz != eytzinger1_last(size) && 1035 eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz); 1036 1037 inorder++; 1038 } 1039 } 1040 } 1041 1042 void eytzinger0_test(void) 1043 { 1044 1045 unsigned inorder, eytz, size; 1046 1047 pr_info("0 based eytzinger test:"); 1048 1049 for (size = 1; 1050 size < 65536; 1051 size++) { 1052 unsigned extra = eytzinger0_extra(size); 1053 1054 if (!(size % 4096)) 1055 pr_info("tree size %u", size); 1056 1057 BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size)); 1058 BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size)); 1059 1060 BUG_ON(eytzinger0_prev(eytzinger0_first(size), size) != -1); 1061 BUG_ON(eytzinger0_next(eytzinger0_last(size), size) != -1); 1062 1063 inorder = 0; 1064 eytzinger0_for_each(eytz, size) { 1065 BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz); 1066 BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder); 1067 BUG_ON(eytz != eytzinger0_last(size) && 1068 eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz); 1069 1070 inorder++; 1071 } 1072 } 1073 } 1074 1075 static inline int cmp_u16(const void *_l, const void *_r, size_t size) 1076 { 1077 const u16 *l = _l, *r = _r; 1078 1079 return (*l > *r) - (*r - *l); 1080 } 1081 1082 static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search) 1083 { 1084 int i, c1 = -1, c2 = -1; 1085 ssize_t r; 1086 1087 r = eytzinger0_find_le(test_array, nr, 1088 sizeof(test_array[0]), 1089 cmp_u16, &search); 1090 if (r >= 0) 1091 c1 = test_array[r]; 1092 1093 for (i = 0; i < nr; i++) 1094 if (test_array[i] <= search && test_array[i] > c2) 1095 c2 = test_array[i]; 1096 1097 if (c1 != c2) { 1098 eytzinger0_for_each(i, nr) 1099 pr_info("[%3u] = %12u", i, test_array[i]); 1100 pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i", 1101 i, r, c1, c2); 1102 } 1103 } 1104 1105 void eytzinger0_find_test(void) 1106 { 1107 unsigned i, nr, allocated = 1 << 12; 1108 u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL); 1109 1110 for (nr = 1; nr < allocated; nr++) { 1111 pr_info("testing %u elems", nr); 1112 1113 get_random_bytes(test_array, nr * sizeof(test_array[0])); 1114 eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL); 1115 1116 /* verify array is sorted correctly: */ 1117 eytzinger0_for_each(i, nr) 1118 BUG_ON(i != eytzinger0_last(nr) && 1119 test_array[i] > test_array[eytzinger0_next(i, nr)]); 1120 1121 for (i = 0; i < U16_MAX; i += 1 << 12) 1122 eytzinger0_find_test_val(test_array, nr, i); 1123 1124 for (i = 0; i < nr; i++) { 1125 eytzinger0_find_test_val(test_array, nr, test_array[i] - 1); 1126 eytzinger0_find_test_val(test_array, nr, test_array[i]); 1127 eytzinger0_find_test_val(test_array, nr, test_array[i] + 1); 1128 } 1129 } 1130 1131 kfree(test_array); 1132 } 1133 #endif 1134 1135 /* 1136 * Accumulate percpu counters onto one cpu's copy - only valid when access 1137 * against any percpu counter is guarded against 1138 */ 1139 u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) 1140 { 1141 u64 *ret; 1142 int cpu; 1143 1144 /* access to pcpu vars has to be blocked by other locking */ 1145 preempt_disable(); 1146 ret = this_cpu_ptr(p); 1147 preempt_enable(); 1148 1149 for_each_possible_cpu(cpu) { 1150 u64 *i = per_cpu_ptr(p, cpu); 1151 1152 if (i != ret) { 1153 acc_u64s(ret, i, nr); 1154 memset(i, 0, nr * sizeof(u64)); 1155 } 1156 } 1157 1158 return ret; 1159 } 1160