1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * random utiility code, for bcache but in theory not specific to bcache 4 * 5 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 6 * Copyright 2012 Google, Inc. 7 */ 8 9 #include <linux/bio.h> 10 #include <linux/blkdev.h> 11 #include <linux/console.h> 12 #include <linux/ctype.h> 13 #include <linux/debugfs.h> 14 #include <linux/freezer.h> 15 #include <linux/kthread.h> 16 #include <linux/log2.h> 17 #include <linux/math64.h> 18 #include <linux/percpu.h> 19 #include <linux/preempt.h> 20 #include <linux/random.h> 21 #include <linux/seq_file.h> 22 #include <linux/string.h> 23 #include <linux/types.h> 24 #include <linux/sched/clock.h> 25 26 #include "eytzinger.h" 27 #include "mean_and_variance.h" 28 #include "util.h" 29 30 static const char si_units[] = "?kMGTPEZY"; 31 32 /* string_get_size units: */ 33 static const char *const units_2[] = { 34 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" 35 }; 36 static const char *const units_10[] = { 37 "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" 38 }; 39 40 static int parse_u64(const char *cp, u64 *res) 41 { 42 const char *start = cp; 43 u64 v = 0; 44 45 if (!isdigit(*cp)) 46 return -EINVAL; 47 48 do { 49 if (v > U64_MAX / 10) 50 return -ERANGE; 51 v *= 10; 52 if (v > U64_MAX - (*cp - '0')) 53 return -ERANGE; 54 v += *cp - '0'; 55 cp++; 56 } while (isdigit(*cp)); 57 58 *res = v; 59 return cp - start; 60 } 61 62 static int bch2_pow(u64 n, u64 p, u64 *res) 63 { 64 *res = 1; 65 66 while (p--) { 67 if (*res > div_u64(U64_MAX, n)) 68 return -ERANGE; 69 *res *= n; 70 } 71 return 0; 72 } 73 74 static int parse_unit_suffix(const char *cp, u64 *res) 75 { 76 const char *start = cp; 77 u64 base = 1024; 78 unsigned u; 79 int ret; 80 81 if (*cp == ' ') 82 cp++; 83 84 for (u = 1; u < strlen(si_units); u++) 85 if (*cp == si_units[u]) { 86 cp++; 87 goto got_unit; 88 } 89 90 for (u = 0; u < ARRAY_SIZE(units_2); u++) 91 if (!strncmp(cp, units_2[u], strlen(units_2[u]))) { 92 cp += strlen(units_2[u]); 93 goto got_unit; 94 } 95 96 for (u = 0; u < ARRAY_SIZE(units_10); u++) 97 if (!strncmp(cp, units_10[u], strlen(units_10[u]))) { 98 cp += strlen(units_10[u]); 99 base = 1000; 100 goto got_unit; 101 } 102 103 *res = 1; 104 return 0; 105 got_unit: 106 ret = bch2_pow(base, u, res); 107 if (ret) 108 return ret; 109 110 return cp - start; 111 } 112 113 #define parse_or_ret(cp, _f) \ 114 do { \ 115 int _ret = _f; \ 116 if (_ret < 0) \ 117 return _ret; \ 118 cp += _ret; \ 119 } while (0) 120 121 static int __bch2_strtou64_h(const char *cp, u64 *res) 122 { 123 const char *start = cp; 124 u64 v = 0, b, f_n = 0, f_d = 1; 125 int ret; 126 127 parse_or_ret(cp, parse_u64(cp, &v)); 128 129 if (*cp == '.') { 130 cp++; 131 ret = parse_u64(cp, &f_n); 132 if (ret < 0) 133 return ret; 134 cp += ret; 135 136 ret = bch2_pow(10, ret, &f_d); 137 if (ret) 138 return ret; 139 } 140 141 parse_or_ret(cp, parse_unit_suffix(cp, &b)); 142 143 if (v > div_u64(U64_MAX, b)) 144 return -ERANGE; 145 v *= b; 146 147 if (f_n > div_u64(U64_MAX, b)) 148 return -ERANGE; 149 150 f_n = div_u64(f_n * b, f_d); 151 if (v + f_n < v) 152 return -ERANGE; 153 v += f_n; 154 155 *res = v; 156 return cp - start; 157 } 158 159 static int __bch2_strtoh(const char *cp, u64 *res, 160 u64 t_max, bool t_signed) 161 { 162 bool positive = *cp != '-'; 163 u64 v = 0; 164 165 if (*cp == '+' || *cp == '-') 166 cp++; 167 168 parse_or_ret(cp, __bch2_strtou64_h(cp, &v)); 169 170 if (*cp == '\n') 171 cp++; 172 if (*cp) 173 return -EINVAL; 174 175 if (positive) { 176 if (v > t_max) 177 return -ERANGE; 178 } else { 179 if (v && !t_signed) 180 return -ERANGE; 181 182 if (v > t_max + 1) 183 return -ERANGE; 184 v = -v; 185 } 186 187 *res = v; 188 return 0; 189 } 190 191 #define STRTO_H(name, type) \ 192 int bch2_ ## name ## _h(const char *cp, type *res) \ 193 { \ 194 u64 v = 0; \ 195 int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type), \ 196 ANYSINT_MAX(type) != ((type) ~0ULL)); \ 197 *res = v; \ 198 return ret; \ 199 } 200 201 STRTO_H(strtoint, int) 202 STRTO_H(strtouint, unsigned int) 203 STRTO_H(strtoll, long long) 204 STRTO_H(strtoull, unsigned long long) 205 STRTO_H(strtou64, u64) 206 207 u64 bch2_read_flag_list(char *opt, const char * const list[]) 208 { 209 u64 ret = 0; 210 char *p, *s, *d = kstrdup(opt, GFP_KERNEL); 211 212 if (!d) 213 return -ENOMEM; 214 215 s = strim(d); 216 217 while ((p = strsep(&s, ","))) { 218 int flag = match_string(list, -1, p); 219 220 if (flag < 0) { 221 ret = -1; 222 break; 223 } 224 225 ret |= 1 << flag; 226 } 227 228 kfree(d); 229 230 return ret; 231 } 232 233 bool bch2_is_zero(const void *_p, size_t n) 234 { 235 const char *p = _p; 236 size_t i; 237 238 for (i = 0; i < n; i++) 239 if (p[i]) 240 return false; 241 return true; 242 } 243 244 void bch2_prt_u64_base2_nbits(struct printbuf *out, u64 v, unsigned nr_bits) 245 { 246 while (nr_bits) 247 prt_char(out, '0' + ((v >> --nr_bits) & 1)); 248 } 249 250 void bch2_prt_u64_base2(struct printbuf *out, u64 v) 251 { 252 bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); 253 } 254 255 void bch2_print_string_as_lines(const char *prefix, const char *lines) 256 { 257 const char *p; 258 259 if (!lines) { 260 printk("%s (null)\n", prefix); 261 return; 262 } 263 264 console_lock(); 265 while (1) { 266 p = strchrnul(lines, '\n'); 267 printk("%s%.*s\n", prefix, (int) (p - lines), lines); 268 if (!*p) 269 break; 270 lines = p + 1; 271 } 272 console_unlock(); 273 } 274 275 int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr, 276 gfp_t gfp) 277 { 278 #ifdef CONFIG_STACKTRACE 279 unsigned nr_entries = 0; 280 281 stack->nr = 0; 282 int ret = darray_make_room_gfp(stack, 32, gfp); 283 if (ret) 284 return ret; 285 286 if (!down_read_trylock(&task->signal->exec_update_lock)) 287 return -1; 288 289 do { 290 nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1); 291 } while (nr_entries == stack->size && 292 !(ret = darray_make_room_gfp(stack, stack->size * 2, gfp))); 293 294 stack->nr = nr_entries; 295 up_read(&task->signal->exec_update_lock); 296 297 return ret; 298 #else 299 return 0; 300 #endif 301 } 302 303 void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) 304 { 305 darray_for_each(*stack, i) { 306 prt_printf(out, "[<0>] %pB", (void *) *i); 307 prt_newline(out); 308 } 309 } 310 311 int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr, gfp_t gfp) 312 { 313 bch_stacktrace stack = { 0 }; 314 int ret = bch2_save_backtrace(&stack, task, skipnr + 1, gfp); 315 316 bch2_prt_backtrace(out, &stack); 317 darray_exit(&stack); 318 return ret; 319 } 320 321 #ifndef __KERNEL__ 322 #include <time.h> 323 void bch2_prt_datetime(struct printbuf *out, time64_t sec) 324 { 325 time_t t = sec; 326 char buf[64]; 327 ctime_r(&t, buf); 328 strim(buf); 329 prt_str(out, buf); 330 } 331 #else 332 void bch2_prt_datetime(struct printbuf *out, time64_t sec) 333 { 334 char buf[64]; 335 snprintf(buf, sizeof(buf), "%ptT", &sec); 336 prt_u64(out, sec); 337 } 338 #endif 339 340 static const struct time_unit { 341 const char *name; 342 u64 nsecs; 343 } time_units[] = { 344 { "ns", 1 }, 345 { "us", NSEC_PER_USEC }, 346 { "ms", NSEC_PER_MSEC }, 347 { "s", NSEC_PER_SEC }, 348 { "m", (u64) NSEC_PER_SEC * 60}, 349 { "h", (u64) NSEC_PER_SEC * 3600}, 350 { "eon", U64_MAX }, 351 }; 352 353 static const struct time_unit *pick_time_units(u64 ns) 354 { 355 const struct time_unit *u; 356 357 for (u = time_units; 358 u + 1 < time_units + ARRAY_SIZE(time_units) && 359 ns >= u[1].nsecs << 1; 360 u++) 361 ; 362 363 return u; 364 } 365 366 void bch2_pr_time_units(struct printbuf *out, u64 ns) 367 { 368 const struct time_unit *u = pick_time_units(ns); 369 370 prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name); 371 } 372 373 /* time stats: */ 374 375 #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT 376 static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v) 377 { 378 unsigned i = 0; 379 380 while (i < ARRAY_SIZE(q->entries)) { 381 struct bch2_quantile_entry *e = q->entries + i; 382 383 if (unlikely(!e->step)) { 384 e->m = v; 385 e->step = max_t(unsigned, v / 2, 1024); 386 } else if (e->m > v) { 387 e->m = e->m >= e->step 388 ? e->m - e->step 389 : 0; 390 } else if (e->m < v) { 391 e->m = e->m + e->step > e->m 392 ? e->m + e->step 393 : U32_MAX; 394 } 395 396 if ((e->m > v ? e->m - v : v - e->m) < e->step) 397 e->step = max_t(unsigned, e->step / 2, 1); 398 399 if (v >= e->m) 400 break; 401 402 i = eytzinger0_child(i, v > e->m); 403 } 404 } 405 406 static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats, 407 u64 start, u64 end) 408 { 409 u64 duration, freq; 410 411 if (time_after64(end, start)) { 412 duration = end - start; 413 mean_and_variance_update(&stats->duration_stats, duration); 414 mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration); 415 stats->max_duration = max(stats->max_duration, duration); 416 stats->min_duration = min(stats->min_duration, duration); 417 stats->total_duration += duration; 418 bch2_quantiles_update(&stats->quantiles, duration); 419 } 420 421 if (stats->last_event && time_after64(end, stats->last_event)) { 422 freq = end - stats->last_event; 423 mean_and_variance_update(&stats->freq_stats, freq); 424 mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq); 425 stats->max_freq = max(stats->max_freq, freq); 426 stats->min_freq = min(stats->min_freq, freq); 427 } 428 429 stats->last_event = end; 430 } 431 432 static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, 433 struct bch2_time_stat_buffer *b) 434 { 435 for (struct bch2_time_stat_buffer_entry *i = b->entries; 436 i < b->entries + ARRAY_SIZE(b->entries); 437 i++) 438 bch2_time_stats_update_one(stats, i->start, i->end); 439 b->nr = 0; 440 } 441 442 static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, 443 struct bch2_time_stat_buffer *b) 444 { 445 unsigned long flags; 446 447 spin_lock_irqsave(&stats->lock, flags); 448 __bch2_time_stats_clear_buffer(stats, b); 449 spin_unlock_irqrestore(&stats->lock, flags); 450 } 451 452 void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) 453 { 454 unsigned long flags; 455 456 WARN_ONCE(!stats->duration_stats_weighted.weight || 457 !stats->freq_stats_weighted.weight, 458 "uninitialized time_stats"); 459 460 if (!stats->buffer) { 461 spin_lock_irqsave(&stats->lock, flags); 462 bch2_time_stats_update_one(stats, start, end); 463 464 if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 && 465 stats->duration_stats.n > 1024) 466 stats->buffer = 467 alloc_percpu_gfp(struct bch2_time_stat_buffer, 468 GFP_ATOMIC); 469 spin_unlock_irqrestore(&stats->lock, flags); 470 } else { 471 struct bch2_time_stat_buffer *b; 472 473 preempt_disable(); 474 b = this_cpu_ptr(stats->buffer); 475 476 BUG_ON(b->nr >= ARRAY_SIZE(b->entries)); 477 b->entries[b->nr++] = (struct bch2_time_stat_buffer_entry) { 478 .start = start, 479 .end = end 480 }; 481 482 if (unlikely(b->nr == ARRAY_SIZE(b->entries))) 483 bch2_time_stats_clear_buffer(stats, b); 484 preempt_enable(); 485 } 486 } 487 488 static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns) 489 { 490 const struct time_unit *u = pick_time_units(ns); 491 492 prt_printf(out, "%llu ", div64_u64(ns, u->nsecs)); 493 prt_tab_rjust(out); 494 prt_printf(out, "%s", u->name); 495 } 496 497 static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns) 498 { 499 prt_str(out, name); 500 prt_tab(out); 501 bch2_pr_time_units_aligned(out, ns); 502 prt_newline(out); 503 } 504 505 #define TABSTOP_SIZE 12 506 507 void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) 508 { 509 const struct time_unit *u; 510 s64 f_mean = 0, d_mean = 0; 511 u64 q, last_q = 0, f_stddev = 0, d_stddev = 0; 512 int i; 513 514 if (stats->buffer) { 515 int cpu; 516 517 spin_lock_irq(&stats->lock); 518 for_each_possible_cpu(cpu) 519 __bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu)); 520 spin_unlock_irq(&stats->lock); 521 } 522 523 /* 524 * avoid divide by zero 525 */ 526 if (stats->freq_stats.n) { 527 f_mean = mean_and_variance_get_mean(stats->freq_stats); 528 f_stddev = mean_and_variance_get_stddev(stats->freq_stats); 529 d_mean = mean_and_variance_get_mean(stats->duration_stats); 530 d_stddev = mean_and_variance_get_stddev(stats->duration_stats); 531 } 532 533 printbuf_tabstop_push(out, out->indent + TABSTOP_SIZE); 534 prt_printf(out, "count:"); 535 prt_tab(out); 536 prt_printf(out, "%llu ", 537 stats->duration_stats.n); 538 printbuf_tabstop_pop(out); 539 prt_newline(out); 540 541 printbuf_tabstops_reset(out); 542 543 printbuf_tabstop_push(out, out->indent + 20); 544 printbuf_tabstop_push(out, TABSTOP_SIZE + 2); 545 printbuf_tabstop_push(out, 0); 546 printbuf_tabstop_push(out, TABSTOP_SIZE + 2); 547 548 prt_tab(out); 549 prt_printf(out, "since mount"); 550 prt_tab_rjust(out); 551 prt_tab(out); 552 prt_printf(out, "recent"); 553 prt_tab_rjust(out); 554 prt_newline(out); 555 556 printbuf_tabstops_reset(out); 557 printbuf_tabstop_push(out, out->indent + 20); 558 printbuf_tabstop_push(out, TABSTOP_SIZE); 559 printbuf_tabstop_push(out, 2); 560 printbuf_tabstop_push(out, TABSTOP_SIZE); 561 562 prt_printf(out, "duration of events"); 563 prt_newline(out); 564 printbuf_indent_add(out, 2); 565 566 pr_name_and_units(out, "min:", stats->min_duration); 567 pr_name_and_units(out, "max:", stats->max_duration); 568 pr_name_and_units(out, "total:", stats->total_duration); 569 570 prt_printf(out, "mean:"); 571 prt_tab(out); 572 bch2_pr_time_units_aligned(out, d_mean); 573 prt_tab(out); 574 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted)); 575 prt_newline(out); 576 577 prt_printf(out, "stddev:"); 578 prt_tab(out); 579 bch2_pr_time_units_aligned(out, d_stddev); 580 prt_tab(out); 581 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted)); 582 583 printbuf_indent_sub(out, 2); 584 prt_newline(out); 585 586 prt_printf(out, "time between events"); 587 prt_newline(out); 588 printbuf_indent_add(out, 2); 589 590 pr_name_and_units(out, "min:", stats->min_freq); 591 pr_name_and_units(out, "max:", stats->max_freq); 592 593 prt_printf(out, "mean:"); 594 prt_tab(out); 595 bch2_pr_time_units_aligned(out, f_mean); 596 prt_tab(out); 597 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted)); 598 prt_newline(out); 599 600 prt_printf(out, "stddev:"); 601 prt_tab(out); 602 bch2_pr_time_units_aligned(out, f_stddev); 603 prt_tab(out); 604 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted)); 605 606 printbuf_indent_sub(out, 2); 607 prt_newline(out); 608 609 printbuf_tabstops_reset(out); 610 611 i = eytzinger0_first(NR_QUANTILES); 612 u = pick_time_units(stats->quantiles.entries[i].m); 613 614 prt_printf(out, "quantiles (%s):\t", u->name); 615 eytzinger0_for_each(i, NR_QUANTILES) { 616 bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1; 617 618 q = max(stats->quantiles.entries[i].m, last_q); 619 prt_printf(out, "%llu ", 620 div_u64(q, u->nsecs)); 621 if (is_last) 622 prt_newline(out); 623 last_q = q; 624 } 625 } 626 #else 627 void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) {} 628 #endif 629 630 void bch2_time_stats_exit(struct bch2_time_stats *stats) 631 { 632 free_percpu(stats->buffer); 633 } 634 635 void bch2_time_stats_init(struct bch2_time_stats *stats) 636 { 637 memset(stats, 0, sizeof(*stats)); 638 stats->duration_stats_weighted.weight = 8; 639 stats->freq_stats_weighted.weight = 8; 640 stats->min_duration = U64_MAX; 641 stats->min_freq = U64_MAX; 642 spin_lock_init(&stats->lock); 643 } 644 645 /* ratelimit: */ 646 647 /** 648 * bch2_ratelimit_delay() - return how long to delay until the next time to do 649 * some work 650 * @d: the struct bch_ratelimit to update 651 * Returns: the amount of time to delay by, in jiffies 652 */ 653 u64 bch2_ratelimit_delay(struct bch_ratelimit *d) 654 { 655 u64 now = local_clock(); 656 657 return time_after64(d->next, now) 658 ? nsecs_to_jiffies(d->next - now) 659 : 0; 660 } 661 662 /** 663 * bch2_ratelimit_increment() - increment @d by the amount of work done 664 * @d: the struct bch_ratelimit to update 665 * @done: the amount of work done, in arbitrary units 666 */ 667 void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done) 668 { 669 u64 now = local_clock(); 670 671 d->next += div_u64(done * NSEC_PER_SEC, d->rate); 672 673 if (time_before64(now + NSEC_PER_SEC, d->next)) 674 d->next = now + NSEC_PER_SEC; 675 676 if (time_after64(now - NSEC_PER_SEC * 2, d->next)) 677 d->next = now - NSEC_PER_SEC * 2; 678 } 679 680 /* pd controller: */ 681 682 /* 683 * Updates pd_controller. Attempts to scale inputed values to units per second. 684 * @target: desired value 685 * @actual: current value 686 * 687 * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing 688 * it makes actual go down. 689 */ 690 void bch2_pd_controller_update(struct bch_pd_controller *pd, 691 s64 target, s64 actual, int sign) 692 { 693 s64 proportional, derivative, change; 694 695 unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ; 696 697 if (seconds_since_update == 0) 698 return; 699 700 pd->last_update = jiffies; 701 702 proportional = actual - target; 703 proportional *= seconds_since_update; 704 proportional = div_s64(proportional, pd->p_term_inverse); 705 706 derivative = actual - pd->last_actual; 707 derivative = div_s64(derivative, seconds_since_update); 708 derivative = ewma_add(pd->smoothed_derivative, derivative, 709 (pd->d_term / seconds_since_update) ?: 1); 710 derivative = derivative * pd->d_term; 711 derivative = div_s64(derivative, pd->p_term_inverse); 712 713 change = proportional + derivative; 714 715 /* Don't increase rate if not keeping up */ 716 if (change > 0 && 717 pd->backpressure && 718 time_after64(local_clock(), 719 pd->rate.next + NSEC_PER_MSEC)) 720 change = 0; 721 722 change *= (sign * -1); 723 724 pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change, 725 1, UINT_MAX); 726 727 pd->last_actual = actual; 728 pd->last_derivative = derivative; 729 pd->last_proportional = proportional; 730 pd->last_change = change; 731 pd->last_target = target; 732 } 733 734 void bch2_pd_controller_init(struct bch_pd_controller *pd) 735 { 736 pd->rate.rate = 1024; 737 pd->last_update = jiffies; 738 pd->p_term_inverse = 6000; 739 pd->d_term = 30; 740 pd->d_smooth = pd->d_term; 741 pd->backpressure = 1; 742 } 743 744 void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd) 745 { 746 if (!out->nr_tabstops) 747 printbuf_tabstop_push(out, 20); 748 749 prt_printf(out, "rate:"); 750 prt_tab(out); 751 prt_human_readable_s64(out, pd->rate.rate); 752 prt_newline(out); 753 754 prt_printf(out, "target:"); 755 prt_tab(out); 756 prt_human_readable_u64(out, pd->last_target); 757 prt_newline(out); 758 759 prt_printf(out, "actual:"); 760 prt_tab(out); 761 prt_human_readable_u64(out, pd->last_actual); 762 prt_newline(out); 763 764 prt_printf(out, "proportional:"); 765 prt_tab(out); 766 prt_human_readable_s64(out, pd->last_proportional); 767 prt_newline(out); 768 769 prt_printf(out, "derivative:"); 770 prt_tab(out); 771 prt_human_readable_s64(out, pd->last_derivative); 772 prt_newline(out); 773 774 prt_printf(out, "change:"); 775 prt_tab(out); 776 prt_human_readable_s64(out, pd->last_change); 777 prt_newline(out); 778 779 prt_printf(out, "next io:"); 780 prt_tab(out); 781 prt_printf(out, "%llims", div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC)); 782 prt_newline(out); 783 } 784 785 /* misc: */ 786 787 void bch2_bio_map(struct bio *bio, void *base, size_t size) 788 { 789 while (size) { 790 struct page *page = is_vmalloc_addr(base) 791 ? vmalloc_to_page(base) 792 : virt_to_page(base); 793 unsigned offset = offset_in_page(base); 794 unsigned len = min_t(size_t, PAGE_SIZE - offset, size); 795 796 BUG_ON(!bio_add_page(bio, page, len, offset)); 797 size -= len; 798 base += len; 799 } 800 } 801 802 int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) 803 { 804 while (size) { 805 struct page *page = alloc_pages(gfp_mask, 0); 806 unsigned len = min_t(size_t, PAGE_SIZE, size); 807 808 if (!page) 809 return -ENOMEM; 810 811 if (unlikely(!bio_add_page(bio, page, len, 0))) { 812 __free_page(page); 813 break; 814 } 815 816 size -= len; 817 } 818 819 return 0; 820 } 821 822 size_t bch2_rand_range(size_t max) 823 { 824 size_t rand; 825 826 if (!max) 827 return 0; 828 829 do { 830 rand = get_random_long(); 831 rand &= roundup_pow_of_two(max) - 1; 832 } while (rand >= max); 833 834 return rand; 835 } 836 837 void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src) 838 { 839 struct bio_vec bv; 840 struct bvec_iter iter; 841 842 __bio_for_each_segment(bv, dst, iter, dst_iter) { 843 void *dstp = kmap_local_page(bv.bv_page); 844 845 memcpy(dstp + bv.bv_offset, src, bv.bv_len); 846 kunmap_local(dstp); 847 848 src += bv.bv_len; 849 } 850 } 851 852 void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) 853 { 854 struct bio_vec bv; 855 struct bvec_iter iter; 856 857 __bio_for_each_segment(bv, src, iter, src_iter) { 858 void *srcp = kmap_local_page(bv.bv_page); 859 860 memcpy(dst, srcp + bv.bv_offset, bv.bv_len); 861 kunmap_local(srcp); 862 863 dst += bv.bv_len; 864 } 865 } 866 867 static int alignment_ok(const void *base, size_t align) 868 { 869 return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || 870 ((unsigned long)base & (align - 1)) == 0; 871 } 872 873 static void u32_swap(void *a, void *b, size_t size) 874 { 875 u32 t = *(u32 *)a; 876 *(u32 *)a = *(u32 *)b; 877 *(u32 *)b = t; 878 } 879 880 static void u64_swap(void *a, void *b, size_t size) 881 { 882 u64 t = *(u64 *)a; 883 *(u64 *)a = *(u64 *)b; 884 *(u64 *)b = t; 885 } 886 887 static void generic_swap(void *a, void *b, size_t size) 888 { 889 char t; 890 891 do { 892 t = *(char *)a; 893 *(char *)a++ = *(char *)b; 894 *(char *)b++ = t; 895 } while (--size > 0); 896 } 897 898 static inline int do_cmp(void *base, size_t n, size_t size, 899 int (*cmp_func)(const void *, const void *, size_t), 900 size_t l, size_t r) 901 { 902 return cmp_func(base + inorder_to_eytzinger0(l, n) * size, 903 base + inorder_to_eytzinger0(r, n) * size, 904 size); 905 } 906 907 static inline void do_swap(void *base, size_t n, size_t size, 908 void (*swap_func)(void *, void *, size_t), 909 size_t l, size_t r) 910 { 911 swap_func(base + inorder_to_eytzinger0(l, n) * size, 912 base + inorder_to_eytzinger0(r, n) * size, 913 size); 914 } 915 916 void eytzinger0_sort(void *base, size_t n, size_t size, 917 int (*cmp_func)(const void *, const void *, size_t), 918 void (*swap_func)(void *, void *, size_t)) 919 { 920 int i, c, r; 921 922 if (!swap_func) { 923 if (size == 4 && alignment_ok(base, 4)) 924 swap_func = u32_swap; 925 else if (size == 8 && alignment_ok(base, 8)) 926 swap_func = u64_swap; 927 else 928 swap_func = generic_swap; 929 } 930 931 /* heapify */ 932 for (i = n / 2 - 1; i >= 0; --i) { 933 for (r = i; r * 2 + 1 < n; r = c) { 934 c = r * 2 + 1; 935 936 if (c + 1 < n && 937 do_cmp(base, n, size, cmp_func, c, c + 1) < 0) 938 c++; 939 940 if (do_cmp(base, n, size, cmp_func, r, c) >= 0) 941 break; 942 943 do_swap(base, n, size, swap_func, r, c); 944 } 945 } 946 947 /* sort */ 948 for (i = n - 1; i > 0; --i) { 949 do_swap(base, n, size, swap_func, 0, i); 950 951 for (r = 0; r * 2 + 1 < i; r = c) { 952 c = r * 2 + 1; 953 954 if (c + 1 < i && 955 do_cmp(base, n, size, cmp_func, c, c + 1) < 0) 956 c++; 957 958 if (do_cmp(base, n, size, cmp_func, r, c) >= 0) 959 break; 960 961 do_swap(base, n, size, swap_func, r, c); 962 } 963 } 964 } 965 966 void sort_cmp_size(void *base, size_t num, size_t size, 967 int (*cmp_func)(const void *, const void *, size_t), 968 void (*swap_func)(void *, void *, size_t size)) 969 { 970 /* pre-scale counters for performance */ 971 int i = (num/2 - 1) * size, n = num * size, c, r; 972 973 if (!swap_func) { 974 if (size == 4 && alignment_ok(base, 4)) 975 swap_func = u32_swap; 976 else if (size == 8 && alignment_ok(base, 8)) 977 swap_func = u64_swap; 978 else 979 swap_func = generic_swap; 980 } 981 982 /* heapify */ 983 for ( ; i >= 0; i -= size) { 984 for (r = i; r * 2 + size < n; r = c) { 985 c = r * 2 + size; 986 if (c < n - size && 987 cmp_func(base + c, base + c + size, size) < 0) 988 c += size; 989 if (cmp_func(base + r, base + c, size) >= 0) 990 break; 991 swap_func(base + r, base + c, size); 992 } 993 } 994 995 /* sort */ 996 for (i = n - size; i > 0; i -= size) { 997 swap_func(base, base + i, size); 998 for (r = 0; r * 2 + size < i; r = c) { 999 c = r * 2 + size; 1000 if (c < i - size && 1001 cmp_func(base + c, base + c + size, size) < 0) 1002 c += size; 1003 if (cmp_func(base + r, base + c, size) >= 0) 1004 break; 1005 swap_func(base + r, base + c, size); 1006 } 1007 } 1008 } 1009 1010 static void mempool_free_vp(void *element, void *pool_data) 1011 { 1012 size_t size = (size_t) pool_data; 1013 1014 vpfree(element, size); 1015 } 1016 1017 static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data) 1018 { 1019 size_t size = (size_t) pool_data; 1020 1021 return vpmalloc(size, gfp_mask); 1022 } 1023 1024 int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size) 1025 { 1026 return size < PAGE_SIZE 1027 ? mempool_init_kmalloc_pool(pool, min_nr, size) 1028 : mempool_init(pool, min_nr, mempool_alloc_vp, 1029 mempool_free_vp, (void *) size); 1030 } 1031 1032 #if 0 1033 void eytzinger1_test(void) 1034 { 1035 unsigned inorder, eytz, size; 1036 1037 pr_info("1 based eytzinger test:"); 1038 1039 for (size = 2; 1040 size < 65536; 1041 size++) { 1042 unsigned extra = eytzinger1_extra(size); 1043 1044 if (!(size % 4096)) 1045 pr_info("tree size %u", size); 1046 1047 BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size)); 1048 BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size)); 1049 1050 BUG_ON(eytzinger1_prev(eytzinger1_first(size), size) != 0); 1051 BUG_ON(eytzinger1_next(eytzinger1_last(size), size) != 0); 1052 1053 inorder = 1; 1054 eytzinger1_for_each(eytz, size) { 1055 BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz); 1056 BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder); 1057 BUG_ON(eytz != eytzinger1_last(size) && 1058 eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz); 1059 1060 inorder++; 1061 } 1062 } 1063 } 1064 1065 void eytzinger0_test(void) 1066 { 1067 1068 unsigned inorder, eytz, size; 1069 1070 pr_info("0 based eytzinger test:"); 1071 1072 for (size = 1; 1073 size < 65536; 1074 size++) { 1075 unsigned extra = eytzinger0_extra(size); 1076 1077 if (!(size % 4096)) 1078 pr_info("tree size %u", size); 1079 1080 BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size)); 1081 BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size)); 1082 1083 BUG_ON(eytzinger0_prev(eytzinger0_first(size), size) != -1); 1084 BUG_ON(eytzinger0_next(eytzinger0_last(size), size) != -1); 1085 1086 inorder = 0; 1087 eytzinger0_for_each(eytz, size) { 1088 BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz); 1089 BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder); 1090 BUG_ON(eytz != eytzinger0_last(size) && 1091 eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz); 1092 1093 inorder++; 1094 } 1095 } 1096 } 1097 1098 static inline int cmp_u16(const void *_l, const void *_r, size_t size) 1099 { 1100 const u16 *l = _l, *r = _r; 1101 1102 return (*l > *r) - (*r - *l); 1103 } 1104 1105 static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search) 1106 { 1107 int i, c1 = -1, c2 = -1; 1108 ssize_t r; 1109 1110 r = eytzinger0_find_le(test_array, nr, 1111 sizeof(test_array[0]), 1112 cmp_u16, &search); 1113 if (r >= 0) 1114 c1 = test_array[r]; 1115 1116 for (i = 0; i < nr; i++) 1117 if (test_array[i] <= search && test_array[i] > c2) 1118 c2 = test_array[i]; 1119 1120 if (c1 != c2) { 1121 eytzinger0_for_each(i, nr) 1122 pr_info("[%3u] = %12u", i, test_array[i]); 1123 pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i", 1124 i, r, c1, c2); 1125 } 1126 } 1127 1128 void eytzinger0_find_test(void) 1129 { 1130 unsigned i, nr, allocated = 1 << 12; 1131 u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL); 1132 1133 for (nr = 1; nr < allocated; nr++) { 1134 pr_info("testing %u elems", nr); 1135 1136 get_random_bytes(test_array, nr * sizeof(test_array[0])); 1137 eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL); 1138 1139 /* verify array is sorted correctly: */ 1140 eytzinger0_for_each(i, nr) 1141 BUG_ON(i != eytzinger0_last(nr) && 1142 test_array[i] > test_array[eytzinger0_next(i, nr)]); 1143 1144 for (i = 0; i < U16_MAX; i += 1 << 12) 1145 eytzinger0_find_test_val(test_array, nr, i); 1146 1147 for (i = 0; i < nr; i++) { 1148 eytzinger0_find_test_val(test_array, nr, test_array[i] - 1); 1149 eytzinger0_find_test_val(test_array, nr, test_array[i]); 1150 eytzinger0_find_test_val(test_array, nr, test_array[i] + 1); 1151 } 1152 } 1153 1154 kfree(test_array); 1155 } 1156 #endif 1157 1158 /* 1159 * Accumulate percpu counters onto one cpu's copy - only valid when access 1160 * against any percpu counter is guarded against 1161 */ 1162 u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) 1163 { 1164 u64 *ret; 1165 int cpu; 1166 1167 /* access to pcpu vars has to be blocked by other locking */ 1168 preempt_disable(); 1169 ret = this_cpu_ptr(p); 1170 preempt_enable(); 1171 1172 for_each_possible_cpu(cpu) { 1173 u64 *i = per_cpu_ptr(p, cpu); 1174 1175 if (i != ret) { 1176 acc_u64s(ret, i, nr); 1177 memset(i, 0, nr * sizeof(u64)); 1178 } 1179 } 1180 1181 return ret; 1182 } 1183 1184 void bch2_darray_str_exit(darray_str *d) 1185 { 1186 darray_for_each(*d, i) 1187 kfree(*i); 1188 darray_exit(d); 1189 } 1190 1191 int bch2_split_devs(const char *_dev_name, darray_str *ret) 1192 { 1193 darray_init(ret); 1194 1195 char *dev_name, *s, *orig; 1196 1197 dev_name = orig = kstrdup(_dev_name, GFP_KERNEL); 1198 if (!dev_name) 1199 return -ENOMEM; 1200 1201 while ((s = strsep(&dev_name, ":"))) { 1202 char *p = kstrdup(s, GFP_KERNEL); 1203 if (!p) 1204 goto err; 1205 1206 if (darray_push(ret, p)) { 1207 kfree(p); 1208 goto err; 1209 } 1210 } 1211 1212 kfree(orig); 1213 return 0; 1214 err: 1215 bch2_darray_str_exit(ret); 1216 kfree(orig); 1217 return -ENOMEM; 1218 } 1219