1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * random utiility code, for bcache but in theory not specific to bcache 4 * 5 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 6 * Copyright 2012 Google, Inc. 7 */ 8 9 #include <linux/bio.h> 10 #include <linux/blkdev.h> 11 #include <linux/console.h> 12 #include <linux/ctype.h> 13 #include <linux/debugfs.h> 14 #include <linux/freezer.h> 15 #include <linux/kthread.h> 16 #include <linux/log2.h> 17 #include <linux/math64.h> 18 #include <linux/percpu.h> 19 #include <linux/preempt.h> 20 #include <linux/random.h> 21 #include <linux/seq_file.h> 22 #include <linux/string.h> 23 #include <linux/types.h> 24 #include <linux/sched/clock.h> 25 26 #include "eytzinger.h" 27 #include "mean_and_variance.h" 28 #include "util.h" 29 30 static const char si_units[] = "?kMGTPEZY"; 31 32 /* string_get_size units: */ 33 static const char *const units_2[] = { 34 "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" 35 }; 36 static const char *const units_10[] = { 37 "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" 38 }; 39 40 static int parse_u64(const char *cp, u64 *res) 41 { 42 const char *start = cp; 43 u64 v = 0; 44 45 if (!isdigit(*cp)) 46 return -EINVAL; 47 48 do { 49 if (v > U64_MAX / 10) 50 return -ERANGE; 51 v *= 10; 52 if (v > U64_MAX - (*cp - '0')) 53 return -ERANGE; 54 v += *cp - '0'; 55 cp++; 56 } while (isdigit(*cp)); 57 58 *res = v; 59 return cp - start; 60 } 61 62 static int bch2_pow(u64 n, u64 p, u64 *res) 63 { 64 *res = 1; 65 66 while (p--) { 67 if (*res > div_u64(U64_MAX, n)) 68 return -ERANGE; 69 *res *= n; 70 } 71 return 0; 72 } 73 74 static int parse_unit_suffix(const char *cp, u64 *res) 75 { 76 const char *start = cp; 77 u64 base = 1024; 78 unsigned u; 79 int ret; 80 81 if (*cp == ' ') 82 cp++; 83 84 for (u = 1; u < strlen(si_units); u++) 85 if (*cp == si_units[u]) { 86 cp++; 87 goto got_unit; 88 } 89 90 for (u = 0; u < ARRAY_SIZE(units_2); u++) 91 if (!strncmp(cp, units_2[u], strlen(units_2[u]))) { 92 cp += strlen(units_2[u]); 93 goto got_unit; 94 } 95 96 for (u = 0; u < ARRAY_SIZE(units_10); u++) 97 if (!strncmp(cp, units_10[u], strlen(units_10[u]))) { 98 cp += strlen(units_10[u]); 99 base = 1000; 100 goto got_unit; 101 } 102 103 *res = 1; 104 return 0; 105 got_unit: 106 ret = bch2_pow(base, u, res); 107 if (ret) 108 return ret; 109 110 return cp - start; 111 } 112 113 #define parse_or_ret(cp, _f) \ 114 do { \ 115 int _ret = _f; \ 116 if (_ret < 0) \ 117 return _ret; \ 118 cp += _ret; \ 119 } while (0) 120 121 static int __bch2_strtou64_h(const char *cp, u64 *res) 122 { 123 const char *start = cp; 124 u64 v = 0, b, f_n = 0, f_d = 1; 125 int ret; 126 127 parse_or_ret(cp, parse_u64(cp, &v)); 128 129 if (*cp == '.') { 130 cp++; 131 ret = parse_u64(cp, &f_n); 132 if (ret < 0) 133 return ret; 134 cp += ret; 135 136 ret = bch2_pow(10, ret, &f_d); 137 if (ret) 138 return ret; 139 } 140 141 parse_or_ret(cp, parse_unit_suffix(cp, &b)); 142 143 if (v > div_u64(U64_MAX, b)) 144 return -ERANGE; 145 v *= b; 146 147 if (f_n > div_u64(U64_MAX, b)) 148 return -ERANGE; 149 150 f_n = div_u64(f_n * b, f_d); 151 if (v + f_n < v) 152 return -ERANGE; 153 v += f_n; 154 155 *res = v; 156 return cp - start; 157 } 158 159 static int __bch2_strtoh(const char *cp, u64 *res, 160 u64 t_max, bool t_signed) 161 { 162 bool positive = *cp != '-'; 163 u64 v = 0; 164 165 if (*cp == '+' || *cp == '-') 166 cp++; 167 168 parse_or_ret(cp, __bch2_strtou64_h(cp, &v)); 169 170 if (*cp == '\n') 171 cp++; 172 if (*cp) 173 return -EINVAL; 174 175 if (positive) { 176 if (v > t_max) 177 return -ERANGE; 178 } else { 179 if (v && !t_signed) 180 return -ERANGE; 181 182 if (v > t_max + 1) 183 return -ERANGE; 184 v = -v; 185 } 186 187 *res = v; 188 return 0; 189 } 190 191 #define STRTO_H(name, type) \ 192 int bch2_ ## name ## _h(const char *cp, type *res) \ 193 { \ 194 u64 v = 0; \ 195 int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type), \ 196 ANYSINT_MAX(type) != ((type) ~0ULL)); \ 197 *res = v; \ 198 return ret; \ 199 } 200 201 STRTO_H(strtoint, int) 202 STRTO_H(strtouint, unsigned int) 203 STRTO_H(strtoll, long long) 204 STRTO_H(strtoull, unsigned long long) 205 STRTO_H(strtou64, u64) 206 207 u64 bch2_read_flag_list(char *opt, const char * const list[]) 208 { 209 u64 ret = 0; 210 char *p, *s, *d = kstrdup(opt, GFP_KERNEL); 211 212 if (!d) 213 return -ENOMEM; 214 215 s = strim(d); 216 217 while ((p = strsep(&s, ","))) { 218 int flag = match_string(list, -1, p); 219 220 if (flag < 0) { 221 ret = -1; 222 break; 223 } 224 225 ret |= 1 << flag; 226 } 227 228 kfree(d); 229 230 return ret; 231 } 232 233 bool bch2_is_zero(const void *_p, size_t n) 234 { 235 const char *p = _p; 236 size_t i; 237 238 for (i = 0; i < n; i++) 239 if (p[i]) 240 return false; 241 return true; 242 } 243 244 void bch2_prt_u64_base2_nbits(struct printbuf *out, u64 v, unsigned nr_bits) 245 { 246 while (nr_bits) 247 prt_char(out, '0' + ((v >> --nr_bits) & 1)); 248 } 249 250 void bch2_prt_u64_base2(struct printbuf *out, u64 v) 251 { 252 bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); 253 } 254 255 void bch2_print_string_as_lines(const char *prefix, const char *lines) 256 { 257 const char *p; 258 259 if (!lines) { 260 printk("%s (null)\n", prefix); 261 return; 262 } 263 264 console_lock(); 265 while (1) { 266 p = strchrnul(lines, '\n'); 267 printk("%s%.*s\n", prefix, (int) (p - lines), lines); 268 if (!*p) 269 break; 270 lines = p + 1; 271 } 272 console_unlock(); 273 } 274 275 int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr) 276 { 277 #ifdef CONFIG_STACKTRACE 278 unsigned nr_entries = 0; 279 int ret = 0; 280 281 stack->nr = 0; 282 ret = darray_make_room(stack, 32); 283 if (ret) 284 return ret; 285 286 if (!down_read_trylock(&task->signal->exec_update_lock)) 287 return -1; 288 289 do { 290 nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1); 291 } while (nr_entries == stack->size && 292 !(ret = darray_make_room(stack, stack->size * 2))); 293 294 stack->nr = nr_entries; 295 up_read(&task->signal->exec_update_lock); 296 297 return ret; 298 #else 299 return 0; 300 #endif 301 } 302 303 void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) 304 { 305 darray_for_each(*stack, i) { 306 prt_printf(out, "[<0>] %pB", (void *) *i); 307 prt_newline(out); 308 } 309 } 310 311 int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr) 312 { 313 bch_stacktrace stack = { 0 }; 314 int ret = bch2_save_backtrace(&stack, task, skipnr + 1); 315 316 bch2_prt_backtrace(out, &stack); 317 darray_exit(&stack); 318 return ret; 319 } 320 321 #ifndef __KERNEL__ 322 #include <time.h> 323 void bch2_prt_datetime(struct printbuf *out, time64_t sec) 324 { 325 time_t t = sec; 326 char buf[64]; 327 ctime_r(&t, buf); 328 strim(buf); 329 prt_str(out, buf); 330 } 331 #else 332 void bch2_prt_datetime(struct printbuf *out, time64_t sec) 333 { 334 char buf[64]; 335 snprintf(buf, sizeof(buf), "%ptT", &sec); 336 prt_u64(out, sec); 337 } 338 #endif 339 340 static const struct time_unit { 341 const char *name; 342 u64 nsecs; 343 } time_units[] = { 344 { "ns", 1 }, 345 { "us", NSEC_PER_USEC }, 346 { "ms", NSEC_PER_MSEC }, 347 { "s", NSEC_PER_SEC }, 348 { "m", (u64) NSEC_PER_SEC * 60}, 349 { "h", (u64) NSEC_PER_SEC * 3600}, 350 { "eon", U64_MAX }, 351 }; 352 353 static const struct time_unit *pick_time_units(u64 ns) 354 { 355 const struct time_unit *u; 356 357 for (u = time_units; 358 u + 1 < time_units + ARRAY_SIZE(time_units) && 359 ns >= u[1].nsecs << 1; 360 u++) 361 ; 362 363 return u; 364 } 365 366 void bch2_pr_time_units(struct printbuf *out, u64 ns) 367 { 368 const struct time_unit *u = pick_time_units(ns); 369 370 prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name); 371 } 372 373 /* time stats: */ 374 375 #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT 376 static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v) 377 { 378 unsigned i = 0; 379 380 while (i < ARRAY_SIZE(q->entries)) { 381 struct bch2_quantile_entry *e = q->entries + i; 382 383 if (unlikely(!e->step)) { 384 e->m = v; 385 e->step = max_t(unsigned, v / 2, 1024); 386 } else if (e->m > v) { 387 e->m = e->m >= e->step 388 ? e->m - e->step 389 : 0; 390 } else if (e->m < v) { 391 e->m = e->m + e->step > e->m 392 ? e->m + e->step 393 : U32_MAX; 394 } 395 396 if ((e->m > v ? e->m - v : v - e->m) < e->step) 397 e->step = max_t(unsigned, e->step / 2, 1); 398 399 if (v >= e->m) 400 break; 401 402 i = eytzinger0_child(i, v > e->m); 403 } 404 } 405 406 static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats, 407 u64 start, u64 end) 408 { 409 u64 duration, freq; 410 411 if (time_after64(end, start)) { 412 duration = end - start; 413 mean_and_variance_update(&stats->duration_stats, duration); 414 mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration); 415 stats->max_duration = max(stats->max_duration, duration); 416 stats->min_duration = min(stats->min_duration, duration); 417 stats->total_duration += duration; 418 bch2_quantiles_update(&stats->quantiles, duration); 419 } 420 421 if (time_after64(end, stats->last_event)) { 422 freq = end - stats->last_event; 423 mean_and_variance_update(&stats->freq_stats, freq); 424 mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq); 425 stats->max_freq = max(stats->max_freq, freq); 426 stats->min_freq = min(stats->min_freq, freq); 427 stats->last_event = end; 428 } 429 } 430 431 static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, 432 struct bch2_time_stat_buffer *b) 433 { 434 for (struct bch2_time_stat_buffer_entry *i = b->entries; 435 i < b->entries + ARRAY_SIZE(b->entries); 436 i++) 437 bch2_time_stats_update_one(stats, i->start, i->end); 438 b->nr = 0; 439 } 440 441 static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats, 442 struct bch2_time_stat_buffer *b) 443 { 444 unsigned long flags; 445 446 spin_lock_irqsave(&stats->lock, flags); 447 __bch2_time_stats_clear_buffer(stats, b); 448 spin_unlock_irqrestore(&stats->lock, flags); 449 } 450 451 void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) 452 { 453 unsigned long flags; 454 455 WARN_ONCE(!stats->duration_stats_weighted.weight || 456 !stats->freq_stats_weighted.weight, 457 "uninitialized time_stats"); 458 459 if (!stats->buffer) { 460 spin_lock_irqsave(&stats->lock, flags); 461 bch2_time_stats_update_one(stats, start, end); 462 463 if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 && 464 stats->duration_stats.n > 1024) 465 stats->buffer = 466 alloc_percpu_gfp(struct bch2_time_stat_buffer, 467 GFP_ATOMIC); 468 spin_unlock_irqrestore(&stats->lock, flags); 469 } else { 470 struct bch2_time_stat_buffer *b; 471 472 preempt_disable(); 473 b = this_cpu_ptr(stats->buffer); 474 475 BUG_ON(b->nr >= ARRAY_SIZE(b->entries)); 476 b->entries[b->nr++] = (struct bch2_time_stat_buffer_entry) { 477 .start = start, 478 .end = end 479 }; 480 481 if (unlikely(b->nr == ARRAY_SIZE(b->entries))) 482 bch2_time_stats_clear_buffer(stats, b); 483 preempt_enable(); 484 } 485 } 486 487 static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns) 488 { 489 const struct time_unit *u = pick_time_units(ns); 490 491 prt_printf(out, "%llu ", div64_u64(ns, u->nsecs)); 492 prt_tab_rjust(out); 493 prt_printf(out, "%s", u->name); 494 } 495 496 static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns) 497 { 498 prt_str(out, name); 499 prt_tab(out); 500 bch2_pr_time_units_aligned(out, ns); 501 prt_newline(out); 502 } 503 504 #define TABSTOP_SIZE 12 505 506 void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) 507 { 508 const struct time_unit *u; 509 s64 f_mean = 0, d_mean = 0; 510 u64 q, last_q = 0, f_stddev = 0, d_stddev = 0; 511 int i; 512 513 if (stats->buffer) { 514 int cpu; 515 516 spin_lock_irq(&stats->lock); 517 for_each_possible_cpu(cpu) 518 __bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu)); 519 spin_unlock_irq(&stats->lock); 520 } 521 522 /* 523 * avoid divide by zero 524 */ 525 if (stats->freq_stats.n) { 526 f_mean = mean_and_variance_get_mean(stats->freq_stats); 527 f_stddev = mean_and_variance_get_stddev(stats->freq_stats); 528 d_mean = mean_and_variance_get_mean(stats->duration_stats); 529 d_stddev = mean_and_variance_get_stddev(stats->duration_stats); 530 } 531 532 printbuf_tabstop_push(out, out->indent + TABSTOP_SIZE); 533 prt_printf(out, "count:"); 534 prt_tab(out); 535 prt_printf(out, "%llu ", 536 stats->duration_stats.n); 537 printbuf_tabstop_pop(out); 538 prt_newline(out); 539 540 printbuf_tabstops_reset(out); 541 542 printbuf_tabstop_push(out, out->indent + 20); 543 printbuf_tabstop_push(out, TABSTOP_SIZE + 2); 544 printbuf_tabstop_push(out, 0); 545 printbuf_tabstop_push(out, TABSTOP_SIZE + 2); 546 547 prt_tab(out); 548 prt_printf(out, "since mount"); 549 prt_tab_rjust(out); 550 prt_tab(out); 551 prt_printf(out, "recent"); 552 prt_tab_rjust(out); 553 prt_newline(out); 554 555 printbuf_tabstops_reset(out); 556 printbuf_tabstop_push(out, out->indent + 20); 557 printbuf_tabstop_push(out, TABSTOP_SIZE); 558 printbuf_tabstop_push(out, 2); 559 printbuf_tabstop_push(out, TABSTOP_SIZE); 560 561 prt_printf(out, "duration of events"); 562 prt_newline(out); 563 printbuf_indent_add(out, 2); 564 565 pr_name_and_units(out, "min:", stats->min_duration); 566 pr_name_and_units(out, "max:", stats->max_duration); 567 pr_name_and_units(out, "total:", stats->total_duration); 568 569 prt_printf(out, "mean:"); 570 prt_tab(out); 571 bch2_pr_time_units_aligned(out, d_mean); 572 prt_tab(out); 573 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted)); 574 prt_newline(out); 575 576 prt_printf(out, "stddev:"); 577 prt_tab(out); 578 bch2_pr_time_units_aligned(out, d_stddev); 579 prt_tab(out); 580 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted)); 581 582 printbuf_indent_sub(out, 2); 583 prt_newline(out); 584 585 prt_printf(out, "time between events"); 586 prt_newline(out); 587 printbuf_indent_add(out, 2); 588 589 pr_name_and_units(out, "min:", stats->min_freq); 590 pr_name_and_units(out, "max:", stats->max_freq); 591 592 prt_printf(out, "mean:"); 593 prt_tab(out); 594 bch2_pr_time_units_aligned(out, f_mean); 595 prt_tab(out); 596 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted)); 597 prt_newline(out); 598 599 prt_printf(out, "stddev:"); 600 prt_tab(out); 601 bch2_pr_time_units_aligned(out, f_stddev); 602 prt_tab(out); 603 bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted)); 604 605 printbuf_indent_sub(out, 2); 606 prt_newline(out); 607 608 printbuf_tabstops_reset(out); 609 610 i = eytzinger0_first(NR_QUANTILES); 611 u = pick_time_units(stats->quantiles.entries[i].m); 612 613 prt_printf(out, "quantiles (%s):\t", u->name); 614 eytzinger0_for_each(i, NR_QUANTILES) { 615 bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1; 616 617 q = max(stats->quantiles.entries[i].m, last_q); 618 prt_printf(out, "%llu ", 619 div_u64(q, u->nsecs)); 620 if (is_last) 621 prt_newline(out); 622 last_q = q; 623 } 624 } 625 #else 626 void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) {} 627 #endif 628 629 void bch2_time_stats_exit(struct bch2_time_stats *stats) 630 { 631 free_percpu(stats->buffer); 632 } 633 634 void bch2_time_stats_init(struct bch2_time_stats *stats) 635 { 636 memset(stats, 0, sizeof(*stats)); 637 stats->duration_stats_weighted.weight = 8; 638 stats->freq_stats_weighted.weight = 8; 639 stats->min_duration = U64_MAX; 640 stats->min_freq = U64_MAX; 641 spin_lock_init(&stats->lock); 642 } 643 644 /* ratelimit: */ 645 646 /** 647 * bch2_ratelimit_delay() - return how long to delay until the next time to do 648 * some work 649 * @d: the struct bch_ratelimit to update 650 * Returns: the amount of time to delay by, in jiffies 651 */ 652 u64 bch2_ratelimit_delay(struct bch_ratelimit *d) 653 { 654 u64 now = local_clock(); 655 656 return time_after64(d->next, now) 657 ? nsecs_to_jiffies(d->next - now) 658 : 0; 659 } 660 661 /** 662 * bch2_ratelimit_increment() - increment @d by the amount of work done 663 * @d: the struct bch_ratelimit to update 664 * @done: the amount of work done, in arbitrary units 665 */ 666 void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done) 667 { 668 u64 now = local_clock(); 669 670 d->next += div_u64(done * NSEC_PER_SEC, d->rate); 671 672 if (time_before64(now + NSEC_PER_SEC, d->next)) 673 d->next = now + NSEC_PER_SEC; 674 675 if (time_after64(now - NSEC_PER_SEC * 2, d->next)) 676 d->next = now - NSEC_PER_SEC * 2; 677 } 678 679 /* pd controller: */ 680 681 /* 682 * Updates pd_controller. Attempts to scale inputed values to units per second. 683 * @target: desired value 684 * @actual: current value 685 * 686 * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing 687 * it makes actual go down. 688 */ 689 void bch2_pd_controller_update(struct bch_pd_controller *pd, 690 s64 target, s64 actual, int sign) 691 { 692 s64 proportional, derivative, change; 693 694 unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ; 695 696 if (seconds_since_update == 0) 697 return; 698 699 pd->last_update = jiffies; 700 701 proportional = actual - target; 702 proportional *= seconds_since_update; 703 proportional = div_s64(proportional, pd->p_term_inverse); 704 705 derivative = actual - pd->last_actual; 706 derivative = div_s64(derivative, seconds_since_update); 707 derivative = ewma_add(pd->smoothed_derivative, derivative, 708 (pd->d_term / seconds_since_update) ?: 1); 709 derivative = derivative * pd->d_term; 710 derivative = div_s64(derivative, pd->p_term_inverse); 711 712 change = proportional + derivative; 713 714 /* Don't increase rate if not keeping up */ 715 if (change > 0 && 716 pd->backpressure && 717 time_after64(local_clock(), 718 pd->rate.next + NSEC_PER_MSEC)) 719 change = 0; 720 721 change *= (sign * -1); 722 723 pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change, 724 1, UINT_MAX); 725 726 pd->last_actual = actual; 727 pd->last_derivative = derivative; 728 pd->last_proportional = proportional; 729 pd->last_change = change; 730 pd->last_target = target; 731 } 732 733 void bch2_pd_controller_init(struct bch_pd_controller *pd) 734 { 735 pd->rate.rate = 1024; 736 pd->last_update = jiffies; 737 pd->p_term_inverse = 6000; 738 pd->d_term = 30; 739 pd->d_smooth = pd->d_term; 740 pd->backpressure = 1; 741 } 742 743 void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd) 744 { 745 if (!out->nr_tabstops) 746 printbuf_tabstop_push(out, 20); 747 748 prt_printf(out, "rate:"); 749 prt_tab(out); 750 prt_human_readable_s64(out, pd->rate.rate); 751 prt_newline(out); 752 753 prt_printf(out, "target:"); 754 prt_tab(out); 755 prt_human_readable_u64(out, pd->last_target); 756 prt_newline(out); 757 758 prt_printf(out, "actual:"); 759 prt_tab(out); 760 prt_human_readable_u64(out, pd->last_actual); 761 prt_newline(out); 762 763 prt_printf(out, "proportional:"); 764 prt_tab(out); 765 prt_human_readable_s64(out, pd->last_proportional); 766 prt_newline(out); 767 768 prt_printf(out, "derivative:"); 769 prt_tab(out); 770 prt_human_readable_s64(out, pd->last_derivative); 771 prt_newline(out); 772 773 prt_printf(out, "change:"); 774 prt_tab(out); 775 prt_human_readable_s64(out, pd->last_change); 776 prt_newline(out); 777 778 prt_printf(out, "next io:"); 779 prt_tab(out); 780 prt_printf(out, "%llims", div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC)); 781 prt_newline(out); 782 } 783 784 /* misc: */ 785 786 void bch2_bio_map(struct bio *bio, void *base, size_t size) 787 { 788 while (size) { 789 struct page *page = is_vmalloc_addr(base) 790 ? vmalloc_to_page(base) 791 : virt_to_page(base); 792 unsigned offset = offset_in_page(base); 793 unsigned len = min_t(size_t, PAGE_SIZE - offset, size); 794 795 BUG_ON(!bio_add_page(bio, page, len, offset)); 796 size -= len; 797 base += len; 798 } 799 } 800 801 int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) 802 { 803 while (size) { 804 struct page *page = alloc_pages(gfp_mask, 0); 805 unsigned len = min_t(size_t, PAGE_SIZE, size); 806 807 if (!page) 808 return -ENOMEM; 809 810 if (unlikely(!bio_add_page(bio, page, len, 0))) { 811 __free_page(page); 812 break; 813 } 814 815 size -= len; 816 } 817 818 return 0; 819 } 820 821 size_t bch2_rand_range(size_t max) 822 { 823 size_t rand; 824 825 if (!max) 826 return 0; 827 828 do { 829 rand = get_random_long(); 830 rand &= roundup_pow_of_two(max) - 1; 831 } while (rand >= max); 832 833 return rand; 834 } 835 836 void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src) 837 { 838 struct bio_vec bv; 839 struct bvec_iter iter; 840 841 __bio_for_each_segment(bv, dst, iter, dst_iter) { 842 void *dstp = kmap_local_page(bv.bv_page); 843 844 memcpy(dstp + bv.bv_offset, src, bv.bv_len); 845 kunmap_local(dstp); 846 847 src += bv.bv_len; 848 } 849 } 850 851 void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) 852 { 853 struct bio_vec bv; 854 struct bvec_iter iter; 855 856 __bio_for_each_segment(bv, src, iter, src_iter) { 857 void *srcp = kmap_local_page(bv.bv_page); 858 859 memcpy(dst, srcp + bv.bv_offset, bv.bv_len); 860 kunmap_local(srcp); 861 862 dst += bv.bv_len; 863 } 864 } 865 866 static int alignment_ok(const void *base, size_t align) 867 { 868 return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || 869 ((unsigned long)base & (align - 1)) == 0; 870 } 871 872 static void u32_swap(void *a, void *b, size_t size) 873 { 874 u32 t = *(u32 *)a; 875 *(u32 *)a = *(u32 *)b; 876 *(u32 *)b = t; 877 } 878 879 static void u64_swap(void *a, void *b, size_t size) 880 { 881 u64 t = *(u64 *)a; 882 *(u64 *)a = *(u64 *)b; 883 *(u64 *)b = t; 884 } 885 886 static void generic_swap(void *a, void *b, size_t size) 887 { 888 char t; 889 890 do { 891 t = *(char *)a; 892 *(char *)a++ = *(char *)b; 893 *(char *)b++ = t; 894 } while (--size > 0); 895 } 896 897 static inline int do_cmp(void *base, size_t n, size_t size, 898 int (*cmp_func)(const void *, const void *, size_t), 899 size_t l, size_t r) 900 { 901 return cmp_func(base + inorder_to_eytzinger0(l, n) * size, 902 base + inorder_to_eytzinger0(r, n) * size, 903 size); 904 } 905 906 static inline void do_swap(void *base, size_t n, size_t size, 907 void (*swap_func)(void *, void *, size_t), 908 size_t l, size_t r) 909 { 910 swap_func(base + inorder_to_eytzinger0(l, n) * size, 911 base + inorder_to_eytzinger0(r, n) * size, 912 size); 913 } 914 915 void eytzinger0_sort(void *base, size_t n, size_t size, 916 int (*cmp_func)(const void *, const void *, size_t), 917 void (*swap_func)(void *, void *, size_t)) 918 { 919 int i, c, r; 920 921 if (!swap_func) { 922 if (size == 4 && alignment_ok(base, 4)) 923 swap_func = u32_swap; 924 else if (size == 8 && alignment_ok(base, 8)) 925 swap_func = u64_swap; 926 else 927 swap_func = generic_swap; 928 } 929 930 /* heapify */ 931 for (i = n / 2 - 1; i >= 0; --i) { 932 for (r = i; r * 2 + 1 < n; r = c) { 933 c = r * 2 + 1; 934 935 if (c + 1 < n && 936 do_cmp(base, n, size, cmp_func, c, c + 1) < 0) 937 c++; 938 939 if (do_cmp(base, n, size, cmp_func, r, c) >= 0) 940 break; 941 942 do_swap(base, n, size, swap_func, r, c); 943 } 944 } 945 946 /* sort */ 947 for (i = n - 1; i > 0; --i) { 948 do_swap(base, n, size, swap_func, 0, i); 949 950 for (r = 0; r * 2 + 1 < i; r = c) { 951 c = r * 2 + 1; 952 953 if (c + 1 < i && 954 do_cmp(base, n, size, cmp_func, c, c + 1) < 0) 955 c++; 956 957 if (do_cmp(base, n, size, cmp_func, r, c) >= 0) 958 break; 959 960 do_swap(base, n, size, swap_func, r, c); 961 } 962 } 963 } 964 965 void sort_cmp_size(void *base, size_t num, size_t size, 966 int (*cmp_func)(const void *, const void *, size_t), 967 void (*swap_func)(void *, void *, size_t size)) 968 { 969 /* pre-scale counters for performance */ 970 int i = (num/2 - 1) * size, n = num * size, c, r; 971 972 if (!swap_func) { 973 if (size == 4 && alignment_ok(base, 4)) 974 swap_func = u32_swap; 975 else if (size == 8 && alignment_ok(base, 8)) 976 swap_func = u64_swap; 977 else 978 swap_func = generic_swap; 979 } 980 981 /* heapify */ 982 for ( ; i >= 0; i -= size) { 983 for (r = i; r * 2 + size < n; r = c) { 984 c = r * 2 + size; 985 if (c < n - size && 986 cmp_func(base + c, base + c + size, size) < 0) 987 c += size; 988 if (cmp_func(base + r, base + c, size) >= 0) 989 break; 990 swap_func(base + r, base + c, size); 991 } 992 } 993 994 /* sort */ 995 for (i = n - size; i > 0; i -= size) { 996 swap_func(base, base + i, size); 997 for (r = 0; r * 2 + size < i; r = c) { 998 c = r * 2 + size; 999 if (c < i - size && 1000 cmp_func(base + c, base + c + size, size) < 0) 1001 c += size; 1002 if (cmp_func(base + r, base + c, size) >= 0) 1003 break; 1004 swap_func(base + r, base + c, size); 1005 } 1006 } 1007 } 1008 1009 static void mempool_free_vp(void *element, void *pool_data) 1010 { 1011 size_t size = (size_t) pool_data; 1012 1013 vpfree(element, size); 1014 } 1015 1016 static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data) 1017 { 1018 size_t size = (size_t) pool_data; 1019 1020 return vpmalloc(size, gfp_mask); 1021 } 1022 1023 int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size) 1024 { 1025 return size < PAGE_SIZE 1026 ? mempool_init_kmalloc_pool(pool, min_nr, size) 1027 : mempool_init(pool, min_nr, mempool_alloc_vp, 1028 mempool_free_vp, (void *) size); 1029 } 1030 1031 #if 0 1032 void eytzinger1_test(void) 1033 { 1034 unsigned inorder, eytz, size; 1035 1036 pr_info("1 based eytzinger test:"); 1037 1038 for (size = 2; 1039 size < 65536; 1040 size++) { 1041 unsigned extra = eytzinger1_extra(size); 1042 1043 if (!(size % 4096)) 1044 pr_info("tree size %u", size); 1045 1046 BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size)); 1047 BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size)); 1048 1049 BUG_ON(eytzinger1_prev(eytzinger1_first(size), size) != 0); 1050 BUG_ON(eytzinger1_next(eytzinger1_last(size), size) != 0); 1051 1052 inorder = 1; 1053 eytzinger1_for_each(eytz, size) { 1054 BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz); 1055 BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder); 1056 BUG_ON(eytz != eytzinger1_last(size) && 1057 eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz); 1058 1059 inorder++; 1060 } 1061 } 1062 } 1063 1064 void eytzinger0_test(void) 1065 { 1066 1067 unsigned inorder, eytz, size; 1068 1069 pr_info("0 based eytzinger test:"); 1070 1071 for (size = 1; 1072 size < 65536; 1073 size++) { 1074 unsigned extra = eytzinger0_extra(size); 1075 1076 if (!(size % 4096)) 1077 pr_info("tree size %u", size); 1078 1079 BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size)); 1080 BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size)); 1081 1082 BUG_ON(eytzinger0_prev(eytzinger0_first(size), size) != -1); 1083 BUG_ON(eytzinger0_next(eytzinger0_last(size), size) != -1); 1084 1085 inorder = 0; 1086 eytzinger0_for_each(eytz, size) { 1087 BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz); 1088 BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder); 1089 BUG_ON(eytz != eytzinger0_last(size) && 1090 eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz); 1091 1092 inorder++; 1093 } 1094 } 1095 } 1096 1097 static inline int cmp_u16(const void *_l, const void *_r, size_t size) 1098 { 1099 const u16 *l = _l, *r = _r; 1100 1101 return (*l > *r) - (*r - *l); 1102 } 1103 1104 static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search) 1105 { 1106 int i, c1 = -1, c2 = -1; 1107 ssize_t r; 1108 1109 r = eytzinger0_find_le(test_array, nr, 1110 sizeof(test_array[0]), 1111 cmp_u16, &search); 1112 if (r >= 0) 1113 c1 = test_array[r]; 1114 1115 for (i = 0; i < nr; i++) 1116 if (test_array[i] <= search && test_array[i] > c2) 1117 c2 = test_array[i]; 1118 1119 if (c1 != c2) { 1120 eytzinger0_for_each(i, nr) 1121 pr_info("[%3u] = %12u", i, test_array[i]); 1122 pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i", 1123 i, r, c1, c2); 1124 } 1125 } 1126 1127 void eytzinger0_find_test(void) 1128 { 1129 unsigned i, nr, allocated = 1 << 12; 1130 u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL); 1131 1132 for (nr = 1; nr < allocated; nr++) { 1133 pr_info("testing %u elems", nr); 1134 1135 get_random_bytes(test_array, nr * sizeof(test_array[0])); 1136 eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL); 1137 1138 /* verify array is sorted correctly: */ 1139 eytzinger0_for_each(i, nr) 1140 BUG_ON(i != eytzinger0_last(nr) && 1141 test_array[i] > test_array[eytzinger0_next(i, nr)]); 1142 1143 for (i = 0; i < U16_MAX; i += 1 << 12) 1144 eytzinger0_find_test_val(test_array, nr, i); 1145 1146 for (i = 0; i < nr; i++) { 1147 eytzinger0_find_test_val(test_array, nr, test_array[i] - 1); 1148 eytzinger0_find_test_val(test_array, nr, test_array[i]); 1149 eytzinger0_find_test_val(test_array, nr, test_array[i] + 1); 1150 } 1151 } 1152 1153 kfree(test_array); 1154 } 1155 #endif 1156 1157 /* 1158 * Accumulate percpu counters onto one cpu's copy - only valid when access 1159 * against any percpu counter is guarded against 1160 */ 1161 u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) 1162 { 1163 u64 *ret; 1164 int cpu; 1165 1166 /* access to pcpu vars has to be blocked by other locking */ 1167 preempt_disable(); 1168 ret = this_cpu_ptr(p); 1169 preempt_enable(); 1170 1171 for_each_possible_cpu(cpu) { 1172 u64 *i = per_cpu_ptr(p, cpu); 1173 1174 if (i != ret) { 1175 acc_u64s(ret, i, nr); 1176 memset(i, 0, nr * sizeof(u64)); 1177 } 1178 } 1179 1180 return ret; 1181 } 1182 1183 void bch2_darray_str_exit(darray_str *d) 1184 { 1185 darray_for_each(*d, i) 1186 kfree(*i); 1187 darray_exit(d); 1188 } 1189 1190 int bch2_split_devs(const char *_dev_name, darray_str *ret) 1191 { 1192 darray_init(ret); 1193 1194 char *dev_name, *s, *orig; 1195 1196 dev_name = orig = kstrdup(_dev_name, GFP_KERNEL); 1197 if (!dev_name) 1198 return -ENOMEM; 1199 1200 while ((s = strsep(&dev_name, ":"))) { 1201 char *p = kstrdup(s, GFP_KERNEL); 1202 if (!p) 1203 goto err; 1204 1205 if (darray_push(ret, p)) { 1206 kfree(p); 1207 goto err; 1208 } 1209 } 1210 1211 kfree(orig); 1212 return 0; 1213 err: 1214 bch2_darray_str_exit(ret); 1215 kfree(orig); 1216 return -ENOMEM; 1217 } 1218