1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KCSAN reporting. 4 * 5 * Copyright (C) 2019, Google LLC. 6 */ 7 8 #include <linux/debug_locks.h> 9 #include <linux/delay.h> 10 #include <linux/jiffies.h> 11 #include <linux/kernel.h> 12 #include <linux/lockdep.h> 13 #include <linux/preempt.h> 14 #include <linux/printk.h> 15 #include <linux/sched.h> 16 #include <linux/spinlock.h> 17 #include <linux/stacktrace.h> 18 19 #include "kcsan.h" 20 #include "encoding.h" 21 22 /* 23 * Max. number of stack entries to show in the report. 24 */ 25 #define NUM_STACK_ENTRIES 64 26 27 /* Common access info. */ 28 struct access_info { 29 const volatile void *ptr; 30 size_t size; 31 int access_type; 32 int task_pid; 33 int cpu_id; 34 }; 35 36 /* 37 * Other thread info: communicated from other racing thread to thread that set 38 * up the watchpoint, which then prints the complete report atomically. 39 */ 40 struct other_info { 41 struct access_info ai; 42 unsigned long stack_entries[NUM_STACK_ENTRIES]; 43 int num_stack_entries; 44 45 /* 46 * Optionally pass @current. Typically we do not need to pass @current 47 * via @other_info since just @task_pid is sufficient. Passing @current 48 * has additional overhead. 49 * 50 * To safely pass @current, we must either use get_task_struct/ 51 * put_task_struct, or stall the thread that populated @other_info. 52 * 53 * We cannot rely on get_task_struct/put_task_struct in case 54 * release_report() races with a task being released, and would have to 55 * free it in release_report(). This may result in deadlock if we want 56 * to use KCSAN on the allocators. 57 * 58 * Since we also want to reliably print held locks for 59 * CONFIG_KCSAN_VERBOSE, the current implementation stalls the thread 60 * that populated @other_info until it has been consumed. 61 */ 62 struct task_struct *task; 63 }; 64 65 /* 66 * To never block any producers of struct other_info, we need as many elements 67 * as we have watchpoints (upper bound on concurrent races to report). 68 */ 69 static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; 70 71 /* 72 * Information about reported races; used to rate limit reporting. 73 */ 74 struct report_time { 75 /* 76 * The last time the race was reported. 77 */ 78 unsigned long time; 79 80 /* 81 * The frames of the 2 threads; if only 1 thread is known, one frame 82 * will be 0. 83 */ 84 unsigned long frame1; 85 unsigned long frame2; 86 }; 87 88 /* 89 * Since we also want to be able to debug allocators with KCSAN, to avoid 90 * deadlock, report_times cannot be dynamically resized with krealloc in 91 * rate_limit_report. 92 * 93 * Therefore, we use a fixed-size array, which at most will occupy a page. This 94 * still adequately rate limits reports, assuming that a) number of unique data 95 * races is not excessive, and b) occurrence of unique races within the 96 * same time window is limited. 97 */ 98 #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time)) 99 #define REPORT_TIMES_SIZE \ 100 (CONFIG_KCSAN_REPORT_ONCE_IN_MS > REPORT_TIMES_MAX ? \ 101 REPORT_TIMES_MAX : \ 102 CONFIG_KCSAN_REPORT_ONCE_IN_MS) 103 static struct report_time report_times[REPORT_TIMES_SIZE]; 104 105 /* 106 * Spinlock serializing report generation, and access to @other_infos. Although 107 * it could make sense to have a finer-grained locking story for @other_infos, 108 * report generation needs to be serialized either way, so not much is gained. 109 */ 110 static DEFINE_RAW_SPINLOCK(report_lock); 111 112 /* 113 * Checks if the race identified by thread frames frame1 and frame2 has 114 * been reported since (now - KCSAN_REPORT_ONCE_IN_MS). 115 */ 116 static bool rate_limit_report(unsigned long frame1, unsigned long frame2) 117 { 118 struct report_time *use_entry = &report_times[0]; 119 unsigned long invalid_before; 120 int i; 121 122 BUILD_BUG_ON(CONFIG_KCSAN_REPORT_ONCE_IN_MS != 0 && REPORT_TIMES_SIZE == 0); 123 124 if (CONFIG_KCSAN_REPORT_ONCE_IN_MS == 0) 125 return false; 126 127 invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS); 128 129 /* Check if a matching race report exists. */ 130 for (i = 0; i < REPORT_TIMES_SIZE; ++i) { 131 struct report_time *rt = &report_times[i]; 132 133 /* 134 * Must always select an entry for use to store info as we 135 * cannot resize report_times; at the end of the scan, use_entry 136 * will be the oldest entry, which ideally also happened before 137 * KCSAN_REPORT_ONCE_IN_MS ago. 138 */ 139 if (time_before(rt->time, use_entry->time)) 140 use_entry = rt; 141 142 /* 143 * Initially, no need to check any further as this entry as well 144 * as following entries have never been used. 145 */ 146 if (rt->time == 0) 147 break; 148 149 /* Check if entry expired. */ 150 if (time_before(rt->time, invalid_before)) 151 continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */ 152 153 /* Reported recently, check if race matches. */ 154 if ((rt->frame1 == frame1 && rt->frame2 == frame2) || 155 (rt->frame1 == frame2 && rt->frame2 == frame1)) 156 return true; 157 } 158 159 use_entry->time = jiffies; 160 use_entry->frame1 = frame1; 161 use_entry->frame2 = frame2; 162 return false; 163 } 164 165 /* 166 * Special rules to skip reporting. 167 */ 168 static bool 169 skip_report(enum kcsan_value_change value_change, unsigned long top_frame) 170 { 171 /* Should never get here if value_change==FALSE. */ 172 WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE); 173 174 /* 175 * The first call to skip_report always has value_change==TRUE, since we 176 * cannot know the value written of an instrumented access. For the 2nd 177 * call there are 6 cases with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY: 178 * 179 * 1. read watchpoint, conflicting write (value_change==TRUE): report; 180 * 2. read watchpoint, conflicting write (value_change==MAYBE): skip; 181 * 3. write watchpoint, conflicting write (value_change==TRUE): report; 182 * 4. write watchpoint, conflicting write (value_change==MAYBE): skip; 183 * 5. write watchpoint, conflicting read (value_change==MAYBE): skip; 184 * 6. write watchpoint, conflicting read (value_change==TRUE): report; 185 * 186 * Cases 1-4 are intuitive and expected; case 5 ensures we do not report 187 * data races where the write may have rewritten the same value; case 6 188 * is possible either if the size is larger than what we check value 189 * changes for or the access type is KCSAN_ACCESS_ASSERT. 190 */ 191 if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && 192 value_change == KCSAN_VALUE_CHANGE_MAYBE) { 193 /* 194 * The access is a write, but the data value did not change. 195 * 196 * We opt-out of this filter for certain functions at request of 197 * maintainers. 198 */ 199 char buf[64]; 200 int len = scnprintf(buf, sizeof(buf), "%ps", (void *)top_frame); 201 202 if (!strnstr(buf, "rcu_", len) && 203 !strnstr(buf, "_rcu", len) && 204 !strnstr(buf, "_srcu", len)) 205 return true; 206 } 207 208 return kcsan_skip_report_debugfs(top_frame); 209 } 210 211 static const char *get_access_type(int type) 212 { 213 if (type & KCSAN_ACCESS_ASSERT) { 214 if (type & KCSAN_ACCESS_SCOPED) { 215 if (type & KCSAN_ACCESS_WRITE) 216 return "assert no accesses (scoped)"; 217 else 218 return "assert no writes (scoped)"; 219 } else { 220 if (type & KCSAN_ACCESS_WRITE) 221 return "assert no accesses"; 222 else 223 return "assert no writes"; 224 } 225 } 226 227 switch (type) { 228 case 0: 229 return "read"; 230 case KCSAN_ACCESS_ATOMIC: 231 return "read (marked)"; 232 case KCSAN_ACCESS_WRITE: 233 return "write"; 234 case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: 235 return "write (marked)"; 236 case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE: 237 return "read-write"; 238 case KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: 239 return "read-write (marked)"; 240 case KCSAN_ACCESS_SCOPED: 241 return "read (scoped)"; 242 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC: 243 return "read (marked, scoped)"; 244 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE: 245 return "write (scoped)"; 246 case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: 247 return "write (marked, scoped)"; 248 default: 249 BUG(); 250 } 251 } 252 253 static const char *get_bug_type(int type) 254 { 255 return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race"; 256 } 257 258 /* Return thread description: in task or interrupt. */ 259 static const char *get_thread_desc(int task_id) 260 { 261 if (task_id != -1) { 262 static char buf[32]; /* safe: protected by report_lock */ 263 264 snprintf(buf, sizeof(buf), "task %i", task_id); 265 return buf; 266 } 267 return "interrupt"; 268 } 269 270 /* Helper to skip KCSAN-related functions in stack-trace. */ 271 static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries) 272 { 273 char buf[64]; 274 char *cur; 275 int len, skip; 276 277 for (skip = 0; skip < num_entries; ++skip) { 278 len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skip]); 279 280 /* Never show tsan_* or {read,write}_once_size. */ 281 if (strnstr(buf, "tsan_", len) || 282 strnstr(buf, "_once_size", len)) 283 continue; 284 285 cur = strnstr(buf, "kcsan_", len); 286 if (cur) { 287 cur += strlen("kcsan_"); 288 if (!str_has_prefix(cur, "test")) 289 continue; /* KCSAN runtime function. */ 290 /* KCSAN related test. */ 291 } 292 293 /* 294 * No match for runtime functions -- @skip entries to skip to 295 * get to first frame of interest. 296 */ 297 break; 298 } 299 300 return skip; 301 } 302 303 /* Compares symbolized strings of addr1 and addr2. */ 304 static int sym_strcmp(void *addr1, void *addr2) 305 { 306 char buf1[64]; 307 char buf2[64]; 308 309 snprintf(buf1, sizeof(buf1), "%pS", addr1); 310 snprintf(buf2, sizeof(buf2), "%pS", addr2); 311 312 return strncmp(buf1, buf2, sizeof(buf1)); 313 } 314 315 static void print_verbose_info(struct task_struct *task) 316 { 317 if (!task) 318 return; 319 320 /* Restore IRQ state trace for printing. */ 321 kcsan_restore_irqtrace(task); 322 323 pr_err("\n"); 324 debug_show_held_locks(task); 325 print_irqtrace_events(task); 326 } 327 328 /* 329 * Returns true if a report was generated, false otherwise. 330 */ 331 static bool print_report(enum kcsan_value_change value_change, 332 enum kcsan_report_type type, 333 const struct access_info *ai, 334 const struct other_info *other_info) 335 { 336 unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; 337 int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); 338 int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); 339 unsigned long this_frame = stack_entries[skipnr]; 340 unsigned long other_frame = 0; 341 int other_skipnr = 0; /* silence uninit warnings */ 342 343 /* 344 * Must check report filter rules before starting to print. 345 */ 346 if (skip_report(KCSAN_VALUE_CHANGE_TRUE, stack_entries[skipnr])) 347 return false; 348 349 if (type == KCSAN_REPORT_RACE_SIGNAL) { 350 other_skipnr = get_stack_skipnr(other_info->stack_entries, 351 other_info->num_stack_entries); 352 other_frame = other_info->stack_entries[other_skipnr]; 353 354 /* @value_change is only known for the other thread */ 355 if (skip_report(value_change, other_frame)) 356 return false; 357 } 358 359 if (rate_limit_report(this_frame, other_frame)) 360 return false; 361 362 /* Print report header. */ 363 pr_err("==================================================================\n"); 364 switch (type) { 365 case KCSAN_REPORT_RACE_SIGNAL: { 366 int cmp; 367 368 /* 369 * Order functions lexographically for consistent bug titles. 370 * Do not print offset of functions to keep title short. 371 */ 372 cmp = sym_strcmp((void *)other_frame, (void *)this_frame); 373 pr_err("BUG: KCSAN: %s in %ps / %ps\n", 374 get_bug_type(ai->access_type | other_info->ai.access_type), 375 (void *)(cmp < 0 ? other_frame : this_frame), 376 (void *)(cmp < 0 ? this_frame : other_frame)); 377 } break; 378 379 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: 380 pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(ai->access_type), 381 (void *)this_frame); 382 break; 383 384 default: 385 BUG(); 386 } 387 388 pr_err("\n"); 389 390 /* Print information about the racing accesses. */ 391 switch (type) { 392 case KCSAN_REPORT_RACE_SIGNAL: 393 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", 394 get_access_type(other_info->ai.access_type), other_info->ai.ptr, 395 other_info->ai.size, get_thread_desc(other_info->ai.task_pid), 396 other_info->ai.cpu_id); 397 398 /* Print the other thread's stack trace. */ 399 stack_trace_print(other_info->stack_entries + other_skipnr, 400 other_info->num_stack_entries - other_skipnr, 401 0); 402 403 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 404 print_verbose_info(other_info->task); 405 406 pr_err("\n"); 407 pr_err("%s to 0x%px of %zu bytes by %s on cpu %i:\n", 408 get_access_type(ai->access_type), ai->ptr, ai->size, 409 get_thread_desc(ai->task_pid), ai->cpu_id); 410 break; 411 412 case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: 413 pr_err("race at unknown origin, with %s to 0x%px of %zu bytes by %s on cpu %i:\n", 414 get_access_type(ai->access_type), ai->ptr, ai->size, 415 get_thread_desc(ai->task_pid), ai->cpu_id); 416 break; 417 418 default: 419 BUG(); 420 } 421 /* Print stack trace of this thread. */ 422 stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, 423 0); 424 425 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 426 print_verbose_info(current); 427 428 /* Print report footer. */ 429 pr_err("\n"); 430 pr_err("Reported by Kernel Concurrency Sanitizer on:\n"); 431 dump_stack_print_info(KERN_DEFAULT); 432 pr_err("==================================================================\n"); 433 434 return true; 435 } 436 437 static void release_report(unsigned long *flags, struct other_info *other_info) 438 { 439 if (other_info) 440 /* 441 * Use size to denote valid/invalid, since KCSAN entirely 442 * ignores 0-sized accesses. 443 */ 444 other_info->ai.size = 0; 445 446 raw_spin_unlock_irqrestore(&report_lock, *flags); 447 } 448 449 /* 450 * Sets @other_info->task and awaits consumption of @other_info. 451 * 452 * Precondition: report_lock is held. 453 * Postcondition: report_lock is held. 454 */ 455 static void set_other_info_task_blocking(unsigned long *flags, 456 const struct access_info *ai, 457 struct other_info *other_info) 458 { 459 /* 460 * We may be instrumenting a code-path where current->state is already 461 * something other than TASK_RUNNING. 462 */ 463 const bool is_running = current->state == TASK_RUNNING; 464 /* 465 * To avoid deadlock in case we are in an interrupt here and this is a 466 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a 467 * timeout to ensure this works in all contexts. 468 * 469 * Await approximately the worst case delay of the reporting thread (if 470 * we are not interrupted). 471 */ 472 int timeout = max(kcsan_udelay_task, kcsan_udelay_interrupt); 473 474 other_info->task = current; 475 do { 476 if (is_running) { 477 /* 478 * Let lockdep know the real task is sleeping, to print 479 * the held locks (recall we turned lockdep off, so 480 * locking/unlocking @report_lock won't be recorded). 481 */ 482 set_current_state(TASK_UNINTERRUPTIBLE); 483 } 484 raw_spin_unlock_irqrestore(&report_lock, *flags); 485 /* 486 * We cannot call schedule() since we also cannot reliably 487 * determine if sleeping here is permitted -- see in_atomic(). 488 */ 489 490 udelay(1); 491 raw_spin_lock_irqsave(&report_lock, *flags); 492 if (timeout-- < 0) { 493 /* 494 * Abort. Reset @other_info->task to NULL, since it 495 * appears the other thread is still going to consume 496 * it. It will result in no verbose info printed for 497 * this task. 498 */ 499 other_info->task = NULL; 500 break; 501 } 502 /* 503 * If invalid, or @ptr nor @current matches, then @other_info 504 * has been consumed and we may continue. If not, retry. 505 */ 506 } while (other_info->ai.size && other_info->ai.ptr == ai->ptr && 507 other_info->task == current); 508 if (is_running) 509 set_current_state(TASK_RUNNING); 510 } 511 512 /* Populate @other_info; requires that the provided @other_info not in use. */ 513 static void prepare_report_producer(unsigned long *flags, 514 const struct access_info *ai, 515 struct other_info *other_info) 516 { 517 raw_spin_lock_irqsave(&report_lock, *flags); 518 519 /* 520 * The same @other_infos entry cannot be used concurrently, because 521 * there is a one-to-one mapping to watchpoint slots (@watchpoints in 522 * core.c), and a watchpoint is only released for reuse after reporting 523 * is done by the consumer of @other_info. Therefore, it is impossible 524 * for another concurrent prepare_report_producer() to set the same 525 * @other_info, and are guaranteed exclusivity for the @other_infos 526 * entry pointed to by @other_info. 527 * 528 * To check this property holds, size should never be non-zero here, 529 * because every consumer of struct other_info resets size to 0 in 530 * release_report(). 531 */ 532 WARN_ON(other_info->ai.size); 533 534 other_info->ai = *ai; 535 other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2); 536 537 if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) 538 set_other_info_task_blocking(flags, ai, other_info); 539 540 raw_spin_unlock_irqrestore(&report_lock, *flags); 541 } 542 543 /* Awaits producer to fill @other_info and then returns. */ 544 static bool prepare_report_consumer(unsigned long *flags, 545 const struct access_info *ai, 546 struct other_info *other_info) 547 { 548 549 raw_spin_lock_irqsave(&report_lock, *flags); 550 while (!other_info->ai.size) { /* Await valid @other_info. */ 551 raw_spin_unlock_irqrestore(&report_lock, *flags); 552 cpu_relax(); 553 raw_spin_lock_irqsave(&report_lock, *flags); 554 } 555 556 /* Should always have a matching access based on watchpoint encoding. */ 557 if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size, 558 (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size))) 559 goto discard; 560 561 if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, 562 (unsigned long)ai->ptr, ai->size)) { 563 /* 564 * If the actual accesses to not match, this was a false 565 * positive due to watchpoint encoding. 566 */ 567 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ENCODING_FALSE_POSITIVES]); 568 goto discard; 569 } 570 571 return true; 572 573 discard: 574 release_report(flags, other_info); 575 return false; 576 } 577 578 /* 579 * Depending on the report type either sets @other_info and returns false, or 580 * awaits @other_info and returns true. If @other_info is not required for the 581 * report type, simply acquires @report_lock and returns true. 582 */ 583 static noinline bool prepare_report(unsigned long *flags, 584 enum kcsan_report_type type, 585 const struct access_info *ai, 586 struct other_info *other_info) 587 { 588 switch (type) { 589 case KCSAN_REPORT_CONSUMED_WATCHPOINT: 590 prepare_report_producer(flags, ai, other_info); 591 return false; 592 case KCSAN_REPORT_RACE_SIGNAL: 593 return prepare_report_consumer(flags, ai, other_info); 594 default: 595 /* @other_info not required; just acquire @report_lock. */ 596 raw_spin_lock_irqsave(&report_lock, *flags); 597 return true; 598 } 599 } 600 601 void kcsan_report(const volatile void *ptr, size_t size, int access_type, 602 enum kcsan_value_change value_change, 603 enum kcsan_report_type type, int watchpoint_idx) 604 { 605 unsigned long flags = 0; 606 const struct access_info ai = { 607 .ptr = ptr, 608 .size = size, 609 .access_type = access_type, 610 .task_pid = in_task() ? task_pid_nr(current) : -1, 611 .cpu_id = raw_smp_processor_id() 612 }; 613 struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN 614 ? NULL : &other_infos[watchpoint_idx]; 615 616 kcsan_disable_current(); 617 if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos))) 618 goto out; 619 620 /* 621 * Because we may generate reports when we're in scheduler code, the use 622 * of printk() could deadlock. Until such time that all printing code 623 * called in print_report() is scheduler-safe, accept the risk, and just 624 * get our message out. As such, also disable lockdep to hide the 625 * warning, and avoid disabling lockdep for the rest of the kernel. 626 */ 627 lockdep_off(); 628 629 if (prepare_report(&flags, type, &ai, other_info)) { 630 /* 631 * Never report if value_change is FALSE, only if we it is 632 * either TRUE or MAYBE. In case of MAYBE, further filtering may 633 * be done once we know the full stack trace in print_report(). 634 */ 635 bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE && 636 print_report(value_change, type, &ai, other_info); 637 638 if (reported && panic_on_warn) 639 panic("panic_on_warn set ...\n"); 640 641 release_report(&flags, other_info); 642 } 643 644 lockdep_on(); 645 out: 646 kcsan_enable_current(); 647 } 648