1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2007 Alan Stern 4 * Copyright (C) IBM Corporation, 2009 5 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> 6 * 7 * Thanks to Ingo Molnar for his many suggestions. 8 * 9 * Authors: Alan Stern <stern@rowland.harvard.edu> 10 * K.Prasad <prasad@linux.vnet.ibm.com> 11 * Frederic Weisbecker <fweisbec@gmail.com> 12 */ 13 14 /* 15 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, 16 * using the CPU's debug registers. 17 * This file contains the arch-independent routines. 18 */ 19 20 #include <linux/hw_breakpoint.h> 21 22 #include <linux/atomic.h> 23 #include <linux/bug.h> 24 #include <linux/cpu.h> 25 #include <linux/export.h> 26 #include <linux/init.h> 27 #include <linux/irqflags.h> 28 #include <linux/kdebug.h> 29 #include <linux/kernel.h> 30 #include <linux/mutex.h> 31 #include <linux/notifier.h> 32 #include <linux/percpu-rwsem.h> 33 #include <linux/percpu.h> 34 #include <linux/rhashtable.h> 35 #include <linux/sched.h> 36 #include <linux/slab.h> 37 38 /* 39 * Datastructure to track the total uses of N slots across tasks or CPUs; 40 * bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots. 41 */ 42 struct bp_slots_histogram { 43 #ifdef hw_breakpoint_slots 44 atomic_t count[hw_breakpoint_slots(0)]; 45 #else 46 atomic_t *count; 47 #endif 48 }; 49 50 /* 51 * Per-CPU constraints data. 52 */ 53 struct bp_cpuinfo { 54 /* Number of pinned CPU breakpoints in a CPU. */ 55 unsigned int cpu_pinned; 56 /* Histogram of pinned task breakpoints in a CPU. */ 57 struct bp_slots_histogram tsk_pinned; 58 }; 59 60 static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]); 61 62 static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type) 63 { 64 return per_cpu_ptr(bp_cpuinfo + type, cpu); 65 } 66 67 /* Number of pinned CPU breakpoints globally. */ 68 static struct bp_slots_histogram cpu_pinned[TYPE_MAX]; 69 /* Number of pinned CPU-independent task breakpoints. */ 70 static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX]; 71 72 /* Keep track of the breakpoints attached to tasks */ 73 static struct rhltable task_bps_ht; 74 static const struct rhashtable_params task_bps_ht_params = { 75 .head_offset = offsetof(struct hw_perf_event, bp_list), 76 .key_offset = offsetof(struct hw_perf_event, target), 77 .key_len = sizeof_field(struct hw_perf_event, target), 78 .automatic_shrinking = true, 79 }; 80 81 static bool constraints_initialized __ro_after_init; 82 83 /* 84 * Synchronizes accesses to the per-CPU constraints; the locking rules are: 85 * 86 * 1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock 87 * (due to bp_slots_histogram::count being atomic, no update are lost). 88 * 89 * 2. Holding a write-lock is required for computations that require a 90 * stable snapshot of all bp_cpuinfo::tsk_pinned. 91 * 92 * 3. In all other cases, non-atomic accesses require the appropriately held 93 * lock (read-lock for read-only accesses; write-lock for reads/writes). 94 */ 95 DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem); 96 97 /* 98 * Return mutex to serialize accesses to per-task lists in task_bps_ht. Since 99 * rhltable synchronizes concurrent insertions/deletions, independent tasks may 100 * insert/delete concurrently; therefore, a mutex per task is sufficient. 101 * 102 * Uses task_struct::perf_event_mutex, to avoid extending task_struct with a 103 * hw_breakpoint-only mutex, which may be infrequently used. The caveat here is 104 * that hw_breakpoint may contend with per-task perf event list management. The 105 * assumption is that perf usecases involving hw_breakpoints are very unlikely 106 * to result in unnecessary contention. 107 */ 108 static inline struct mutex *get_task_bps_mutex(struct perf_event *bp) 109 { 110 struct task_struct *tsk = bp->hw.target; 111 112 return tsk ? &tsk->perf_event_mutex : NULL; 113 } 114 115 static struct mutex *bp_constraints_lock(struct perf_event *bp) 116 { 117 struct mutex *tsk_mtx = get_task_bps_mutex(bp); 118 119 if (tsk_mtx) { 120 /* 121 * Fully analogous to the perf_try_init_event() nesting 122 * argument in the comment near perf_event_ctx_lock_nested(); 123 * this child->perf_event_mutex cannot ever deadlock against 124 * the parent->perf_event_mutex usage from 125 * perf_event_task_{en,dis}able(). 126 * 127 * Specifically, inherited events will never occur on 128 * ->perf_event_list. 129 */ 130 mutex_lock_nested(tsk_mtx, SINGLE_DEPTH_NESTING); 131 percpu_down_read(&bp_cpuinfo_sem); 132 } else { 133 percpu_down_write(&bp_cpuinfo_sem); 134 } 135 136 return tsk_mtx; 137 } 138 139 static void bp_constraints_unlock(struct mutex *tsk_mtx) 140 { 141 if (tsk_mtx) { 142 percpu_up_read(&bp_cpuinfo_sem); 143 mutex_unlock(tsk_mtx); 144 } else { 145 percpu_up_write(&bp_cpuinfo_sem); 146 } 147 } 148 149 static bool bp_constraints_is_locked(struct perf_event *bp) 150 { 151 struct mutex *tsk_mtx = get_task_bps_mutex(bp); 152 153 return percpu_is_write_locked(&bp_cpuinfo_sem) || 154 (tsk_mtx ? mutex_is_locked(tsk_mtx) : 155 percpu_is_read_locked(&bp_cpuinfo_sem)); 156 } 157 158 static inline void assert_bp_constraints_lock_held(struct perf_event *bp) 159 { 160 struct mutex *tsk_mtx = get_task_bps_mutex(bp); 161 162 if (tsk_mtx) 163 lockdep_assert_held(tsk_mtx); 164 lockdep_assert_held(&bp_cpuinfo_sem); 165 } 166 167 #ifdef hw_breakpoint_slots 168 /* 169 * Number of breakpoint slots is constant, and the same for all types. 170 */ 171 static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA)); 172 static inline int hw_breakpoint_slots_cached(int type) { return hw_breakpoint_slots(type); } 173 static inline int init_breakpoint_slots(void) { return 0; } 174 #else 175 /* 176 * Dynamic number of breakpoint slots. 177 */ 178 static int __nr_bp_slots[TYPE_MAX] __ro_after_init; 179 180 static inline int hw_breakpoint_slots_cached(int type) 181 { 182 return __nr_bp_slots[type]; 183 } 184 185 static __init bool 186 bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type) 187 { 188 hist->count = kcalloc(hw_breakpoint_slots_cached(type), sizeof(*hist->count), GFP_KERNEL); 189 return hist->count; 190 } 191 192 static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist) 193 { 194 kfree(hist->count); 195 } 196 197 static __init int init_breakpoint_slots(void) 198 { 199 int i, cpu, err_cpu; 200 201 for (i = 0; i < TYPE_MAX; i++) 202 __nr_bp_slots[i] = hw_breakpoint_slots(i); 203 204 for_each_possible_cpu(cpu) { 205 for (i = 0; i < TYPE_MAX; i++) { 206 struct bp_cpuinfo *info = get_bp_info(cpu, i); 207 208 if (!bp_slots_histogram_alloc(&info->tsk_pinned, i)) 209 goto err; 210 } 211 } 212 for (i = 0; i < TYPE_MAX; i++) { 213 if (!bp_slots_histogram_alloc(&cpu_pinned[i], i)) 214 goto err; 215 if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i)) 216 goto err; 217 } 218 219 return 0; 220 err: 221 for_each_possible_cpu(err_cpu) { 222 for (i = 0; i < TYPE_MAX; i++) 223 bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned); 224 if (err_cpu == cpu) 225 break; 226 } 227 for (i = 0; i < TYPE_MAX; i++) { 228 bp_slots_histogram_free(&cpu_pinned[i]); 229 bp_slots_histogram_free(&tsk_pinned_all[i]); 230 } 231 232 return -ENOMEM; 233 } 234 #endif 235 236 static inline void 237 bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val) 238 { 239 const int old_idx = old - 1; 240 const int new_idx = old_idx + val; 241 242 if (old_idx >= 0) 243 WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0); 244 if (new_idx >= 0) 245 WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0); 246 } 247 248 static int 249 bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type) 250 { 251 for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { 252 const int count = atomic_read(&hist->count[i]); 253 254 /* Catch unexpected writers; we want a stable snapshot. */ 255 ASSERT_EXCLUSIVE_WRITER(hist->count[i]); 256 if (count > 0) 257 return i + 1; 258 WARN(count < 0, "inconsistent breakpoint slots histogram"); 259 } 260 261 return 0; 262 } 263 264 static int 265 bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2, 266 enum bp_type_idx type) 267 { 268 for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { 269 const int count1 = atomic_read(&hist1->count[i]); 270 const int count2 = atomic_read(&hist2->count[i]); 271 272 /* Catch unexpected writers; we want a stable snapshot. */ 273 ASSERT_EXCLUSIVE_WRITER(hist1->count[i]); 274 ASSERT_EXCLUSIVE_WRITER(hist2->count[i]); 275 if (count1 + count2 > 0) 276 return i + 1; 277 WARN(count1 < 0, "inconsistent breakpoint slots histogram"); 278 WARN(count2 < 0, "inconsistent breakpoint slots histogram"); 279 } 280 281 return 0; 282 } 283 284 #ifndef hw_breakpoint_weight 285 static inline int hw_breakpoint_weight(struct perf_event *bp) 286 { 287 return 1; 288 } 289 #endif 290 291 static inline enum bp_type_idx find_slot_idx(u64 bp_type) 292 { 293 if (bp_type & HW_BREAKPOINT_RW) 294 return TYPE_DATA; 295 296 return TYPE_INST; 297 } 298 299 /* 300 * Return the maximum number of pinned breakpoints a task has in this CPU. 301 */ 302 static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) 303 { 304 struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned; 305 306 /* 307 * At this point we want to have acquired the bp_cpuinfo_sem as a 308 * writer to ensure that there are no concurrent writers in 309 * toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot. 310 */ 311 lockdep_assert_held_write(&bp_cpuinfo_sem); 312 return bp_slots_histogram_max_merge(tsk_pinned, &tsk_pinned_all[type], type); 313 } 314 315 /* 316 * Count the number of breakpoints of the same type and same task. 317 * The given event must be not on the list. 318 * 319 * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent, 320 * returns a negative value. 321 */ 322 static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) 323 { 324 struct rhlist_head *head, *pos; 325 struct perf_event *iter; 326 int count = 0; 327 328 /* 329 * We need a stable snapshot of the per-task breakpoint list. 330 */ 331 assert_bp_constraints_lock_held(bp); 332 333 rcu_read_lock(); 334 head = rhltable_lookup(&task_bps_ht, &bp->hw.target, task_bps_ht_params); 335 if (!head) 336 goto out; 337 338 rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) { 339 if (find_slot_idx(iter->attr.bp_type) != type) 340 continue; 341 342 if (iter->cpu >= 0) { 343 if (cpu == -1) { 344 count = -1; 345 goto out; 346 } else if (cpu != iter->cpu) 347 continue; 348 } 349 350 count += hw_breakpoint_weight(iter); 351 } 352 353 out: 354 rcu_read_unlock(); 355 return count; 356 } 357 358 static const struct cpumask *cpumask_of_bp(struct perf_event *bp) 359 { 360 if (bp->cpu >= 0) 361 return cpumask_of(bp->cpu); 362 return cpu_possible_mask; 363 } 364 365 /* 366 * Returns the max pinned breakpoint slots in a given 367 * CPU (cpu > -1) or across all of them (cpu = -1). 368 */ 369 static int 370 max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type) 371 { 372 const struct cpumask *cpumask = cpumask_of_bp(bp); 373 int pinned_slots = 0; 374 int cpu; 375 376 if (bp->hw.target && bp->cpu < 0) { 377 int max_pinned = task_bp_pinned(-1, bp, type); 378 379 if (max_pinned >= 0) { 380 /* 381 * Fast path: task_bp_pinned() is CPU-independent and 382 * returns the same value for any CPU. 383 */ 384 max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type); 385 return max_pinned; 386 } 387 } 388 389 for_each_cpu(cpu, cpumask) { 390 struct bp_cpuinfo *info = get_bp_info(cpu, type); 391 int nr; 392 393 nr = info->cpu_pinned; 394 if (!bp->hw.target) 395 nr += max_task_bp_pinned(cpu, type); 396 else 397 nr += task_bp_pinned(cpu, bp, type); 398 399 pinned_slots = max(nr, pinned_slots); 400 } 401 402 return pinned_slots; 403 } 404 405 /* 406 * Add/remove the given breakpoint in our constraint table 407 */ 408 static int 409 toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight) 410 { 411 int cpu, next_tsk_pinned; 412 413 if (!enable) 414 weight = -weight; 415 416 if (!bp->hw.target) { 417 /* 418 * Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the 419 * global histogram. 420 */ 421 struct bp_cpuinfo *info = get_bp_info(bp->cpu, type); 422 423 lockdep_assert_held_write(&bp_cpuinfo_sem); 424 bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight); 425 info->cpu_pinned += weight; 426 return 0; 427 } 428 429 /* 430 * If bp->hw.target, tsk_pinned is only modified, but not used 431 * otherwise. We can permit concurrent updates as long as there are no 432 * other uses: having acquired bp_cpuinfo_sem as a reader allows 433 * concurrent updates here. Uses of tsk_pinned will require acquiring 434 * bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value. 435 */ 436 lockdep_assert_held_read(&bp_cpuinfo_sem); 437 438 /* 439 * Update the pinned task slots, in per-CPU bp_cpuinfo and in the global 440 * histogram. We need to take care of 4 cases: 441 * 442 * 1. This breakpoint targets all CPUs (cpu < 0), and there may only 443 * exist other task breakpoints targeting all CPUs. In this case we 444 * can simply update the global slots histogram. 445 * 446 * 2. This breakpoint targets a specific CPU (cpu >= 0), but there may 447 * only exist other task breakpoints targeting all CPUs. 448 * 449 * a. On enable: remove the existing breakpoints from the global 450 * slots histogram and use the per-CPU histogram. 451 * 452 * b. On disable: re-insert the existing breakpoints into the global 453 * slots histogram and remove from per-CPU histogram. 454 * 455 * 3. Some other existing task breakpoints target specific CPUs. Only 456 * update the per-CPU slots histogram. 457 */ 458 459 if (!enable) { 460 /* 461 * Remove before updating histograms so we can determine if this 462 * was the last task breakpoint for a specific CPU. 463 */ 464 int ret = rhltable_remove(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params); 465 466 if (ret) 467 return ret; 468 } 469 /* 470 * Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint. 471 */ 472 next_tsk_pinned = task_bp_pinned(-1, bp, type); 473 474 if (next_tsk_pinned >= 0) { 475 if (bp->cpu < 0) { /* Case 1: fast path */ 476 if (!enable) 477 next_tsk_pinned += hw_breakpoint_weight(bp); 478 bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, weight); 479 } else if (enable) { /* Case 2.a: slow path */ 480 /* Add existing to per-CPU histograms. */ 481 for_each_possible_cpu(cpu) { 482 bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, 483 0, next_tsk_pinned); 484 } 485 /* Add this first CPU-pinned task breakpoint. */ 486 bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned, 487 next_tsk_pinned, weight); 488 /* Rebalance global task pinned histogram. */ 489 bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, 490 -next_tsk_pinned); 491 } else { /* Case 2.b: slow path */ 492 /* Remove this last CPU-pinned task breakpoint. */ 493 bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned, 494 next_tsk_pinned + hw_breakpoint_weight(bp), weight); 495 /* Remove all from per-CPU histograms. */ 496 for_each_possible_cpu(cpu) { 497 bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, 498 next_tsk_pinned, -next_tsk_pinned); 499 } 500 /* Rebalance global task pinned histogram. */ 501 bp_slots_histogram_add(&tsk_pinned_all[type], 0, next_tsk_pinned); 502 } 503 } else { /* Case 3: slow path */ 504 const struct cpumask *cpumask = cpumask_of_bp(bp); 505 506 for_each_cpu(cpu, cpumask) { 507 next_tsk_pinned = task_bp_pinned(cpu, bp, type); 508 if (!enable) 509 next_tsk_pinned += hw_breakpoint_weight(bp); 510 bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, 511 next_tsk_pinned, weight); 512 } 513 } 514 515 /* 516 * Readers want a stable snapshot of the per-task breakpoint list. 517 */ 518 assert_bp_constraints_lock_held(bp); 519 520 if (enable) 521 return rhltable_insert(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params); 522 523 return 0; 524 } 525 526 __weak int arch_reserve_bp_slot(struct perf_event *bp) 527 { 528 return 0; 529 } 530 531 __weak void arch_release_bp_slot(struct perf_event *bp) 532 { 533 } 534 535 /* 536 * Function to perform processor-specific cleanup during unregistration 537 */ 538 __weak void arch_unregister_hw_breakpoint(struct perf_event *bp) 539 { 540 /* 541 * A weak stub function here for those archs that don't define 542 * it inside arch/.../kernel/hw_breakpoint.c 543 */ 544 } 545 546 /* 547 * Constraints to check before allowing this new breakpoint counter. 548 * 549 * Note: Flexible breakpoints are currently unimplemented, but outlined in the 550 * below algorithm for completeness. The implementation treats flexible as 551 * pinned due to no guarantee that we currently always schedule flexible events 552 * before a pinned event in a same CPU. 553 * 554 * == Non-pinned counter == (Considered as pinned for now) 555 * 556 * - If attached to a single cpu, check: 557 * 558 * (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu) 559 * + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM 560 * 561 * -> If there are already non-pinned counters in this cpu, it means 562 * there is already a free slot for them. 563 * Otherwise, we check that the maximum number of per task 564 * breakpoints (for this cpu) plus the number of per cpu breakpoint 565 * (for this cpu) doesn't cover every registers. 566 * 567 * - If attached to every cpus, check: 568 * 569 * (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *)) 570 * + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM 571 * 572 * -> This is roughly the same, except we check the number of per cpu 573 * bp for every cpu and we keep the max one. Same for the per tasks 574 * breakpoints. 575 * 576 * 577 * == Pinned counter == 578 * 579 * - If attached to a single cpu, check: 580 * 581 * ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu) 582 * + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM 583 * 584 * -> Same checks as before. But now the info->flexible, if any, must keep 585 * one register at least (or they will never be fed). 586 * 587 * - If attached to every cpus, check: 588 * 589 * ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *)) 590 * + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM 591 */ 592 static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) 593 { 594 enum bp_type_idx type; 595 int max_pinned_slots; 596 int weight; 597 int ret; 598 599 /* We couldn't initialize breakpoint constraints on boot */ 600 if (!constraints_initialized) 601 return -ENOMEM; 602 603 /* Basic checks */ 604 if (bp_type == HW_BREAKPOINT_EMPTY || 605 bp_type == HW_BREAKPOINT_INVALID) 606 return -EINVAL; 607 608 type = find_slot_idx(bp_type); 609 weight = hw_breakpoint_weight(bp); 610 611 /* Check if this new breakpoint can be satisfied across all CPUs. */ 612 max_pinned_slots = max_bp_pinned_slots(bp, type) + weight; 613 if (max_pinned_slots > hw_breakpoint_slots_cached(type)) 614 return -ENOSPC; 615 616 ret = arch_reserve_bp_slot(bp); 617 if (ret) 618 return ret; 619 620 return toggle_bp_slot(bp, true, type, weight); 621 } 622 623 int reserve_bp_slot(struct perf_event *bp) 624 { 625 struct mutex *mtx = bp_constraints_lock(bp); 626 int ret = __reserve_bp_slot(bp, bp->attr.bp_type); 627 628 bp_constraints_unlock(mtx); 629 return ret; 630 } 631 632 static void __release_bp_slot(struct perf_event *bp, u64 bp_type) 633 { 634 enum bp_type_idx type; 635 int weight; 636 637 arch_release_bp_slot(bp); 638 639 type = find_slot_idx(bp_type); 640 weight = hw_breakpoint_weight(bp); 641 WARN_ON(toggle_bp_slot(bp, false, type, weight)); 642 } 643 644 void release_bp_slot(struct perf_event *bp) 645 { 646 struct mutex *mtx = bp_constraints_lock(bp); 647 648 arch_unregister_hw_breakpoint(bp); 649 __release_bp_slot(bp, bp->attr.bp_type); 650 bp_constraints_unlock(mtx); 651 } 652 653 static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) 654 { 655 int err; 656 657 __release_bp_slot(bp, old_type); 658 659 err = __reserve_bp_slot(bp, new_type); 660 if (err) { 661 /* 662 * Reserve the old_type slot back in case 663 * there's no space for the new type. 664 * 665 * This must succeed, because we just released 666 * the old_type slot in the __release_bp_slot 667 * call above. If not, something is broken. 668 */ 669 WARN_ON(__reserve_bp_slot(bp, old_type)); 670 } 671 672 return err; 673 } 674 675 static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) 676 { 677 struct mutex *mtx = bp_constraints_lock(bp); 678 int ret = __modify_bp_slot(bp, old_type, new_type); 679 680 bp_constraints_unlock(mtx); 681 return ret; 682 } 683 684 /* 685 * Allow the kernel debugger to reserve breakpoint slots without 686 * taking a lock using the dbg_* variant of for the reserve and 687 * release breakpoint slots. 688 */ 689 int dbg_reserve_bp_slot(struct perf_event *bp) 690 { 691 int ret; 692 693 if (bp_constraints_is_locked(bp)) 694 return -1; 695 696 /* Locks aren't held; disable lockdep assert checking. */ 697 lockdep_off(); 698 ret = __reserve_bp_slot(bp, bp->attr.bp_type); 699 lockdep_on(); 700 701 return ret; 702 } 703 704 int dbg_release_bp_slot(struct perf_event *bp) 705 { 706 if (bp_constraints_is_locked(bp)) 707 return -1; 708 709 /* Locks aren't held; disable lockdep assert checking. */ 710 lockdep_off(); 711 __release_bp_slot(bp, bp->attr.bp_type); 712 lockdep_on(); 713 714 return 0; 715 } 716 717 static int hw_breakpoint_parse(struct perf_event *bp, 718 const struct perf_event_attr *attr, 719 struct arch_hw_breakpoint *hw) 720 { 721 int err; 722 723 err = hw_breakpoint_arch_parse(bp, attr, hw); 724 if (err) 725 return err; 726 727 if (arch_check_bp_in_kernelspace(hw)) { 728 if (attr->exclude_kernel) 729 return -EINVAL; 730 /* 731 * Don't let unprivileged users set a breakpoint in the trap 732 * path to avoid trap recursion attacks. 733 */ 734 if (!capable(CAP_SYS_ADMIN)) 735 return -EPERM; 736 } 737 738 return 0; 739 } 740 741 int register_perf_hw_breakpoint(struct perf_event *bp) 742 { 743 struct arch_hw_breakpoint hw = { }; 744 int err; 745 746 err = reserve_bp_slot(bp); 747 if (err) 748 return err; 749 750 err = hw_breakpoint_parse(bp, &bp->attr, &hw); 751 if (err) { 752 release_bp_slot(bp); 753 return err; 754 } 755 756 bp->hw.info = hw; 757 758 return 0; 759 } 760 761 /** 762 * register_user_hw_breakpoint - register a hardware breakpoint for user space 763 * @attr: breakpoint attributes 764 * @triggered: callback to trigger when we hit the breakpoint 765 * @context: context data could be used in the triggered callback 766 * @tsk: pointer to 'task_struct' of the process to which the address belongs 767 */ 768 struct perf_event * 769 register_user_hw_breakpoint(struct perf_event_attr *attr, 770 perf_overflow_handler_t triggered, 771 void *context, 772 struct task_struct *tsk) 773 { 774 return perf_event_create_kernel_counter(attr, -1, tsk, triggered, 775 context); 776 } 777 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 778 779 static void hw_breakpoint_copy_attr(struct perf_event_attr *to, 780 struct perf_event_attr *from) 781 { 782 to->bp_addr = from->bp_addr; 783 to->bp_type = from->bp_type; 784 to->bp_len = from->bp_len; 785 to->disabled = from->disabled; 786 } 787 788 int 789 modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, 790 bool check) 791 { 792 struct arch_hw_breakpoint hw = { }; 793 int err; 794 795 err = hw_breakpoint_parse(bp, attr, &hw); 796 if (err) 797 return err; 798 799 if (check) { 800 struct perf_event_attr old_attr; 801 802 old_attr = bp->attr; 803 hw_breakpoint_copy_attr(&old_attr, attr); 804 if (memcmp(&old_attr, attr, sizeof(*attr))) 805 return -EINVAL; 806 } 807 808 if (bp->attr.bp_type != attr->bp_type) { 809 err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type); 810 if (err) 811 return err; 812 } 813 814 hw_breakpoint_copy_attr(&bp->attr, attr); 815 bp->hw.info = hw; 816 817 return 0; 818 } 819 820 /** 821 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint 822 * @bp: the breakpoint structure to modify 823 * @attr: new breakpoint attributes 824 */ 825 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) 826 { 827 int err; 828 829 /* 830 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it 831 * will not be possible to raise IPIs that invoke __perf_event_disable. 832 * So call the function directly after making sure we are targeting the 833 * current task. 834 */ 835 if (irqs_disabled() && bp->ctx && bp->ctx->task == current) 836 perf_event_disable_local(bp); 837 else 838 perf_event_disable(bp); 839 840 err = modify_user_hw_breakpoint_check(bp, attr, false); 841 842 if (!bp->attr.disabled) 843 perf_event_enable(bp); 844 845 return err; 846 } 847 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); 848 849 /** 850 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint 851 * @bp: the breakpoint structure to unregister 852 */ 853 void unregister_hw_breakpoint(struct perf_event *bp) 854 { 855 if (!bp) 856 return; 857 perf_event_release_kernel(bp); 858 } 859 EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); 860 861 /** 862 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel 863 * @attr: breakpoint attributes 864 * @triggered: callback to trigger when we hit the breakpoint 865 * @context: context data could be used in the triggered callback 866 * 867 * @return a set of per_cpu pointers to perf events 868 */ 869 struct perf_event * __percpu * 870 register_wide_hw_breakpoint(struct perf_event_attr *attr, 871 perf_overflow_handler_t triggered, 872 void *context) 873 { 874 struct perf_event * __percpu *cpu_events, *bp; 875 long err = 0; 876 int cpu; 877 878 cpu_events = alloc_percpu(typeof(*cpu_events)); 879 if (!cpu_events) 880 return (void __percpu __force *)ERR_PTR(-ENOMEM); 881 882 cpus_read_lock(); 883 for_each_online_cpu(cpu) { 884 bp = perf_event_create_kernel_counter(attr, cpu, NULL, 885 triggered, context); 886 if (IS_ERR(bp)) { 887 err = PTR_ERR(bp); 888 break; 889 } 890 891 per_cpu(*cpu_events, cpu) = bp; 892 } 893 cpus_read_unlock(); 894 895 if (likely(!err)) 896 return cpu_events; 897 898 unregister_wide_hw_breakpoint(cpu_events); 899 return (void __percpu __force *)ERR_PTR(err); 900 } 901 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); 902 903 /** 904 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel 905 * @cpu_events: the per cpu set of events to unregister 906 */ 907 void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) 908 { 909 int cpu; 910 911 for_each_possible_cpu(cpu) 912 unregister_hw_breakpoint(per_cpu(*cpu_events, cpu)); 913 914 free_percpu(cpu_events); 915 } 916 EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); 917 918 /** 919 * hw_breakpoint_is_used - check if breakpoints are currently used 920 * 921 * Returns: true if breakpoints are used, false otherwise. 922 */ 923 bool hw_breakpoint_is_used(void) 924 { 925 int cpu; 926 927 if (!constraints_initialized) 928 return false; 929 930 for_each_possible_cpu(cpu) { 931 for (int type = 0; type < TYPE_MAX; ++type) { 932 struct bp_cpuinfo *info = get_bp_info(cpu, type); 933 934 if (info->cpu_pinned) 935 return true; 936 937 for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { 938 if (atomic_read(&info->tsk_pinned.count[slot])) 939 return true; 940 } 941 } 942 } 943 944 for (int type = 0; type < TYPE_MAX; ++type) { 945 for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { 946 /* 947 * Warn, because if there are CPU pinned counters, 948 * should never get here; bp_cpuinfo::cpu_pinned should 949 * be consistent with the global cpu_pinned histogram. 950 */ 951 if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot]))) 952 return true; 953 954 if (atomic_read(&tsk_pinned_all[type].count[slot])) 955 return true; 956 } 957 } 958 959 return false; 960 } 961 962 static struct notifier_block hw_breakpoint_exceptions_nb = { 963 .notifier_call = hw_breakpoint_exceptions_notify, 964 /* we need to be notified first */ 965 .priority = 0x7fffffff 966 }; 967 968 static void bp_perf_event_destroy(struct perf_event *event) 969 { 970 release_bp_slot(event); 971 } 972 973 static int hw_breakpoint_event_init(struct perf_event *bp) 974 { 975 int err; 976 977 if (bp->attr.type != PERF_TYPE_BREAKPOINT) 978 return -ENOENT; 979 980 /* 981 * no branch sampling for breakpoint events 982 */ 983 if (has_branch_stack(bp)) 984 return -EOPNOTSUPP; 985 986 err = register_perf_hw_breakpoint(bp); 987 if (err) 988 return err; 989 990 bp->destroy = bp_perf_event_destroy; 991 992 return 0; 993 } 994 995 static int hw_breakpoint_add(struct perf_event *bp, int flags) 996 { 997 if (!(flags & PERF_EF_START)) 998 bp->hw.state = PERF_HES_STOPPED; 999 1000 if (is_sampling_event(bp)) { 1001 bp->hw.last_period = bp->hw.sample_period; 1002 perf_swevent_set_period(bp); 1003 } 1004 1005 return arch_install_hw_breakpoint(bp); 1006 } 1007 1008 static void hw_breakpoint_del(struct perf_event *bp, int flags) 1009 { 1010 arch_uninstall_hw_breakpoint(bp); 1011 } 1012 1013 static void hw_breakpoint_start(struct perf_event *bp, int flags) 1014 { 1015 bp->hw.state = 0; 1016 } 1017 1018 static void hw_breakpoint_stop(struct perf_event *bp, int flags) 1019 { 1020 bp->hw.state = PERF_HES_STOPPED; 1021 } 1022 1023 static struct pmu perf_breakpoint = { 1024 .task_ctx_nr = perf_sw_context, /* could eventually get its own */ 1025 1026 .event_init = hw_breakpoint_event_init, 1027 .add = hw_breakpoint_add, 1028 .del = hw_breakpoint_del, 1029 .start = hw_breakpoint_start, 1030 .stop = hw_breakpoint_stop, 1031 .read = hw_breakpoint_pmu_read, 1032 }; 1033 1034 int __init init_hw_breakpoint(void) 1035 { 1036 int ret; 1037 1038 ret = rhltable_init(&task_bps_ht, &task_bps_ht_params); 1039 if (ret) 1040 return ret; 1041 1042 ret = init_breakpoint_slots(); 1043 if (ret) 1044 return ret; 1045 1046 constraints_initialized = true; 1047 1048 perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT); 1049 1050 return register_die_notifier(&hw_breakpoint_exceptions_nb); 1051 } 1052