1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2007 Alan Stern 4 * Copyright (C) IBM Corporation, 2009 5 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> 6 * 7 * Thanks to Ingo Molnar for his many suggestions. 8 * 9 * Authors: Alan Stern <stern@rowland.harvard.edu> 10 * K.Prasad <prasad@linux.vnet.ibm.com> 11 * Frederic Weisbecker <fweisbec@gmail.com> 12 */ 13 14 /* 15 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, 16 * using the CPU's debug registers. 17 * This file contains the arch-independent routines. 18 */ 19 20 #include <linux/hw_breakpoint.h> 21 22 #include <linux/atomic.h> 23 #include <linux/bug.h> 24 #include <linux/cpu.h> 25 #include <linux/export.h> 26 #include <linux/init.h> 27 #include <linux/irqflags.h> 28 #include <linux/kdebug.h> 29 #include <linux/kernel.h> 30 #include <linux/mutex.h> 31 #include <linux/notifier.h> 32 #include <linux/percpu-rwsem.h> 33 #include <linux/percpu.h> 34 #include <linux/rhashtable.h> 35 #include <linux/sched.h> 36 #include <linux/slab.h> 37 38 /* 39 * Datastructure to track the total uses of N slots across tasks or CPUs; 40 * bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots. 41 */ 42 struct bp_slots_histogram { 43 #ifdef hw_breakpoint_slots 44 atomic_t count[hw_breakpoint_slots(0)]; 45 #else 46 atomic_t *count; 47 #endif 48 }; 49 50 /* 51 * Per-CPU constraints data. 52 */ 53 struct bp_cpuinfo { 54 /* Number of pinned CPU breakpoints in a CPU. */ 55 unsigned int cpu_pinned; 56 /* Histogram of pinned task breakpoints in a CPU. */ 57 struct bp_slots_histogram tsk_pinned; 58 }; 59 60 static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]); 61 62 static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type) 63 { 64 return per_cpu_ptr(bp_cpuinfo + type, cpu); 65 } 66 67 /* Number of pinned CPU breakpoints globally. */ 68 static struct bp_slots_histogram cpu_pinned[TYPE_MAX]; 69 /* Number of pinned CPU-independent task breakpoints. */ 70 static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX]; 71 72 /* Keep track of the breakpoints attached to tasks */ 73 static struct rhltable task_bps_ht; 74 static const struct rhashtable_params task_bps_ht_params = { 75 .head_offset = offsetof(struct hw_perf_event, bp_list), 76 .key_offset = offsetof(struct hw_perf_event, target), 77 .key_len = sizeof_field(struct hw_perf_event, target), 78 .automatic_shrinking = true, 79 }; 80 81 static bool constraints_initialized __ro_after_init; 82 83 /* 84 * Synchronizes accesses to the per-CPU constraints; the locking rules are: 85 * 86 * 1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock 87 * (due to bp_slots_histogram::count being atomic, no update are lost). 88 * 89 * 2. Holding a write-lock is required for computations that require a 90 * stable snapshot of all bp_cpuinfo::tsk_pinned. 91 * 92 * 3. In all other cases, non-atomic accesses require the appropriately held 93 * lock (read-lock for read-only accesses; write-lock for reads/writes). 94 */ 95 DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem); 96 97 /* 98 * Return mutex to serialize accesses to per-task lists in task_bps_ht. Since 99 * rhltable synchronizes concurrent insertions/deletions, independent tasks may 100 * insert/delete concurrently; therefore, a mutex per task is sufficient. 101 * 102 * Uses task_struct::perf_event_mutex, to avoid extending task_struct with a 103 * hw_breakpoint-only mutex, which may be infrequently used. The caveat here is 104 * that hw_breakpoint may contend with per-task perf event list management. The 105 * assumption is that perf usecases involving hw_breakpoints are very unlikely 106 * to result in unnecessary contention. 107 */ 108 static inline struct mutex *get_task_bps_mutex(struct perf_event *bp) 109 { 110 struct task_struct *tsk = bp->hw.target; 111 112 return tsk ? &tsk->perf_event_mutex : NULL; 113 } 114 115 static struct mutex *bp_constraints_lock(struct perf_event *bp) 116 { 117 struct mutex *tsk_mtx = get_task_bps_mutex(bp); 118 119 if (tsk_mtx) { 120 /* 121 * Fully analogous to the perf_try_init_event() nesting 122 * argument in the comment near perf_event_ctx_lock_nested(); 123 * this child->perf_event_mutex cannot ever deadlock against 124 * the parent->perf_event_mutex usage from 125 * perf_event_task_{en,dis}able(). 126 * 127 * Specifically, inherited events will never occur on 128 * ->perf_event_list. 129 */ 130 mutex_lock_nested(tsk_mtx, SINGLE_DEPTH_NESTING); 131 percpu_down_read(&bp_cpuinfo_sem); 132 } else { 133 percpu_down_write(&bp_cpuinfo_sem); 134 } 135 136 return tsk_mtx; 137 } 138 139 static void bp_constraints_unlock(struct mutex *tsk_mtx) 140 { 141 if (tsk_mtx) { 142 percpu_up_read(&bp_cpuinfo_sem); 143 mutex_unlock(tsk_mtx); 144 } else { 145 percpu_up_write(&bp_cpuinfo_sem); 146 } 147 } 148 149 static bool bp_constraints_is_locked(struct perf_event *bp) 150 { 151 struct mutex *tsk_mtx = get_task_bps_mutex(bp); 152 153 return percpu_is_write_locked(&bp_cpuinfo_sem) || 154 (tsk_mtx ? mutex_is_locked(tsk_mtx) : 155 percpu_is_read_locked(&bp_cpuinfo_sem)); 156 } 157 158 static inline void assert_bp_constraints_lock_held(struct perf_event *bp) 159 { 160 struct mutex *tsk_mtx = get_task_bps_mutex(bp); 161 162 if (tsk_mtx) 163 lockdep_assert_held(tsk_mtx); 164 lockdep_assert_held(&bp_cpuinfo_sem); 165 } 166 167 #ifdef hw_breakpoint_slots 168 /* 169 * Number of breakpoint slots is constant, and the same for all types. 170 */ 171 static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA)); 172 static inline int hw_breakpoint_slots_cached(int type) { return hw_breakpoint_slots(type); } 173 static inline int init_breakpoint_slots(void) { return 0; } 174 #else 175 /* 176 * Dynamic number of breakpoint slots. 177 */ 178 static int __nr_bp_slots[TYPE_MAX] __ro_after_init; 179 180 static inline int hw_breakpoint_slots_cached(int type) 181 { 182 return __nr_bp_slots[type]; 183 } 184 185 static __init bool 186 bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type) 187 { 188 hist->count = kzalloc_objs(*hist->count, 189 hw_breakpoint_slots_cached(type), GFP_KERNEL); 190 return hist->count; 191 } 192 193 static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist) 194 { 195 kfree(hist->count); 196 } 197 198 static __init int init_breakpoint_slots(void) 199 { 200 int i, cpu, err_cpu; 201 202 for (i = 0; i < TYPE_MAX; i++) 203 __nr_bp_slots[i] = hw_breakpoint_slots(i); 204 205 for_each_possible_cpu(cpu) { 206 for (i = 0; i < TYPE_MAX; i++) { 207 struct bp_cpuinfo *info = get_bp_info(cpu, i); 208 209 if (!bp_slots_histogram_alloc(&info->tsk_pinned, i)) 210 goto err; 211 } 212 } 213 for (i = 0; i < TYPE_MAX; i++) { 214 if (!bp_slots_histogram_alloc(&cpu_pinned[i], i)) 215 goto err; 216 if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i)) 217 goto err; 218 } 219 220 return 0; 221 err: 222 for_each_possible_cpu(err_cpu) { 223 for (i = 0; i < TYPE_MAX; i++) 224 bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned); 225 if (err_cpu == cpu) 226 break; 227 } 228 for (i = 0; i < TYPE_MAX; i++) { 229 bp_slots_histogram_free(&cpu_pinned[i]); 230 bp_slots_histogram_free(&tsk_pinned_all[i]); 231 } 232 233 return -ENOMEM; 234 } 235 #endif 236 237 static inline void 238 bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val) 239 { 240 const int old_idx = old - 1; 241 const int new_idx = old_idx + val; 242 243 if (old_idx >= 0) 244 WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0); 245 if (new_idx >= 0) 246 WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0); 247 } 248 249 static int 250 bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type) 251 { 252 for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { 253 const int count = atomic_read(&hist->count[i]); 254 255 /* Catch unexpected writers; we want a stable snapshot. */ 256 ASSERT_EXCLUSIVE_WRITER(hist->count[i]); 257 if (count > 0) 258 return i + 1; 259 WARN(count < 0, "inconsistent breakpoint slots histogram"); 260 } 261 262 return 0; 263 } 264 265 static int 266 bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2, 267 enum bp_type_idx type) 268 { 269 for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { 270 const int count1 = atomic_read(&hist1->count[i]); 271 const int count2 = atomic_read(&hist2->count[i]); 272 273 /* Catch unexpected writers; we want a stable snapshot. */ 274 ASSERT_EXCLUSIVE_WRITER(hist1->count[i]); 275 ASSERT_EXCLUSIVE_WRITER(hist2->count[i]); 276 if (count1 + count2 > 0) 277 return i + 1; 278 WARN(count1 < 0, "inconsistent breakpoint slots histogram"); 279 WARN(count2 < 0, "inconsistent breakpoint slots histogram"); 280 } 281 282 return 0; 283 } 284 285 #ifndef hw_breakpoint_weight 286 static inline int hw_breakpoint_weight(struct perf_event *bp) 287 { 288 return 1; 289 } 290 #endif 291 292 static inline enum bp_type_idx find_slot_idx(u64 bp_type) 293 { 294 if (bp_type & HW_BREAKPOINT_RW) 295 return TYPE_DATA; 296 297 return TYPE_INST; 298 } 299 300 /* 301 * Return the maximum number of pinned breakpoints a task has in this CPU. 302 */ 303 static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) 304 { 305 struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned; 306 307 /* 308 * At this point we want to have acquired the bp_cpuinfo_sem as a 309 * writer to ensure that there are no concurrent writers in 310 * toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot. 311 */ 312 lockdep_assert_held_write(&bp_cpuinfo_sem); 313 return bp_slots_histogram_max_merge(tsk_pinned, &tsk_pinned_all[type], type); 314 } 315 316 /* 317 * Count the number of breakpoints of the same type and same task. 318 * The given event must be not on the list. 319 * 320 * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent, 321 * returns a negative value. 322 */ 323 static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) 324 { 325 struct rhlist_head *head, *pos; 326 struct perf_event *iter; 327 int count = 0; 328 329 /* 330 * We need a stable snapshot of the per-task breakpoint list. 331 */ 332 assert_bp_constraints_lock_held(bp); 333 334 rcu_read_lock(); 335 head = rhltable_lookup(&task_bps_ht, &bp->hw.target, task_bps_ht_params); 336 if (!head) 337 goto out; 338 339 rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) { 340 if (find_slot_idx(iter->attr.bp_type) != type) 341 continue; 342 343 if (iter->cpu >= 0) { 344 if (cpu == -1) { 345 count = -1; 346 goto out; 347 } else if (cpu != iter->cpu) 348 continue; 349 } 350 351 count += hw_breakpoint_weight(iter); 352 } 353 354 out: 355 rcu_read_unlock(); 356 return count; 357 } 358 359 static const struct cpumask *cpumask_of_bp(struct perf_event *bp) 360 { 361 if (bp->cpu >= 0) 362 return cpumask_of(bp->cpu); 363 return cpu_possible_mask; 364 } 365 366 /* 367 * Returns the max pinned breakpoint slots in a given 368 * CPU (cpu > -1) or across all of them (cpu = -1). 369 */ 370 static int 371 max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type) 372 { 373 const struct cpumask *cpumask = cpumask_of_bp(bp); 374 int pinned_slots = 0; 375 int cpu; 376 377 if (bp->hw.target && bp->cpu < 0) { 378 int max_pinned = task_bp_pinned(-1, bp, type); 379 380 if (max_pinned >= 0) { 381 /* 382 * Fast path: task_bp_pinned() is CPU-independent and 383 * returns the same value for any CPU. 384 */ 385 max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type); 386 return max_pinned; 387 } 388 } 389 390 for_each_cpu(cpu, cpumask) { 391 struct bp_cpuinfo *info = get_bp_info(cpu, type); 392 int nr; 393 394 nr = info->cpu_pinned; 395 if (!bp->hw.target) 396 nr += max_task_bp_pinned(cpu, type); 397 else 398 nr += task_bp_pinned(cpu, bp, type); 399 400 pinned_slots = max(nr, pinned_slots); 401 } 402 403 return pinned_slots; 404 } 405 406 /* 407 * Add/remove the given breakpoint in our constraint table 408 */ 409 static int 410 toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight) 411 { 412 int cpu, next_tsk_pinned; 413 414 if (!enable) 415 weight = -weight; 416 417 if (!bp->hw.target) { 418 /* 419 * Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the 420 * global histogram. 421 */ 422 struct bp_cpuinfo *info = get_bp_info(bp->cpu, type); 423 424 lockdep_assert_held_write(&bp_cpuinfo_sem); 425 bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight); 426 info->cpu_pinned += weight; 427 return 0; 428 } 429 430 /* 431 * If bp->hw.target, tsk_pinned is only modified, but not used 432 * otherwise. We can permit concurrent updates as long as there are no 433 * other uses: having acquired bp_cpuinfo_sem as a reader allows 434 * concurrent updates here. Uses of tsk_pinned will require acquiring 435 * bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value. 436 */ 437 lockdep_assert_held_read(&bp_cpuinfo_sem); 438 439 /* 440 * Update the pinned task slots, in per-CPU bp_cpuinfo and in the global 441 * histogram. We need to take care of 4 cases: 442 * 443 * 1. This breakpoint targets all CPUs (cpu < 0), and there may only 444 * exist other task breakpoints targeting all CPUs. In this case we 445 * can simply update the global slots histogram. 446 * 447 * 2. This breakpoint targets a specific CPU (cpu >= 0), but there may 448 * only exist other task breakpoints targeting all CPUs. 449 * 450 * a. On enable: remove the existing breakpoints from the global 451 * slots histogram and use the per-CPU histogram. 452 * 453 * b. On disable: re-insert the existing breakpoints into the global 454 * slots histogram and remove from per-CPU histogram. 455 * 456 * 3. Some other existing task breakpoints target specific CPUs. Only 457 * update the per-CPU slots histogram. 458 */ 459 460 if (!enable) { 461 /* 462 * Remove before updating histograms so we can determine if this 463 * was the last task breakpoint for a specific CPU. 464 */ 465 int ret = rhltable_remove(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params); 466 467 if (ret) 468 return ret; 469 } 470 /* 471 * Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint. 472 */ 473 next_tsk_pinned = task_bp_pinned(-1, bp, type); 474 475 if (next_tsk_pinned >= 0) { 476 if (bp->cpu < 0) { /* Case 1: fast path */ 477 if (!enable) 478 next_tsk_pinned += hw_breakpoint_weight(bp); 479 bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, weight); 480 } else if (enable) { /* Case 2.a: slow path */ 481 /* Add existing to per-CPU histograms. */ 482 for_each_possible_cpu(cpu) { 483 bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, 484 0, next_tsk_pinned); 485 } 486 /* Add this first CPU-pinned task breakpoint. */ 487 bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned, 488 next_tsk_pinned, weight); 489 /* Rebalance global task pinned histogram. */ 490 bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, 491 -next_tsk_pinned); 492 } else { /* Case 2.b: slow path */ 493 /* Remove this last CPU-pinned task breakpoint. */ 494 bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned, 495 next_tsk_pinned + hw_breakpoint_weight(bp), weight); 496 /* Remove all from per-CPU histograms. */ 497 for_each_possible_cpu(cpu) { 498 bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, 499 next_tsk_pinned, -next_tsk_pinned); 500 } 501 /* Rebalance global task pinned histogram. */ 502 bp_slots_histogram_add(&tsk_pinned_all[type], 0, next_tsk_pinned); 503 } 504 } else { /* Case 3: slow path */ 505 const struct cpumask *cpumask = cpumask_of_bp(bp); 506 507 for_each_cpu(cpu, cpumask) { 508 next_tsk_pinned = task_bp_pinned(cpu, bp, type); 509 if (!enable) 510 next_tsk_pinned += hw_breakpoint_weight(bp); 511 bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, 512 next_tsk_pinned, weight); 513 } 514 } 515 516 /* 517 * Readers want a stable snapshot of the per-task breakpoint list. 518 */ 519 assert_bp_constraints_lock_held(bp); 520 521 if (enable) 522 return rhltable_insert(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params); 523 524 return 0; 525 } 526 527 /* 528 * Constraints to check before allowing this new breakpoint counter. 529 * 530 * Note: Flexible breakpoints are currently unimplemented, but outlined in the 531 * below algorithm for completeness. The implementation treats flexible as 532 * pinned due to no guarantee that we currently always schedule flexible events 533 * before a pinned event in a same CPU. 534 * 535 * == Non-pinned counter == (Considered as pinned for now) 536 * 537 * - If attached to a single cpu, check: 538 * 539 * (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu) 540 * + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM 541 * 542 * -> If there are already non-pinned counters in this cpu, it means 543 * there is already a free slot for them. 544 * Otherwise, we check that the maximum number of per task 545 * breakpoints (for this cpu) plus the number of per cpu breakpoint 546 * (for this cpu) doesn't cover every registers. 547 * 548 * - If attached to every cpus, check: 549 * 550 * (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *)) 551 * + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM 552 * 553 * -> This is roughly the same, except we check the number of per cpu 554 * bp for every cpu and we keep the max one. Same for the per tasks 555 * breakpoints. 556 * 557 * 558 * == Pinned counter == 559 * 560 * - If attached to a single cpu, check: 561 * 562 * ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu) 563 * + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM 564 * 565 * -> Same checks as before. But now the info->flexible, if any, must keep 566 * one register at least (or they will never be fed). 567 * 568 * - If attached to every cpus, check: 569 * 570 * ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *)) 571 * + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM 572 */ 573 static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) 574 { 575 enum bp_type_idx type; 576 int max_pinned_slots; 577 int weight; 578 579 /* We couldn't initialize breakpoint constraints on boot */ 580 if (!constraints_initialized) 581 return -ENOMEM; 582 583 /* Basic checks */ 584 if (bp_type == HW_BREAKPOINT_EMPTY || 585 bp_type == HW_BREAKPOINT_INVALID) 586 return -EINVAL; 587 588 type = find_slot_idx(bp_type); 589 weight = hw_breakpoint_weight(bp); 590 591 /* Check if this new breakpoint can be satisfied across all CPUs. */ 592 max_pinned_slots = max_bp_pinned_slots(bp, type) + weight; 593 if (max_pinned_slots > hw_breakpoint_slots_cached(type)) 594 return -ENOSPC; 595 596 return toggle_bp_slot(bp, true, type, weight); 597 } 598 599 int reserve_bp_slot(struct perf_event *bp) 600 { 601 struct mutex *mtx = bp_constraints_lock(bp); 602 int ret = __reserve_bp_slot(bp, bp->attr.bp_type); 603 604 bp_constraints_unlock(mtx); 605 return ret; 606 } 607 608 static void __release_bp_slot(struct perf_event *bp, u64 bp_type) 609 { 610 enum bp_type_idx type; 611 int weight; 612 613 type = find_slot_idx(bp_type); 614 weight = hw_breakpoint_weight(bp); 615 WARN_ON(toggle_bp_slot(bp, false, type, weight)); 616 } 617 618 void release_bp_slot(struct perf_event *bp) 619 { 620 struct mutex *mtx = bp_constraints_lock(bp); 621 622 __release_bp_slot(bp, bp->attr.bp_type); 623 bp_constraints_unlock(mtx); 624 } 625 626 static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) 627 { 628 int err; 629 630 __release_bp_slot(bp, old_type); 631 632 err = __reserve_bp_slot(bp, new_type); 633 if (err) { 634 /* 635 * Reserve the old_type slot back in case 636 * there's no space for the new type. 637 * 638 * This must succeed, because we just released 639 * the old_type slot in the __release_bp_slot 640 * call above. If not, something is broken. 641 */ 642 WARN_ON(__reserve_bp_slot(bp, old_type)); 643 } 644 645 return err; 646 } 647 648 static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) 649 { 650 struct mutex *mtx = bp_constraints_lock(bp); 651 int ret = __modify_bp_slot(bp, old_type, new_type); 652 653 bp_constraints_unlock(mtx); 654 return ret; 655 } 656 657 /* 658 * Allow the kernel debugger to reserve breakpoint slots without 659 * taking a lock using the dbg_* variant of for the reserve and 660 * release breakpoint slots. 661 */ 662 int dbg_reserve_bp_slot(struct perf_event *bp) 663 { 664 int ret; 665 666 if (bp_constraints_is_locked(bp)) 667 return -1; 668 669 /* Locks aren't held; disable lockdep assert checking. */ 670 lockdep_off(); 671 ret = __reserve_bp_slot(bp, bp->attr.bp_type); 672 lockdep_on(); 673 674 return ret; 675 } 676 677 int dbg_release_bp_slot(struct perf_event *bp) 678 { 679 if (bp_constraints_is_locked(bp)) 680 return -1; 681 682 /* Locks aren't held; disable lockdep assert checking. */ 683 lockdep_off(); 684 __release_bp_slot(bp, bp->attr.bp_type); 685 lockdep_on(); 686 687 return 0; 688 } 689 690 static int hw_breakpoint_parse(struct perf_event *bp, 691 const struct perf_event_attr *attr, 692 struct arch_hw_breakpoint *hw) 693 { 694 int err; 695 696 err = hw_breakpoint_arch_parse(bp, attr, hw); 697 if (err) 698 return err; 699 700 if (arch_check_bp_in_kernelspace(hw)) { 701 if (attr->exclude_kernel) 702 return -EINVAL; 703 /* 704 * Don't let unprivileged users set a breakpoint in the trap 705 * path to avoid trap recursion attacks. 706 */ 707 if (!capable(CAP_SYS_ADMIN)) 708 return -EPERM; 709 } 710 711 return 0; 712 } 713 714 int register_perf_hw_breakpoint(struct perf_event *bp) 715 { 716 struct arch_hw_breakpoint hw = { }; 717 int err; 718 719 err = reserve_bp_slot(bp); 720 if (err) 721 return err; 722 723 err = hw_breakpoint_parse(bp, &bp->attr, &hw); 724 if (err) { 725 release_bp_slot(bp); 726 return err; 727 } 728 729 bp->hw.info = hw; 730 731 return 0; 732 } 733 734 /** 735 * register_user_hw_breakpoint - register a hardware breakpoint for user space 736 * @attr: breakpoint attributes 737 * @triggered: callback to trigger when we hit the breakpoint 738 * @context: context data could be used in the triggered callback 739 * @tsk: pointer to 'task_struct' of the process to which the address belongs 740 */ 741 struct perf_event * 742 register_user_hw_breakpoint(struct perf_event_attr *attr, 743 perf_overflow_handler_t triggered, 744 void *context, 745 struct task_struct *tsk) 746 { 747 return perf_event_create_kernel_counter(attr, -1, tsk, triggered, 748 context); 749 } 750 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 751 752 static void hw_breakpoint_copy_attr(struct perf_event_attr *to, 753 struct perf_event_attr *from) 754 { 755 to->bp_addr = from->bp_addr; 756 to->bp_type = from->bp_type; 757 to->bp_len = from->bp_len; 758 to->disabled = from->disabled; 759 } 760 761 int 762 modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, 763 bool check) 764 { 765 struct arch_hw_breakpoint hw = { }; 766 int err; 767 768 err = hw_breakpoint_parse(bp, attr, &hw); 769 if (err) 770 return err; 771 772 if (check) { 773 struct perf_event_attr old_attr; 774 775 old_attr = bp->attr; 776 hw_breakpoint_copy_attr(&old_attr, attr); 777 if (memcmp(&old_attr, attr, sizeof(*attr))) 778 return -EINVAL; 779 } 780 781 if (bp->attr.bp_type != attr->bp_type) { 782 err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type); 783 if (err) 784 return err; 785 } 786 787 hw_breakpoint_copy_attr(&bp->attr, attr); 788 bp->hw.info = hw; 789 790 return 0; 791 } 792 793 /** 794 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint 795 * @bp: the breakpoint structure to modify 796 * @attr: new breakpoint attributes 797 */ 798 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) 799 { 800 int err; 801 802 /* 803 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it 804 * will not be possible to raise IPIs that invoke __perf_event_disable. 805 * So call the function directly after making sure we are targeting the 806 * current task. 807 */ 808 if (irqs_disabled() && bp->ctx && bp->ctx->task == current) 809 perf_event_disable_local(bp); 810 else 811 perf_event_disable(bp); 812 813 err = modify_user_hw_breakpoint_check(bp, attr, false); 814 815 if (!bp->attr.disabled) 816 perf_event_enable(bp); 817 818 return err; 819 } 820 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); 821 822 /** 823 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint 824 * @bp: the breakpoint structure to unregister 825 */ 826 void unregister_hw_breakpoint(struct perf_event *bp) 827 { 828 if (!bp) 829 return; 830 perf_event_release_kernel(bp); 831 } 832 EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); 833 834 /** 835 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel 836 * @attr: breakpoint attributes 837 * @triggered: callback to trigger when we hit the breakpoint 838 * @context: context data could be used in the triggered callback 839 * 840 * @return a set of per_cpu pointers to perf events 841 */ 842 struct perf_event * __percpu * 843 register_wide_hw_breakpoint(struct perf_event_attr *attr, 844 perf_overflow_handler_t triggered, 845 void *context) 846 { 847 struct perf_event * __percpu *cpu_events, *bp; 848 long err = 0; 849 int cpu; 850 851 cpu_events = alloc_percpu(typeof(*cpu_events)); 852 if (!cpu_events) 853 return ERR_PTR_PCPU(-ENOMEM); 854 855 cpus_read_lock(); 856 for_each_online_cpu(cpu) { 857 bp = perf_event_create_kernel_counter(attr, cpu, NULL, 858 triggered, context); 859 if (IS_ERR(bp)) { 860 err = PTR_ERR(bp); 861 break; 862 } 863 864 per_cpu(*cpu_events, cpu) = bp; 865 } 866 cpus_read_unlock(); 867 868 if (likely(!err)) 869 return cpu_events; 870 871 unregister_wide_hw_breakpoint(cpu_events); 872 return ERR_PTR_PCPU(err); 873 } 874 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); 875 876 /** 877 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel 878 * @cpu_events: the per cpu set of events to unregister 879 */ 880 void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) 881 { 882 int cpu; 883 884 for_each_possible_cpu(cpu) 885 unregister_hw_breakpoint(per_cpu(*cpu_events, cpu)); 886 887 free_percpu(cpu_events); 888 } 889 EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); 890 891 /** 892 * hw_breakpoint_is_used - check if breakpoints are currently used 893 * 894 * Returns: true if breakpoints are used, false otherwise. 895 */ 896 bool hw_breakpoint_is_used(void) 897 { 898 int cpu; 899 900 if (!constraints_initialized) 901 return false; 902 903 for_each_possible_cpu(cpu) { 904 for (int type = 0; type < TYPE_MAX; ++type) { 905 struct bp_cpuinfo *info = get_bp_info(cpu, type); 906 907 if (info->cpu_pinned) 908 return true; 909 910 for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { 911 if (atomic_read(&info->tsk_pinned.count[slot])) 912 return true; 913 } 914 } 915 } 916 917 for (int type = 0; type < TYPE_MAX; ++type) { 918 for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { 919 /* 920 * Warn, because if there are CPU pinned counters, 921 * should never get here; bp_cpuinfo::cpu_pinned should 922 * be consistent with the global cpu_pinned histogram. 923 */ 924 if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot]))) 925 return true; 926 927 if (atomic_read(&tsk_pinned_all[type].count[slot])) 928 return true; 929 } 930 } 931 932 return false; 933 } 934 935 static struct notifier_block hw_breakpoint_exceptions_nb = { 936 .notifier_call = hw_breakpoint_exceptions_notify, 937 /* we need to be notified first */ 938 .priority = 0x7fffffff 939 }; 940 941 static void bp_perf_event_destroy(struct perf_event *event) 942 { 943 release_bp_slot(event); 944 } 945 946 static int hw_breakpoint_event_init(struct perf_event *bp) 947 { 948 int err; 949 950 if (bp->attr.type != PERF_TYPE_BREAKPOINT) 951 return -ENOENT; 952 953 /* 954 * Check if breakpoint type is supported before proceeding. 955 * Also, no branch sampling for breakpoint events. 956 */ 957 if (!hw_breakpoint_slots_cached(find_slot_idx(bp->attr.bp_type)) || has_branch_stack(bp)) 958 return -EOPNOTSUPP; 959 960 err = register_perf_hw_breakpoint(bp); 961 if (err) 962 return err; 963 964 bp->destroy = bp_perf_event_destroy; 965 966 return 0; 967 } 968 969 static int hw_breakpoint_add(struct perf_event *bp, int flags) 970 { 971 if (!(flags & PERF_EF_START)) 972 bp->hw.state = PERF_HES_STOPPED; 973 974 if (is_sampling_event(bp)) { 975 bp->hw.last_period = bp->hw.sample_period; 976 perf_swevent_set_period(bp); 977 } 978 979 return arch_install_hw_breakpoint(bp); 980 } 981 982 static void hw_breakpoint_del(struct perf_event *bp, int flags) 983 { 984 arch_uninstall_hw_breakpoint(bp); 985 } 986 987 static void hw_breakpoint_start(struct perf_event *bp, int flags) 988 { 989 bp->hw.state = 0; 990 } 991 992 static void hw_breakpoint_stop(struct perf_event *bp, int flags) 993 { 994 bp->hw.state = PERF_HES_STOPPED; 995 } 996 997 static struct pmu perf_breakpoint = { 998 .task_ctx_nr = perf_sw_context, /* could eventually get its own */ 999 1000 .event_init = hw_breakpoint_event_init, 1001 .add = hw_breakpoint_add, 1002 .del = hw_breakpoint_del, 1003 .start = hw_breakpoint_start, 1004 .stop = hw_breakpoint_stop, 1005 .read = hw_breakpoint_pmu_read, 1006 }; 1007 1008 int __init init_hw_breakpoint(void) 1009 { 1010 int ret; 1011 1012 ret = rhltable_init(&task_bps_ht, &task_bps_ht_params); 1013 if (ret) 1014 return ret; 1015 1016 ret = init_breakpoint_slots(); 1017 if (ret) 1018 return ret; 1019 1020 constraints_initialized = true; 1021 1022 perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT); 1023 1024 return register_die_notifier(&hw_breakpoint_exceptions_nb); 1025 } 1026