1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2007 Alan Stern 4 * Copyright (C) IBM Corporation, 2009 5 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> 6 * 7 * Thanks to Ingo Molnar for his many suggestions. 8 * 9 * Authors: Alan Stern <stern@rowland.harvard.edu> 10 * K.Prasad <prasad@linux.vnet.ibm.com> 11 * Frederic Weisbecker <fweisbec@gmail.com> 12 */ 13 14 /* 15 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, 16 * using the CPU's debug registers. 17 * This file contains the arch-independent routines. 18 */ 19 20 #include <linux/hw_breakpoint.h> 21 22 #include <linux/atomic.h> 23 #include <linux/bug.h> 24 #include <linux/cpu.h> 25 #include <linux/export.h> 26 #include <linux/init.h> 27 #include <linux/irqflags.h> 28 #include <linux/kdebug.h> 29 #include <linux/kernel.h> 30 #include <linux/mutex.h> 31 #include <linux/notifier.h> 32 #include <linux/percpu-rwsem.h> 33 #include <linux/percpu.h> 34 #include <linux/rhashtable.h> 35 #include <linux/sched.h> 36 #include <linux/slab.h> 37 38 /* 39 * Datastructure to track the total uses of N slots across tasks or CPUs; 40 * bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots. 41 */ 42 struct bp_slots_histogram { 43 #ifdef hw_breakpoint_slots 44 atomic_t count[hw_breakpoint_slots(0)]; 45 #else 46 atomic_t *count; 47 #endif 48 }; 49 50 /* 51 * Per-CPU constraints data. 52 */ 53 struct bp_cpuinfo { 54 /* Number of pinned CPU breakpoints in a CPU. */ 55 unsigned int cpu_pinned; 56 /* Histogram of pinned task breakpoints in a CPU. */ 57 struct bp_slots_histogram tsk_pinned; 58 }; 59 60 static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]); 61 62 static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type) 63 { 64 return per_cpu_ptr(bp_cpuinfo + type, cpu); 65 } 66 67 /* Number of pinned CPU breakpoints globally. */ 68 static struct bp_slots_histogram cpu_pinned[TYPE_MAX]; 69 /* Number of pinned CPU-independent task breakpoints. */ 70 static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX]; 71 72 /* Keep track of the breakpoints attached to tasks */ 73 static struct rhltable task_bps_ht; 74 static const struct rhashtable_params task_bps_ht_params = { 75 .head_offset = offsetof(struct hw_perf_event, bp_list), 76 .key_offset = offsetof(struct hw_perf_event, target), 77 .key_len = sizeof_field(struct hw_perf_event, target), 78 .automatic_shrinking = true, 79 }; 80 81 static bool constraints_initialized __ro_after_init; 82 83 /* 84 * Synchronizes accesses to the per-CPU constraints; the locking rules are: 85 * 86 * 1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock 87 * (due to bp_slots_histogram::count being atomic, no update are lost). 88 * 89 * 2. Holding a write-lock is required for computations that require a 90 * stable snapshot of all bp_cpuinfo::tsk_pinned. 91 * 92 * 3. In all other cases, non-atomic accesses require the appropriately held 93 * lock (read-lock for read-only accesses; write-lock for reads/writes). 94 */ 95 DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem); 96 97 /* 98 * Return mutex to serialize accesses to per-task lists in task_bps_ht. Since 99 * rhltable synchronizes concurrent insertions/deletions, independent tasks may 100 * insert/delete concurrently; therefore, a mutex per task is sufficient. 101 * 102 * Uses task_struct::perf_event_mutex, to avoid extending task_struct with a 103 * hw_breakpoint-only mutex, which may be infrequently used. The caveat here is 104 * that hw_breakpoint may contend with per-task perf event list management. The 105 * assumption is that perf usecases involving hw_breakpoints are very unlikely 106 * to result in unnecessary contention. 107 */ 108 static inline struct mutex *get_task_bps_mutex(struct perf_event *bp) 109 { 110 struct task_struct *tsk = bp->hw.target; 111 112 return tsk ? &tsk->perf_event_mutex : NULL; 113 } 114 115 static struct mutex *bp_constraints_lock(struct perf_event *bp) 116 { 117 struct mutex *tsk_mtx = get_task_bps_mutex(bp); 118 119 if (tsk_mtx) { 120 /* 121 * Fully analogous to the perf_try_init_event() nesting 122 * argument in the comment near perf_event_ctx_lock_nested(); 123 * this child->perf_event_mutex cannot ever deadlock against 124 * the parent->perf_event_mutex usage from 125 * perf_event_task_{en,dis}able(). 126 * 127 * Specifically, inherited events will never occur on 128 * ->perf_event_list. 129 */ 130 mutex_lock_nested(tsk_mtx, SINGLE_DEPTH_NESTING); 131 percpu_down_read(&bp_cpuinfo_sem); 132 } else { 133 percpu_down_write(&bp_cpuinfo_sem); 134 } 135 136 return tsk_mtx; 137 } 138 139 static void bp_constraints_unlock(struct mutex *tsk_mtx) 140 { 141 if (tsk_mtx) { 142 percpu_up_read(&bp_cpuinfo_sem); 143 mutex_unlock(tsk_mtx); 144 } else { 145 percpu_up_write(&bp_cpuinfo_sem); 146 } 147 } 148 149 static bool bp_constraints_is_locked(struct perf_event *bp) 150 { 151 struct mutex *tsk_mtx = get_task_bps_mutex(bp); 152 153 return percpu_is_write_locked(&bp_cpuinfo_sem) || 154 (tsk_mtx ? mutex_is_locked(tsk_mtx) : 155 percpu_is_read_locked(&bp_cpuinfo_sem)); 156 } 157 158 static inline void assert_bp_constraints_lock_held(struct perf_event *bp) 159 { 160 struct mutex *tsk_mtx = get_task_bps_mutex(bp); 161 162 if (tsk_mtx) 163 lockdep_assert_held(tsk_mtx); 164 lockdep_assert_held(&bp_cpuinfo_sem); 165 } 166 167 #ifdef hw_breakpoint_slots 168 /* 169 * Number of breakpoint slots is constant, and the same for all types. 170 */ 171 static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA)); 172 static inline int hw_breakpoint_slots_cached(int type) { return hw_breakpoint_slots(type); } 173 static inline int init_breakpoint_slots(void) { return 0; } 174 #else 175 /* 176 * Dynamic number of breakpoint slots. 177 */ 178 static int __nr_bp_slots[TYPE_MAX] __ro_after_init; 179 180 static inline int hw_breakpoint_slots_cached(int type) 181 { 182 return __nr_bp_slots[type]; 183 } 184 185 static __init bool 186 bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type) 187 { 188 hist->count = kcalloc(hw_breakpoint_slots_cached(type), sizeof(*hist->count), GFP_KERNEL); 189 return hist->count; 190 } 191 192 static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist) 193 { 194 kfree(hist->count); 195 } 196 197 static __init int init_breakpoint_slots(void) 198 { 199 int i, cpu, err_cpu; 200 201 for (i = 0; i < TYPE_MAX; i++) 202 __nr_bp_slots[i] = hw_breakpoint_slots(i); 203 204 for_each_possible_cpu(cpu) { 205 for (i = 0; i < TYPE_MAX; i++) { 206 struct bp_cpuinfo *info = get_bp_info(cpu, i); 207 208 if (!bp_slots_histogram_alloc(&info->tsk_pinned, i)) 209 goto err; 210 } 211 } 212 for (i = 0; i < TYPE_MAX; i++) { 213 if (!bp_slots_histogram_alloc(&cpu_pinned[i], i)) 214 goto err; 215 if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i)) 216 goto err; 217 } 218 219 return 0; 220 err: 221 for_each_possible_cpu(err_cpu) { 222 for (i = 0; i < TYPE_MAX; i++) 223 bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned); 224 if (err_cpu == cpu) 225 break; 226 } 227 for (i = 0; i < TYPE_MAX; i++) { 228 bp_slots_histogram_free(&cpu_pinned[i]); 229 bp_slots_histogram_free(&tsk_pinned_all[i]); 230 } 231 232 return -ENOMEM; 233 } 234 #endif 235 236 static inline void 237 bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val) 238 { 239 const int old_idx = old - 1; 240 const int new_idx = old_idx + val; 241 242 if (old_idx >= 0) 243 WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0); 244 if (new_idx >= 0) 245 WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0); 246 } 247 248 static int 249 bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type) 250 { 251 for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { 252 const int count = atomic_read(&hist->count[i]); 253 254 /* Catch unexpected writers; we want a stable snapshot. */ 255 ASSERT_EXCLUSIVE_WRITER(hist->count[i]); 256 if (count > 0) 257 return i + 1; 258 WARN(count < 0, "inconsistent breakpoint slots histogram"); 259 } 260 261 return 0; 262 } 263 264 static int 265 bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2, 266 enum bp_type_idx type) 267 { 268 for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { 269 const int count1 = atomic_read(&hist1->count[i]); 270 const int count2 = atomic_read(&hist2->count[i]); 271 272 /* Catch unexpected writers; we want a stable snapshot. */ 273 ASSERT_EXCLUSIVE_WRITER(hist1->count[i]); 274 ASSERT_EXCLUSIVE_WRITER(hist2->count[i]); 275 if (count1 + count2 > 0) 276 return i + 1; 277 WARN(count1 < 0, "inconsistent breakpoint slots histogram"); 278 WARN(count2 < 0, "inconsistent breakpoint slots histogram"); 279 } 280 281 return 0; 282 } 283 284 #ifndef hw_breakpoint_weight 285 static inline int hw_breakpoint_weight(struct perf_event *bp) 286 { 287 return 1; 288 } 289 #endif 290 291 static inline enum bp_type_idx find_slot_idx(u64 bp_type) 292 { 293 if (bp_type & HW_BREAKPOINT_RW) 294 return TYPE_DATA; 295 296 return TYPE_INST; 297 } 298 299 /* 300 * Return the maximum number of pinned breakpoints a task has in this CPU. 301 */ 302 static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) 303 { 304 struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned; 305 306 /* 307 * At this point we want to have acquired the bp_cpuinfo_sem as a 308 * writer to ensure that there are no concurrent writers in 309 * toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot. 310 */ 311 lockdep_assert_held_write(&bp_cpuinfo_sem); 312 return bp_slots_histogram_max_merge(tsk_pinned, &tsk_pinned_all[type], type); 313 } 314 315 /* 316 * Count the number of breakpoints of the same type and same task. 317 * The given event must be not on the list. 318 * 319 * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent, 320 * returns a negative value. 321 */ 322 static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) 323 { 324 struct rhlist_head *head, *pos; 325 struct perf_event *iter; 326 int count = 0; 327 328 /* 329 * We need a stable snapshot of the per-task breakpoint list. 330 */ 331 assert_bp_constraints_lock_held(bp); 332 333 rcu_read_lock(); 334 head = rhltable_lookup(&task_bps_ht, &bp->hw.target, task_bps_ht_params); 335 if (!head) 336 goto out; 337 338 rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) { 339 if (find_slot_idx(iter->attr.bp_type) != type) 340 continue; 341 342 if (iter->cpu >= 0) { 343 if (cpu == -1) { 344 count = -1; 345 goto out; 346 } else if (cpu != iter->cpu) 347 continue; 348 } 349 350 count += hw_breakpoint_weight(iter); 351 } 352 353 out: 354 rcu_read_unlock(); 355 return count; 356 } 357 358 static const struct cpumask *cpumask_of_bp(struct perf_event *bp) 359 { 360 if (bp->cpu >= 0) 361 return cpumask_of(bp->cpu); 362 return cpu_possible_mask; 363 } 364 365 /* 366 * Returns the max pinned breakpoint slots in a given 367 * CPU (cpu > -1) or across all of them (cpu = -1). 368 */ 369 static int 370 max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type) 371 { 372 const struct cpumask *cpumask = cpumask_of_bp(bp); 373 int pinned_slots = 0; 374 int cpu; 375 376 if (bp->hw.target && bp->cpu < 0) { 377 int max_pinned = task_bp_pinned(-1, bp, type); 378 379 if (max_pinned >= 0) { 380 /* 381 * Fast path: task_bp_pinned() is CPU-independent and 382 * returns the same value for any CPU. 383 */ 384 max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type); 385 return max_pinned; 386 } 387 } 388 389 for_each_cpu(cpu, cpumask) { 390 struct bp_cpuinfo *info = get_bp_info(cpu, type); 391 int nr; 392 393 nr = info->cpu_pinned; 394 if (!bp->hw.target) 395 nr += max_task_bp_pinned(cpu, type); 396 else 397 nr += task_bp_pinned(cpu, bp, type); 398 399 pinned_slots = max(nr, pinned_slots); 400 } 401 402 return pinned_slots; 403 } 404 405 /* 406 * Add/remove the given breakpoint in our constraint table 407 */ 408 static int 409 toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight) 410 { 411 int cpu, next_tsk_pinned; 412 413 if (!enable) 414 weight = -weight; 415 416 if (!bp->hw.target) { 417 /* 418 * Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the 419 * global histogram. 420 */ 421 struct bp_cpuinfo *info = get_bp_info(bp->cpu, type); 422 423 lockdep_assert_held_write(&bp_cpuinfo_sem); 424 bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight); 425 info->cpu_pinned += weight; 426 return 0; 427 } 428 429 /* 430 * If bp->hw.target, tsk_pinned is only modified, but not used 431 * otherwise. We can permit concurrent updates as long as there are no 432 * other uses: having acquired bp_cpuinfo_sem as a reader allows 433 * concurrent updates here. Uses of tsk_pinned will require acquiring 434 * bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value. 435 */ 436 lockdep_assert_held_read(&bp_cpuinfo_sem); 437 438 /* 439 * Update the pinned task slots, in per-CPU bp_cpuinfo and in the global 440 * histogram. We need to take care of 4 cases: 441 * 442 * 1. This breakpoint targets all CPUs (cpu < 0), and there may only 443 * exist other task breakpoints targeting all CPUs. In this case we 444 * can simply update the global slots histogram. 445 * 446 * 2. This breakpoint targets a specific CPU (cpu >= 0), but there may 447 * only exist other task breakpoints targeting all CPUs. 448 * 449 * a. On enable: remove the existing breakpoints from the global 450 * slots histogram and use the per-CPU histogram. 451 * 452 * b. On disable: re-insert the existing breakpoints into the global 453 * slots histogram and remove from per-CPU histogram. 454 * 455 * 3. Some other existing task breakpoints target specific CPUs. Only 456 * update the per-CPU slots histogram. 457 */ 458 459 if (!enable) { 460 /* 461 * Remove before updating histograms so we can determine if this 462 * was the last task breakpoint for a specific CPU. 463 */ 464 int ret = rhltable_remove(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params); 465 466 if (ret) 467 return ret; 468 } 469 /* 470 * Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint. 471 */ 472 next_tsk_pinned = task_bp_pinned(-1, bp, type); 473 474 if (next_tsk_pinned >= 0) { 475 if (bp->cpu < 0) { /* Case 1: fast path */ 476 if (!enable) 477 next_tsk_pinned += hw_breakpoint_weight(bp); 478 bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, weight); 479 } else if (enable) { /* Case 2.a: slow path */ 480 /* Add existing to per-CPU histograms. */ 481 for_each_possible_cpu(cpu) { 482 bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, 483 0, next_tsk_pinned); 484 } 485 /* Add this first CPU-pinned task breakpoint. */ 486 bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned, 487 next_tsk_pinned, weight); 488 /* Rebalance global task pinned histogram. */ 489 bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, 490 -next_tsk_pinned); 491 } else { /* Case 2.b: slow path */ 492 /* Remove this last CPU-pinned task breakpoint. */ 493 bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned, 494 next_tsk_pinned + hw_breakpoint_weight(bp), weight); 495 /* Remove all from per-CPU histograms. */ 496 for_each_possible_cpu(cpu) { 497 bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, 498 next_tsk_pinned, -next_tsk_pinned); 499 } 500 /* Rebalance global task pinned histogram. */ 501 bp_slots_histogram_add(&tsk_pinned_all[type], 0, next_tsk_pinned); 502 } 503 } else { /* Case 3: slow path */ 504 const struct cpumask *cpumask = cpumask_of_bp(bp); 505 506 for_each_cpu(cpu, cpumask) { 507 next_tsk_pinned = task_bp_pinned(cpu, bp, type); 508 if (!enable) 509 next_tsk_pinned += hw_breakpoint_weight(bp); 510 bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, 511 next_tsk_pinned, weight); 512 } 513 } 514 515 /* 516 * Readers want a stable snapshot of the per-task breakpoint list. 517 */ 518 assert_bp_constraints_lock_held(bp); 519 520 if (enable) 521 return rhltable_insert(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params); 522 523 return 0; 524 } 525 526 /* 527 * Constraints to check before allowing this new breakpoint counter. 528 * 529 * Note: Flexible breakpoints are currently unimplemented, but outlined in the 530 * below algorithm for completeness. The implementation treats flexible as 531 * pinned due to no guarantee that we currently always schedule flexible events 532 * before a pinned event in a same CPU. 533 * 534 * == Non-pinned counter == (Considered as pinned for now) 535 * 536 * - If attached to a single cpu, check: 537 * 538 * (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu) 539 * + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM 540 * 541 * -> If there are already non-pinned counters in this cpu, it means 542 * there is already a free slot for them. 543 * Otherwise, we check that the maximum number of per task 544 * breakpoints (for this cpu) plus the number of per cpu breakpoint 545 * (for this cpu) doesn't cover every registers. 546 * 547 * - If attached to every cpus, check: 548 * 549 * (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *)) 550 * + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM 551 * 552 * -> This is roughly the same, except we check the number of per cpu 553 * bp for every cpu and we keep the max one. Same for the per tasks 554 * breakpoints. 555 * 556 * 557 * == Pinned counter == 558 * 559 * - If attached to a single cpu, check: 560 * 561 * ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu) 562 * + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM 563 * 564 * -> Same checks as before. But now the info->flexible, if any, must keep 565 * one register at least (or they will never be fed). 566 * 567 * - If attached to every cpus, check: 568 * 569 * ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *)) 570 * + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM 571 */ 572 static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) 573 { 574 enum bp_type_idx type; 575 int max_pinned_slots; 576 int weight; 577 578 /* We couldn't initialize breakpoint constraints on boot */ 579 if (!constraints_initialized) 580 return -ENOMEM; 581 582 /* Basic checks */ 583 if (bp_type == HW_BREAKPOINT_EMPTY || 584 bp_type == HW_BREAKPOINT_INVALID) 585 return -EINVAL; 586 587 type = find_slot_idx(bp_type); 588 weight = hw_breakpoint_weight(bp); 589 590 /* Check if this new breakpoint can be satisfied across all CPUs. */ 591 max_pinned_slots = max_bp_pinned_slots(bp, type) + weight; 592 if (max_pinned_slots > hw_breakpoint_slots_cached(type)) 593 return -ENOSPC; 594 595 return toggle_bp_slot(bp, true, type, weight); 596 } 597 598 int reserve_bp_slot(struct perf_event *bp) 599 { 600 struct mutex *mtx = bp_constraints_lock(bp); 601 int ret = __reserve_bp_slot(bp, bp->attr.bp_type); 602 603 bp_constraints_unlock(mtx); 604 return ret; 605 } 606 607 static void __release_bp_slot(struct perf_event *bp, u64 bp_type) 608 { 609 enum bp_type_idx type; 610 int weight; 611 612 type = find_slot_idx(bp_type); 613 weight = hw_breakpoint_weight(bp); 614 WARN_ON(toggle_bp_slot(bp, false, type, weight)); 615 } 616 617 void release_bp_slot(struct perf_event *bp) 618 { 619 struct mutex *mtx = bp_constraints_lock(bp); 620 621 __release_bp_slot(bp, bp->attr.bp_type); 622 bp_constraints_unlock(mtx); 623 } 624 625 static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) 626 { 627 int err; 628 629 __release_bp_slot(bp, old_type); 630 631 err = __reserve_bp_slot(bp, new_type); 632 if (err) { 633 /* 634 * Reserve the old_type slot back in case 635 * there's no space for the new type. 636 * 637 * This must succeed, because we just released 638 * the old_type slot in the __release_bp_slot 639 * call above. If not, something is broken. 640 */ 641 WARN_ON(__reserve_bp_slot(bp, old_type)); 642 } 643 644 return err; 645 } 646 647 static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) 648 { 649 struct mutex *mtx = bp_constraints_lock(bp); 650 int ret = __modify_bp_slot(bp, old_type, new_type); 651 652 bp_constraints_unlock(mtx); 653 return ret; 654 } 655 656 /* 657 * Allow the kernel debugger to reserve breakpoint slots without 658 * taking a lock using the dbg_* variant of for the reserve and 659 * release breakpoint slots. 660 */ 661 int dbg_reserve_bp_slot(struct perf_event *bp) 662 { 663 int ret; 664 665 if (bp_constraints_is_locked(bp)) 666 return -1; 667 668 /* Locks aren't held; disable lockdep assert checking. */ 669 lockdep_off(); 670 ret = __reserve_bp_slot(bp, bp->attr.bp_type); 671 lockdep_on(); 672 673 return ret; 674 } 675 676 int dbg_release_bp_slot(struct perf_event *bp) 677 { 678 if (bp_constraints_is_locked(bp)) 679 return -1; 680 681 /* Locks aren't held; disable lockdep assert checking. */ 682 lockdep_off(); 683 __release_bp_slot(bp, bp->attr.bp_type); 684 lockdep_on(); 685 686 return 0; 687 } 688 689 static int hw_breakpoint_parse(struct perf_event *bp, 690 const struct perf_event_attr *attr, 691 struct arch_hw_breakpoint *hw) 692 { 693 int err; 694 695 err = hw_breakpoint_arch_parse(bp, attr, hw); 696 if (err) 697 return err; 698 699 if (arch_check_bp_in_kernelspace(hw)) { 700 if (attr->exclude_kernel) 701 return -EINVAL; 702 /* 703 * Don't let unprivileged users set a breakpoint in the trap 704 * path to avoid trap recursion attacks. 705 */ 706 if (!capable(CAP_SYS_ADMIN)) 707 return -EPERM; 708 } 709 710 return 0; 711 } 712 713 int register_perf_hw_breakpoint(struct perf_event *bp) 714 { 715 struct arch_hw_breakpoint hw = { }; 716 int err; 717 718 err = reserve_bp_slot(bp); 719 if (err) 720 return err; 721 722 err = hw_breakpoint_parse(bp, &bp->attr, &hw); 723 if (err) { 724 release_bp_slot(bp); 725 return err; 726 } 727 728 bp->hw.info = hw; 729 730 return 0; 731 } 732 733 /** 734 * register_user_hw_breakpoint - register a hardware breakpoint for user space 735 * @attr: breakpoint attributes 736 * @triggered: callback to trigger when we hit the breakpoint 737 * @context: context data could be used in the triggered callback 738 * @tsk: pointer to 'task_struct' of the process to which the address belongs 739 */ 740 struct perf_event * 741 register_user_hw_breakpoint(struct perf_event_attr *attr, 742 perf_overflow_handler_t triggered, 743 void *context, 744 struct task_struct *tsk) 745 { 746 return perf_event_create_kernel_counter(attr, -1, tsk, triggered, 747 context); 748 } 749 EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 750 751 static void hw_breakpoint_copy_attr(struct perf_event_attr *to, 752 struct perf_event_attr *from) 753 { 754 to->bp_addr = from->bp_addr; 755 to->bp_type = from->bp_type; 756 to->bp_len = from->bp_len; 757 to->disabled = from->disabled; 758 } 759 760 int 761 modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, 762 bool check) 763 { 764 struct arch_hw_breakpoint hw = { }; 765 int err; 766 767 err = hw_breakpoint_parse(bp, attr, &hw); 768 if (err) 769 return err; 770 771 if (check) { 772 struct perf_event_attr old_attr; 773 774 old_attr = bp->attr; 775 hw_breakpoint_copy_attr(&old_attr, attr); 776 if (memcmp(&old_attr, attr, sizeof(*attr))) 777 return -EINVAL; 778 } 779 780 if (bp->attr.bp_type != attr->bp_type) { 781 err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type); 782 if (err) 783 return err; 784 } 785 786 hw_breakpoint_copy_attr(&bp->attr, attr); 787 bp->hw.info = hw; 788 789 return 0; 790 } 791 792 /** 793 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint 794 * @bp: the breakpoint structure to modify 795 * @attr: new breakpoint attributes 796 */ 797 int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) 798 { 799 int err; 800 801 /* 802 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it 803 * will not be possible to raise IPIs that invoke __perf_event_disable. 804 * So call the function directly after making sure we are targeting the 805 * current task. 806 */ 807 if (irqs_disabled() && bp->ctx && bp->ctx->task == current) 808 perf_event_disable_local(bp); 809 else 810 perf_event_disable(bp); 811 812 err = modify_user_hw_breakpoint_check(bp, attr, false); 813 814 if (!bp->attr.disabled) 815 perf_event_enable(bp); 816 817 return err; 818 } 819 EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); 820 821 /** 822 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint 823 * @bp: the breakpoint structure to unregister 824 */ 825 void unregister_hw_breakpoint(struct perf_event *bp) 826 { 827 if (!bp) 828 return; 829 perf_event_release_kernel(bp); 830 } 831 EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); 832 833 /** 834 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel 835 * @attr: breakpoint attributes 836 * @triggered: callback to trigger when we hit the breakpoint 837 * @context: context data could be used in the triggered callback 838 * 839 * @return a set of per_cpu pointers to perf events 840 */ 841 struct perf_event * __percpu * 842 register_wide_hw_breakpoint(struct perf_event_attr *attr, 843 perf_overflow_handler_t triggered, 844 void *context) 845 { 846 struct perf_event * __percpu *cpu_events, *bp; 847 long err = 0; 848 int cpu; 849 850 cpu_events = alloc_percpu(typeof(*cpu_events)); 851 if (!cpu_events) 852 return (void __percpu __force *)ERR_PTR(-ENOMEM); 853 854 cpus_read_lock(); 855 for_each_online_cpu(cpu) { 856 bp = perf_event_create_kernel_counter(attr, cpu, NULL, 857 triggered, context); 858 if (IS_ERR(bp)) { 859 err = PTR_ERR(bp); 860 break; 861 } 862 863 per_cpu(*cpu_events, cpu) = bp; 864 } 865 cpus_read_unlock(); 866 867 if (likely(!err)) 868 return cpu_events; 869 870 unregister_wide_hw_breakpoint(cpu_events); 871 return (void __percpu __force *)ERR_PTR(err); 872 } 873 EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); 874 875 /** 876 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel 877 * @cpu_events: the per cpu set of events to unregister 878 */ 879 void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) 880 { 881 int cpu; 882 883 for_each_possible_cpu(cpu) 884 unregister_hw_breakpoint(per_cpu(*cpu_events, cpu)); 885 886 free_percpu(cpu_events); 887 } 888 EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); 889 890 /** 891 * hw_breakpoint_is_used - check if breakpoints are currently used 892 * 893 * Returns: true if breakpoints are used, false otherwise. 894 */ 895 bool hw_breakpoint_is_used(void) 896 { 897 int cpu; 898 899 if (!constraints_initialized) 900 return false; 901 902 for_each_possible_cpu(cpu) { 903 for (int type = 0; type < TYPE_MAX; ++type) { 904 struct bp_cpuinfo *info = get_bp_info(cpu, type); 905 906 if (info->cpu_pinned) 907 return true; 908 909 for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { 910 if (atomic_read(&info->tsk_pinned.count[slot])) 911 return true; 912 } 913 } 914 } 915 916 for (int type = 0; type < TYPE_MAX; ++type) { 917 for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { 918 /* 919 * Warn, because if there are CPU pinned counters, 920 * should never get here; bp_cpuinfo::cpu_pinned should 921 * be consistent with the global cpu_pinned histogram. 922 */ 923 if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot]))) 924 return true; 925 926 if (atomic_read(&tsk_pinned_all[type].count[slot])) 927 return true; 928 } 929 } 930 931 return false; 932 } 933 934 static struct notifier_block hw_breakpoint_exceptions_nb = { 935 .notifier_call = hw_breakpoint_exceptions_notify, 936 /* we need to be notified first */ 937 .priority = 0x7fffffff 938 }; 939 940 static void bp_perf_event_destroy(struct perf_event *event) 941 { 942 release_bp_slot(event); 943 } 944 945 static int hw_breakpoint_event_init(struct perf_event *bp) 946 { 947 int err; 948 949 if (bp->attr.type != PERF_TYPE_BREAKPOINT) 950 return -ENOENT; 951 952 /* 953 * no branch sampling for breakpoint events 954 */ 955 if (has_branch_stack(bp)) 956 return -EOPNOTSUPP; 957 958 err = register_perf_hw_breakpoint(bp); 959 if (err) 960 return err; 961 962 bp->destroy = bp_perf_event_destroy; 963 964 return 0; 965 } 966 967 static int hw_breakpoint_add(struct perf_event *bp, int flags) 968 { 969 if (!(flags & PERF_EF_START)) 970 bp->hw.state = PERF_HES_STOPPED; 971 972 if (is_sampling_event(bp)) { 973 bp->hw.last_period = bp->hw.sample_period; 974 perf_swevent_set_period(bp); 975 } 976 977 return arch_install_hw_breakpoint(bp); 978 } 979 980 static void hw_breakpoint_del(struct perf_event *bp, int flags) 981 { 982 arch_uninstall_hw_breakpoint(bp); 983 } 984 985 static void hw_breakpoint_start(struct perf_event *bp, int flags) 986 { 987 bp->hw.state = 0; 988 } 989 990 static void hw_breakpoint_stop(struct perf_event *bp, int flags) 991 { 992 bp->hw.state = PERF_HES_STOPPED; 993 } 994 995 static struct pmu perf_breakpoint = { 996 .task_ctx_nr = perf_sw_context, /* could eventually get its own */ 997 998 .event_init = hw_breakpoint_event_init, 999 .add = hw_breakpoint_add, 1000 .del = hw_breakpoint_del, 1001 .start = hw_breakpoint_start, 1002 .stop = hw_breakpoint_stop, 1003 .read = hw_breakpoint_pmu_read, 1004 }; 1005 1006 int __init init_hw_breakpoint(void) 1007 { 1008 int ret; 1009 1010 ret = rhltable_init(&task_bps_ht, &task_bps_ht_params); 1011 if (ret) 1012 return ret; 1013 1014 ret = init_breakpoint_slots(); 1015 if (ret) 1016 return ret; 1017 1018 constraints_initialized = true; 1019 1020 perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT); 1021 1022 return register_die_notifier(&hw_breakpoint_exceptions_nb); 1023 } 1024