1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Kernel Probes (KProbes)
4 *
5 * Copyright (C) IBM Corporation, 2002, 2004
6 *
7 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
8 * Probes initial implementation (includes suggestions from
9 * Rusty Russell).
10 * 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
11 * hlists and exceptions notifier as suggested by Andi Kleen.
12 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
13 * interface to access function arguments.
14 * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
15 * exceptions notifier to be first on the priority list.
16 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
17 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
18 * <prasanna@in.ibm.com> added function-return probes.
19 */
20
21 #define pr_fmt(fmt) "kprobes: " fmt
22
23 #include <linux/kprobes.h>
24 #include <linux/hash.h>
25 #include <linux/init.h>
26 #include <linux/slab.h>
27 #include <linux/stddef.h>
28 #include <linux/export.h>
29 #include <linux/kallsyms.h>
30 #include <linux/freezer.h>
31 #include <linux/seq_file.h>
32 #include <linux/debugfs.h>
33 #include <linux/sysctl.h>
34 #include <linux/kdebug.h>
35 #include <linux/kthread.h>
36 #include <linux/memory.h>
37 #include <linux/ftrace.h>
38 #include <linux/cpu.h>
39 #include <linux/jump_label.h>
40 #include <linux/static_call.h>
41 #include <linux/perf_event.h>
42 #include <linux/execmem.h>
43 #include <linux/cleanup.h>
44 #include <linux/wait.h>
45
46 #include <asm/sections.h>
47 #include <asm/cacheflush.h>
48 #include <asm/errno.h>
49 #include <linux/uaccess.h>
50
51 #define KPROBE_HASH_BITS 6
52 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
53
54 #if !defined(CONFIG_OPTPROBES) || !defined(CONFIG_SYSCTL)
55 #define kprobe_sysctls_init() do { } while (0)
56 #endif
57
58 static int kprobes_initialized;
59 /* kprobe_table can be accessed by
60 * - Normal hlist traversal and RCU add/del under 'kprobe_mutex' is held.
61 * Or
62 * - RCU hlist traversal under disabling preempt (breakpoint handlers)
63 */
64 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
65
66 /* NOTE: change this value only with 'kprobe_mutex' held */
67 static bool kprobes_all_disarmed;
68
69 /* This protects 'kprobe_table' and 'optimizing_list' */
70 static DEFINE_MUTEX(kprobe_mutex);
71 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance);
72
kprobe_lookup_name(const char * name,unsigned int __unused)73 kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
74 unsigned int __unused)
75 {
76 return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
77 }
78
79 /*
80 * Blacklist -- list of 'struct kprobe_blacklist_entry' to store info where
81 * kprobes can not probe.
82 */
83 static LIST_HEAD(kprobe_blacklist);
84
85 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
86 /*
87 * 'kprobe::ainsn.insn' points to the copy of the instruction to be
88 * single-stepped. x86_64, POWER4 and above have no-exec support and
89 * stepping on the instruction on a vmalloced/kmalloced/data page
90 * is a recipe for disaster
91 */
92 struct kprobe_insn_page {
93 struct list_head list;
94 kprobe_opcode_t *insns; /* Page of instruction slots */
95 struct kprobe_insn_cache *cache;
96 int nused;
97 int ngarbage;
98 char slot_used[];
99 };
100
slots_per_page(struct kprobe_insn_cache * c)101 static int slots_per_page(struct kprobe_insn_cache *c)
102 {
103 return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
104 }
105
106 enum kprobe_slot_state {
107 SLOT_CLEAN = 0,
108 SLOT_DIRTY = 1,
109 SLOT_USED = 2,
110 };
111
alloc_insn_page(void)112 void __weak *alloc_insn_page(void)
113 {
114 /*
115 * Use execmem_alloc() so this page is within +/- 2GB of where the
116 * kernel image and loaded module images reside. This is required
117 * for most of the architectures.
118 * (e.g. x86-64 needs this to handle the %rip-relative fixups.)
119 */
120 return execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
121 }
122
free_insn_page(void * page)123 static void free_insn_page(void *page)
124 {
125 execmem_free(page);
126 }
127
128 struct kprobe_insn_cache kprobe_insn_slots = {
129 .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
130 .alloc = alloc_insn_page,
131 .free = free_insn_page,
132 .sym = KPROBE_INSN_PAGE_SYM,
133 .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
134 .insn_size = MAX_INSN_SIZE,
135 .nr_garbage = 0,
136 };
137 static int collect_garbage_slots(struct kprobe_insn_cache *c);
138
139 /**
140 * __get_insn_slot - Find a slot on an executable page for an instruction.
141 * @c: Pointer to kprobe instruction cache
142 *
143 * Description: Locates available slot on existing executable pages,
144 * allocates an executable page if there's no room on existing ones.
145 * Return: Pointer to instruction slot on success, NULL on failure.
146 */
__get_insn_slot(struct kprobe_insn_cache * c)147 kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
148 {
149 struct kprobe_insn_page *kip;
150
151 /* Since the slot array is not protected by rcu, we need a mutex */
152 guard(mutex)(&c->mutex);
153 do {
154 guard(rcu)();
155 list_for_each_entry_rcu(kip, &c->pages, list) {
156 if (kip->nused < slots_per_page(c)) {
157 int i;
158
159 for (i = 0; i < slots_per_page(c); i++) {
160 if (kip->slot_used[i] == SLOT_CLEAN) {
161 kip->slot_used[i] = SLOT_USED;
162 kip->nused++;
163 return kip->insns + (i * c->insn_size);
164 }
165 }
166 /* kip->nused is broken. Fix it. */
167 kip->nused = slots_per_page(c);
168 WARN_ON(1);
169 }
170 }
171 /* If there are any garbage slots, collect it and try again. */
172 } while (c->nr_garbage && collect_garbage_slots(c) == 0);
173
174 /* All out of space. Need to allocate a new page. */
175 kip = kmalloc_flex(*kip, slot_used, slots_per_page(c), GFP_KERNEL);
176 if (!kip)
177 return NULL;
178
179 kip->insns = c->alloc();
180 if (!kip->insns) {
181 kfree(kip);
182 return NULL;
183 }
184 INIT_LIST_HEAD(&kip->list);
185 memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
186 kip->slot_used[0] = SLOT_USED;
187 kip->nused = 1;
188 kip->ngarbage = 0;
189 kip->cache = c;
190 list_add_rcu(&kip->list, &c->pages);
191
192 /* Record the perf ksymbol register event after adding the page */
193 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns,
194 PAGE_SIZE, false, c->sym);
195
196 return kip->insns;
197 }
198
199 /* Return true if all garbages are collected, otherwise false. */
collect_one_slot(struct kprobe_insn_page * kip,int idx)200 static bool collect_one_slot(struct kprobe_insn_page *kip, int idx)
201 {
202 kip->slot_used[idx] = SLOT_CLEAN;
203 kip->nused--;
204 if (kip->nused != 0)
205 return false;
206
207 /*
208 * Page is no longer in use. Free it unless
209 * it's the last one. We keep the last one
210 * so as not to have to set it up again the
211 * next time somebody inserts a probe.
212 */
213 if (!list_is_singular(&kip->list)) {
214 /*
215 * Record perf ksymbol unregister event before removing
216 * the page.
217 */
218 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
219 (unsigned long)kip->insns, PAGE_SIZE, true,
220 kip->cache->sym);
221 list_del_rcu(&kip->list);
222 synchronize_rcu();
223 kip->cache->free(kip->insns);
224 kfree(kip);
225 }
226 return true;
227 }
228
collect_garbage_slots(struct kprobe_insn_cache * c)229 static int collect_garbage_slots(struct kprobe_insn_cache *c)
230 {
231 struct kprobe_insn_page *kip, *next;
232
233 /* Ensure no-one is interrupted on the garbages */
234 synchronize_rcu();
235
236 list_for_each_entry_safe(kip, next, &c->pages, list) {
237 int i;
238
239 if (kip->ngarbage == 0)
240 continue;
241 kip->ngarbage = 0; /* we will collect all garbages */
242 for (i = 0; i < slots_per_page(c); i++) {
243 if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i))
244 break;
245 }
246 }
247 c->nr_garbage = 0;
248 return 0;
249 }
250
__find_insn_page(struct kprobe_insn_cache * c,kprobe_opcode_t * slot,struct kprobe_insn_page ** pkip)251 static long __find_insn_page(struct kprobe_insn_cache *c,
252 kprobe_opcode_t *slot, struct kprobe_insn_page **pkip)
253 {
254 struct kprobe_insn_page *kip = NULL;
255 long idx;
256
257 guard(rcu)();
258 list_for_each_entry_rcu(kip, &c->pages, list) {
259 idx = ((long)slot - (long)kip->insns) /
260 (c->insn_size * sizeof(kprobe_opcode_t));
261 if (idx >= 0 && idx < slots_per_page(c)) {
262 *pkip = kip;
263 return idx;
264 }
265 }
266 /* Could not find this slot. */
267 WARN_ON(1);
268 *pkip = NULL;
269 return -1;
270 }
271
__free_insn_slot(struct kprobe_insn_cache * c,kprobe_opcode_t * slot,int dirty)272 void __free_insn_slot(struct kprobe_insn_cache *c,
273 kprobe_opcode_t *slot, int dirty)
274 {
275 struct kprobe_insn_page *kip = NULL;
276 long idx;
277
278 guard(mutex)(&c->mutex);
279 idx = __find_insn_page(c, slot, &kip);
280 /* Mark and sweep: this may sleep */
281 if (kip) {
282 /* Check double free */
283 WARN_ON(kip->slot_used[idx] != SLOT_USED);
284 if (dirty) {
285 kip->slot_used[idx] = SLOT_DIRTY;
286 kip->ngarbage++;
287 if (++c->nr_garbage > slots_per_page(c))
288 collect_garbage_slots(c);
289 } else {
290 collect_one_slot(kip, idx);
291 }
292 }
293 }
294
295 /*
296 * Check given address is on the page of kprobe instruction slots.
297 * This will be used for checking whether the address on a stack
298 * is on a text area or not.
299 */
__is_insn_slot_addr(struct kprobe_insn_cache * c,unsigned long addr)300 bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
301 {
302 struct kprobe_insn_page *kip;
303 bool ret = false;
304
305 rcu_read_lock();
306 list_for_each_entry_rcu(kip, &c->pages, list) {
307 if (addr >= (unsigned long)kip->insns &&
308 addr < (unsigned long)kip->insns + PAGE_SIZE) {
309 ret = true;
310 break;
311 }
312 }
313 rcu_read_unlock();
314
315 return ret;
316 }
317
kprobe_cache_get_kallsym(struct kprobe_insn_cache * c,unsigned int * symnum,unsigned long * value,char * type,char * sym)318 int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
319 unsigned long *value, char *type, char *sym)
320 {
321 struct kprobe_insn_page *kip;
322 int ret = -ERANGE;
323
324 rcu_read_lock();
325 list_for_each_entry_rcu(kip, &c->pages, list) {
326 if ((*symnum)--)
327 continue;
328 strscpy(sym, c->sym, KSYM_NAME_LEN);
329 *type = 't';
330 *value = (unsigned long)kip->insns;
331 ret = 0;
332 break;
333 }
334 rcu_read_unlock();
335
336 return ret;
337 }
338
339 #ifdef CONFIG_OPTPROBES
alloc_optinsn_page(void)340 void __weak *alloc_optinsn_page(void)
341 {
342 return alloc_insn_page();
343 }
344
free_optinsn_page(void * page)345 void __weak free_optinsn_page(void *page)
346 {
347 free_insn_page(page);
348 }
349
350 /* For optimized_kprobe buffer */
351 struct kprobe_insn_cache kprobe_optinsn_slots = {
352 .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
353 .alloc = alloc_optinsn_page,
354 .free = free_optinsn_page,
355 .sym = KPROBE_OPTINSN_PAGE_SYM,
356 .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
357 /* .insn_size is initialized later */
358 .nr_garbage = 0,
359 };
360 #endif /* CONFIG_OPTPROBES */
361 #endif /* __ARCH_WANT_KPROBES_INSN_SLOT */
362
363 /* We have preemption disabled.. so it is safe to use __ versions */
set_kprobe_instance(struct kprobe * kp)364 static inline void set_kprobe_instance(struct kprobe *kp)
365 {
366 __this_cpu_write(kprobe_instance, kp);
367 }
368
reset_kprobe_instance(void)369 static inline void reset_kprobe_instance(void)
370 {
371 __this_cpu_write(kprobe_instance, NULL);
372 }
373
374 /*
375 * This routine is called either:
376 * - under the 'kprobe_mutex' - during kprobe_[un]register().
377 * OR
378 * - with preemption disabled - from architecture specific code.
379 */
get_kprobe(void * addr)380 struct kprobe *get_kprobe(void *addr)
381 {
382 struct hlist_head *head;
383 struct kprobe *p;
384
385 head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
386 hlist_for_each_entry_rcu(p, head, hlist,
387 lockdep_is_held(&kprobe_mutex)) {
388 if (p->addr == addr)
389 return p;
390 }
391
392 return NULL;
393 }
394 NOKPROBE_SYMBOL(get_kprobe);
395
396 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
397
398 /* Return true if 'p' is an aggregator */
kprobe_aggrprobe(struct kprobe * p)399 static inline bool kprobe_aggrprobe(struct kprobe *p)
400 {
401 return p->pre_handler == aggr_pre_handler;
402 }
403
404 /* Return true if 'p' is unused */
kprobe_unused(struct kprobe * p)405 static inline bool kprobe_unused(struct kprobe *p)
406 {
407 return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
408 list_empty(&p->list);
409 }
410
411 /* Keep all fields in the kprobe consistent. */
copy_kprobe(struct kprobe * ap,struct kprobe * p)412 static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
413 {
414 memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
415 memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
416 }
417
418 #ifdef CONFIG_OPTPROBES
419 /* NOTE: This is protected by 'kprobe_mutex'. */
420 static bool kprobes_allow_optimization;
421
422 /*
423 * Call all 'kprobe::pre_handler' on the list, but ignores its return value.
424 * This must be called from arch-dep optimized caller.
425 */
opt_pre_handler(struct kprobe * p,struct pt_regs * regs)426 void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
427 {
428 struct kprobe *kp;
429
430 list_for_each_entry_rcu(kp, &p->list, list) {
431 if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
432 set_kprobe_instance(kp);
433 kp->pre_handler(kp, regs);
434 }
435 reset_kprobe_instance();
436 }
437 }
438 NOKPROBE_SYMBOL(opt_pre_handler);
439
440 /* Free optimized instructions and optimized_kprobe */
free_aggr_kprobe(struct kprobe * p)441 static void free_aggr_kprobe(struct kprobe *p)
442 {
443 struct optimized_kprobe *op;
444
445 op = container_of(p, struct optimized_kprobe, kp);
446 arch_remove_optimized_kprobe(op);
447 arch_remove_kprobe(p);
448 kfree(op);
449 }
450
451 /* Return true if the kprobe is ready for optimization. */
kprobe_optready(struct kprobe * p)452 static inline int kprobe_optready(struct kprobe *p)
453 {
454 struct optimized_kprobe *op;
455
456 if (kprobe_aggrprobe(p)) {
457 op = container_of(p, struct optimized_kprobe, kp);
458 return arch_prepared_optinsn(&op->optinsn);
459 }
460
461 return 0;
462 }
463
464 /* Return true if the kprobe is disarmed. Note: p must be on hash list */
kprobe_disarmed(struct kprobe * p)465 bool kprobe_disarmed(struct kprobe *p)
466 {
467 struct optimized_kprobe *op;
468
469 /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
470 if (!kprobe_aggrprobe(p))
471 return kprobe_disabled(p);
472
473 op = container_of(p, struct optimized_kprobe, kp);
474
475 return kprobe_disabled(p) && list_empty(&op->list);
476 }
477
478 /* Return true if the probe is queued on (un)optimizing lists */
kprobe_queued(struct kprobe * p)479 static bool kprobe_queued(struct kprobe *p)
480 {
481 struct optimized_kprobe *op;
482
483 if (kprobe_aggrprobe(p)) {
484 op = container_of(p, struct optimized_kprobe, kp);
485 if (!list_empty(&op->list))
486 return true;
487 }
488 return false;
489 }
490
491 /*
492 * Return an optimized kprobe whose optimizing code replaces
493 * instructions including 'addr' (exclude breakpoint).
494 */
get_optimized_kprobe(kprobe_opcode_t * addr)495 static struct kprobe *get_optimized_kprobe(kprobe_opcode_t *addr)
496 {
497 int i;
498 struct kprobe *p = NULL;
499 struct optimized_kprobe *op;
500
501 /* Don't check i == 0, since that is a breakpoint case. */
502 for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH / sizeof(kprobe_opcode_t); i++)
503 p = get_kprobe(addr - i);
504
505 if (p && kprobe_optready(p)) {
506 op = container_of(p, struct optimized_kprobe, kp);
507 if (arch_within_optimized_kprobe(op, addr))
508 return p;
509 }
510
511 return NULL;
512 }
513
514 /* Optimization staging list, protected by 'kprobe_mutex' */
515 static LIST_HEAD(optimizing_list);
516 static LIST_HEAD(unoptimizing_list);
517 static LIST_HEAD(freeing_list);
518
519 static void optimize_kprobe(struct kprobe *p);
520 static struct task_struct *kprobe_optimizer_task;
521 static wait_queue_head_t kprobe_optimizer_wait;
522 static atomic_t optimizer_state;
523 enum {
524 OPTIMIZER_ST_IDLE = 0,
525 OPTIMIZER_ST_KICKED = 1,
526 OPTIMIZER_ST_FLUSHING = 2,
527 };
528
529 static DECLARE_COMPLETION(optimizer_completion);
530
531 #define OPTIMIZE_DELAY 5
532
533 /*
534 * Optimize (replace a breakpoint with a jump) kprobes listed on
535 * 'optimizing_list'.
536 */
do_optimize_kprobes(void)537 static void do_optimize_kprobes(void)
538 {
539 lockdep_assert_held(&text_mutex);
540 /*
541 * The optimization/unoptimization refers 'online_cpus' via
542 * stop_machine() and cpu-hotplug modifies the 'online_cpus'.
543 * And same time, 'text_mutex' will be held in cpu-hotplug and here.
544 * This combination can cause a deadlock (cpu-hotplug tries to lock
545 * 'text_mutex' but stop_machine() can not be done because
546 * the 'online_cpus' has been changed)
547 * To avoid this deadlock, caller must have locked cpu-hotplug
548 * for preventing cpu-hotplug outside of 'text_mutex' locking.
549 */
550 lockdep_assert_cpus_held();
551
552 /* Optimization never be done when disarmed */
553 if (kprobes_all_disarmed || !kprobes_allow_optimization ||
554 list_empty(&optimizing_list))
555 return;
556
557 arch_optimize_kprobes(&optimizing_list);
558 }
559
560 /*
561 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
562 * if need) kprobes listed on 'unoptimizing_list'.
563 */
do_unoptimize_kprobes(void)564 static void do_unoptimize_kprobes(void)
565 {
566 struct optimized_kprobe *op, *tmp;
567
568 lockdep_assert_held(&text_mutex);
569 /* See comment in do_optimize_kprobes() */
570 lockdep_assert_cpus_held();
571
572 if (!list_empty(&unoptimizing_list))
573 arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
574
575 /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
576 list_for_each_entry_safe(op, tmp, &freeing_list, list) {
577 /* Switching from detour code to origin */
578 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
579 /* Disarm probes if marked disabled and not gone */
580 if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
581 arch_disarm_kprobe(&op->kp);
582 if (kprobe_unused(&op->kp)) {
583 /*
584 * Remove unused probes from hash list. After waiting
585 * for synchronization, these probes are reclaimed.
586 * (reclaiming is done by do_free_cleaned_kprobes().)
587 */
588 hlist_del_rcu(&op->kp.hlist);
589 } else
590 list_del_init(&op->list);
591 }
592 }
593
594 /* Reclaim all kprobes on the 'freeing_list' */
do_free_cleaned_kprobes(void)595 static void do_free_cleaned_kprobes(void)
596 {
597 struct optimized_kprobe *op, *tmp;
598
599 list_for_each_entry_safe(op, tmp, &freeing_list, list) {
600 list_del_init(&op->list);
601 if (WARN_ON_ONCE(!kprobe_unused(&op->kp))) {
602 /*
603 * This must not happen, but if there is a kprobe
604 * still in use, keep it on kprobes hash list.
605 */
606 continue;
607 }
608
609 /*
610 * The aggregator was holding back another probe while it sat on the
611 * unoptimizing/freeing lists. Now that the aggregator has been fully
612 * reverted we can safely retry the optimization of that sibling.
613 */
614
615 struct kprobe *_p = get_optimized_kprobe(op->kp.addr);
616 if (unlikely(_p))
617 optimize_kprobe(_p);
618
619 free_aggr_kprobe(&op->kp);
620 }
621 }
622
623 static void kick_kprobe_optimizer(void);
624
625 /* Kprobe jump optimizer */
kprobe_optimizer(void)626 static void kprobe_optimizer(void)
627 {
628 guard(mutex)(&kprobe_mutex);
629
630 scoped_guard(cpus_read_lock) {
631 guard(mutex)(&text_mutex);
632
633 /*
634 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
635 * kprobes before waiting for quiesence period.
636 */
637 do_unoptimize_kprobes();
638
639 /*
640 * Step 2: Wait for quiesence period to ensure all potentially
641 * preempted tasks to have normally scheduled. Because optprobe
642 * may modify multiple instructions, there is a chance that Nth
643 * instruction is preempted. In that case, such tasks can return
644 * to 2nd-Nth byte of jump instruction. This wait is for avoiding it.
645 * Note that on non-preemptive kernel, this is transparently converted
646 * to synchronoze_sched() to wait for all interrupts to have completed.
647 */
648 synchronize_rcu_tasks();
649
650 /* Step 3: Optimize kprobes after quiesence period */
651 do_optimize_kprobes();
652
653 /* Step 4: Free cleaned kprobes after quiesence period */
654 do_free_cleaned_kprobes();
655 }
656
657 /* Step 5: Kick optimizer again if needed. But if there is a flush requested, */
658 if (completion_done(&optimizer_completion))
659 complete(&optimizer_completion);
660
661 if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
662 kick_kprobe_optimizer(); /*normal kick*/
663 }
664
kprobe_optimizer_thread(void * data)665 static int kprobe_optimizer_thread(void *data)
666 {
667 while (!kthread_should_stop()) {
668 /* To avoid hung_task, wait in interruptible state. */
669 wait_event_interruptible(kprobe_optimizer_wait,
670 atomic_read(&optimizer_state) != OPTIMIZER_ST_IDLE ||
671 kthread_should_stop());
672
673 if (kthread_should_stop())
674 break;
675
676 /*
677 * If it was a normal kick, wait for OPTIMIZE_DELAY.
678 * This wait can be interrupted by a flush request.
679 */
680 if (atomic_read(&optimizer_state) == 1)
681 wait_event_interruptible_timeout(
682 kprobe_optimizer_wait,
683 atomic_read(&optimizer_state) == OPTIMIZER_ST_FLUSHING ||
684 kthread_should_stop(),
685 OPTIMIZE_DELAY);
686
687 if (kthread_should_stop())
688 break;
689
690 atomic_set(&optimizer_state, OPTIMIZER_ST_IDLE);
691
692 kprobe_optimizer();
693 }
694 return 0;
695 }
696
697 /* Start optimizer after OPTIMIZE_DELAY passed */
kick_kprobe_optimizer(void)698 static void kick_kprobe_optimizer(void)
699 {
700 lockdep_assert_held(&kprobe_mutex);
701 if (atomic_cmpxchg(&optimizer_state,
702 OPTIMIZER_ST_IDLE, OPTIMIZER_ST_KICKED) == OPTIMIZER_ST_IDLE)
703 wake_up(&kprobe_optimizer_wait);
704 }
705
wait_for_kprobe_optimizer_locked(void)706 static void wait_for_kprobe_optimizer_locked(void)
707 {
708 lockdep_assert_held(&kprobe_mutex);
709
710 while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) {
711 init_completion(&optimizer_completion);
712 /*
713 * Set state to OPTIMIZER_ST_FLUSHING and wake up the thread if it's
714 * idle. If it's already kicked, it will see the state change.
715 */
716 if (atomic_xchg_acquire(&optimizer_state,
717 OPTIMIZER_ST_FLUSHING) != OPTIMIZER_ST_FLUSHING)
718 wake_up(&kprobe_optimizer_wait);
719
720 mutex_unlock(&kprobe_mutex);
721 wait_for_completion(&optimizer_completion);
722 mutex_lock(&kprobe_mutex);
723 }
724 }
725
726 /* Wait for completing optimization and unoptimization */
wait_for_kprobe_optimizer(void)727 void wait_for_kprobe_optimizer(void)
728 {
729 guard(mutex)(&kprobe_mutex);
730
731 wait_for_kprobe_optimizer_locked();
732 }
733
optprobe_queued_unopt(struct optimized_kprobe * op)734 bool optprobe_queued_unopt(struct optimized_kprobe *op)
735 {
736 struct optimized_kprobe *_op;
737
738 list_for_each_entry(_op, &unoptimizing_list, list) {
739 if (op == _op)
740 return true;
741 }
742
743 return false;
744 }
745
746 /* Optimize kprobe if p is ready to be optimized */
optimize_kprobe(struct kprobe * p)747 static void optimize_kprobe(struct kprobe *p)
748 {
749 struct optimized_kprobe *op;
750
751 /* Check if the kprobe is disabled or not ready for optimization. */
752 if (!kprobe_optready(p) || !kprobes_allow_optimization ||
753 (kprobe_disabled(p) || kprobes_all_disarmed))
754 return;
755
756 /* kprobes with 'post_handler' can not be optimized */
757 if (p->post_handler)
758 return;
759
760 op = container_of(p, struct optimized_kprobe, kp);
761
762 /* Check there is no other kprobes at the optimized instructions */
763 if (arch_check_optimized_kprobe(op) < 0)
764 return;
765
766 /* Check if it is already optimized. */
767 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) {
768 if (optprobe_queued_unopt(op)) {
769 /* This is under unoptimizing. Just dequeue the probe */
770 list_del_init(&op->list);
771 }
772 return;
773 }
774 op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
775
776 /*
777 * On the 'unoptimizing_list' and 'optimizing_list',
778 * 'op' must have OPTIMIZED flag
779 */
780 if (WARN_ON_ONCE(!list_empty(&op->list)))
781 return;
782
783 list_add(&op->list, &optimizing_list);
784 kick_kprobe_optimizer();
785 }
786
787 /* Short cut to direct unoptimizing */
force_unoptimize_kprobe(struct optimized_kprobe * op)788 static void force_unoptimize_kprobe(struct optimized_kprobe *op)
789 {
790 lockdep_assert_cpus_held();
791 arch_unoptimize_kprobe(op);
792 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
793 }
794
795 /* Unoptimize a kprobe if p is optimized */
unoptimize_kprobe(struct kprobe * p,bool force)796 static void unoptimize_kprobe(struct kprobe *p, bool force)
797 {
798 struct optimized_kprobe *op;
799
800 if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
801 return; /* This is not an optprobe nor optimized */
802
803 op = container_of(p, struct optimized_kprobe, kp);
804 if (!kprobe_optimized(p))
805 return;
806
807 if (!list_empty(&op->list)) {
808 if (optprobe_queued_unopt(op)) {
809 /* Queued in unoptimizing queue */
810 if (force) {
811 /*
812 * Forcibly unoptimize the kprobe here, and queue it
813 * in the freeing list for release afterwards.
814 */
815 force_unoptimize_kprobe(op);
816 list_move(&op->list, &freeing_list);
817 }
818 } else {
819 /* Dequeue from the optimizing queue */
820 list_del_init(&op->list);
821 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
822 }
823 return;
824 }
825
826 /* Optimized kprobe case */
827 if (force) {
828 /* Forcibly update the code: this is a special case */
829 force_unoptimize_kprobe(op);
830 } else {
831 list_add(&op->list, &unoptimizing_list);
832 kick_kprobe_optimizer();
833 }
834 }
835
836 /* Cancel unoptimizing for reusing */
reuse_unused_kprobe(struct kprobe * ap)837 static int reuse_unused_kprobe(struct kprobe *ap)
838 {
839 struct optimized_kprobe *op;
840
841 /*
842 * Unused kprobe MUST be on the way of delayed unoptimizing (means
843 * there is still a relative jump) and disabled.
844 */
845 op = container_of(ap, struct optimized_kprobe, kp);
846 WARN_ON_ONCE(list_empty(&op->list));
847 /* Enable the probe again */
848 ap->flags &= ~KPROBE_FLAG_DISABLED;
849 /* Optimize it again. (remove from 'op->list') */
850 if (!kprobe_optready(ap))
851 return -EINVAL;
852
853 optimize_kprobe(ap);
854 return 0;
855 }
856
857 /* Remove optimized instructions */
kill_optimized_kprobe(struct kprobe * p)858 static void kill_optimized_kprobe(struct kprobe *p)
859 {
860 struct optimized_kprobe *op;
861
862 op = container_of(p, struct optimized_kprobe, kp);
863 if (!list_empty(&op->list))
864 /* Dequeue from the (un)optimization queue */
865 list_del_init(&op->list);
866 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
867
868 if (kprobe_unused(p)) {
869 /*
870 * Unused kprobe is on unoptimizing or freeing list. We move it
871 * to freeing_list and let the kprobe_optimizer() remove it from
872 * the kprobe hash list and free it.
873 */
874 if (optprobe_queued_unopt(op))
875 list_move(&op->list, &freeing_list);
876 }
877
878 /* Don't touch the code, because it is already freed. */
879 arch_remove_optimized_kprobe(op);
880 }
881
882 static inline
__prepare_optimized_kprobe(struct optimized_kprobe * op,struct kprobe * p)883 void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
884 {
885 if (!kprobe_ftrace(p))
886 arch_prepare_optimized_kprobe(op, p);
887 }
888
889 /* Try to prepare optimized instructions */
prepare_optimized_kprobe(struct kprobe * p)890 static void prepare_optimized_kprobe(struct kprobe *p)
891 {
892 struct optimized_kprobe *op;
893
894 op = container_of(p, struct optimized_kprobe, kp);
895 __prepare_optimized_kprobe(op, p);
896 }
897
898 /* Allocate new optimized_kprobe and try to prepare optimized instructions. */
alloc_aggr_kprobe(struct kprobe * p)899 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
900 {
901 struct optimized_kprobe *op;
902
903 op = kzalloc_obj(struct optimized_kprobe, GFP_KERNEL);
904 if (!op)
905 return NULL;
906
907 INIT_LIST_HEAD(&op->list);
908 op->kp.addr = p->addr;
909 __prepare_optimized_kprobe(op, p);
910
911 return &op->kp;
912 }
913
914 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
915
916 /*
917 * Prepare an optimized_kprobe and optimize it.
918 * NOTE: 'p' must be a normal registered kprobe.
919 */
try_to_optimize_kprobe(struct kprobe * p)920 static void try_to_optimize_kprobe(struct kprobe *p)
921 {
922 struct kprobe *ap;
923 struct optimized_kprobe *op;
924
925 /* Impossible to optimize ftrace-based kprobe. */
926 if (kprobe_ftrace(p))
927 return;
928
929 /* For preparing optimization, jump_label_text_reserved() is called. */
930 guard(cpus_read_lock)();
931 guard(jump_label_lock)();
932 guard(mutex)(&text_mutex);
933
934 ap = alloc_aggr_kprobe(p);
935 if (!ap)
936 return;
937
938 op = container_of(ap, struct optimized_kprobe, kp);
939 if (!arch_prepared_optinsn(&op->optinsn)) {
940 /* If failed to setup optimizing, fallback to kprobe. */
941 arch_remove_optimized_kprobe(op);
942 kfree(op);
943 return;
944 }
945
946 init_aggr_kprobe(ap, p);
947 optimize_kprobe(ap); /* This just kicks optimizer thread. */
948 }
949
optimize_all_kprobes(void)950 static void optimize_all_kprobes(void)
951 {
952 struct hlist_head *head;
953 struct kprobe *p;
954 unsigned int i;
955
956 guard(mutex)(&kprobe_mutex);
957 /* If optimization is already allowed, just return. */
958 if (kprobes_allow_optimization)
959 return;
960
961 cpus_read_lock();
962 kprobes_allow_optimization = true;
963 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
964 head = &kprobe_table[i];
965 hlist_for_each_entry(p, head, hlist)
966 if (!kprobe_disabled(p))
967 optimize_kprobe(p);
968 }
969 cpus_read_unlock();
970 pr_info("kprobe jump-optimization is enabled. All kprobes are optimized if possible.\n");
971 }
972
973 #ifdef CONFIG_SYSCTL
unoptimize_all_kprobes(void)974 static void unoptimize_all_kprobes(void)
975 {
976 struct hlist_head *head;
977 struct kprobe *p;
978 unsigned int i;
979
980 guard(mutex)(&kprobe_mutex);
981 /* If optimization is already prohibited, just return. */
982 if (!kprobes_allow_optimization)
983 return;
984
985 cpus_read_lock();
986 kprobes_allow_optimization = false;
987 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
988 head = &kprobe_table[i];
989 hlist_for_each_entry(p, head, hlist) {
990 if (!kprobe_disabled(p))
991 unoptimize_kprobe(p, false);
992 }
993 }
994 cpus_read_unlock();
995 /* Wait for unoptimizing completion. */
996 wait_for_kprobe_optimizer_locked();
997 pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n");
998 }
999
1000 static DEFINE_MUTEX(kprobe_sysctl_mutex);
1001 static int sysctl_kprobes_optimization;
proc_kprobes_optimization_handler(const struct ctl_table * table,int write,void * buffer,size_t * length,loff_t * ppos)1002 static int proc_kprobes_optimization_handler(const struct ctl_table *table,
1003 int write, void *buffer,
1004 size_t *length, loff_t *ppos)
1005 {
1006 int ret;
1007
1008 guard(mutex)(&kprobe_sysctl_mutex);
1009 sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
1010 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
1011
1012 if (sysctl_kprobes_optimization)
1013 optimize_all_kprobes();
1014 else
1015 unoptimize_all_kprobes();
1016
1017 return ret;
1018 }
1019
1020 static const struct ctl_table kprobe_sysctls[] = {
1021 {
1022 .procname = "kprobes-optimization",
1023 .data = &sysctl_kprobes_optimization,
1024 .maxlen = sizeof(int),
1025 .mode = 0644,
1026 .proc_handler = proc_kprobes_optimization_handler,
1027 .extra1 = SYSCTL_ZERO,
1028 .extra2 = SYSCTL_ONE,
1029 },
1030 };
1031
kprobe_sysctls_init(void)1032 static void __init kprobe_sysctls_init(void)
1033 {
1034 register_sysctl_init("debug", kprobe_sysctls);
1035 }
1036 #endif /* CONFIG_SYSCTL */
1037
1038 /* Put a breakpoint for a probe. */
__arm_kprobe(struct kprobe * p)1039 static void __arm_kprobe(struct kprobe *p)
1040 {
1041 struct kprobe *_p;
1042
1043 lockdep_assert_held(&text_mutex);
1044
1045 /* Find the overlapping optimized kprobes. */
1046 _p = get_optimized_kprobe(p->addr);
1047 if (unlikely(_p))
1048 /* Fallback to unoptimized kprobe */
1049 unoptimize_kprobe(_p, true);
1050
1051 arch_arm_kprobe(p);
1052 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */
1053 }
1054
1055 /* Remove the breakpoint of a probe. */
__disarm_kprobe(struct kprobe * p,bool reopt)1056 static void __disarm_kprobe(struct kprobe *p, bool reopt)
1057 {
1058 struct kprobe *_p;
1059
1060 lockdep_assert_held(&text_mutex);
1061
1062 /* Try to unoptimize */
1063 unoptimize_kprobe(p, kprobes_all_disarmed);
1064
1065 if (!kprobe_queued(p)) {
1066 arch_disarm_kprobe(p);
1067 /* If another kprobe was blocked, re-optimize it. */
1068 _p = get_optimized_kprobe(p->addr);
1069 if (unlikely(_p) && reopt)
1070 optimize_kprobe(_p);
1071 }
1072 }
1073
init_optprobe(void)1074 static void __init init_optprobe(void)
1075 {
1076 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
1077 /* Init 'kprobe_optinsn_slots' for allocation */
1078 kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
1079 #endif
1080
1081 init_waitqueue_head(&kprobe_optimizer_wait);
1082 atomic_set(&optimizer_state, OPTIMIZER_ST_IDLE);
1083 kprobe_optimizer_task = kthread_run(kprobe_optimizer_thread, NULL,
1084 "kprobe-optimizer");
1085 }
1086 #else /* !CONFIG_OPTPROBES */
1087
1088 #define init_optprobe() do {} while (0)
1089 #define optimize_kprobe(p) do {} while (0)
1090 #define unoptimize_kprobe(p, f) do {} while (0)
1091 #define kill_optimized_kprobe(p) do {} while (0)
1092 #define prepare_optimized_kprobe(p) do {} while (0)
1093 #define try_to_optimize_kprobe(p) do {} while (0)
1094 #define __arm_kprobe(p) arch_arm_kprobe(p)
1095 #define __disarm_kprobe(p, o) arch_disarm_kprobe(p)
1096 #define kprobe_disarmed(p) kprobe_disabled(p)
1097 #define wait_for_kprobe_optimizer_locked() \
1098 lockdep_assert_held(&kprobe_mutex)
1099
reuse_unused_kprobe(struct kprobe * ap)1100 static int reuse_unused_kprobe(struct kprobe *ap)
1101 {
1102 /*
1103 * If the optimized kprobe is NOT supported, the aggr kprobe is
1104 * released at the same time that the last aggregated kprobe is
1105 * unregistered.
1106 * Thus there should be no chance to reuse unused kprobe.
1107 */
1108 WARN_ON_ONCE(1);
1109 return -EINVAL;
1110 }
1111
free_aggr_kprobe(struct kprobe * p)1112 static void free_aggr_kprobe(struct kprobe *p)
1113 {
1114 arch_remove_kprobe(p);
1115 kfree(p);
1116 }
1117
alloc_aggr_kprobe(struct kprobe * p)1118 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
1119 {
1120 return kzalloc_obj(struct kprobe, GFP_KERNEL);
1121 }
1122 #endif /* CONFIG_OPTPROBES */
1123
1124 #ifdef CONFIG_KPROBES_ON_FTRACE
1125 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
1126 .func = kprobe_ftrace_handler,
1127 .flags = FTRACE_OPS_FL_SAVE_REGS,
1128 };
1129
1130 static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
1131 .func = kprobe_ftrace_handler,
1132 .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
1133 };
1134
1135 static int kprobe_ipmodify_enabled;
1136 static int kprobe_ftrace_enabled;
1137 bool kprobe_ftrace_disabled;
1138
__arm_kprobe_ftrace(struct kprobe * p,struct ftrace_ops * ops,int * cnt)1139 static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1140 int *cnt)
1141 {
1142 int ret;
1143
1144 lockdep_assert_held(&kprobe_mutex);
1145
1146 ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0);
1147 if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret))
1148 return ret;
1149
1150 if (*cnt == 0) {
1151 ret = register_ftrace_function(ops);
1152 if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret)) {
1153 /*
1154 * At this point, sinec ops is not registered, we should be sefe from
1155 * registering empty filter.
1156 */
1157 ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1158 return ret;
1159 }
1160 }
1161
1162 (*cnt)++;
1163 return ret;
1164 }
1165
arm_kprobe_ftrace(struct kprobe * p)1166 static int arm_kprobe_ftrace(struct kprobe *p)
1167 {
1168 bool ipmodify = (p->post_handler != NULL);
1169
1170 return __arm_kprobe_ftrace(p,
1171 ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1172 ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1173 }
1174
__disarm_kprobe_ftrace(struct kprobe * p,struct ftrace_ops * ops,int * cnt)1175 static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1176 int *cnt)
1177 {
1178 int ret;
1179
1180 lockdep_assert_held(&kprobe_mutex);
1181
1182 if (*cnt == 1) {
1183 ret = unregister_ftrace_function(ops);
1184 if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (error %d)\n", ret))
1185 return ret;
1186 }
1187
1188 (*cnt)--;
1189
1190 ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1191 WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (error %d)\n",
1192 p->addr, ret);
1193 return ret;
1194 }
1195
disarm_kprobe_ftrace(struct kprobe * p)1196 static int disarm_kprobe_ftrace(struct kprobe *p)
1197 {
1198 bool ipmodify = (p->post_handler != NULL);
1199
1200 return __disarm_kprobe_ftrace(p,
1201 ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1202 ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1203 }
1204
kprobe_ftrace_kill(void)1205 void kprobe_ftrace_kill(void)
1206 {
1207 kprobe_ftrace_disabled = true;
1208 }
1209 #else /* !CONFIG_KPROBES_ON_FTRACE */
arm_kprobe_ftrace(struct kprobe * p)1210 static inline int arm_kprobe_ftrace(struct kprobe *p)
1211 {
1212 return -ENODEV;
1213 }
1214
disarm_kprobe_ftrace(struct kprobe * p)1215 static inline int disarm_kprobe_ftrace(struct kprobe *p)
1216 {
1217 return -ENODEV;
1218 }
1219 #endif
1220
prepare_kprobe(struct kprobe * p)1221 static int prepare_kprobe(struct kprobe *p)
1222 {
1223 /* Must ensure p->addr is really on ftrace */
1224 if (kprobe_ftrace(p))
1225 return arch_prepare_kprobe_ftrace(p);
1226
1227 return arch_prepare_kprobe(p);
1228 }
1229
arm_kprobe(struct kprobe * kp)1230 static int arm_kprobe(struct kprobe *kp)
1231 {
1232 if (unlikely(kprobe_ftrace(kp)))
1233 return arm_kprobe_ftrace(kp);
1234
1235 guard(cpus_read_lock)();
1236 guard(mutex)(&text_mutex);
1237 __arm_kprobe(kp);
1238 return 0;
1239 }
1240
disarm_kprobe(struct kprobe * kp,bool reopt)1241 static int disarm_kprobe(struct kprobe *kp, bool reopt)
1242 {
1243 if (unlikely(kprobe_ftrace(kp)))
1244 return disarm_kprobe_ftrace(kp);
1245
1246 guard(cpus_read_lock)();
1247 guard(mutex)(&text_mutex);
1248 __disarm_kprobe(kp, reopt);
1249 return 0;
1250 }
1251
1252 /*
1253 * Aggregate handlers for multiple kprobes support - these handlers
1254 * take care of invoking the individual kprobe handlers on p->list
1255 */
aggr_pre_handler(struct kprobe * p,struct pt_regs * regs)1256 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
1257 {
1258 struct kprobe *kp;
1259
1260 list_for_each_entry_rcu(kp, &p->list, list) {
1261 if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
1262 set_kprobe_instance(kp);
1263 if (kp->pre_handler(kp, regs))
1264 return 1;
1265 }
1266 reset_kprobe_instance();
1267 }
1268 return 0;
1269 }
1270 NOKPROBE_SYMBOL(aggr_pre_handler);
1271
aggr_post_handler(struct kprobe * p,struct pt_regs * regs,unsigned long flags)1272 static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
1273 unsigned long flags)
1274 {
1275 struct kprobe *kp;
1276
1277 list_for_each_entry_rcu(kp, &p->list, list) {
1278 if (kp->post_handler && likely(!kprobe_disabled(kp))) {
1279 set_kprobe_instance(kp);
1280 kp->post_handler(kp, regs, flags);
1281 reset_kprobe_instance();
1282 }
1283 }
1284 }
1285 NOKPROBE_SYMBOL(aggr_post_handler);
1286
1287 /* Walks the list and increments 'nmissed' if 'p' has child probes. */
kprobes_inc_nmissed_count(struct kprobe * p)1288 void kprobes_inc_nmissed_count(struct kprobe *p)
1289 {
1290 struct kprobe *kp;
1291
1292 if (!kprobe_aggrprobe(p)) {
1293 p->nmissed++;
1294 } else {
1295 list_for_each_entry_rcu(kp, &p->list, list)
1296 kp->nmissed++;
1297 }
1298 }
1299 NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
1300
1301 static struct kprobe kprobe_busy = {
1302 .addr = (void *) get_kprobe,
1303 };
1304
kprobe_busy_begin(void)1305 void kprobe_busy_begin(void)
1306 {
1307 struct kprobe_ctlblk *kcb;
1308
1309 preempt_disable();
1310 __this_cpu_write(current_kprobe, &kprobe_busy);
1311 kcb = get_kprobe_ctlblk();
1312 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1313 }
1314
kprobe_busy_end(void)1315 void kprobe_busy_end(void)
1316 {
1317 __this_cpu_write(current_kprobe, NULL);
1318 preempt_enable();
1319 }
1320
1321 /* Add the new probe to 'ap->list'. */
add_new_kprobe(struct kprobe * ap,struct kprobe * p)1322 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
1323 {
1324 if (p->post_handler)
1325 unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */
1326
1327 list_add_rcu(&p->list, &ap->list);
1328 if (p->post_handler && !ap->post_handler)
1329 ap->post_handler = aggr_post_handler;
1330
1331 return 0;
1332 }
1333
1334 /*
1335 * Fill in the required fields of the aggregator kprobe. Replace the
1336 * earlier kprobe in the hlist with the aggregator kprobe.
1337 */
init_aggr_kprobe(struct kprobe * ap,struct kprobe * p)1338 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
1339 {
1340 /* Copy the insn slot of 'p' to 'ap'. */
1341 copy_kprobe(p, ap);
1342 flush_insn_slot(ap);
1343 ap->addr = p->addr;
1344 ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
1345 ap->pre_handler = aggr_pre_handler;
1346 /* We don't care the kprobe which has gone. */
1347 if (p->post_handler && !kprobe_gone(p))
1348 ap->post_handler = aggr_post_handler;
1349
1350 INIT_LIST_HEAD(&ap->list);
1351 INIT_HLIST_NODE(&ap->hlist);
1352
1353 list_add_rcu(&p->list, &ap->list);
1354 hlist_replace_rcu(&p->hlist, &ap->hlist);
1355 }
1356
1357 /*
1358 * This registers the second or subsequent kprobe at the same address.
1359 */
register_aggr_kprobe(struct kprobe * orig_p,struct kprobe * p)1360 static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
1361 {
1362 int ret = 0;
1363 struct kprobe *ap = orig_p;
1364
1365 scoped_guard(cpus_read_lock) {
1366 /* For preparing optimization, jump_label_text_reserved() is called */
1367 guard(jump_label_lock)();
1368 guard(mutex)(&text_mutex);
1369
1370 if (!kprobe_aggrprobe(orig_p)) {
1371 /* If 'orig_p' is not an 'aggr_kprobe', create new one. */
1372 ap = alloc_aggr_kprobe(orig_p);
1373 if (!ap)
1374 return -ENOMEM;
1375 init_aggr_kprobe(ap, orig_p);
1376 } else if (kprobe_unused(ap)) {
1377 /* This probe is going to die. Rescue it */
1378 ret = reuse_unused_kprobe(ap);
1379 if (ret)
1380 return ret;
1381 }
1382
1383 if (kprobe_gone(ap)) {
1384 /*
1385 * Attempting to insert new probe at the same location that
1386 * had a probe in the module vaddr area which already
1387 * freed. So, the instruction slot has already been
1388 * released. We need a new slot for the new probe.
1389 */
1390 ret = arch_prepare_kprobe(ap);
1391 if (ret)
1392 /*
1393 * Even if fail to allocate new slot, don't need to
1394 * free the 'ap'. It will be used next time, or
1395 * freed by unregister_kprobe().
1396 */
1397 return ret;
1398
1399 /* Prepare optimized instructions if possible. */
1400 prepare_optimized_kprobe(ap);
1401
1402 /*
1403 * Clear gone flag to prevent allocating new slot again, and
1404 * set disabled flag because it is not armed yet.
1405 */
1406 ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
1407 | KPROBE_FLAG_DISABLED;
1408 }
1409
1410 /* Copy the insn slot of 'p' to 'ap'. */
1411 copy_kprobe(ap, p);
1412 ret = add_new_kprobe(ap, p);
1413 }
1414
1415 if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
1416 ap->flags &= ~KPROBE_FLAG_DISABLED;
1417 if (!kprobes_all_disarmed) {
1418 /* Arm the breakpoint again. */
1419 ret = arm_kprobe(ap);
1420 if (ret) {
1421 ap->flags |= KPROBE_FLAG_DISABLED;
1422 list_del_rcu(&p->list);
1423 synchronize_rcu();
1424 }
1425 }
1426 }
1427 return ret;
1428 }
1429
arch_within_kprobe_blacklist(unsigned long addr)1430 bool __weak arch_within_kprobe_blacklist(unsigned long addr)
1431 {
1432 /* The '__kprobes' functions and entry code must not be probed. */
1433 return addr >= (unsigned long)__kprobes_text_start &&
1434 addr < (unsigned long)__kprobes_text_end;
1435 }
1436
__within_kprobe_blacklist(unsigned long addr)1437 static bool __within_kprobe_blacklist(unsigned long addr)
1438 {
1439 struct kprobe_blacklist_entry *ent;
1440
1441 if (arch_within_kprobe_blacklist(addr))
1442 return true;
1443 /*
1444 * If 'kprobe_blacklist' is defined, check the address and
1445 * reject any probe registration in the prohibited area.
1446 */
1447 list_for_each_entry(ent, &kprobe_blacklist, list) {
1448 if (addr >= ent->start_addr && addr < ent->end_addr)
1449 return true;
1450 }
1451 return false;
1452 }
1453
within_kprobe_blacklist(unsigned long addr)1454 bool within_kprobe_blacklist(unsigned long addr)
1455 {
1456 char symname[KSYM_NAME_LEN], *p;
1457
1458 if (__within_kprobe_blacklist(addr))
1459 return true;
1460
1461 /* Check if the address is on a suffixed-symbol */
1462 if (!lookup_symbol_name(addr, symname)) {
1463 p = strchr(symname, '.');
1464 if (!p)
1465 return false;
1466 *p = '\0';
1467 addr = (unsigned long)kprobe_lookup_name(symname, 0);
1468 if (addr)
1469 return __within_kprobe_blacklist(addr);
1470 }
1471 return false;
1472 }
1473
1474 /*
1475 * arch_adjust_kprobe_addr - adjust the address
1476 * @addr: symbol base address
1477 * @offset: offset within the symbol
1478 * @on_func_entry: was this @addr+@offset on the function entry
1479 *
1480 * Typically returns @addr + @offset, except for special cases where the
1481 * function might be prefixed by a CFI landing pad, in that case any offset
1482 * inside the landing pad is mapped to the first 'real' instruction of the
1483 * symbol.
1484 *
1485 * Specifically, for things like IBT/BTI, skip the resp. ENDBR/BTI.C
1486 * instruction at +0.
1487 */
arch_adjust_kprobe_addr(unsigned long addr,unsigned long offset,bool * on_func_entry)1488 kprobe_opcode_t *__weak arch_adjust_kprobe_addr(unsigned long addr,
1489 unsigned long offset,
1490 bool *on_func_entry)
1491 {
1492 *on_func_entry = !offset;
1493 return (kprobe_opcode_t *)(addr + offset);
1494 }
1495
1496 /*
1497 * If 'symbol_name' is specified, look it up and add the 'offset'
1498 * to it. This way, we can specify a relative address to a symbol.
1499 * This returns encoded errors if it fails to look up symbol or invalid
1500 * combination of parameters.
1501 */
1502 static kprobe_opcode_t *
_kprobe_addr(kprobe_opcode_t * addr,const char * symbol_name,unsigned long offset,bool * on_func_entry)1503 _kprobe_addr(kprobe_opcode_t *addr, const char *symbol_name,
1504 unsigned long offset, bool *on_func_entry)
1505 {
1506 if ((symbol_name && addr) || (!symbol_name && !addr))
1507 return ERR_PTR(-EINVAL);
1508
1509 if (symbol_name) {
1510 /*
1511 * Input: @sym + @offset
1512 * Output: @addr + @offset
1513 *
1514 * NOTE: kprobe_lookup_name() does *NOT* fold the offset
1515 * argument into it's output!
1516 */
1517 addr = kprobe_lookup_name(symbol_name, offset);
1518 if (!addr)
1519 return ERR_PTR(-ENOENT);
1520 }
1521
1522 /*
1523 * So here we have @addr + @offset, displace it into a new
1524 * @addr' + @offset' where @addr' is the symbol start address.
1525 */
1526 addr = (void *)addr + offset;
1527 if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset))
1528 return ERR_PTR(-ENOENT);
1529 addr = (void *)addr - offset;
1530
1531 /*
1532 * Then ask the architecture to re-combine them, taking care of
1533 * magical function entry details while telling us if this was indeed
1534 * at the start of the function.
1535 */
1536 addr = arch_adjust_kprobe_addr((unsigned long)addr, offset, on_func_entry);
1537 if (!addr)
1538 return ERR_PTR(-EINVAL);
1539
1540 return addr;
1541 }
1542
kprobe_addr(struct kprobe * p)1543 static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
1544 {
1545 bool on_func_entry;
1546
1547 return _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry);
1548 }
1549
1550 /*
1551 * Check the 'p' is valid and return the aggregator kprobe
1552 * at the same address.
1553 */
__get_valid_kprobe(struct kprobe * p)1554 static struct kprobe *__get_valid_kprobe(struct kprobe *p)
1555 {
1556 struct kprobe *ap, *list_p;
1557
1558 lockdep_assert_held(&kprobe_mutex);
1559
1560 ap = get_kprobe(p->addr);
1561 if (unlikely(!ap))
1562 return NULL;
1563
1564 if (p == ap)
1565 return ap;
1566
1567 list_for_each_entry(list_p, &ap->list, list)
1568 if (list_p == p)
1569 /* kprobe p is a valid probe */
1570 return ap;
1571
1572 return NULL;
1573 }
1574
1575 /*
1576 * Warn and return error if the kprobe is being re-registered since
1577 * there must be a software bug.
1578 */
warn_kprobe_rereg(struct kprobe * p)1579 static inline int warn_kprobe_rereg(struct kprobe *p)
1580 {
1581 guard(mutex)(&kprobe_mutex);
1582
1583 if (WARN_ON_ONCE(__get_valid_kprobe(p)))
1584 return -EINVAL;
1585
1586 return 0;
1587 }
1588
check_ftrace_location(struct kprobe * p)1589 static int check_ftrace_location(struct kprobe *p)
1590 {
1591 unsigned long addr = (unsigned long)p->addr;
1592
1593 if (ftrace_location(addr) == addr) {
1594 #ifdef CONFIG_KPROBES_ON_FTRACE
1595 p->flags |= KPROBE_FLAG_FTRACE;
1596 #else
1597 return -EINVAL;
1598 #endif
1599 }
1600 return 0;
1601 }
1602
is_cfi_preamble_symbol(unsigned long addr)1603 static bool is_cfi_preamble_symbol(unsigned long addr)
1604 {
1605 char symbuf[KSYM_NAME_LEN];
1606
1607 if (lookup_symbol_name(addr, symbuf))
1608 return false;
1609
1610 return str_has_prefix(symbuf, "__cfi_") ||
1611 str_has_prefix(symbuf, "__pfx_");
1612 }
1613
check_kprobe_address_safe(struct kprobe * p,struct module ** probed_mod)1614 static int check_kprobe_address_safe(struct kprobe *p,
1615 struct module **probed_mod)
1616 {
1617 int ret;
1618
1619 ret = check_ftrace_location(p);
1620 if (ret)
1621 return ret;
1622
1623 guard(jump_label_lock)();
1624
1625 /* Ensure the address is in a text area, and find a module if exists. */
1626 *probed_mod = NULL;
1627 if (!core_kernel_text((unsigned long) p->addr)) {
1628 guard(rcu)();
1629 *probed_mod = __module_text_address((unsigned long) p->addr);
1630 if (!(*probed_mod))
1631 return -EINVAL;
1632
1633 /*
1634 * We must hold a refcount of the probed module while updating
1635 * its code to prohibit unexpected unloading.
1636 */
1637 if (unlikely(!try_module_get(*probed_mod)))
1638 return -ENOENT;
1639 }
1640 /* Ensure it is not in reserved area. */
1641 if (in_gate_area_no_mm((unsigned long) p->addr) ||
1642 within_kprobe_blacklist((unsigned long) p->addr) ||
1643 jump_label_text_reserved(p->addr, p->addr) ||
1644 static_call_text_reserved(p->addr, p->addr) ||
1645 find_bug((unsigned long)p->addr) ||
1646 is_cfi_preamble_symbol((unsigned long)p->addr)) {
1647 module_put(*probed_mod);
1648 return -EINVAL;
1649 }
1650
1651 /* Get module refcount and reject __init functions for loaded modules. */
1652 if (IS_ENABLED(CONFIG_MODULES) && *probed_mod) {
1653 /*
1654 * If the module freed '.init.text', we couldn't insert
1655 * kprobes in there.
1656 */
1657 if (within_module_init((unsigned long)p->addr, *probed_mod) &&
1658 !module_is_coming(*probed_mod)) {
1659 module_put(*probed_mod);
1660 return -ENOENT;
1661 }
1662 }
1663
1664 return 0;
1665 }
1666
__register_kprobe(struct kprobe * p)1667 static int __register_kprobe(struct kprobe *p)
1668 {
1669 int ret;
1670 struct kprobe *old_p;
1671
1672 guard(mutex)(&kprobe_mutex);
1673
1674 old_p = get_kprobe(p->addr);
1675 if (old_p)
1676 /* Since this may unoptimize 'old_p', locking 'text_mutex'. */
1677 return register_aggr_kprobe(old_p, p);
1678
1679 scoped_guard(cpus_read_lock) {
1680 /* Prevent text modification */
1681 guard(mutex)(&text_mutex);
1682 ret = prepare_kprobe(p);
1683 if (ret)
1684 return ret;
1685 }
1686
1687 INIT_HLIST_NODE(&p->hlist);
1688 hlist_add_head_rcu(&p->hlist,
1689 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
1690
1691 if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
1692 ret = arm_kprobe(p);
1693 if (ret) {
1694 hlist_del_rcu(&p->hlist);
1695 synchronize_rcu();
1696 }
1697 }
1698
1699 /* Try to optimize kprobe */
1700 try_to_optimize_kprobe(p);
1701 return 0;
1702 }
1703
register_kprobe(struct kprobe * p)1704 int register_kprobe(struct kprobe *p)
1705 {
1706 int ret;
1707 struct module *probed_mod;
1708 kprobe_opcode_t *addr;
1709 bool on_func_entry;
1710
1711 /* Canonicalize probe address from symbol */
1712 addr = _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry);
1713 if (IS_ERR(addr))
1714 return PTR_ERR(addr);
1715 p->addr = addr;
1716
1717 ret = warn_kprobe_rereg(p);
1718 if (ret)
1719 return ret;
1720
1721 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
1722 p->flags &= KPROBE_FLAG_DISABLED;
1723 if (on_func_entry)
1724 p->flags |= KPROBE_FLAG_ON_FUNC_ENTRY;
1725 p->nmissed = 0;
1726 INIT_LIST_HEAD(&p->list);
1727
1728 ret = check_kprobe_address_safe(p, &probed_mod);
1729 if (ret)
1730 return ret;
1731
1732 ret = __register_kprobe(p);
1733
1734 if (probed_mod)
1735 module_put(probed_mod);
1736
1737 return ret;
1738 }
1739 EXPORT_SYMBOL_GPL(register_kprobe);
1740
1741 /* Check if all probes on the 'ap' are disabled. */
aggr_kprobe_disabled(struct kprobe * ap)1742 static bool aggr_kprobe_disabled(struct kprobe *ap)
1743 {
1744 struct kprobe *kp;
1745
1746 lockdep_assert_held(&kprobe_mutex);
1747
1748 list_for_each_entry(kp, &ap->list, list)
1749 if (!kprobe_disabled(kp))
1750 /*
1751 * Since there is an active probe on the list,
1752 * we can't disable this 'ap'.
1753 */
1754 return false;
1755
1756 return true;
1757 }
1758
__disable_kprobe(struct kprobe * p)1759 static struct kprobe *__disable_kprobe(struct kprobe *p)
1760 {
1761 struct kprobe *orig_p;
1762 int ret;
1763
1764 lockdep_assert_held(&kprobe_mutex);
1765
1766 /* Get an original kprobe for return */
1767 orig_p = __get_valid_kprobe(p);
1768 if (unlikely(orig_p == NULL))
1769 return ERR_PTR(-EINVAL);
1770
1771 if (kprobe_disabled(p))
1772 return orig_p;
1773
1774 /* Disable probe if it is a child probe */
1775 if (p != orig_p)
1776 p->flags |= KPROBE_FLAG_DISABLED;
1777
1778 /* Try to disarm and disable this/parent probe */
1779 if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1780 /*
1781 * Don't be lazy here. Even if 'kprobes_all_disarmed'
1782 * is false, 'orig_p' might not have been armed yet.
1783 * Note arm_all_kprobes() __tries__ to arm all kprobes
1784 * on the best effort basis.
1785 */
1786 if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) {
1787 ret = disarm_kprobe(orig_p, true);
1788 if (ret) {
1789 p->flags &= ~KPROBE_FLAG_DISABLED;
1790 return ERR_PTR(ret);
1791 }
1792 }
1793 orig_p->flags |= KPROBE_FLAG_DISABLED;
1794 }
1795
1796 return orig_p;
1797 }
1798
1799 /*
1800 * Unregister a kprobe without a scheduler synchronization.
1801 */
__unregister_kprobe_top(struct kprobe * p)1802 static int __unregister_kprobe_top(struct kprobe *p)
1803 {
1804 struct kprobe *ap, *list_p;
1805
1806 /* Disable kprobe. This will disarm it if needed. */
1807 ap = __disable_kprobe(p);
1808 if (IS_ERR(ap))
1809 return PTR_ERR(ap);
1810
1811 WARN_ON(ap != p && !kprobe_aggrprobe(ap));
1812
1813 /*
1814 * If the probe is an independent(and non-optimized) kprobe
1815 * (not an aggrprobe), the last kprobe on the aggrprobe, or
1816 * kprobe is already disarmed, just remove from the hash list.
1817 */
1818 if (ap == p ||
1819 (list_is_singular(&ap->list) && kprobe_disarmed(ap))) {
1820 /*
1821 * !disarmed could be happen if the probe is under delayed
1822 * unoptimizing.
1823 */
1824 hlist_del_rcu(&ap->hlist);
1825 return 0;
1826 }
1827
1828 /* If disabling probe has special handlers, update aggrprobe */
1829 if (p->post_handler && !kprobe_gone(p)) {
1830 list_for_each_entry(list_p, &ap->list, list) {
1831 if ((list_p != p) && (list_p->post_handler))
1832 break;
1833 }
1834 /* No other probe has post_handler */
1835 if (list_entry_is_head(list_p, &ap->list, list)) {
1836 /*
1837 * For the kprobe-on-ftrace case, we keep the
1838 * post_handler setting to identify this aggrprobe
1839 * armed with kprobe_ipmodify_ops.
1840 */
1841 if (!kprobe_ftrace(ap))
1842 ap->post_handler = NULL;
1843 }
1844 }
1845
1846 /*
1847 * Remove from the aggrprobe: this path will do nothing in
1848 * __unregister_kprobe_bottom().
1849 */
1850 list_del_rcu(&p->list);
1851 if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
1852 /*
1853 * Try to optimize this probe again, because post
1854 * handler may have been changed.
1855 */
1856 optimize_kprobe(ap);
1857 return 0;
1858
1859 }
1860
__unregister_kprobe_bottom(struct kprobe * p)1861 static void __unregister_kprobe_bottom(struct kprobe *p)
1862 {
1863 struct kprobe *ap;
1864
1865 if (list_empty(&p->list))
1866 /* This is an independent kprobe */
1867 arch_remove_kprobe(p);
1868 else if (list_is_singular(&p->list)) {
1869 /* This is the last child of an aggrprobe */
1870 ap = list_entry(p->list.next, struct kprobe, list);
1871 list_del(&p->list);
1872 free_aggr_kprobe(ap);
1873 }
1874 /* Otherwise, do nothing. */
1875 }
1876
register_kprobes(struct kprobe ** kps,int num)1877 int register_kprobes(struct kprobe **kps, int num)
1878 {
1879 int i, ret = 0;
1880
1881 if (num <= 0)
1882 return -EINVAL;
1883 for (i = 0; i < num; i++) {
1884 ret = register_kprobe(kps[i]);
1885 if (ret < 0) {
1886 if (i > 0)
1887 unregister_kprobes(kps, i);
1888 break;
1889 }
1890 }
1891 return ret;
1892 }
1893 EXPORT_SYMBOL_GPL(register_kprobes);
1894
unregister_kprobe(struct kprobe * p)1895 void unregister_kprobe(struct kprobe *p)
1896 {
1897 unregister_kprobes(&p, 1);
1898 }
1899 EXPORT_SYMBOL_GPL(unregister_kprobe);
1900
unregister_kprobes(struct kprobe ** kps,int num)1901 void unregister_kprobes(struct kprobe **kps, int num)
1902 {
1903 int i;
1904
1905 if (num <= 0)
1906 return;
1907 scoped_guard(mutex, &kprobe_mutex) {
1908 for (i = 0; i < num; i++)
1909 if (__unregister_kprobe_top(kps[i]) < 0)
1910 kps[i]->addr = NULL;
1911 }
1912 synchronize_rcu();
1913 for (i = 0; i < num; i++)
1914 if (kps[i]->addr)
1915 __unregister_kprobe_bottom(kps[i]);
1916 }
1917 EXPORT_SYMBOL_GPL(unregister_kprobes);
1918
kprobe_exceptions_notify(struct notifier_block * self,unsigned long val,void * data)1919 int __weak kprobe_exceptions_notify(struct notifier_block *self,
1920 unsigned long val, void *data)
1921 {
1922 return NOTIFY_DONE;
1923 }
1924 NOKPROBE_SYMBOL(kprobe_exceptions_notify);
1925
1926 static struct notifier_block kprobe_exceptions_nb = {
1927 .notifier_call = kprobe_exceptions_notify,
1928 .priority = 0x7fffffff /* we need to be notified first */
1929 };
1930
1931 #ifdef CONFIG_KRETPROBES
1932
1933 #if !defined(CONFIG_KRETPROBE_ON_RETHOOK)
1934
1935 /* callbacks for objpool of kretprobe instances */
kretprobe_init_inst(void * nod,void * context)1936 static int kretprobe_init_inst(void *nod, void *context)
1937 {
1938 struct kretprobe_instance *ri = nod;
1939
1940 ri->rph = context;
1941 return 0;
1942 }
kretprobe_fini_pool(struct objpool_head * head,void * context)1943 static int kretprobe_fini_pool(struct objpool_head *head, void *context)
1944 {
1945 kfree(context);
1946 return 0;
1947 }
1948
free_rp_inst_rcu(struct rcu_head * head)1949 static void free_rp_inst_rcu(struct rcu_head *head)
1950 {
1951 struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
1952 struct kretprobe_holder *rph = ri->rph;
1953
1954 objpool_drop(ri, &rph->pool);
1955 }
1956 NOKPROBE_SYMBOL(free_rp_inst_rcu);
1957
recycle_rp_inst(struct kretprobe_instance * ri)1958 static void recycle_rp_inst(struct kretprobe_instance *ri)
1959 {
1960 struct kretprobe *rp = get_kretprobe(ri);
1961
1962 if (likely(rp))
1963 objpool_push(ri, &rp->rph->pool);
1964 else
1965 call_rcu(&ri->rcu, free_rp_inst_rcu);
1966 }
1967 NOKPROBE_SYMBOL(recycle_rp_inst);
1968
1969 /*
1970 * This function is called from delayed_put_task_struct() when a task is
1971 * dead and cleaned up to recycle any kretprobe instances associated with
1972 * this task. These left over instances represent probed functions that
1973 * have been called but will never return.
1974 */
kprobe_flush_task(struct task_struct * tk)1975 void kprobe_flush_task(struct task_struct *tk)
1976 {
1977 struct kretprobe_instance *ri;
1978 struct llist_node *node;
1979
1980 /* Early boot, not yet initialized. */
1981 if (unlikely(!kprobes_initialized))
1982 return;
1983
1984 kprobe_busy_begin();
1985
1986 node = __llist_del_all(&tk->kretprobe_instances);
1987 while (node) {
1988 ri = container_of(node, struct kretprobe_instance, llist);
1989 node = node->next;
1990
1991 recycle_rp_inst(ri);
1992 }
1993
1994 kprobe_busy_end();
1995 }
1996 NOKPROBE_SYMBOL(kprobe_flush_task);
1997
free_rp_inst(struct kretprobe * rp)1998 static inline void free_rp_inst(struct kretprobe *rp)
1999 {
2000 struct kretprobe_holder *rph = rp->rph;
2001
2002 if (!rph)
2003 return;
2004 rp->rph = NULL;
2005 objpool_fini(&rph->pool);
2006 }
2007
2008 /* This assumes the 'tsk' is the current task or the is not running. */
__kretprobe_find_ret_addr(struct task_struct * tsk,struct llist_node ** cur)2009 static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk,
2010 struct llist_node **cur)
2011 {
2012 struct kretprobe_instance *ri = NULL;
2013 struct llist_node *node = *cur;
2014
2015 if (!node)
2016 node = tsk->kretprobe_instances.first;
2017 else
2018 node = node->next;
2019
2020 while (node) {
2021 ri = container_of(node, struct kretprobe_instance, llist);
2022 if (ri->ret_addr != kretprobe_trampoline_addr()) {
2023 *cur = node;
2024 return ri->ret_addr;
2025 }
2026 node = node->next;
2027 }
2028 return NULL;
2029 }
2030 NOKPROBE_SYMBOL(__kretprobe_find_ret_addr);
2031
2032 /**
2033 * kretprobe_find_ret_addr -- Find correct return address modified by kretprobe
2034 * @tsk: Target task
2035 * @fp: A frame pointer
2036 * @cur: a storage of the loop cursor llist_node pointer for next call
2037 *
2038 * Find the correct return address modified by a kretprobe on @tsk in unsigned
2039 * long type. If it finds the return address, this returns that address value,
2040 * or this returns 0.
2041 * The @tsk must be 'current' or a task which is not running. @fp is a hint
2042 * to get the currect return address - which is compared with the
2043 * kretprobe_instance::fp field. The @cur is a loop cursor for searching the
2044 * kretprobe return addresses on the @tsk. The '*@cur' should be NULL at the
2045 * first call, but '@cur' itself must NOT NULL.
2046 */
kretprobe_find_ret_addr(struct task_struct * tsk,void * fp,struct llist_node ** cur)2047 unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
2048 struct llist_node **cur)
2049 {
2050 struct kretprobe_instance *ri;
2051 kprobe_opcode_t *ret;
2052
2053 if (WARN_ON_ONCE(!cur))
2054 return 0;
2055
2056 do {
2057 ret = __kretprobe_find_ret_addr(tsk, cur);
2058 if (!ret)
2059 break;
2060 ri = container_of(*cur, struct kretprobe_instance, llist);
2061 } while (ri->fp != fp);
2062
2063 return (unsigned long)ret;
2064 }
2065 NOKPROBE_SYMBOL(kretprobe_find_ret_addr);
2066
arch_kretprobe_fixup_return(struct pt_regs * regs,kprobe_opcode_t * correct_ret_addr)2067 void __weak arch_kretprobe_fixup_return(struct pt_regs *regs,
2068 kprobe_opcode_t *correct_ret_addr)
2069 {
2070 /*
2071 * Do nothing by default. Please fill this to update the fake return
2072 * address on the stack with the correct one on each arch if possible.
2073 */
2074 }
2075
__kretprobe_trampoline_handler(struct pt_regs * regs,void * frame_pointer)2076 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
2077 void *frame_pointer)
2078 {
2079 struct kretprobe_instance *ri = NULL;
2080 struct llist_node *first, *node = NULL;
2081 kprobe_opcode_t *correct_ret_addr;
2082 struct kretprobe *rp;
2083
2084 /* Find correct address and all nodes for this frame. */
2085 correct_ret_addr = __kretprobe_find_ret_addr(current, &node);
2086 if (!correct_ret_addr) {
2087 pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n");
2088 BUG_ON(1);
2089 }
2090
2091 /*
2092 * Set the return address as the instruction pointer, because if the
2093 * user handler calls stack_trace_save_regs() with this 'regs',
2094 * the stack trace will start from the instruction pointer.
2095 */
2096 instruction_pointer_set(regs, (unsigned long)correct_ret_addr);
2097
2098 /* Run the user handler of the nodes. */
2099 first = current->kretprobe_instances.first;
2100 while (first) {
2101 ri = container_of(first, struct kretprobe_instance, llist);
2102
2103 if (WARN_ON_ONCE(ri->fp != frame_pointer))
2104 break;
2105
2106 rp = get_kretprobe(ri);
2107 if (rp && rp->handler) {
2108 struct kprobe *prev = kprobe_running();
2109
2110 __this_cpu_write(current_kprobe, &rp->kp);
2111 ri->ret_addr = correct_ret_addr;
2112 rp->handler(ri, regs);
2113 __this_cpu_write(current_kprobe, prev);
2114 }
2115 if (first == node)
2116 break;
2117
2118 first = first->next;
2119 }
2120
2121 arch_kretprobe_fixup_return(regs, correct_ret_addr);
2122
2123 /* Unlink all nodes for this frame. */
2124 first = current->kretprobe_instances.first;
2125 current->kretprobe_instances.first = node->next;
2126 node->next = NULL;
2127
2128 /* Recycle free instances. */
2129 while (first) {
2130 ri = container_of(first, struct kretprobe_instance, llist);
2131 first = first->next;
2132
2133 recycle_rp_inst(ri);
2134 }
2135
2136 return (unsigned long)correct_ret_addr;
2137 }
NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)2138 NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
2139
2140 /*
2141 * This kprobe pre_handler is registered with every kretprobe. When probe
2142 * hits it will set up the return probe.
2143 */
2144 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2145 {
2146 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
2147 struct kretprobe_holder *rph = rp->rph;
2148 struct kretprobe_instance *ri;
2149
2150 ri = objpool_pop(&rph->pool);
2151 if (!ri) {
2152 rp->nmissed++;
2153 return 0;
2154 }
2155
2156 if (rp->entry_handler && rp->entry_handler(ri, regs)) {
2157 objpool_push(ri, &rph->pool);
2158 return 0;
2159 }
2160
2161 arch_prepare_kretprobe(ri, regs);
2162
2163 __llist_add(&ri->llist, ¤t->kretprobe_instances);
2164
2165 return 0;
2166 }
2167 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2168 #else /* CONFIG_KRETPROBE_ON_RETHOOK */
2169 /*
2170 * This kprobe pre_handler is registered with every kretprobe. When probe
2171 * hits it will set up the return probe.
2172 */
pre_handler_kretprobe(struct kprobe * p,struct pt_regs * regs)2173 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2174 {
2175 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
2176 struct kretprobe_instance *ri;
2177 struct rethook_node *rhn;
2178
2179 rhn = rethook_try_get(rp->rh);
2180 if (!rhn) {
2181 rp->nmissed++;
2182 return 0;
2183 }
2184
2185 ri = container_of(rhn, struct kretprobe_instance, node);
2186
2187 if (rp->entry_handler && rp->entry_handler(ri, regs))
2188 rethook_recycle(rhn);
2189 else
2190 rethook_hook(rhn, regs, kprobe_ftrace(p));
2191
2192 return 0;
2193 }
2194 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2195
kretprobe_rethook_handler(struct rethook_node * rh,void * data,unsigned long ret_addr,struct pt_regs * regs)2196 static void kretprobe_rethook_handler(struct rethook_node *rh, void *data,
2197 unsigned long ret_addr,
2198 struct pt_regs *regs)
2199 {
2200 struct kretprobe *rp = (struct kretprobe *)data;
2201 struct kretprobe_instance *ri;
2202 struct kprobe_ctlblk *kcb;
2203
2204 /* The data must NOT be null. This means rethook data structure is broken. */
2205 if (WARN_ON_ONCE(!data) || !rp->handler)
2206 return;
2207
2208 __this_cpu_write(current_kprobe, &rp->kp);
2209 kcb = get_kprobe_ctlblk();
2210 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
2211
2212 ri = container_of(rh, struct kretprobe_instance, node);
2213 rp->handler(ri, regs);
2214
2215 __this_cpu_write(current_kprobe, NULL);
2216 }
2217 NOKPROBE_SYMBOL(kretprobe_rethook_handler);
2218
2219 #endif /* !CONFIG_KRETPROBE_ON_RETHOOK */
2220
2221 /**
2222 * kprobe_on_func_entry() -- check whether given address is function entry
2223 * @addr: Target address
2224 * @sym: Target symbol name
2225 * @offset: The offset from the symbol or the address
2226 *
2227 * This checks whether the given @addr+@offset or @sym+@offset is on the
2228 * function entry address or not.
2229 * This returns 0 if it is the function entry, or -EINVAL if it is not.
2230 * And also it returns -ENOENT if it fails the symbol or address lookup.
2231 * Caller must pass @addr or @sym (either one must be NULL), or this
2232 * returns -EINVAL.
2233 */
kprobe_on_func_entry(kprobe_opcode_t * addr,const char * sym,unsigned long offset)2234 int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
2235 {
2236 bool on_func_entry;
2237 kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset, &on_func_entry);
2238
2239 if (IS_ERR(kp_addr))
2240 return PTR_ERR(kp_addr);
2241
2242 if (!on_func_entry)
2243 return -EINVAL;
2244
2245 return 0;
2246 }
2247
register_kretprobe(struct kretprobe * rp)2248 int register_kretprobe(struct kretprobe *rp)
2249 {
2250 int ret;
2251 int i;
2252 void *addr;
2253
2254 ret = kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset);
2255 if (ret)
2256 return ret;
2257
2258 /* If only 'rp->kp.addr' is specified, check reregistering kprobes */
2259 if (rp->kp.addr && warn_kprobe_rereg(&rp->kp))
2260 return -EINVAL;
2261
2262 if (kretprobe_blacklist_size) {
2263 addr = kprobe_addr(&rp->kp);
2264 if (IS_ERR(addr))
2265 return PTR_ERR(addr);
2266
2267 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
2268 if (kretprobe_blacklist[i].addr == addr)
2269 return -EINVAL;
2270 }
2271 }
2272
2273 if (rp->data_size > KRETPROBE_MAX_DATA_SIZE)
2274 return -E2BIG;
2275
2276 rp->kp.pre_handler = pre_handler_kretprobe;
2277 rp->kp.post_handler = NULL;
2278
2279 /* Pre-allocate memory for max kretprobe instances */
2280 if (rp->maxactive <= 0)
2281 rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
2282
2283 #ifdef CONFIG_KRETPROBE_ON_RETHOOK
2284 rp->rh = rethook_alloc((void *)rp, kretprobe_rethook_handler,
2285 sizeof(struct kretprobe_instance) +
2286 rp->data_size, rp->maxactive);
2287 if (IS_ERR(rp->rh))
2288 return PTR_ERR(rp->rh);
2289
2290 rp->nmissed = 0;
2291 /* Establish function entry probe point */
2292 ret = register_kprobe(&rp->kp);
2293 if (ret != 0) {
2294 rethook_free(rp->rh);
2295 rp->rh = NULL;
2296 }
2297 #else /* !CONFIG_KRETPROBE_ON_RETHOOK */
2298 rp->rph = kzalloc_obj(struct kretprobe_holder, GFP_KERNEL);
2299 if (!rp->rph)
2300 return -ENOMEM;
2301
2302 if (objpool_init(&rp->rph->pool, rp->maxactive, rp->data_size +
2303 sizeof(struct kretprobe_instance), GFP_KERNEL,
2304 rp->rph, kretprobe_init_inst, kretprobe_fini_pool)) {
2305 kfree(rp->rph);
2306 rp->rph = NULL;
2307 return -ENOMEM;
2308 }
2309 rcu_assign_pointer(rp->rph->rp, rp);
2310 rp->nmissed = 0;
2311 /* Establish function entry probe point */
2312 ret = register_kprobe(&rp->kp);
2313 if (ret != 0)
2314 free_rp_inst(rp);
2315 #endif
2316 return ret;
2317 }
2318 EXPORT_SYMBOL_GPL(register_kretprobe);
2319
register_kretprobes(struct kretprobe ** rps,int num)2320 int register_kretprobes(struct kretprobe **rps, int num)
2321 {
2322 int ret = 0, i;
2323
2324 if (num <= 0)
2325 return -EINVAL;
2326 for (i = 0; i < num; i++) {
2327 ret = register_kretprobe(rps[i]);
2328 if (ret < 0) {
2329 if (i > 0)
2330 unregister_kretprobes(rps, i);
2331 break;
2332 }
2333 }
2334 return ret;
2335 }
2336 EXPORT_SYMBOL_GPL(register_kretprobes);
2337
unregister_kretprobe(struct kretprobe * rp)2338 void unregister_kretprobe(struct kretprobe *rp)
2339 {
2340 unregister_kretprobes(&rp, 1);
2341 }
2342 EXPORT_SYMBOL_GPL(unregister_kretprobe);
2343
unregister_kretprobes(struct kretprobe ** rps,int num)2344 void unregister_kretprobes(struct kretprobe **rps, int num)
2345 {
2346 int i;
2347
2348 if (num <= 0)
2349 return;
2350 for (i = 0; i < num; i++) {
2351 guard(mutex)(&kprobe_mutex);
2352
2353 if (__unregister_kprobe_top(&rps[i]->kp) < 0)
2354 rps[i]->kp.addr = NULL;
2355 #ifdef CONFIG_KRETPROBE_ON_RETHOOK
2356 rethook_free(rps[i]->rh);
2357 #else
2358 rcu_assign_pointer(rps[i]->rph->rp, NULL);
2359 #endif
2360 }
2361
2362 synchronize_rcu();
2363 for (i = 0; i < num; i++) {
2364 if (rps[i]->kp.addr) {
2365 __unregister_kprobe_bottom(&rps[i]->kp);
2366 #ifndef CONFIG_KRETPROBE_ON_RETHOOK
2367 free_rp_inst(rps[i]);
2368 #endif
2369 }
2370 }
2371 }
2372 EXPORT_SYMBOL_GPL(unregister_kretprobes);
2373
2374 #else /* CONFIG_KRETPROBES */
register_kretprobe(struct kretprobe * rp)2375 int register_kretprobe(struct kretprobe *rp)
2376 {
2377 return -EOPNOTSUPP;
2378 }
2379 EXPORT_SYMBOL_GPL(register_kretprobe);
2380
register_kretprobes(struct kretprobe ** rps,int num)2381 int register_kretprobes(struct kretprobe **rps, int num)
2382 {
2383 return -EOPNOTSUPP;
2384 }
2385 EXPORT_SYMBOL_GPL(register_kretprobes);
2386
unregister_kretprobe(struct kretprobe * rp)2387 void unregister_kretprobe(struct kretprobe *rp)
2388 {
2389 }
2390 EXPORT_SYMBOL_GPL(unregister_kretprobe);
2391
unregister_kretprobes(struct kretprobe ** rps,int num)2392 void unregister_kretprobes(struct kretprobe **rps, int num)
2393 {
2394 }
2395 EXPORT_SYMBOL_GPL(unregister_kretprobes);
2396
pre_handler_kretprobe(struct kprobe * p,struct pt_regs * regs)2397 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2398 {
2399 return 0;
2400 }
2401 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2402
2403 #endif /* CONFIG_KRETPROBES */
2404
2405 /* Set the kprobe gone and remove its instruction buffer. */
kill_kprobe(struct kprobe * p)2406 static void kill_kprobe(struct kprobe *p)
2407 {
2408 struct kprobe *kp;
2409
2410 lockdep_assert_held(&kprobe_mutex);
2411
2412 /*
2413 * The module is going away. We should disarm the kprobe which
2414 * is using ftrace, because ftrace framework is still available at
2415 * 'MODULE_STATE_GOING' notification.
2416 */
2417 if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed)
2418 disarm_kprobe_ftrace(p);
2419
2420 p->flags |= KPROBE_FLAG_GONE;
2421 if (kprobe_aggrprobe(p)) {
2422 /*
2423 * If this is an aggr_kprobe, we have to list all the
2424 * chained probes and mark them GONE.
2425 */
2426 list_for_each_entry(kp, &p->list, list)
2427 kp->flags |= KPROBE_FLAG_GONE;
2428 p->post_handler = NULL;
2429 kill_optimized_kprobe(p);
2430 }
2431 /*
2432 * Here, we can remove insn_slot safely, because no thread calls
2433 * the original probed function (which will be freed soon) any more.
2434 */
2435 arch_remove_kprobe(p);
2436 }
2437
2438 /* Disable one kprobe */
disable_kprobe(struct kprobe * kp)2439 int disable_kprobe(struct kprobe *kp)
2440 {
2441 struct kprobe *p;
2442
2443 guard(mutex)(&kprobe_mutex);
2444
2445 /* Disable this kprobe */
2446 p = __disable_kprobe(kp);
2447
2448 return IS_ERR(p) ? PTR_ERR(p) : 0;
2449 }
2450 EXPORT_SYMBOL_GPL(disable_kprobe);
2451
2452 /* Enable one kprobe */
enable_kprobe(struct kprobe * kp)2453 int enable_kprobe(struct kprobe *kp)
2454 {
2455 int ret = 0;
2456 struct kprobe *p;
2457
2458 guard(mutex)(&kprobe_mutex);
2459
2460 /* Check whether specified probe is valid. */
2461 p = __get_valid_kprobe(kp);
2462 if (unlikely(p == NULL))
2463 return -EINVAL;
2464
2465 if (kprobe_gone(kp))
2466 /* This kprobe has gone, we couldn't enable it. */
2467 return -EINVAL;
2468
2469 if (p != kp)
2470 kp->flags &= ~KPROBE_FLAG_DISABLED;
2471
2472 if (!kprobes_all_disarmed && kprobe_disabled(p)) {
2473 p->flags &= ~KPROBE_FLAG_DISABLED;
2474 ret = arm_kprobe(p);
2475 if (ret) {
2476 p->flags |= KPROBE_FLAG_DISABLED;
2477 if (p != kp)
2478 kp->flags |= KPROBE_FLAG_DISABLED;
2479 }
2480 }
2481 return ret;
2482 }
2483 EXPORT_SYMBOL_GPL(enable_kprobe);
2484
2485 /* Caller must NOT call this in usual path. This is only for critical case */
dump_kprobe(struct kprobe * kp)2486 void dump_kprobe(struct kprobe *kp)
2487 {
2488 pr_err("Dump kprobe:\n.symbol_name = %s, .offset = %x, .addr = %pS\n",
2489 kp->symbol_name, kp->offset, kp->addr);
2490 }
2491 NOKPROBE_SYMBOL(dump_kprobe);
2492
kprobe_add_ksym_blacklist(unsigned long entry)2493 int kprobe_add_ksym_blacklist(unsigned long entry)
2494 {
2495 struct kprobe_blacklist_entry *ent;
2496 unsigned long offset = 0, size = 0;
2497
2498 if (!kernel_text_address(entry) ||
2499 !kallsyms_lookup_size_offset(entry, &size, &offset))
2500 return -EINVAL;
2501
2502 ent = kmalloc_obj(*ent, GFP_KERNEL);
2503 if (!ent)
2504 return -ENOMEM;
2505 ent->start_addr = entry;
2506 ent->end_addr = entry + size;
2507 INIT_LIST_HEAD(&ent->list);
2508 list_add_tail(&ent->list, &kprobe_blacklist);
2509
2510 return (int)size;
2511 }
2512
2513 /* Add all symbols in given area into kprobe blacklist */
kprobe_add_area_blacklist(unsigned long start,unsigned long end)2514 int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
2515 {
2516 unsigned long entry;
2517 int ret = 0;
2518
2519 for (entry = start; entry < end; entry += ret) {
2520 ret = kprobe_add_ksym_blacklist(entry);
2521 if (ret < 0)
2522 return ret;
2523 if (ret == 0) /* In case of alias symbol */
2524 ret = 1;
2525 }
2526 return 0;
2527 }
2528
arch_kprobe_get_kallsym(unsigned int * symnum,unsigned long * value,char * type,char * sym)2529 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
2530 char *type, char *sym)
2531 {
2532 return -ERANGE;
2533 }
2534
kprobe_get_kallsym(unsigned int symnum,unsigned long * value,char * type,char * sym)2535 int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
2536 char *sym)
2537 {
2538 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
2539 if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym))
2540 return 0;
2541 #ifdef CONFIG_OPTPROBES
2542 if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym))
2543 return 0;
2544 #endif
2545 #endif
2546 if (!arch_kprobe_get_kallsym(&symnum, value, type, sym))
2547 return 0;
2548 return -ERANGE;
2549 }
2550
arch_populate_kprobe_blacklist(void)2551 int __init __weak arch_populate_kprobe_blacklist(void)
2552 {
2553 return 0;
2554 }
2555
2556 /*
2557 * Lookup and populate the kprobe_blacklist.
2558 *
2559 * Unlike the kretprobe blacklist, we'll need to determine
2560 * the range of addresses that belong to the said functions,
2561 * since a kprobe need not necessarily be at the beginning
2562 * of a function.
2563 */
populate_kprobe_blacklist(unsigned long * start,unsigned long * end)2564 static int __init populate_kprobe_blacklist(unsigned long *start,
2565 unsigned long *end)
2566 {
2567 unsigned long entry;
2568 unsigned long *iter;
2569 int ret;
2570
2571 for (iter = start; iter < end; iter++) {
2572 entry = (unsigned long)dereference_symbol_descriptor((void *)*iter);
2573 ret = kprobe_add_ksym_blacklist(entry);
2574 if (ret == -EINVAL)
2575 continue;
2576 if (ret < 0)
2577 return ret;
2578 }
2579
2580 /* Symbols in '__kprobes_text' are blacklisted */
2581 ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
2582 (unsigned long)__kprobes_text_end);
2583 if (ret)
2584 return ret;
2585
2586 /* Symbols in 'noinstr' section are blacklisted */
2587 ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start,
2588 (unsigned long)__noinstr_text_end);
2589
2590 return ret ? : arch_populate_kprobe_blacklist();
2591 }
2592
2593 #ifdef CONFIG_MODULES
2594 /* Remove all symbols in given area from kprobe blacklist */
kprobe_remove_area_blacklist(unsigned long start,unsigned long end)2595 static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
2596 {
2597 struct kprobe_blacklist_entry *ent, *n;
2598
2599 list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) {
2600 if (ent->start_addr < start || ent->start_addr >= end)
2601 continue;
2602 list_del(&ent->list);
2603 kfree(ent);
2604 }
2605 }
2606
kprobe_remove_ksym_blacklist(unsigned long entry)2607 static void kprobe_remove_ksym_blacklist(unsigned long entry)
2608 {
2609 kprobe_remove_area_blacklist(entry, entry + 1);
2610 }
2611
add_module_kprobe_blacklist(struct module * mod)2612 static void add_module_kprobe_blacklist(struct module *mod)
2613 {
2614 unsigned long start, end;
2615 int i;
2616
2617 if (mod->kprobe_blacklist) {
2618 for (i = 0; i < mod->num_kprobe_blacklist; i++)
2619 kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]);
2620 }
2621
2622 start = (unsigned long)mod->kprobes_text_start;
2623 if (start) {
2624 end = start + mod->kprobes_text_size;
2625 kprobe_add_area_blacklist(start, end);
2626 }
2627
2628 start = (unsigned long)mod->noinstr_text_start;
2629 if (start) {
2630 end = start + mod->noinstr_text_size;
2631 kprobe_add_area_blacklist(start, end);
2632 }
2633 }
2634
remove_module_kprobe_blacklist(struct module * mod)2635 static void remove_module_kprobe_blacklist(struct module *mod)
2636 {
2637 unsigned long start, end;
2638 int i;
2639
2640 if (mod->kprobe_blacklist) {
2641 for (i = 0; i < mod->num_kprobe_blacklist; i++)
2642 kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]);
2643 }
2644
2645 start = (unsigned long)mod->kprobes_text_start;
2646 if (start) {
2647 end = start + mod->kprobes_text_size;
2648 kprobe_remove_area_blacklist(start, end);
2649 }
2650
2651 start = (unsigned long)mod->noinstr_text_start;
2652 if (start) {
2653 end = start + mod->noinstr_text_size;
2654 kprobe_remove_area_blacklist(start, end);
2655 }
2656 }
2657
2658 /* Module notifier call back, checking kprobes on the module */
kprobes_module_callback(struct notifier_block * nb,unsigned long val,void * data)2659 static int kprobes_module_callback(struct notifier_block *nb,
2660 unsigned long val, void *data)
2661 {
2662 struct module *mod = data;
2663 struct hlist_head *head;
2664 struct kprobe *p;
2665 unsigned int i;
2666 int checkcore = (val == MODULE_STATE_GOING);
2667
2668 guard(mutex)(&kprobe_mutex);
2669
2670 if (val == MODULE_STATE_COMING)
2671 add_module_kprobe_blacklist(mod);
2672
2673 if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
2674 return NOTIFY_DONE;
2675
2676 /*
2677 * When 'MODULE_STATE_GOING' was notified, both of module '.text' and
2678 * '.init.text' sections would be freed. When 'MODULE_STATE_LIVE' was
2679 * notified, only '.init.text' section would be freed. We need to
2680 * disable kprobes which have been inserted in the sections.
2681 */
2682 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2683 head = &kprobe_table[i];
2684 hlist_for_each_entry(p, head, hlist)
2685 if (within_module_init((unsigned long)p->addr, mod) ||
2686 (checkcore &&
2687 within_module_core((unsigned long)p->addr, mod))) {
2688 /*
2689 * The vaddr this probe is installed will soon
2690 * be vfreed buy not synced to disk. Hence,
2691 * disarming the breakpoint isn't needed.
2692 *
2693 * Note, this will also move any optimized probes
2694 * that are pending to be removed from their
2695 * corresponding lists to the 'freeing_list' and
2696 * will not be touched by the delayed
2697 * kprobe_optimizer() work handler.
2698 */
2699 kill_kprobe(p);
2700 }
2701 }
2702 if (val == MODULE_STATE_GOING)
2703 remove_module_kprobe_blacklist(mod);
2704 return NOTIFY_DONE;
2705 }
2706
2707 static struct notifier_block kprobe_module_nb = {
2708 .notifier_call = kprobes_module_callback,
2709 .priority = 0
2710 };
2711
kprobe_register_module_notifier(void)2712 static int kprobe_register_module_notifier(void)
2713 {
2714 return register_module_notifier(&kprobe_module_nb);
2715 }
2716 #else
kprobe_register_module_notifier(void)2717 static int kprobe_register_module_notifier(void)
2718 {
2719 return 0;
2720 }
2721 #endif /* CONFIG_MODULES */
2722
kprobe_free_init_mem(void)2723 void kprobe_free_init_mem(void)
2724 {
2725 void *start = (void *)(&__init_begin);
2726 void *end = (void *)(&__init_end);
2727 struct hlist_head *head;
2728 struct kprobe *p;
2729 int i;
2730
2731 guard(mutex)(&kprobe_mutex);
2732
2733 /* Kill all kprobes on initmem because the target code has been freed. */
2734 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2735 head = &kprobe_table[i];
2736 hlist_for_each_entry(p, head, hlist) {
2737 if (start <= (void *)p->addr && (void *)p->addr < end)
2738 kill_kprobe(p);
2739 }
2740 }
2741 }
2742
init_kprobes(void)2743 static int __init init_kprobes(void)
2744 {
2745 int i, err;
2746
2747 /* FIXME allocate the probe table, currently defined statically */
2748 /* initialize all list heads */
2749 for (i = 0; i < KPROBE_TABLE_SIZE; i++)
2750 INIT_HLIST_HEAD(&kprobe_table[i]);
2751
2752 err = populate_kprobe_blacklist(__start_kprobe_blacklist,
2753 __stop_kprobe_blacklist);
2754 if (err)
2755 pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err);
2756
2757 if (kretprobe_blacklist_size) {
2758 /* lookup the function address from its name */
2759 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
2760 kretprobe_blacklist[i].addr =
2761 kprobe_lookup_name(kretprobe_blacklist[i].name, 0);
2762 if (!kretprobe_blacklist[i].addr)
2763 pr_err("Failed to lookup symbol '%s' for kretprobe blacklist. Maybe the target function is removed or renamed.\n",
2764 kretprobe_blacklist[i].name);
2765 }
2766 }
2767
2768 /* By default, kprobes are armed */
2769 kprobes_all_disarmed = false;
2770
2771 /* Initialize the optimization infrastructure */
2772 init_optprobe();
2773
2774 err = arch_init_kprobes();
2775 if (!err)
2776 err = register_die_notifier(&kprobe_exceptions_nb);
2777 if (!err)
2778 err = kprobe_register_module_notifier();
2779
2780 kprobes_initialized = (err == 0);
2781 kprobe_sysctls_init();
2782 return err;
2783 }
2784 early_initcall(init_kprobes);
2785
2786 #if defined(CONFIG_OPTPROBES)
init_optprobes(void)2787 static int __init init_optprobes(void)
2788 {
2789 /*
2790 * Enable kprobe optimization - this kicks the optimizer which
2791 * depends on synchronize_rcu_tasks() and ksoftirqd, that is
2792 * not spawned in early initcall. So delay the optimization.
2793 */
2794 optimize_all_kprobes();
2795
2796 return 0;
2797 }
2798 subsys_initcall(init_optprobes);
2799 #endif
2800
2801 #ifdef CONFIG_DEBUG_FS
report_probe(struct seq_file * pi,struct kprobe * p,const char * sym,int offset,char * modname,struct kprobe * pp)2802 static void report_probe(struct seq_file *pi, struct kprobe *p,
2803 const char *sym, int offset, char *modname, struct kprobe *pp)
2804 {
2805 char *kprobe_type;
2806 void *addr = p->addr;
2807
2808 if (p->pre_handler == pre_handler_kretprobe)
2809 kprobe_type = "r";
2810 else
2811 kprobe_type = "k";
2812
2813 if (!kallsyms_show_value(pi->file->f_cred))
2814 addr = NULL;
2815
2816 if (sym)
2817 seq_printf(pi, "%px %s %s+0x%x %s ",
2818 addr, kprobe_type, sym, offset,
2819 (modname ? modname : " "));
2820 else /* try to use %pS */
2821 seq_printf(pi, "%px %s %pS ",
2822 addr, kprobe_type, p->addr);
2823
2824 if (!pp)
2825 pp = p;
2826 seq_printf(pi, "%s%s%s%s\n",
2827 (kprobe_gone(p) ? "[GONE]" : ""),
2828 ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""),
2829 (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""),
2830 (kprobe_ftrace(pp) ? "[FTRACE]" : ""));
2831 }
2832
kprobe_seq_start(struct seq_file * f,loff_t * pos)2833 static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
2834 {
2835 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
2836 }
2837
kprobe_seq_next(struct seq_file * f,void * v,loff_t * pos)2838 static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
2839 {
2840 (*pos)++;
2841 if (*pos >= KPROBE_TABLE_SIZE)
2842 return NULL;
2843 return pos;
2844 }
2845
kprobe_seq_stop(struct seq_file * f,void * v)2846 static void kprobe_seq_stop(struct seq_file *f, void *v)
2847 {
2848 /* Nothing to do */
2849 }
2850
show_kprobe_addr(struct seq_file * pi,void * v)2851 static int show_kprobe_addr(struct seq_file *pi, void *v)
2852 {
2853 struct hlist_head *head;
2854 struct kprobe *p, *kp;
2855 const char *sym;
2856 unsigned int i = *(loff_t *) v;
2857 unsigned long offset = 0;
2858 char *modname, namebuf[KSYM_NAME_LEN];
2859
2860 head = &kprobe_table[i];
2861 preempt_disable();
2862 hlist_for_each_entry_rcu(p, head, hlist) {
2863 sym = kallsyms_lookup((unsigned long)p->addr, NULL,
2864 &offset, &modname, namebuf);
2865 if (kprobe_aggrprobe(p)) {
2866 list_for_each_entry_rcu(kp, &p->list, list)
2867 report_probe(pi, kp, sym, offset, modname, p);
2868 } else
2869 report_probe(pi, p, sym, offset, modname, NULL);
2870 }
2871 preempt_enable();
2872 return 0;
2873 }
2874
2875 static const struct seq_operations kprobes_sops = {
2876 .start = kprobe_seq_start,
2877 .next = kprobe_seq_next,
2878 .stop = kprobe_seq_stop,
2879 .show = show_kprobe_addr
2880 };
2881
2882 DEFINE_SEQ_ATTRIBUTE(kprobes);
2883
2884 /* kprobes/blacklist -- shows which functions can not be probed */
kprobe_blacklist_seq_start(struct seq_file * m,loff_t * pos)2885 static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
2886 {
2887 mutex_lock(&kprobe_mutex);
2888 return seq_list_start(&kprobe_blacklist, *pos);
2889 }
2890
kprobe_blacklist_seq_next(struct seq_file * m,void * v,loff_t * pos)2891 static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
2892 {
2893 return seq_list_next(v, &kprobe_blacklist, pos);
2894 }
2895
kprobe_blacklist_seq_show(struct seq_file * m,void * v)2896 static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
2897 {
2898 struct kprobe_blacklist_entry *ent =
2899 list_entry(v, struct kprobe_blacklist_entry, list);
2900
2901 /*
2902 * If '/proc/kallsyms' is not showing kernel address, we won't
2903 * show them here either.
2904 */
2905 if (!kallsyms_show_value(m->file->f_cred))
2906 seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL,
2907 (void *)ent->start_addr);
2908 else
2909 seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr,
2910 (void *)ent->end_addr, (void *)ent->start_addr);
2911 return 0;
2912 }
2913
kprobe_blacklist_seq_stop(struct seq_file * f,void * v)2914 static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v)
2915 {
2916 mutex_unlock(&kprobe_mutex);
2917 }
2918
2919 static const struct seq_operations kprobe_blacklist_sops = {
2920 .start = kprobe_blacklist_seq_start,
2921 .next = kprobe_blacklist_seq_next,
2922 .stop = kprobe_blacklist_seq_stop,
2923 .show = kprobe_blacklist_seq_show,
2924 };
2925 DEFINE_SEQ_ATTRIBUTE(kprobe_blacklist);
2926
arm_all_kprobes(void)2927 static int arm_all_kprobes(void)
2928 {
2929 struct hlist_head *head;
2930 struct kprobe *p;
2931 unsigned int i, total = 0, errors = 0;
2932 int err, ret = 0;
2933
2934 guard(mutex)(&kprobe_mutex);
2935
2936 /* If kprobes are armed, just return */
2937 if (!kprobes_all_disarmed)
2938 return 0;
2939
2940 /*
2941 * optimize_kprobe() called by arm_kprobe() checks
2942 * kprobes_all_disarmed, so set kprobes_all_disarmed before
2943 * arm_kprobe.
2944 */
2945 kprobes_all_disarmed = false;
2946 /* Arming kprobes doesn't optimize kprobe itself */
2947 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2948 head = &kprobe_table[i];
2949 /* Arm all kprobes on a best-effort basis */
2950 hlist_for_each_entry(p, head, hlist) {
2951 if (!kprobe_disabled(p)) {
2952 err = arm_kprobe(p);
2953 if (err) {
2954 errors++;
2955 ret = err;
2956 }
2957 total++;
2958 }
2959 }
2960 }
2961
2962 if (errors)
2963 pr_warn("Kprobes globally enabled, but failed to enable %d out of %d probes. Please check which kprobes are kept disabled via debugfs.\n",
2964 errors, total);
2965 else
2966 pr_info("Kprobes globally enabled\n");
2967
2968 return ret;
2969 }
2970
disarm_all_kprobes(void)2971 static int disarm_all_kprobes(void)
2972 {
2973 struct hlist_head *head;
2974 struct kprobe *p;
2975 unsigned int i, total = 0, errors = 0;
2976 int err, ret = 0;
2977
2978 guard(mutex)(&kprobe_mutex);
2979
2980 /* If kprobes are already disarmed, just return */
2981 if (kprobes_all_disarmed)
2982 return 0;
2983
2984 kprobes_all_disarmed = true;
2985
2986 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2987 head = &kprobe_table[i];
2988 /* Disarm all kprobes on a best-effort basis */
2989 hlist_for_each_entry(p, head, hlist) {
2990 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
2991 err = disarm_kprobe(p, false);
2992 if (err) {
2993 errors++;
2994 ret = err;
2995 }
2996 total++;
2997 }
2998 }
2999 }
3000
3001 if (errors)
3002 pr_warn("Kprobes globally disabled, but failed to disable %d out of %d probes. Please check which kprobes are kept enabled via debugfs.\n",
3003 errors, total);
3004 else
3005 pr_info("Kprobes globally disabled\n");
3006
3007 /* Wait for disarming all kprobes by optimizer */
3008 wait_for_kprobe_optimizer_locked();
3009 return ret;
3010 }
3011
3012 /*
3013 * XXX: The debugfs bool file interface doesn't allow for callbacks
3014 * when the bool state is switched. We can reuse that facility when
3015 * available
3016 */
read_enabled_file_bool(struct file * file,char __user * user_buf,size_t count,loff_t * ppos)3017 static ssize_t read_enabled_file_bool(struct file *file,
3018 char __user *user_buf, size_t count, loff_t *ppos)
3019 {
3020 char buf[3];
3021
3022 if (!kprobes_all_disarmed)
3023 buf[0] = '1';
3024 else
3025 buf[0] = '0';
3026 buf[1] = '\n';
3027 buf[2] = 0x00;
3028 return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
3029 }
3030
write_enabled_file_bool(struct file * file,const char __user * user_buf,size_t count,loff_t * ppos)3031 static ssize_t write_enabled_file_bool(struct file *file,
3032 const char __user *user_buf, size_t count, loff_t *ppos)
3033 {
3034 bool enable;
3035 int ret;
3036
3037 ret = kstrtobool_from_user(user_buf, count, &enable);
3038 if (ret)
3039 return ret;
3040
3041 ret = enable ? arm_all_kprobes() : disarm_all_kprobes();
3042 if (ret)
3043 return ret;
3044
3045 return count;
3046 }
3047
3048 static const struct file_operations fops_kp = {
3049 .read = read_enabled_file_bool,
3050 .write = write_enabled_file_bool,
3051 .llseek = default_llseek,
3052 };
3053
debugfs_kprobe_init(void)3054 static int __init debugfs_kprobe_init(void)
3055 {
3056 struct dentry *dir;
3057
3058 dir = debugfs_create_dir("kprobes", NULL);
3059
3060 debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops);
3061
3062 debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp);
3063
3064 debugfs_create_file("blacklist", 0400, dir, NULL,
3065 &kprobe_blacklist_fops);
3066
3067 return 0;
3068 }
3069
3070 late_initcall(debugfs_kprobe_init);
3071 #endif /* CONFIG_DEBUG_FS */
3072