1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Kernel Probes (KProbes)
4 *
5 * Copyright (C) IBM Corporation, 2002, 2004
6 *
7 * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
8 * Probes initial implementation (includes suggestions from
9 * Rusty Russell).
10 * 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
11 * hlists and exceptions notifier as suggested by Andi Kleen.
12 * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
13 * interface to access function arguments.
14 * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
15 * exceptions notifier to be first on the priority list.
16 * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
17 * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
18 * <prasanna@in.ibm.com> added function-return probes.
19 */
20
21 #define pr_fmt(fmt) "kprobes: " fmt
22
23 #include <linux/kprobes.h>
24 #include <linux/hash.h>
25 #include <linux/init.h>
26 #include <linux/slab.h>
27 #include <linux/stddef.h>
28 #include <linux/export.h>
29 #include <linux/kallsyms.h>
30 #include <linux/freezer.h>
31 #include <linux/seq_file.h>
32 #include <linux/debugfs.h>
33 #include <linux/sysctl.h>
34 #include <linux/kdebug.h>
35 #include <linux/kthread.h>
36 #include <linux/memory.h>
37 #include <linux/ftrace.h>
38 #include <linux/cpu.h>
39 #include <linux/jump_label.h>
40 #include <linux/static_call.h>
41 #include <linux/perf_event.h>
42 #include <linux/execmem.h>
43 #include <linux/cleanup.h>
44 #include <linux/wait.h>
45
46 #include <asm/sections.h>
47 #include <asm/cacheflush.h>
48 #include <asm/errno.h>
49 #include <linux/uaccess.h>
50
51 #define KPROBE_HASH_BITS 6
52 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
53
54 #if !defined(CONFIG_OPTPROBES) || !defined(CONFIG_SYSCTL)
55 #define kprobe_sysctls_init() do { } while (0)
56 #endif
57
58 static int kprobes_initialized;
59 /* kprobe_table can be accessed by
60 * - Normal hlist traversal and RCU add/del under 'kprobe_mutex' is held.
61 * Or
62 * - RCU hlist traversal under disabling preempt (breakpoint handlers)
63 */
64 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
65
66 /* NOTE: change this value only with 'kprobe_mutex' held */
67 static bool kprobes_all_disarmed;
68
69 /* This protects 'kprobe_table' and 'optimizing_list' */
70 static DEFINE_MUTEX(kprobe_mutex);
71 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance);
72
kprobe_lookup_name(const char * name,unsigned int __unused)73 kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
74 unsigned int __unused)
75 {
76 return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
77 }
78
79 /*
80 * Blacklist -- list of 'struct kprobe_blacklist_entry' to store info where
81 * kprobes can not probe.
82 */
83 static LIST_HEAD(kprobe_blacklist);
84
85 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
86 /*
87 * 'kprobe::ainsn.insn' points to the copy of the instruction to be
88 * single-stepped. x86_64, POWER4 and above have no-exec support and
89 * stepping on the instruction on a vmalloced/kmalloced/data page
90 * is a recipe for disaster
91 */
92 struct kprobe_insn_page {
93 struct list_head list;
94 kprobe_opcode_t *insns; /* Page of instruction slots */
95 struct kprobe_insn_cache *cache;
96 int nused;
97 int ngarbage;
98 char slot_used[];
99 };
100
slots_per_page(struct kprobe_insn_cache * c)101 static int slots_per_page(struct kprobe_insn_cache *c)
102 {
103 return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
104 }
105
106 enum kprobe_slot_state {
107 SLOT_CLEAN = 0,
108 SLOT_DIRTY = 1,
109 SLOT_USED = 2,
110 };
111
alloc_insn_page(void)112 void __weak *alloc_insn_page(void)
113 {
114 /*
115 * Use execmem_alloc() so this page is within +/- 2GB of where the
116 * kernel image and loaded module images reside. This is required
117 * for most of the architectures.
118 * (e.g. x86-64 needs this to handle the %rip-relative fixups.)
119 */
120 return execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
121 }
122
free_insn_page(void * page)123 static void free_insn_page(void *page)
124 {
125 execmem_free(page);
126 }
127
128 struct kprobe_insn_cache kprobe_insn_slots = {
129 .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
130 .alloc = alloc_insn_page,
131 .free = free_insn_page,
132 .sym = KPROBE_INSN_PAGE_SYM,
133 .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
134 .insn_size = MAX_INSN_SIZE,
135 .nr_garbage = 0,
136 };
137 static int collect_garbage_slots(struct kprobe_insn_cache *c);
138
139 /**
140 * __get_insn_slot - Find a slot on an executable page for an instruction.
141 * @c: Pointer to kprobe instruction cache
142 *
143 * Description: Locates available slot on existing executable pages,
144 * allocates an executable page if there's no room on existing ones.
145 * Return: Pointer to instruction slot on success, NULL on failure.
146 */
__get_insn_slot(struct kprobe_insn_cache * c)147 kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
148 {
149 struct kprobe_insn_page *kip;
150
151 /* Since the slot array is not protected by rcu, we need a mutex */
152 guard(mutex)(&c->mutex);
153 do {
154 guard(rcu)();
155 list_for_each_entry_rcu(kip, &c->pages, list) {
156 if (kip->nused < slots_per_page(c)) {
157 int i;
158
159 for (i = 0; i < slots_per_page(c); i++) {
160 if (kip->slot_used[i] == SLOT_CLEAN) {
161 kip->slot_used[i] = SLOT_USED;
162 kip->nused++;
163 return kip->insns + (i * c->insn_size);
164 }
165 }
166 /* kip->nused is broken. Fix it. */
167 kip->nused = slots_per_page(c);
168 WARN_ON(1);
169 }
170 }
171 /* If there are any garbage slots, collect it and try again. */
172 } while (c->nr_garbage && collect_garbage_slots(c) == 0);
173
174 /* All out of space. Need to allocate a new page. */
175 kip = kmalloc_flex(*kip, slot_used, slots_per_page(c));
176 if (!kip)
177 return NULL;
178
179 kip->insns = c->alloc();
180 if (!kip->insns) {
181 kfree(kip);
182 return NULL;
183 }
184 INIT_LIST_HEAD(&kip->list);
185 memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
186 kip->slot_used[0] = SLOT_USED;
187 kip->nused = 1;
188 kip->ngarbage = 0;
189 kip->cache = c;
190 list_add_rcu(&kip->list, &c->pages);
191
192 /* Record the perf ksymbol register event after adding the page */
193 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns,
194 PAGE_SIZE, false, c->sym);
195
196 return kip->insns;
197 }
198
199 /* Return true if all garbages are collected, otherwise false. */
collect_one_slot(struct kprobe_insn_page * kip,int idx)200 static bool collect_one_slot(struct kprobe_insn_page *kip, int idx)
201 {
202 kip->slot_used[idx] = SLOT_CLEAN;
203 kip->nused--;
204 if (kip->nused != 0)
205 return false;
206
207 /*
208 * Page is no longer in use. Free it unless
209 * it's the last one. We keep the last one
210 * so as not to have to set it up again the
211 * next time somebody inserts a probe.
212 */
213 if (!list_is_singular(&kip->list)) {
214 /*
215 * Record perf ksymbol unregister event before removing
216 * the page.
217 */
218 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
219 (unsigned long)kip->insns, PAGE_SIZE, true,
220 kip->cache->sym);
221 list_del_rcu(&kip->list);
222 synchronize_rcu();
223 kip->cache->free(kip->insns);
224 kfree(kip);
225 }
226 return true;
227 }
228
collect_garbage_slots(struct kprobe_insn_cache * c)229 static int collect_garbage_slots(struct kprobe_insn_cache *c)
230 {
231 struct kprobe_insn_page *kip, *next;
232
233 /* Ensure no-one is interrupted on the garbages */
234 synchronize_rcu();
235
236 list_for_each_entry_safe(kip, next, &c->pages, list) {
237 int i;
238
239 if (kip->ngarbage == 0)
240 continue;
241 kip->ngarbage = 0; /* we will collect all garbages */
242 for (i = 0; i < slots_per_page(c); i++) {
243 if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i))
244 break;
245 }
246 }
247 c->nr_garbage = 0;
248 return 0;
249 }
250
__find_insn_page(struct kprobe_insn_cache * c,kprobe_opcode_t * slot,struct kprobe_insn_page ** pkip)251 static long __find_insn_page(struct kprobe_insn_cache *c,
252 kprobe_opcode_t *slot, struct kprobe_insn_page **pkip)
253 {
254 struct kprobe_insn_page *kip = NULL;
255 long idx;
256
257 guard(rcu)();
258 list_for_each_entry_rcu(kip, &c->pages, list) {
259 idx = ((long)slot - (long)kip->insns) /
260 (c->insn_size * sizeof(kprobe_opcode_t));
261 if (idx >= 0 && idx < slots_per_page(c)) {
262 *pkip = kip;
263 return idx;
264 }
265 }
266 /* Could not find this slot. */
267 WARN_ON(1);
268 *pkip = NULL;
269 return -1;
270 }
271
__free_insn_slot(struct kprobe_insn_cache * c,kprobe_opcode_t * slot,int dirty)272 void __free_insn_slot(struct kprobe_insn_cache *c,
273 kprobe_opcode_t *slot, int dirty)
274 {
275 struct kprobe_insn_page *kip = NULL;
276 long idx;
277
278 guard(mutex)(&c->mutex);
279 idx = __find_insn_page(c, slot, &kip);
280 /* Mark and sweep: this may sleep */
281 if (kip) {
282 /* Check double free */
283 WARN_ON(kip->slot_used[idx] != SLOT_USED);
284 if (dirty) {
285 kip->slot_used[idx] = SLOT_DIRTY;
286 kip->ngarbage++;
287 if (++c->nr_garbage > slots_per_page(c))
288 collect_garbage_slots(c);
289 } else {
290 collect_one_slot(kip, idx);
291 }
292 }
293 }
294
295 /*
296 * Check given address is on the page of kprobe instruction slots.
297 * This will be used for checking whether the address on a stack
298 * is on a text area or not.
299 */
__is_insn_slot_addr(struct kprobe_insn_cache * c,unsigned long addr)300 bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
301 {
302 struct kprobe_insn_page *kip;
303 bool ret = false;
304
305 rcu_read_lock();
306 list_for_each_entry_rcu(kip, &c->pages, list) {
307 if (addr >= (unsigned long)kip->insns &&
308 addr < (unsigned long)kip->insns + PAGE_SIZE) {
309 ret = true;
310 break;
311 }
312 }
313 rcu_read_unlock();
314
315 return ret;
316 }
317
kprobe_cache_get_kallsym(struct kprobe_insn_cache * c,unsigned int * symnum,unsigned long * value,char * type,char * sym)318 int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
319 unsigned long *value, char *type, char *sym)
320 {
321 struct kprobe_insn_page *kip;
322 int ret = -ERANGE;
323
324 rcu_read_lock();
325 list_for_each_entry_rcu(kip, &c->pages, list) {
326 if ((*symnum)--)
327 continue;
328 strscpy(sym, c->sym, KSYM_NAME_LEN);
329 *type = 't';
330 *value = (unsigned long)kip->insns;
331 ret = 0;
332 break;
333 }
334 rcu_read_unlock();
335
336 return ret;
337 }
338
339 #ifdef CONFIG_OPTPROBES
alloc_optinsn_page(void)340 void __weak *alloc_optinsn_page(void)
341 {
342 return alloc_insn_page();
343 }
344
free_optinsn_page(void * page)345 void __weak free_optinsn_page(void *page)
346 {
347 free_insn_page(page);
348 }
349
350 /* For optimized_kprobe buffer */
351 struct kprobe_insn_cache kprobe_optinsn_slots = {
352 .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
353 .alloc = alloc_optinsn_page,
354 .free = free_optinsn_page,
355 .sym = KPROBE_OPTINSN_PAGE_SYM,
356 .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
357 /* .insn_size is initialized later */
358 .nr_garbage = 0,
359 };
360 #endif /* CONFIG_OPTPROBES */
361 #endif /* __ARCH_WANT_KPROBES_INSN_SLOT */
362
363 /* We have preemption disabled.. so it is safe to use __ versions */
set_kprobe_instance(struct kprobe * kp)364 static inline void set_kprobe_instance(struct kprobe *kp)
365 {
366 __this_cpu_write(kprobe_instance, kp);
367 }
368
reset_kprobe_instance(void)369 static inline void reset_kprobe_instance(void)
370 {
371 __this_cpu_write(kprobe_instance, NULL);
372 }
373
374 /*
375 * This routine is called either:
376 * - under the 'kprobe_mutex' - during kprobe_[un]register().
377 * OR
378 * - with preemption disabled - from architecture specific code.
379 */
get_kprobe(void * addr)380 struct kprobe *get_kprobe(void *addr)
381 {
382 struct hlist_head *head;
383 struct kprobe *p;
384
385 head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
386 hlist_for_each_entry_rcu(p, head, hlist,
387 lockdep_is_held(&kprobe_mutex)) {
388 if (p->addr == addr)
389 return p;
390 }
391
392 return NULL;
393 }
394 NOKPROBE_SYMBOL(get_kprobe);
395
396 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
397
398 /* Return true if 'p' is an aggregator */
kprobe_aggrprobe(struct kprobe * p)399 static inline bool kprobe_aggrprobe(struct kprobe *p)
400 {
401 return p->pre_handler == aggr_pre_handler;
402 }
403
404 /* Return true if 'p' is unused */
kprobe_unused(struct kprobe * p)405 static inline bool kprobe_unused(struct kprobe *p)
406 {
407 return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
408 list_empty(&p->list);
409 }
410
411 /* Keep all fields in the kprobe consistent. */
copy_kprobe(struct kprobe * ap,struct kprobe * p)412 static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
413 {
414 memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
415 memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
416 }
417
418 #ifdef CONFIG_OPTPROBES
419 /* NOTE: This is protected by 'kprobe_mutex'. */
420 static bool kprobes_allow_optimization;
421
422 /*
423 * Call all 'kprobe::pre_handler' on the list, but ignores its return value.
424 * This must be called from arch-dep optimized caller.
425 */
opt_pre_handler(struct kprobe * p,struct pt_regs * regs)426 void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
427 {
428 struct kprobe *kp;
429
430 list_for_each_entry_rcu(kp, &p->list, list) {
431 if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
432 set_kprobe_instance(kp);
433 kp->pre_handler(kp, regs);
434 }
435 reset_kprobe_instance();
436 }
437 }
438 NOKPROBE_SYMBOL(opt_pre_handler);
439
440 /* Free optimized instructions and optimized_kprobe */
free_aggr_kprobe(struct kprobe * p)441 static void free_aggr_kprobe(struct kprobe *p)
442 {
443 struct optimized_kprobe *op;
444
445 op = container_of(p, struct optimized_kprobe, kp);
446 arch_remove_optimized_kprobe(op);
447 arch_remove_kprobe(p);
448 kfree(op);
449 }
450
451 /* Return true if the kprobe is ready for optimization. */
kprobe_optready(struct kprobe * p)452 static inline int kprobe_optready(struct kprobe *p)
453 {
454 struct optimized_kprobe *op;
455
456 if (kprobe_aggrprobe(p)) {
457 op = container_of(p, struct optimized_kprobe, kp);
458 return arch_prepared_optinsn(&op->optinsn);
459 }
460
461 return 0;
462 }
463
464 /* Return true if the kprobe is disarmed. Note: p must be on hash list */
kprobe_disarmed(struct kprobe * p)465 bool kprobe_disarmed(struct kprobe *p)
466 {
467 struct optimized_kprobe *op;
468
469 /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
470 if (!kprobe_aggrprobe(p))
471 return kprobe_disabled(p);
472
473 op = container_of(p, struct optimized_kprobe, kp);
474
475 return kprobe_disabled(p) && list_empty(&op->list);
476 }
477
478 /* Return true if the probe is queued on (un)optimizing lists */
kprobe_queued(struct kprobe * p)479 static bool kprobe_queued(struct kprobe *p)
480 {
481 struct optimized_kprobe *op;
482
483 if (kprobe_aggrprobe(p)) {
484 op = container_of(p, struct optimized_kprobe, kp);
485 if (!list_empty(&op->list))
486 return true;
487 }
488 return false;
489 }
490
491 /*
492 * Return an optimized kprobe whose optimizing code replaces
493 * instructions including 'addr' (exclude breakpoint).
494 */
get_optimized_kprobe(kprobe_opcode_t * addr)495 static struct kprobe *get_optimized_kprobe(kprobe_opcode_t *addr)
496 {
497 int i;
498 struct kprobe *p = NULL;
499 struct optimized_kprobe *op;
500
501 /* Don't check i == 0, since that is a breakpoint case. */
502 for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH / sizeof(kprobe_opcode_t); i++)
503 p = get_kprobe(addr - i);
504
505 if (p && kprobe_optready(p)) {
506 op = container_of(p, struct optimized_kprobe, kp);
507 if (arch_within_optimized_kprobe(op, addr))
508 return p;
509 }
510
511 return NULL;
512 }
513
514 /* Optimization staging list, protected by 'kprobe_mutex' */
515 static LIST_HEAD(optimizing_list);
516 static LIST_HEAD(unoptimizing_list);
517 static LIST_HEAD(freeing_list);
518
519 static void optimize_kprobe(struct kprobe *p);
520 static struct task_struct *kprobe_optimizer_task;
521 static wait_queue_head_t kprobe_optimizer_wait;
522 static atomic_t optimizer_state;
523 enum {
524 OPTIMIZER_ST_IDLE = 0,
525 OPTIMIZER_ST_KICKED = 1,
526 OPTIMIZER_ST_FLUSHING = 2,
527 };
528
529 static DECLARE_COMPLETION(optimizer_completion);
530
531 #define OPTIMIZE_DELAY 5
532
533 /*
534 * Optimize (replace a breakpoint with a jump) kprobes listed on
535 * 'optimizing_list'.
536 */
do_optimize_kprobes(void)537 static void do_optimize_kprobes(void)
538 {
539 lockdep_assert_held(&text_mutex);
540 /*
541 * The optimization/unoptimization refers 'online_cpus' via
542 * stop_machine() and cpu-hotplug modifies the 'online_cpus'.
543 * And same time, 'text_mutex' will be held in cpu-hotplug and here.
544 * This combination can cause a deadlock (cpu-hotplug tries to lock
545 * 'text_mutex' but stop_machine() can not be done because
546 * the 'online_cpus' has been changed)
547 * To avoid this deadlock, caller must have locked cpu-hotplug
548 * for preventing cpu-hotplug outside of 'text_mutex' locking.
549 */
550 lockdep_assert_cpus_held();
551
552 /* Optimization never be done when disarmed */
553 if (kprobes_all_disarmed || !kprobes_allow_optimization ||
554 list_empty(&optimizing_list))
555 return;
556
557 arch_optimize_kprobes(&optimizing_list);
558 }
559
560 /*
561 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
562 * if need) kprobes listed on 'unoptimizing_list'.
563 */
do_unoptimize_kprobes(void)564 static void do_unoptimize_kprobes(void)
565 {
566 struct optimized_kprobe *op, *tmp;
567
568 lockdep_assert_held(&text_mutex);
569 /* See comment in do_optimize_kprobes() */
570 lockdep_assert_cpus_held();
571
572 if (!list_empty(&unoptimizing_list))
573 arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
574
575 /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
576 list_for_each_entry_safe(op, tmp, &freeing_list, list) {
577 /* Switching from detour code to origin */
578 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
579 /* Disarm probes if marked disabled and not gone */
580 if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
581 arch_disarm_kprobe(&op->kp);
582 if (kprobe_unused(&op->kp)) {
583 /*
584 * Remove unused probes from hash list. After waiting
585 * for synchronization, these probes are reclaimed.
586 * (reclaiming is done by do_free_cleaned_kprobes().)
587 */
588 hlist_del_rcu(&op->kp.hlist);
589 } else
590 list_del_init(&op->list);
591 }
592 }
593
594 /* Reclaim all kprobes on the 'freeing_list' */
do_free_cleaned_kprobes(void)595 static void do_free_cleaned_kprobes(void)
596 {
597 struct optimized_kprobe *op, *tmp;
598
599 list_for_each_entry_safe(op, tmp, &freeing_list, list) {
600 list_del_init(&op->list);
601 if (WARN_ON_ONCE(!kprobe_unused(&op->kp))) {
602 /*
603 * This must not happen, but if there is a kprobe
604 * still in use, keep it on kprobes hash list.
605 */
606 continue;
607 }
608
609 /*
610 * The aggregator was holding back another probe while it sat on the
611 * unoptimizing/freeing lists. Now that the aggregator has been fully
612 * reverted we can safely retry the optimization of that sibling.
613 */
614
615 struct kprobe *_p = get_optimized_kprobe(op->kp.addr);
616 if (unlikely(_p))
617 optimize_kprobe(_p);
618
619 free_aggr_kprobe(&op->kp);
620 }
621 }
622
623 static void kick_kprobe_optimizer(void);
624
625 /* Kprobe jump optimizer */
kprobe_optimizer(void)626 static void kprobe_optimizer(void)
627 {
628 guard(mutex)(&kprobe_mutex);
629
630 scoped_guard(cpus_read_lock) {
631 guard(mutex)(&text_mutex);
632
633 /*
634 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
635 * kprobes before waiting for quiesence period.
636 */
637 do_unoptimize_kprobes();
638
639 /*
640 * Step 2: Wait for quiesence period to ensure all potentially
641 * preempted tasks to have normally scheduled. Because optprobe
642 * may modify multiple instructions, there is a chance that Nth
643 * instruction is preempted. In that case, such tasks can return
644 * to 2nd-Nth byte of jump instruction. This wait is for avoiding it.
645 * Note that on non-preemptive kernel, this is transparently converted
646 * to synchronoze_sched() to wait for all interrupts to have completed.
647 */
648 synchronize_rcu_tasks();
649
650 /* Step 3: Optimize kprobes after quiesence period */
651 do_optimize_kprobes();
652
653 /* Step 4: Free cleaned kprobes after quiesence period */
654 do_free_cleaned_kprobes();
655 }
656
657 /* Step 5: Kick optimizer again if needed. But if there is a flush requested, */
658 if (completion_done(&optimizer_completion))
659 complete(&optimizer_completion);
660
661 if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
662 kick_kprobe_optimizer(); /*normal kick*/
663 }
664
kprobe_optimizer_thread(void * data)665 static int kprobe_optimizer_thread(void *data)
666 {
667 while (!kthread_should_stop()) {
668 /* To avoid hung_task, wait in interruptible state. */
669 wait_event_interruptible(kprobe_optimizer_wait,
670 atomic_read(&optimizer_state) != OPTIMIZER_ST_IDLE ||
671 kthread_should_stop());
672
673 if (kthread_should_stop())
674 break;
675
676 /*
677 * If it was a normal kick, wait for OPTIMIZE_DELAY.
678 * This wait can be interrupted by a flush request.
679 */
680 if (atomic_read(&optimizer_state) == 1)
681 wait_event_interruptible_timeout(
682 kprobe_optimizer_wait,
683 atomic_read(&optimizer_state) == OPTIMIZER_ST_FLUSHING ||
684 kthread_should_stop(),
685 OPTIMIZE_DELAY);
686
687 if (kthread_should_stop())
688 break;
689
690 atomic_set(&optimizer_state, OPTIMIZER_ST_IDLE);
691
692 kprobe_optimizer();
693 }
694 return 0;
695 }
696
697 /* Start optimizer after OPTIMIZE_DELAY passed */
kick_kprobe_optimizer(void)698 static void kick_kprobe_optimizer(void)
699 {
700 lockdep_assert_held(&kprobe_mutex);
701 if (atomic_cmpxchg(&optimizer_state,
702 OPTIMIZER_ST_IDLE, OPTIMIZER_ST_KICKED) == OPTIMIZER_ST_IDLE)
703 wake_up(&kprobe_optimizer_wait);
704 }
705
wait_for_kprobe_optimizer_locked(void)706 static void wait_for_kprobe_optimizer_locked(void)
707 {
708 lockdep_assert_held(&kprobe_mutex);
709
710 while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) {
711 init_completion(&optimizer_completion);
712 /*
713 * Set state to OPTIMIZER_ST_FLUSHING and wake up the thread if it's
714 * idle. If it's already kicked, it will see the state change.
715 */
716 if (atomic_xchg_acquire(&optimizer_state,
717 OPTIMIZER_ST_FLUSHING) != OPTIMIZER_ST_FLUSHING)
718 wake_up(&kprobe_optimizer_wait);
719
720 mutex_unlock(&kprobe_mutex);
721 wait_for_completion(&optimizer_completion);
722 mutex_lock(&kprobe_mutex);
723 }
724 }
725
726 /* Wait for completing optimization and unoptimization */
wait_for_kprobe_optimizer(void)727 void wait_for_kprobe_optimizer(void)
728 {
729 guard(mutex)(&kprobe_mutex);
730
731 wait_for_kprobe_optimizer_locked();
732 }
733
optprobe_queued_unopt(struct optimized_kprobe * op)734 bool optprobe_queued_unopt(struct optimized_kprobe *op)
735 {
736 struct optimized_kprobe *_op;
737
738 list_for_each_entry(_op, &unoptimizing_list, list) {
739 if (op == _op)
740 return true;
741 }
742
743 return false;
744 }
745
746 /* Optimize kprobe if p is ready to be optimized */
optimize_kprobe(struct kprobe * p)747 static void optimize_kprobe(struct kprobe *p)
748 {
749 struct optimized_kprobe *op;
750
751 /* Check if the kprobe is disabled or not ready for optimization. */
752 if (!kprobe_optready(p) || !kprobes_allow_optimization ||
753 (kprobe_disabled(p) || kprobes_all_disarmed))
754 return;
755
756 /* kprobes with 'post_handler' can not be optimized */
757 if (p->post_handler)
758 return;
759
760 op = container_of(p, struct optimized_kprobe, kp);
761
762 /* Check there is no other kprobes at the optimized instructions */
763 if (arch_check_optimized_kprobe(op) < 0)
764 return;
765
766 /* Check if it is already optimized. */
767 if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) {
768 if (optprobe_queued_unopt(op)) {
769 /* This is under unoptimizing. Just dequeue the probe */
770 list_del_init(&op->list);
771 }
772 return;
773 }
774 op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
775
776 /*
777 * On the 'unoptimizing_list' and 'optimizing_list',
778 * 'op' must have OPTIMIZED flag
779 */
780 if (WARN_ON_ONCE(!list_empty(&op->list)))
781 return;
782
783 list_add(&op->list, &optimizing_list);
784 kick_kprobe_optimizer();
785 }
786
787 /* Short cut to direct unoptimizing */
force_unoptimize_kprobe(struct optimized_kprobe * op)788 static void force_unoptimize_kprobe(struct optimized_kprobe *op)
789 {
790 lockdep_assert_cpus_held();
791 arch_unoptimize_kprobe(op);
792 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
793 }
794
795 /* Unoptimize a kprobe if p is optimized */
unoptimize_kprobe(struct kprobe * p,bool force)796 static void unoptimize_kprobe(struct kprobe *p, bool force)
797 {
798 struct optimized_kprobe *op;
799
800 if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
801 return; /* This is not an optprobe nor optimized */
802
803 op = container_of(p, struct optimized_kprobe, kp);
804 if (!kprobe_optimized(p))
805 return;
806
807 if (!list_empty(&op->list)) {
808 if (optprobe_queued_unopt(op)) {
809 /* Queued in unoptimizing queue */
810 if (force) {
811 /*
812 * Forcibly unoptimize the kprobe here, and queue it
813 * in the freeing list for release afterwards.
814 */
815 force_unoptimize_kprobe(op);
816 list_move(&op->list, &freeing_list);
817 }
818 } else {
819 /* Dequeue from the optimizing queue */
820 list_del_init(&op->list);
821 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
822 }
823 return;
824 }
825
826 /* Optimized kprobe case */
827 if (force) {
828 /* Forcibly update the code: this is a special case */
829 force_unoptimize_kprobe(op);
830 } else {
831 list_add(&op->list, &unoptimizing_list);
832 kick_kprobe_optimizer();
833 }
834 }
835
836 /* Cancel unoptimizing for reusing */
reuse_unused_kprobe(struct kprobe * ap)837 static int reuse_unused_kprobe(struct kprobe *ap)
838 {
839 struct optimized_kprobe *op;
840
841 /*
842 * Unused kprobe MUST be on the way of delayed unoptimizing (means
843 * there is still a relative jump) and disabled.
844 */
845 op = container_of(ap, struct optimized_kprobe, kp);
846 WARN_ON_ONCE(list_empty(&op->list));
847 /* Enable the probe again */
848 ap->flags &= ~KPROBE_FLAG_DISABLED;
849 /* Optimize it again. (remove from 'op->list') */
850 if (!kprobe_optready(ap))
851 return -EINVAL;
852
853 optimize_kprobe(ap);
854 return 0;
855 }
856
857 /* Remove optimized instructions */
kill_optimized_kprobe(struct kprobe * p)858 static void kill_optimized_kprobe(struct kprobe *p)
859 {
860 struct optimized_kprobe *op;
861
862 op = container_of(p, struct optimized_kprobe, kp);
863 if (!list_empty(&op->list))
864 /* Dequeue from the (un)optimization queue */
865 list_del_init(&op->list);
866 op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
867
868 if (kprobe_unused(p)) {
869 /*
870 * Unused kprobe is on unoptimizing or freeing list. We move it
871 * to freeing_list and let the kprobe_optimizer() remove it from
872 * the kprobe hash list and free it.
873 */
874 if (optprobe_queued_unopt(op))
875 list_move(&op->list, &freeing_list);
876 }
877
878 /* Don't touch the code, because it is already freed. */
879 arch_remove_optimized_kprobe(op);
880 }
881
882 static inline
__prepare_optimized_kprobe(struct optimized_kprobe * op,struct kprobe * p)883 void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
884 {
885 if (!kprobe_ftrace(p))
886 arch_prepare_optimized_kprobe(op, p);
887 }
888
889 /* Try to prepare optimized instructions */
prepare_optimized_kprobe(struct kprobe * p)890 static void prepare_optimized_kprobe(struct kprobe *p)
891 {
892 struct optimized_kprobe *op;
893
894 op = container_of(p, struct optimized_kprobe, kp);
895 __prepare_optimized_kprobe(op, p);
896 }
897
898 /* Allocate new optimized_kprobe and try to prepare optimized instructions. */
alloc_aggr_kprobe(struct kprobe * p)899 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
900 {
901 struct optimized_kprobe *op;
902
903 op = kzalloc_obj(struct optimized_kprobe);
904 if (!op)
905 return NULL;
906
907 INIT_LIST_HEAD(&op->list);
908 op->kp.addr = p->addr;
909 __prepare_optimized_kprobe(op, p);
910
911 return &op->kp;
912 }
913
914 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
915
916 /*
917 * Prepare an optimized_kprobe and optimize it.
918 * NOTE: 'p' must be a normal registered kprobe.
919 */
try_to_optimize_kprobe(struct kprobe * p)920 static void try_to_optimize_kprobe(struct kprobe *p)
921 {
922 struct kprobe *ap;
923 struct optimized_kprobe *op;
924
925 /* Impossible to optimize ftrace-based kprobe. */
926 if (kprobe_ftrace(p))
927 return;
928
929 /* For preparing optimization, jump_label_text_reserved() is called. */
930 guard(cpus_read_lock)();
931 guard(jump_label_lock)();
932 guard(mutex)(&text_mutex);
933
934 ap = alloc_aggr_kprobe(p);
935 if (!ap)
936 return;
937
938 op = container_of(ap, struct optimized_kprobe, kp);
939 if (!arch_prepared_optinsn(&op->optinsn)) {
940 /* If failed to setup optimizing, fallback to kprobe. */
941 arch_remove_optimized_kprobe(op);
942 kfree(op);
943 return;
944 }
945
946 init_aggr_kprobe(ap, p);
947 optimize_kprobe(ap); /* This just kicks optimizer thread. */
948 }
949
optimize_all_kprobes(void)950 static void optimize_all_kprobes(void)
951 {
952 struct hlist_head *head;
953 struct kprobe *p;
954 unsigned int i;
955
956 guard(mutex)(&kprobe_mutex);
957 /* If optimization is already allowed, just return. */
958 if (kprobes_allow_optimization)
959 return;
960
961 cpus_read_lock();
962 kprobes_allow_optimization = true;
963 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
964 head = &kprobe_table[i];
965 hlist_for_each_entry(p, head, hlist)
966 if (!kprobe_disabled(p))
967 optimize_kprobe(p);
968 }
969 cpus_read_unlock();
970 pr_info("kprobe jump-optimization is enabled. All kprobes are optimized if possible.\n");
971 }
972
973 #ifdef CONFIG_SYSCTL
unoptimize_all_kprobes(void)974 static void unoptimize_all_kprobes(void)
975 {
976 struct hlist_head *head;
977 struct kprobe *p;
978 unsigned int i;
979
980 guard(mutex)(&kprobe_mutex);
981 /* If optimization is already prohibited, just return. */
982 if (!kprobes_allow_optimization)
983 return;
984
985 cpus_read_lock();
986 kprobes_allow_optimization = false;
987 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
988 head = &kprobe_table[i];
989 hlist_for_each_entry(p, head, hlist) {
990 if (!kprobe_disabled(p))
991 unoptimize_kprobe(p, false);
992 }
993 }
994 cpus_read_unlock();
995 /* Wait for unoptimizing completion. */
996 wait_for_kprobe_optimizer_locked();
997 pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n");
998 }
999
1000 static DEFINE_MUTEX(kprobe_sysctl_mutex);
1001 static int sysctl_kprobes_optimization;
proc_kprobes_optimization_handler(const struct ctl_table * table,int write,void * buffer,size_t * length,loff_t * ppos)1002 static int proc_kprobes_optimization_handler(const struct ctl_table *table,
1003 int write, void *buffer,
1004 size_t *length, loff_t *ppos)
1005 {
1006 int ret;
1007
1008 guard(mutex)(&kprobe_sysctl_mutex);
1009 sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
1010 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
1011
1012 if (sysctl_kprobes_optimization)
1013 optimize_all_kprobes();
1014 else
1015 unoptimize_all_kprobes();
1016
1017 return ret;
1018 }
1019
1020 static const struct ctl_table kprobe_sysctls[] = {
1021 {
1022 .procname = "kprobes-optimization",
1023 .data = &sysctl_kprobes_optimization,
1024 .maxlen = sizeof(int),
1025 .mode = 0644,
1026 .proc_handler = proc_kprobes_optimization_handler,
1027 .extra1 = SYSCTL_ZERO,
1028 .extra2 = SYSCTL_ONE,
1029 },
1030 };
1031
kprobe_sysctls_init(void)1032 static void __init kprobe_sysctls_init(void)
1033 {
1034 register_sysctl_init("debug", kprobe_sysctls);
1035 }
1036 #endif /* CONFIG_SYSCTL */
1037
1038 /* Put a breakpoint for a probe. */
__arm_kprobe(struct kprobe * p)1039 static void __arm_kprobe(struct kprobe *p)
1040 {
1041 struct kprobe *_p;
1042
1043 lockdep_assert_held(&text_mutex);
1044
1045 /* Find the overlapping optimized kprobes. */
1046 _p = get_optimized_kprobe(p->addr);
1047 if (unlikely(_p))
1048 /* Fallback to unoptimized kprobe */
1049 unoptimize_kprobe(_p, true);
1050
1051 arch_arm_kprobe(p);
1052 optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */
1053 }
1054
1055 /* Remove the breakpoint of a probe. */
__disarm_kprobe(struct kprobe * p,bool reopt)1056 static void __disarm_kprobe(struct kprobe *p, bool reopt)
1057 {
1058 struct kprobe *_p;
1059
1060 lockdep_assert_held(&text_mutex);
1061
1062 /* Try to unoptimize */
1063 unoptimize_kprobe(p, kprobes_all_disarmed);
1064
1065 if (!kprobe_queued(p)) {
1066 arch_disarm_kprobe(p);
1067 /* If another kprobe was blocked, re-optimize it. */
1068 _p = get_optimized_kprobe(p->addr);
1069 if (unlikely(_p) && reopt)
1070 optimize_kprobe(_p);
1071 }
1072 }
1073
init_optprobe(void)1074 static void __init init_optprobe(void)
1075 {
1076 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
1077 /* Init 'kprobe_optinsn_slots' for allocation */
1078 kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
1079 #endif
1080
1081 init_waitqueue_head(&kprobe_optimizer_wait);
1082 atomic_set(&optimizer_state, OPTIMIZER_ST_IDLE);
1083 kprobe_optimizer_task = kthread_run(kprobe_optimizer_thread, NULL,
1084 "kprobe-optimizer");
1085 }
1086 #else /* !CONFIG_OPTPROBES */
1087
1088 #define init_optprobe() do {} while (0)
1089 #define optimize_kprobe(p) do {} while (0)
1090 #define unoptimize_kprobe(p, f) do {} while (0)
1091 #define kill_optimized_kprobe(p) do {} while (0)
1092 #define prepare_optimized_kprobe(p) do {} while (0)
1093 #define try_to_optimize_kprobe(p) do {} while (0)
1094 #define __arm_kprobe(p) arch_arm_kprobe(p)
1095 #define __disarm_kprobe(p, o) arch_disarm_kprobe(p)
1096 #define kprobe_disarmed(p) kprobe_disabled(p)
1097 #define wait_for_kprobe_optimizer_locked() \
1098 lockdep_assert_held(&kprobe_mutex)
1099
reuse_unused_kprobe(struct kprobe * ap)1100 static int reuse_unused_kprobe(struct kprobe *ap)
1101 {
1102 /*
1103 * If the optimized kprobe is NOT supported, the aggr kprobe is
1104 * released at the same time that the last aggregated kprobe is
1105 * unregistered.
1106 * Thus there should be no chance to reuse unused kprobe.
1107 */
1108 WARN_ON_ONCE(1);
1109 return -EINVAL;
1110 }
1111
free_aggr_kprobe(struct kprobe * p)1112 static void free_aggr_kprobe(struct kprobe *p)
1113 {
1114 arch_remove_kprobe(p);
1115 kfree(p);
1116 }
1117
alloc_aggr_kprobe(struct kprobe * p)1118 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
1119 {
1120 return kzalloc_obj(struct kprobe);
1121 }
1122 #endif /* CONFIG_OPTPROBES */
1123
1124 #ifdef CONFIG_KPROBES_ON_FTRACE
1125 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
1126 .func = kprobe_ftrace_handler,
1127 .flags = FTRACE_OPS_FL_SAVE_REGS,
1128 };
1129
1130 static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
1131 .func = kprobe_ftrace_handler,
1132 .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
1133 };
1134
1135 static int kprobe_ipmodify_enabled;
1136 static int kprobe_ftrace_enabled;
1137 bool kprobe_ftrace_disabled;
1138
__arm_kprobe_ftrace(struct kprobe * p,struct ftrace_ops * ops,int * cnt)1139 static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1140 int *cnt)
1141 {
1142 int ret;
1143
1144 lockdep_assert_held(&kprobe_mutex);
1145
1146 ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0);
1147 if (ret < 0)
1148 return ret;
1149
1150 if (*cnt == 0) {
1151 ret = register_ftrace_function(ops);
1152 if (ret < 0) {
1153 /*
1154 * At this point, sinec ops is not registered, we should be sefe from
1155 * registering empty filter.
1156 */
1157 ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1158 return ret;
1159 }
1160 }
1161
1162 (*cnt)++;
1163 return ret;
1164 }
1165
arm_kprobe_ftrace(struct kprobe * p)1166 static int arm_kprobe_ftrace(struct kprobe *p)
1167 {
1168 bool ipmodify = (p->post_handler != NULL);
1169
1170 return __arm_kprobe_ftrace(p,
1171 ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1172 ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1173 }
1174
__disarm_kprobe_ftrace(struct kprobe * p,struct ftrace_ops * ops,int * cnt)1175 static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1176 int *cnt)
1177 {
1178 int ret;
1179
1180 lockdep_assert_held(&kprobe_mutex);
1181 if (unlikely(kprobe_ftrace_disabled)) {
1182 /* Now ftrace is disabled forever, disarm is already done. */
1183 return 0;
1184 }
1185
1186 if (*cnt == 1) {
1187 ret = unregister_ftrace_function(ops);
1188 if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (error %d)\n", ret))
1189 return ret;
1190 }
1191
1192 (*cnt)--;
1193
1194 ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1195 WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (error %d)\n",
1196 p->addr, ret);
1197 return ret;
1198 }
1199
disarm_kprobe_ftrace(struct kprobe * p)1200 static int disarm_kprobe_ftrace(struct kprobe *p)
1201 {
1202 bool ipmodify = (p->post_handler != NULL);
1203
1204 return __disarm_kprobe_ftrace(p,
1205 ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1206 ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1207 }
1208
kprobe_ftrace_kill(void)1209 void kprobe_ftrace_kill(void)
1210 {
1211 kprobe_ftrace_disabled = true;
1212 }
1213 #else /* !CONFIG_KPROBES_ON_FTRACE */
arm_kprobe_ftrace(struct kprobe * p)1214 static inline int arm_kprobe_ftrace(struct kprobe *p)
1215 {
1216 return -ENODEV;
1217 }
1218
disarm_kprobe_ftrace(struct kprobe * p)1219 static inline int disarm_kprobe_ftrace(struct kprobe *p)
1220 {
1221 return -ENODEV;
1222 }
1223 #endif
1224
prepare_kprobe(struct kprobe * p)1225 static int prepare_kprobe(struct kprobe *p)
1226 {
1227 /* Must ensure p->addr is really on ftrace */
1228 if (kprobe_ftrace(p))
1229 return arch_prepare_kprobe_ftrace(p);
1230
1231 return arch_prepare_kprobe(p);
1232 }
1233
arm_kprobe(struct kprobe * kp)1234 static int arm_kprobe(struct kprobe *kp)
1235 {
1236 if (unlikely(kprobe_ftrace(kp)))
1237 return arm_kprobe_ftrace(kp);
1238
1239 guard(cpus_read_lock)();
1240 guard(mutex)(&text_mutex);
1241 __arm_kprobe(kp);
1242 return 0;
1243 }
1244
disarm_kprobe(struct kprobe * kp,bool reopt)1245 static int disarm_kprobe(struct kprobe *kp, bool reopt)
1246 {
1247 if (unlikely(kprobe_ftrace(kp)))
1248 return disarm_kprobe_ftrace(kp);
1249
1250 guard(cpus_read_lock)();
1251 guard(mutex)(&text_mutex);
1252 __disarm_kprobe(kp, reopt);
1253 return 0;
1254 }
1255
1256 /*
1257 * Aggregate handlers for multiple kprobes support - these handlers
1258 * take care of invoking the individual kprobe handlers on p->list
1259 */
aggr_pre_handler(struct kprobe * p,struct pt_regs * regs)1260 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
1261 {
1262 struct kprobe *kp;
1263
1264 list_for_each_entry_rcu(kp, &p->list, list) {
1265 if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
1266 set_kprobe_instance(kp);
1267 if (kp->pre_handler(kp, regs))
1268 return 1;
1269 }
1270 reset_kprobe_instance();
1271 }
1272 return 0;
1273 }
1274 NOKPROBE_SYMBOL(aggr_pre_handler);
1275
aggr_post_handler(struct kprobe * p,struct pt_regs * regs,unsigned long flags)1276 static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
1277 unsigned long flags)
1278 {
1279 struct kprobe *kp;
1280
1281 list_for_each_entry_rcu(kp, &p->list, list) {
1282 if (kp->post_handler && likely(!kprobe_disabled(kp))) {
1283 set_kprobe_instance(kp);
1284 kp->post_handler(kp, regs, flags);
1285 reset_kprobe_instance();
1286 }
1287 }
1288 }
1289 NOKPROBE_SYMBOL(aggr_post_handler);
1290
1291 /* Walks the list and increments 'nmissed' if 'p' has child probes. */
kprobes_inc_nmissed_count(struct kprobe * p)1292 void kprobes_inc_nmissed_count(struct kprobe *p)
1293 {
1294 struct kprobe *kp;
1295
1296 if (!kprobe_aggrprobe(p)) {
1297 p->nmissed++;
1298 } else {
1299 list_for_each_entry_rcu(kp, &p->list, list)
1300 kp->nmissed++;
1301 }
1302 }
1303 NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
1304
1305 static struct kprobe kprobe_busy = {
1306 .addr = (void *) get_kprobe,
1307 };
1308
kprobe_busy_begin(void)1309 void kprobe_busy_begin(void)
1310 {
1311 struct kprobe_ctlblk *kcb;
1312
1313 preempt_disable();
1314 __this_cpu_write(current_kprobe, &kprobe_busy);
1315 kcb = get_kprobe_ctlblk();
1316 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1317 }
1318
kprobe_busy_end(void)1319 void kprobe_busy_end(void)
1320 {
1321 __this_cpu_write(current_kprobe, NULL);
1322 preempt_enable();
1323 }
1324
1325 /* Add the new probe to 'ap->list'. */
add_new_kprobe(struct kprobe * ap,struct kprobe * p)1326 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
1327 {
1328 if (p->post_handler)
1329 unoptimize_kprobe(ap, true); /* Fall back to normal kprobe */
1330
1331 list_add_rcu(&p->list, &ap->list);
1332 if (p->post_handler && !ap->post_handler)
1333 ap->post_handler = aggr_post_handler;
1334
1335 return 0;
1336 }
1337
1338 /*
1339 * Fill in the required fields of the aggregator kprobe. Replace the
1340 * earlier kprobe in the hlist with the aggregator kprobe.
1341 */
init_aggr_kprobe(struct kprobe * ap,struct kprobe * p)1342 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
1343 {
1344 /* Copy the insn slot of 'p' to 'ap'. */
1345 copy_kprobe(p, ap);
1346 flush_insn_slot(ap);
1347 ap->addr = p->addr;
1348 ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
1349 ap->pre_handler = aggr_pre_handler;
1350 /* We don't care the kprobe which has gone. */
1351 if (p->post_handler && !kprobe_gone(p))
1352 ap->post_handler = aggr_post_handler;
1353
1354 INIT_LIST_HEAD(&ap->list);
1355 INIT_HLIST_NODE(&ap->hlist);
1356
1357 list_add_rcu(&p->list, &ap->list);
1358 hlist_replace_rcu(&p->hlist, &ap->hlist);
1359 }
1360
1361 /*
1362 * This registers the second or subsequent kprobe at the same address.
1363 */
register_aggr_kprobe(struct kprobe * orig_p,struct kprobe * p)1364 static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
1365 {
1366 int ret = 0;
1367 struct kprobe *ap = orig_p;
1368
1369 scoped_guard(cpus_read_lock) {
1370 /* For preparing optimization, jump_label_text_reserved() is called */
1371 guard(jump_label_lock)();
1372 guard(mutex)(&text_mutex);
1373
1374 if (!kprobe_aggrprobe(orig_p)) {
1375 /* If 'orig_p' is not an 'aggr_kprobe', create new one. */
1376 ap = alloc_aggr_kprobe(orig_p);
1377 if (!ap)
1378 return -ENOMEM;
1379 init_aggr_kprobe(ap, orig_p);
1380 } else if (kprobe_unused(ap)) {
1381 /* This probe is going to die. Rescue it */
1382 ret = reuse_unused_kprobe(ap);
1383 if (ret)
1384 return ret;
1385 }
1386
1387 if (kprobe_gone(ap)) {
1388 /*
1389 * Attempting to insert new probe at the same location that
1390 * had a probe in the module vaddr area which already
1391 * freed. So, the instruction slot has already been
1392 * released. We need a new slot for the new probe.
1393 */
1394 ret = arch_prepare_kprobe(ap);
1395 if (ret)
1396 /*
1397 * Even if fail to allocate new slot, don't need to
1398 * free the 'ap'. It will be used next time, or
1399 * freed by unregister_kprobe().
1400 */
1401 return ret;
1402
1403 /* Prepare optimized instructions if possible. */
1404 prepare_optimized_kprobe(ap);
1405
1406 /*
1407 * Clear gone flag to prevent allocating new slot again, and
1408 * set disabled flag because it is not armed yet.
1409 */
1410 ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
1411 | KPROBE_FLAG_DISABLED;
1412 }
1413
1414 /* Copy the insn slot of 'p' to 'ap'. */
1415 copy_kprobe(ap, p);
1416 ret = add_new_kprobe(ap, p);
1417 }
1418
1419 if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
1420 ap->flags &= ~KPROBE_FLAG_DISABLED;
1421 if (!kprobes_all_disarmed) {
1422 /* Arm the breakpoint again. */
1423 ret = arm_kprobe(ap);
1424 if (ret) {
1425 ap->flags |= KPROBE_FLAG_DISABLED;
1426 list_del_rcu(&p->list);
1427 synchronize_rcu();
1428 }
1429 }
1430 }
1431 return ret;
1432 }
1433
arch_within_kprobe_blacklist(unsigned long addr)1434 bool __weak arch_within_kprobe_blacklist(unsigned long addr)
1435 {
1436 /* The '__kprobes' functions and entry code must not be probed. */
1437 return addr >= (unsigned long)__kprobes_text_start &&
1438 addr < (unsigned long)__kprobes_text_end;
1439 }
1440
__within_kprobe_blacklist(unsigned long addr)1441 static bool __within_kprobe_blacklist(unsigned long addr)
1442 {
1443 struct kprobe_blacklist_entry *ent;
1444
1445 if (arch_within_kprobe_blacklist(addr))
1446 return true;
1447 /*
1448 * If 'kprobe_blacklist' is defined, check the address and
1449 * reject any probe registration in the prohibited area.
1450 */
1451 list_for_each_entry(ent, &kprobe_blacklist, list) {
1452 if (addr >= ent->start_addr && addr < ent->end_addr)
1453 return true;
1454 }
1455 return false;
1456 }
1457
within_kprobe_blacklist(unsigned long addr)1458 bool within_kprobe_blacklist(unsigned long addr)
1459 {
1460 char symname[KSYM_NAME_LEN], *p;
1461
1462 if (__within_kprobe_blacklist(addr))
1463 return true;
1464
1465 /* Check if the address is on a suffixed-symbol */
1466 if (!lookup_symbol_name(addr, symname)) {
1467 p = strchr(symname, '.');
1468 if (!p)
1469 return false;
1470 *p = '\0';
1471 addr = (unsigned long)kprobe_lookup_name(symname, 0);
1472 if (addr)
1473 return __within_kprobe_blacklist(addr);
1474 }
1475 return false;
1476 }
1477
1478 /*
1479 * arch_adjust_kprobe_addr - adjust the address
1480 * @addr: symbol base address
1481 * @offset: offset within the symbol
1482 * @on_func_entry: was this @addr+@offset on the function entry
1483 *
1484 * Typically returns @addr + @offset, except for special cases where the
1485 * function might be prefixed by a CFI landing pad, in that case any offset
1486 * inside the landing pad is mapped to the first 'real' instruction of the
1487 * symbol.
1488 *
1489 * Specifically, for things like IBT/BTI, skip the resp. ENDBR/BTI.C
1490 * instruction at +0.
1491 */
arch_adjust_kprobe_addr(unsigned long addr,unsigned long offset,bool * on_func_entry)1492 kprobe_opcode_t *__weak arch_adjust_kprobe_addr(unsigned long addr,
1493 unsigned long offset,
1494 bool *on_func_entry)
1495 {
1496 *on_func_entry = !offset;
1497 return (kprobe_opcode_t *)(addr + offset);
1498 }
1499
1500 /*
1501 * If 'symbol_name' is specified, look it up and add the 'offset'
1502 * to it. This way, we can specify a relative address to a symbol.
1503 * This returns encoded errors if it fails to look up symbol or invalid
1504 * combination of parameters.
1505 */
1506 static kprobe_opcode_t *
_kprobe_addr(kprobe_opcode_t * addr,const char * symbol_name,unsigned long offset,bool * on_func_entry)1507 _kprobe_addr(kprobe_opcode_t *addr, const char *symbol_name,
1508 unsigned long offset, bool *on_func_entry)
1509 {
1510 if ((symbol_name && addr) || (!symbol_name && !addr))
1511 return ERR_PTR(-EINVAL);
1512
1513 if (symbol_name) {
1514 /*
1515 * Input: @sym + @offset
1516 * Output: @addr + @offset
1517 *
1518 * NOTE: kprobe_lookup_name() does *NOT* fold the offset
1519 * argument into it's output!
1520 */
1521 addr = kprobe_lookup_name(symbol_name, offset);
1522 if (!addr)
1523 return ERR_PTR(-ENOENT);
1524 }
1525
1526 /*
1527 * So here we have @addr + @offset, displace it into a new
1528 * @addr' + @offset' where @addr' is the symbol start address.
1529 */
1530 addr = (void *)addr + offset;
1531 if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset))
1532 return ERR_PTR(-ENOENT);
1533 addr = (void *)addr - offset;
1534
1535 /*
1536 * Then ask the architecture to re-combine them, taking care of
1537 * magical function entry details while telling us if this was indeed
1538 * at the start of the function.
1539 */
1540 addr = arch_adjust_kprobe_addr((unsigned long)addr, offset, on_func_entry);
1541 if (!addr)
1542 return ERR_PTR(-EINVAL);
1543
1544 return addr;
1545 }
1546
kprobe_addr(struct kprobe * p)1547 static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
1548 {
1549 bool on_func_entry;
1550
1551 return _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry);
1552 }
1553
1554 /*
1555 * Check the 'p' is valid and return the aggregator kprobe
1556 * at the same address.
1557 */
__get_valid_kprobe(struct kprobe * p)1558 static struct kprobe *__get_valid_kprobe(struct kprobe *p)
1559 {
1560 struct kprobe *ap, *list_p;
1561
1562 lockdep_assert_held(&kprobe_mutex);
1563
1564 ap = get_kprobe(p->addr);
1565 if (unlikely(!ap))
1566 return NULL;
1567
1568 if (p == ap)
1569 return ap;
1570
1571 list_for_each_entry(list_p, &ap->list, list)
1572 if (list_p == p)
1573 /* kprobe p is a valid probe */
1574 return ap;
1575
1576 return NULL;
1577 }
1578
1579 /*
1580 * Warn and return error if the kprobe is being re-registered since
1581 * there must be a software bug.
1582 */
warn_kprobe_rereg(struct kprobe * p)1583 static inline int warn_kprobe_rereg(struct kprobe *p)
1584 {
1585 guard(mutex)(&kprobe_mutex);
1586
1587 if (WARN_ON_ONCE(__get_valid_kprobe(p)))
1588 return -EINVAL;
1589
1590 return 0;
1591 }
1592
check_ftrace_location(struct kprobe * p)1593 static int check_ftrace_location(struct kprobe *p)
1594 {
1595 unsigned long addr = (unsigned long)p->addr;
1596
1597 if (ftrace_location(addr) == addr) {
1598 #ifdef CONFIG_KPROBES_ON_FTRACE
1599 p->flags |= KPROBE_FLAG_FTRACE;
1600 #else
1601 return -EINVAL;
1602 #endif
1603 }
1604 return 0;
1605 }
1606
is_cfi_preamble_symbol(unsigned long addr)1607 static bool is_cfi_preamble_symbol(unsigned long addr)
1608 {
1609 char symbuf[KSYM_NAME_LEN];
1610
1611 if (lookup_symbol_name(addr, symbuf))
1612 return false;
1613
1614 return str_has_prefix(symbuf, "__cfi_") ||
1615 str_has_prefix(symbuf, "__pfx_");
1616 }
1617
check_kprobe_address_safe(struct kprobe * p,struct module ** probed_mod)1618 static int check_kprobe_address_safe(struct kprobe *p,
1619 struct module **probed_mod)
1620 {
1621 int ret;
1622
1623 ret = check_ftrace_location(p);
1624 if (ret)
1625 return ret;
1626
1627 guard(jump_label_lock)();
1628
1629 /* Ensure the address is in a text area, and find a module if exists. */
1630 *probed_mod = NULL;
1631 if (!core_kernel_text((unsigned long) p->addr)) {
1632 guard(rcu)();
1633 *probed_mod = __module_text_address((unsigned long) p->addr);
1634 if (!(*probed_mod))
1635 return -EINVAL;
1636
1637 /*
1638 * We must hold a refcount of the probed module while updating
1639 * its code to prohibit unexpected unloading.
1640 */
1641 if (unlikely(!try_module_get(*probed_mod)))
1642 return -ENOENT;
1643 }
1644 /* Ensure it is not in reserved area. */
1645 if (in_gate_area_no_mm((unsigned long) p->addr) ||
1646 within_kprobe_blacklist((unsigned long) p->addr) ||
1647 jump_label_text_reserved(p->addr, p->addr) ||
1648 static_call_text_reserved(p->addr, p->addr) ||
1649 find_bug((unsigned long)p->addr) ||
1650 is_cfi_preamble_symbol((unsigned long)p->addr)) {
1651 module_put(*probed_mod);
1652 return -EINVAL;
1653 }
1654
1655 /* Get module refcount and reject __init functions for loaded modules. */
1656 if (IS_ENABLED(CONFIG_MODULES) && *probed_mod) {
1657 /*
1658 * If the module freed '.init.text', we couldn't insert
1659 * kprobes in there.
1660 */
1661 if (within_module_init((unsigned long)p->addr, *probed_mod) &&
1662 !module_is_coming(*probed_mod)) {
1663 module_put(*probed_mod);
1664 return -ENOENT;
1665 }
1666 }
1667
1668 return 0;
1669 }
1670
__register_kprobe(struct kprobe * p)1671 static int __register_kprobe(struct kprobe *p)
1672 {
1673 int ret;
1674 struct kprobe *old_p;
1675
1676 guard(mutex)(&kprobe_mutex);
1677
1678 old_p = get_kprobe(p->addr);
1679 if (old_p)
1680 /* Since this may unoptimize 'old_p', locking 'text_mutex'. */
1681 return register_aggr_kprobe(old_p, p);
1682
1683 scoped_guard(cpus_read_lock) {
1684 /* Prevent text modification */
1685 guard(mutex)(&text_mutex);
1686 ret = prepare_kprobe(p);
1687 if (ret)
1688 return ret;
1689 }
1690
1691 INIT_HLIST_NODE(&p->hlist);
1692 hlist_add_head_rcu(&p->hlist,
1693 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
1694
1695 if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
1696 ret = arm_kprobe(p);
1697 if (ret) {
1698 hlist_del_rcu(&p->hlist);
1699 synchronize_rcu();
1700 }
1701 }
1702
1703 /* Try to optimize kprobe */
1704 try_to_optimize_kprobe(p);
1705 return 0;
1706 }
1707
register_kprobe(struct kprobe * p)1708 int register_kprobe(struct kprobe *p)
1709 {
1710 int ret;
1711 struct module *probed_mod;
1712 kprobe_opcode_t *addr;
1713 bool on_func_entry;
1714
1715 /* Canonicalize probe address from symbol */
1716 addr = _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry);
1717 if (IS_ERR(addr))
1718 return PTR_ERR(addr);
1719 p->addr = addr;
1720
1721 ret = warn_kprobe_rereg(p);
1722 if (ret)
1723 return ret;
1724
1725 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
1726 p->flags &= KPROBE_FLAG_DISABLED;
1727 if (on_func_entry)
1728 p->flags |= KPROBE_FLAG_ON_FUNC_ENTRY;
1729 p->nmissed = 0;
1730 INIT_LIST_HEAD(&p->list);
1731
1732 ret = check_kprobe_address_safe(p, &probed_mod);
1733 if (ret)
1734 return ret;
1735
1736 ret = __register_kprobe(p);
1737
1738 if (probed_mod)
1739 module_put(probed_mod);
1740
1741 return ret;
1742 }
1743 EXPORT_SYMBOL_GPL(register_kprobe);
1744
1745 /* Check if all probes on the 'ap' are disabled. */
aggr_kprobe_disabled(struct kprobe * ap)1746 static bool aggr_kprobe_disabled(struct kprobe *ap)
1747 {
1748 struct kprobe *kp;
1749
1750 lockdep_assert_held(&kprobe_mutex);
1751
1752 list_for_each_entry(kp, &ap->list, list)
1753 if (!kprobe_disabled(kp))
1754 /*
1755 * Since there is an active probe on the list,
1756 * we can't disable this 'ap'.
1757 */
1758 return false;
1759
1760 return true;
1761 }
1762
__disable_kprobe(struct kprobe * p)1763 static struct kprobe *__disable_kprobe(struct kprobe *p)
1764 {
1765 struct kprobe *orig_p;
1766 int ret;
1767
1768 lockdep_assert_held(&kprobe_mutex);
1769
1770 /* Get an original kprobe for return */
1771 orig_p = __get_valid_kprobe(p);
1772 if (unlikely(orig_p == NULL))
1773 return ERR_PTR(-EINVAL);
1774
1775 if (kprobe_disabled(p))
1776 return orig_p;
1777
1778 /* Disable probe if it is a child probe */
1779 if (p != orig_p)
1780 p->flags |= KPROBE_FLAG_DISABLED;
1781
1782 /* Try to disarm and disable this/parent probe */
1783 if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1784 /*
1785 * Don't be lazy here. Even if 'kprobes_all_disarmed'
1786 * is false, 'orig_p' might not have been armed yet.
1787 * Note arm_all_kprobes() __tries__ to arm all kprobes
1788 * on the best effort basis.
1789 */
1790 if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) {
1791 ret = disarm_kprobe(orig_p, true);
1792 if (ret) {
1793 p->flags &= ~KPROBE_FLAG_DISABLED;
1794 return ERR_PTR(ret);
1795 }
1796 }
1797 orig_p->flags |= KPROBE_FLAG_DISABLED;
1798 }
1799
1800 return orig_p;
1801 }
1802
1803 /*
1804 * Unregister a kprobe without a scheduler synchronization.
1805 */
__unregister_kprobe_top(struct kprobe * p)1806 static int __unregister_kprobe_top(struct kprobe *p)
1807 {
1808 struct kprobe *ap, *list_p;
1809
1810 /* Disable kprobe. This will disarm it if needed. */
1811 ap = __disable_kprobe(p);
1812 if (IS_ERR(ap))
1813 return PTR_ERR(ap);
1814
1815 WARN_ON(ap != p && !kprobe_aggrprobe(ap));
1816
1817 /*
1818 * If the probe is an independent(and non-optimized) kprobe
1819 * (not an aggrprobe), the last kprobe on the aggrprobe, or
1820 * kprobe is already disarmed, just remove from the hash list.
1821 */
1822 if (ap == p ||
1823 (list_is_singular(&ap->list) && kprobe_disarmed(ap))) {
1824 /*
1825 * !disarmed could be happen if the probe is under delayed
1826 * unoptimizing.
1827 */
1828 hlist_del_rcu(&ap->hlist);
1829 return 0;
1830 }
1831
1832 /* If disabling probe has special handlers, update aggrprobe */
1833 if (p->post_handler && !kprobe_gone(p)) {
1834 list_for_each_entry(list_p, &ap->list, list) {
1835 if ((list_p != p) && (list_p->post_handler))
1836 break;
1837 }
1838 /* No other probe has post_handler */
1839 if (list_entry_is_head(list_p, &ap->list, list)) {
1840 /*
1841 * For the kprobe-on-ftrace case, we keep the
1842 * post_handler setting to identify this aggrprobe
1843 * armed with kprobe_ipmodify_ops.
1844 */
1845 if (!kprobe_ftrace(ap))
1846 ap->post_handler = NULL;
1847 }
1848 }
1849
1850 /*
1851 * Remove from the aggrprobe: this path will do nothing in
1852 * __unregister_kprobe_bottom().
1853 */
1854 list_del_rcu(&p->list);
1855 if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
1856 /*
1857 * Try to optimize this probe again, because post
1858 * handler may have been changed.
1859 */
1860 optimize_kprobe(ap);
1861 return 0;
1862
1863 }
1864
__unregister_kprobe_bottom(struct kprobe * p)1865 static void __unregister_kprobe_bottom(struct kprobe *p)
1866 {
1867 struct kprobe *ap;
1868
1869 if (list_empty(&p->list))
1870 /* This is an independent kprobe */
1871 arch_remove_kprobe(p);
1872 else if (list_is_singular(&p->list)) {
1873 /* This is the last child of an aggrprobe */
1874 ap = list_entry(p->list.next, struct kprobe, list);
1875 list_del(&p->list);
1876 free_aggr_kprobe(ap);
1877 }
1878 /* Otherwise, do nothing. */
1879 }
1880
register_kprobes(struct kprobe ** kps,int num)1881 int register_kprobes(struct kprobe **kps, int num)
1882 {
1883 int i, ret = 0;
1884
1885 if (num <= 0)
1886 return -EINVAL;
1887 for (i = 0; i < num; i++) {
1888 ret = register_kprobe(kps[i]);
1889 if (ret < 0) {
1890 if (i > 0)
1891 unregister_kprobes(kps, i);
1892 break;
1893 }
1894 }
1895 return ret;
1896 }
1897 EXPORT_SYMBOL_GPL(register_kprobes);
1898
unregister_kprobe(struct kprobe * p)1899 void unregister_kprobe(struct kprobe *p)
1900 {
1901 unregister_kprobes(&p, 1);
1902 }
1903 EXPORT_SYMBOL_GPL(unregister_kprobe);
1904
unregister_kprobes(struct kprobe ** kps,int num)1905 void unregister_kprobes(struct kprobe **kps, int num)
1906 {
1907 int i;
1908
1909 if (num <= 0)
1910 return;
1911 scoped_guard(mutex, &kprobe_mutex) {
1912 for (i = 0; i < num; i++)
1913 if (__unregister_kprobe_top(kps[i]) < 0)
1914 kps[i]->addr = NULL;
1915 }
1916 synchronize_rcu();
1917 for (i = 0; i < num; i++)
1918 if (kps[i]->addr)
1919 __unregister_kprobe_bottom(kps[i]);
1920 }
1921 EXPORT_SYMBOL_GPL(unregister_kprobes);
1922
kprobe_exceptions_notify(struct notifier_block * self,unsigned long val,void * data)1923 int __weak kprobe_exceptions_notify(struct notifier_block *self,
1924 unsigned long val, void *data)
1925 {
1926 return NOTIFY_DONE;
1927 }
1928 NOKPROBE_SYMBOL(kprobe_exceptions_notify);
1929
1930 static struct notifier_block kprobe_exceptions_nb = {
1931 .notifier_call = kprobe_exceptions_notify,
1932 .priority = 0x7fffffff /* we need to be notified first */
1933 };
1934
1935 #ifdef CONFIG_KRETPROBES
1936
1937 #if !defined(CONFIG_KRETPROBE_ON_RETHOOK)
1938
1939 /* callbacks for objpool of kretprobe instances */
kretprobe_init_inst(void * nod,void * context)1940 static int kretprobe_init_inst(void *nod, void *context)
1941 {
1942 struct kretprobe_instance *ri = nod;
1943
1944 ri->rph = context;
1945 return 0;
1946 }
kretprobe_fini_pool(struct objpool_head * head,void * context)1947 static int kretprobe_fini_pool(struct objpool_head *head, void *context)
1948 {
1949 kfree(context);
1950 return 0;
1951 }
1952
free_rp_inst_rcu(struct rcu_head * head)1953 static void free_rp_inst_rcu(struct rcu_head *head)
1954 {
1955 struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
1956 struct kretprobe_holder *rph = ri->rph;
1957
1958 objpool_drop(ri, &rph->pool);
1959 }
1960 NOKPROBE_SYMBOL(free_rp_inst_rcu);
1961
recycle_rp_inst(struct kretprobe_instance * ri)1962 static void recycle_rp_inst(struct kretprobe_instance *ri)
1963 {
1964 struct kretprobe *rp = get_kretprobe(ri);
1965
1966 if (likely(rp))
1967 objpool_push(ri, &rp->rph->pool);
1968 else
1969 call_rcu(&ri->rcu, free_rp_inst_rcu);
1970 }
1971 NOKPROBE_SYMBOL(recycle_rp_inst);
1972
1973 /*
1974 * This function is called from delayed_put_task_struct() when a task is
1975 * dead and cleaned up to recycle any kretprobe instances associated with
1976 * this task. These left over instances represent probed functions that
1977 * have been called but will never return.
1978 */
kprobe_flush_task(struct task_struct * tk)1979 void kprobe_flush_task(struct task_struct *tk)
1980 {
1981 struct kretprobe_instance *ri;
1982 struct llist_node *node;
1983
1984 /* Early boot, not yet initialized. */
1985 if (unlikely(!kprobes_initialized))
1986 return;
1987
1988 kprobe_busy_begin();
1989
1990 node = __llist_del_all(&tk->kretprobe_instances);
1991 while (node) {
1992 ri = container_of(node, struct kretprobe_instance, llist);
1993 node = node->next;
1994
1995 recycle_rp_inst(ri);
1996 }
1997
1998 kprobe_busy_end();
1999 }
2000 NOKPROBE_SYMBOL(kprobe_flush_task);
2001
free_rp_inst(struct kretprobe * rp)2002 static inline void free_rp_inst(struct kretprobe *rp)
2003 {
2004 struct kretprobe_holder *rph = rp->rph;
2005
2006 if (!rph)
2007 return;
2008 rp->rph = NULL;
2009 objpool_fini(&rph->pool);
2010 }
2011
2012 /* This assumes the 'tsk' is the current task or the is not running. */
__kretprobe_find_ret_addr(struct task_struct * tsk,struct llist_node ** cur)2013 static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk,
2014 struct llist_node **cur)
2015 {
2016 struct kretprobe_instance *ri = NULL;
2017 struct llist_node *node = *cur;
2018
2019 if (!node)
2020 node = tsk->kretprobe_instances.first;
2021 else
2022 node = node->next;
2023
2024 while (node) {
2025 ri = container_of(node, struct kretprobe_instance, llist);
2026 if (ri->ret_addr != kretprobe_trampoline_addr()) {
2027 *cur = node;
2028 return ri->ret_addr;
2029 }
2030 node = node->next;
2031 }
2032 return NULL;
2033 }
2034 NOKPROBE_SYMBOL(__kretprobe_find_ret_addr);
2035
2036 /**
2037 * kretprobe_find_ret_addr -- Find correct return address modified by kretprobe
2038 * @tsk: Target task
2039 * @fp: A frame pointer
2040 * @cur: a storage of the loop cursor llist_node pointer for next call
2041 *
2042 * Find the correct return address modified by a kretprobe on @tsk in unsigned
2043 * long type. If it finds the return address, this returns that address value,
2044 * or this returns 0.
2045 * The @tsk must be 'current' or a task which is not running. @fp is a hint
2046 * to get the currect return address - which is compared with the
2047 * kretprobe_instance::fp field. The @cur is a loop cursor for searching the
2048 * kretprobe return addresses on the @tsk. The '*@cur' should be NULL at the
2049 * first call, but '@cur' itself must NOT NULL.
2050 */
kretprobe_find_ret_addr(struct task_struct * tsk,void * fp,struct llist_node ** cur)2051 unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
2052 struct llist_node **cur)
2053 {
2054 struct kretprobe_instance *ri;
2055 kprobe_opcode_t *ret;
2056
2057 if (WARN_ON_ONCE(!cur))
2058 return 0;
2059
2060 do {
2061 ret = __kretprobe_find_ret_addr(tsk, cur);
2062 if (!ret)
2063 break;
2064 ri = container_of(*cur, struct kretprobe_instance, llist);
2065 } while (ri->fp != fp);
2066
2067 return (unsigned long)ret;
2068 }
2069 NOKPROBE_SYMBOL(kretprobe_find_ret_addr);
2070
arch_kretprobe_fixup_return(struct pt_regs * regs,kprobe_opcode_t * correct_ret_addr)2071 void __weak arch_kretprobe_fixup_return(struct pt_regs *regs,
2072 kprobe_opcode_t *correct_ret_addr)
2073 {
2074 /*
2075 * Do nothing by default. Please fill this to update the fake return
2076 * address on the stack with the correct one on each arch if possible.
2077 */
2078 }
2079
__kretprobe_trampoline_handler(struct pt_regs * regs,void * frame_pointer)2080 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
2081 void *frame_pointer)
2082 {
2083 struct kretprobe_instance *ri = NULL;
2084 struct llist_node *first, *node = NULL;
2085 kprobe_opcode_t *correct_ret_addr;
2086 struct kretprobe *rp;
2087
2088 /* Find correct address and all nodes for this frame. */
2089 correct_ret_addr = __kretprobe_find_ret_addr(current, &node);
2090 if (!correct_ret_addr) {
2091 pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n");
2092 BUG_ON(1);
2093 }
2094
2095 /*
2096 * Set the return address as the instruction pointer, because if the
2097 * user handler calls stack_trace_save_regs() with this 'regs',
2098 * the stack trace will start from the instruction pointer.
2099 */
2100 instruction_pointer_set(regs, (unsigned long)correct_ret_addr);
2101
2102 /* Run the user handler of the nodes. */
2103 first = current->kretprobe_instances.first;
2104 while (first) {
2105 ri = container_of(first, struct kretprobe_instance, llist);
2106
2107 if (WARN_ON_ONCE(ri->fp != frame_pointer))
2108 break;
2109
2110 rp = get_kretprobe(ri);
2111 if (rp && rp->handler) {
2112 struct kprobe *prev = kprobe_running();
2113
2114 __this_cpu_write(current_kprobe, &rp->kp);
2115 ri->ret_addr = correct_ret_addr;
2116 rp->handler(ri, regs);
2117 __this_cpu_write(current_kprobe, prev);
2118 }
2119 if (first == node)
2120 break;
2121
2122 first = first->next;
2123 }
2124
2125 arch_kretprobe_fixup_return(regs, correct_ret_addr);
2126
2127 /* Unlink all nodes for this frame. */
2128 first = current->kretprobe_instances.first;
2129 current->kretprobe_instances.first = node->next;
2130 node->next = NULL;
2131
2132 /* Recycle free instances. */
2133 while (first) {
2134 ri = container_of(first, struct kretprobe_instance, llist);
2135 first = first->next;
2136
2137 recycle_rp_inst(ri);
2138 }
2139
2140 return (unsigned long)correct_ret_addr;
2141 }
NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)2142 NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
2143
2144 /*
2145 * This kprobe pre_handler is registered with every kretprobe. When probe
2146 * hits it will set up the return probe.
2147 */
2148 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2149 {
2150 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
2151 struct kretprobe_holder *rph = rp->rph;
2152 struct kretprobe_instance *ri;
2153
2154 ri = objpool_pop(&rph->pool);
2155 if (!ri) {
2156 rp->nmissed++;
2157 return 0;
2158 }
2159
2160 if (rp->entry_handler && rp->entry_handler(ri, regs)) {
2161 objpool_push(ri, &rph->pool);
2162 return 0;
2163 }
2164
2165 arch_prepare_kretprobe(ri, regs);
2166
2167 __llist_add(&ri->llist, ¤t->kretprobe_instances);
2168
2169 return 0;
2170 }
2171 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2172 #else /* CONFIG_KRETPROBE_ON_RETHOOK */
2173 /*
2174 * This kprobe pre_handler is registered with every kretprobe. When probe
2175 * hits it will set up the return probe.
2176 */
pre_handler_kretprobe(struct kprobe * p,struct pt_regs * regs)2177 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2178 {
2179 struct kretprobe *rp = container_of(p, struct kretprobe, kp);
2180 struct kretprobe_instance *ri;
2181 struct rethook_node *rhn;
2182
2183 rhn = rethook_try_get(rp->rh);
2184 if (!rhn) {
2185 rp->nmissed++;
2186 return 0;
2187 }
2188
2189 ri = container_of(rhn, struct kretprobe_instance, node);
2190
2191 if (rp->entry_handler && rp->entry_handler(ri, regs))
2192 rethook_recycle(rhn);
2193 else
2194 rethook_hook(rhn, regs, kprobe_ftrace(p));
2195
2196 return 0;
2197 }
2198 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2199
kretprobe_rethook_handler(struct rethook_node * rh,void * data,unsigned long ret_addr,struct pt_regs * regs)2200 static void kretprobe_rethook_handler(struct rethook_node *rh, void *data,
2201 unsigned long ret_addr,
2202 struct pt_regs *regs)
2203 {
2204 struct kretprobe *rp = (struct kretprobe *)data;
2205 struct kretprobe_instance *ri;
2206 struct kprobe_ctlblk *kcb;
2207
2208 /* The data must NOT be null. This means rethook data structure is broken. */
2209 if (WARN_ON_ONCE(!data) || !rp->handler)
2210 return;
2211
2212 __this_cpu_write(current_kprobe, &rp->kp);
2213 kcb = get_kprobe_ctlblk();
2214 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
2215
2216 ri = container_of(rh, struct kretprobe_instance, node);
2217 rp->handler(ri, regs);
2218
2219 __this_cpu_write(current_kprobe, NULL);
2220 }
2221 NOKPROBE_SYMBOL(kretprobe_rethook_handler);
2222
2223 #endif /* !CONFIG_KRETPROBE_ON_RETHOOK */
2224
2225 /**
2226 * kprobe_on_func_entry() -- check whether given address is function entry
2227 * @addr: Target address
2228 * @sym: Target symbol name
2229 * @offset: The offset from the symbol or the address
2230 *
2231 * This checks whether the given @addr+@offset or @sym+@offset is on the
2232 * function entry address or not.
2233 * This returns 0 if it is the function entry, or -EINVAL if it is not.
2234 * And also it returns -ENOENT if it fails the symbol or address lookup.
2235 * Caller must pass @addr or @sym (either one must be NULL), or this
2236 * returns -EINVAL.
2237 */
kprobe_on_func_entry(kprobe_opcode_t * addr,const char * sym,unsigned long offset)2238 int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
2239 {
2240 bool on_func_entry;
2241 kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset, &on_func_entry);
2242
2243 if (IS_ERR(kp_addr))
2244 return PTR_ERR(kp_addr);
2245
2246 if (!on_func_entry)
2247 return -EINVAL;
2248
2249 return 0;
2250 }
2251
register_kretprobe(struct kretprobe * rp)2252 int register_kretprobe(struct kretprobe *rp)
2253 {
2254 int ret;
2255 int i;
2256 void *addr;
2257
2258 ret = kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset);
2259 if (ret)
2260 return ret;
2261
2262 /* If only 'rp->kp.addr' is specified, check reregistering kprobes */
2263 if (rp->kp.addr && warn_kprobe_rereg(&rp->kp))
2264 return -EINVAL;
2265
2266 if (kretprobe_blacklist_size) {
2267 addr = kprobe_addr(&rp->kp);
2268 if (IS_ERR(addr))
2269 return PTR_ERR(addr);
2270
2271 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
2272 if (kretprobe_blacklist[i].addr == addr)
2273 return -EINVAL;
2274 }
2275 }
2276
2277 if (rp->data_size > KRETPROBE_MAX_DATA_SIZE)
2278 return -E2BIG;
2279
2280 rp->kp.pre_handler = pre_handler_kretprobe;
2281 rp->kp.post_handler = NULL;
2282
2283 /* Pre-allocate memory for max kretprobe instances */
2284 if (rp->maxactive <= 0)
2285 rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
2286
2287 #ifdef CONFIG_KRETPROBE_ON_RETHOOK
2288 rp->rh = rethook_alloc((void *)rp, kretprobe_rethook_handler,
2289 sizeof(struct kretprobe_instance) +
2290 rp->data_size, rp->maxactive);
2291 if (IS_ERR(rp->rh))
2292 return PTR_ERR(rp->rh);
2293
2294 rp->nmissed = 0;
2295 /* Establish function entry probe point */
2296 ret = register_kprobe(&rp->kp);
2297 if (ret != 0) {
2298 rethook_free(rp->rh);
2299 rp->rh = NULL;
2300 }
2301 #else /* !CONFIG_KRETPROBE_ON_RETHOOK */
2302 rp->rph = kzalloc_obj(struct kretprobe_holder);
2303 if (!rp->rph)
2304 return -ENOMEM;
2305
2306 if (objpool_init(&rp->rph->pool, rp->maxactive, rp->data_size +
2307 sizeof(struct kretprobe_instance), GFP_KERNEL,
2308 rp->rph, kretprobe_init_inst, kretprobe_fini_pool)) {
2309 kfree(rp->rph);
2310 rp->rph = NULL;
2311 return -ENOMEM;
2312 }
2313 rcu_assign_pointer(rp->rph->rp, rp);
2314 rp->nmissed = 0;
2315 /* Establish function entry probe point */
2316 ret = register_kprobe(&rp->kp);
2317 if (ret != 0)
2318 free_rp_inst(rp);
2319 #endif
2320 return ret;
2321 }
2322 EXPORT_SYMBOL_GPL(register_kretprobe);
2323
register_kretprobes(struct kretprobe ** rps,int num)2324 int register_kretprobes(struct kretprobe **rps, int num)
2325 {
2326 int ret = 0, i;
2327
2328 if (num <= 0)
2329 return -EINVAL;
2330 for (i = 0; i < num; i++) {
2331 ret = register_kretprobe(rps[i]);
2332 if (ret < 0) {
2333 if (i > 0)
2334 unregister_kretprobes(rps, i);
2335 break;
2336 }
2337 }
2338 return ret;
2339 }
2340 EXPORT_SYMBOL_GPL(register_kretprobes);
2341
unregister_kretprobe(struct kretprobe * rp)2342 void unregister_kretprobe(struct kretprobe *rp)
2343 {
2344 unregister_kretprobes(&rp, 1);
2345 }
2346 EXPORT_SYMBOL_GPL(unregister_kretprobe);
2347
unregister_kretprobes(struct kretprobe ** rps,int num)2348 void unregister_kretprobes(struct kretprobe **rps, int num)
2349 {
2350 int i;
2351
2352 if (num <= 0)
2353 return;
2354 for (i = 0; i < num; i++) {
2355 guard(mutex)(&kprobe_mutex);
2356
2357 if (__unregister_kprobe_top(&rps[i]->kp) < 0)
2358 rps[i]->kp.addr = NULL;
2359 #ifdef CONFIG_KRETPROBE_ON_RETHOOK
2360 rethook_free(rps[i]->rh);
2361 #else
2362 rcu_assign_pointer(rps[i]->rph->rp, NULL);
2363 #endif
2364 }
2365
2366 synchronize_rcu();
2367 for (i = 0; i < num; i++) {
2368 if (rps[i]->kp.addr) {
2369 __unregister_kprobe_bottom(&rps[i]->kp);
2370 #ifndef CONFIG_KRETPROBE_ON_RETHOOK
2371 free_rp_inst(rps[i]);
2372 #endif
2373 }
2374 }
2375 }
2376 EXPORT_SYMBOL_GPL(unregister_kretprobes);
2377
2378 #else /* CONFIG_KRETPROBES */
register_kretprobe(struct kretprobe * rp)2379 int register_kretprobe(struct kretprobe *rp)
2380 {
2381 return -EOPNOTSUPP;
2382 }
2383 EXPORT_SYMBOL_GPL(register_kretprobe);
2384
register_kretprobes(struct kretprobe ** rps,int num)2385 int register_kretprobes(struct kretprobe **rps, int num)
2386 {
2387 return -EOPNOTSUPP;
2388 }
2389 EXPORT_SYMBOL_GPL(register_kretprobes);
2390
unregister_kretprobe(struct kretprobe * rp)2391 void unregister_kretprobe(struct kretprobe *rp)
2392 {
2393 }
2394 EXPORT_SYMBOL_GPL(unregister_kretprobe);
2395
unregister_kretprobes(struct kretprobe ** rps,int num)2396 void unregister_kretprobes(struct kretprobe **rps, int num)
2397 {
2398 }
2399 EXPORT_SYMBOL_GPL(unregister_kretprobes);
2400
pre_handler_kretprobe(struct kprobe * p,struct pt_regs * regs)2401 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2402 {
2403 return 0;
2404 }
2405 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2406
2407 #endif /* CONFIG_KRETPROBES */
2408
2409 /* Set the kprobe gone and remove its instruction buffer. */
kill_kprobe(struct kprobe * p)2410 static void kill_kprobe(struct kprobe *p)
2411 {
2412 struct kprobe *kp;
2413
2414 lockdep_assert_held(&kprobe_mutex);
2415
2416 /*
2417 * The module is going away. We should disarm the kprobe which
2418 * is using ftrace, because ftrace framework is still available at
2419 * 'MODULE_STATE_GOING' notification.
2420 */
2421 if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed)
2422 disarm_kprobe_ftrace(p);
2423
2424 p->flags |= KPROBE_FLAG_GONE;
2425 if (kprobe_aggrprobe(p)) {
2426 /*
2427 * If this is an aggr_kprobe, we have to list all the
2428 * chained probes and mark them GONE.
2429 */
2430 list_for_each_entry(kp, &p->list, list)
2431 kp->flags |= KPROBE_FLAG_GONE;
2432 p->post_handler = NULL;
2433 kill_optimized_kprobe(p);
2434 }
2435 /*
2436 * Here, we can remove insn_slot safely, because no thread calls
2437 * the original probed function (which will be freed soon) any more.
2438 */
2439 arch_remove_kprobe(p);
2440 }
2441
2442 /* Disable one kprobe */
disable_kprobe(struct kprobe * kp)2443 int disable_kprobe(struct kprobe *kp)
2444 {
2445 struct kprobe *p;
2446
2447 guard(mutex)(&kprobe_mutex);
2448
2449 /* Disable this kprobe */
2450 p = __disable_kprobe(kp);
2451
2452 return IS_ERR(p) ? PTR_ERR(p) : 0;
2453 }
2454 EXPORT_SYMBOL_GPL(disable_kprobe);
2455
2456 /* Enable one kprobe */
enable_kprobe(struct kprobe * kp)2457 int enable_kprobe(struct kprobe *kp)
2458 {
2459 int ret = 0;
2460 struct kprobe *p;
2461
2462 guard(mutex)(&kprobe_mutex);
2463
2464 /* Check whether specified probe is valid. */
2465 p = __get_valid_kprobe(kp);
2466 if (unlikely(p == NULL))
2467 return -EINVAL;
2468
2469 if (kprobe_gone(kp))
2470 /* This kprobe has gone, we couldn't enable it. */
2471 return -EINVAL;
2472
2473 if (p != kp)
2474 kp->flags &= ~KPROBE_FLAG_DISABLED;
2475
2476 if (!kprobes_all_disarmed && kprobe_disabled(p)) {
2477 p->flags &= ~KPROBE_FLAG_DISABLED;
2478 ret = arm_kprobe(p);
2479 if (ret) {
2480 p->flags |= KPROBE_FLAG_DISABLED;
2481 if (p != kp)
2482 kp->flags |= KPROBE_FLAG_DISABLED;
2483 }
2484 }
2485 return ret;
2486 }
2487 EXPORT_SYMBOL_GPL(enable_kprobe);
2488
2489 /* Caller must NOT call this in usual path. This is only for critical case */
dump_kprobe(struct kprobe * kp)2490 void dump_kprobe(struct kprobe *kp)
2491 {
2492 pr_err("Dump kprobe:\n.symbol_name = %s, .offset = %x, .addr = %pS\n",
2493 kp->symbol_name, kp->offset, kp->addr);
2494 }
2495 NOKPROBE_SYMBOL(dump_kprobe);
2496
kprobe_add_ksym_blacklist(unsigned long entry)2497 int kprobe_add_ksym_blacklist(unsigned long entry)
2498 {
2499 struct kprobe_blacklist_entry *ent;
2500 unsigned long offset = 0, size = 0;
2501
2502 if (!kernel_text_address(entry) ||
2503 !kallsyms_lookup_size_offset(entry, &size, &offset))
2504 return -EINVAL;
2505
2506 ent = kmalloc_obj(*ent);
2507 if (!ent)
2508 return -ENOMEM;
2509 ent->start_addr = entry;
2510 ent->end_addr = entry + size;
2511 INIT_LIST_HEAD(&ent->list);
2512 list_add_tail(&ent->list, &kprobe_blacklist);
2513
2514 return (int)size;
2515 }
2516
2517 /* Add all symbols in given area into kprobe blacklist */
kprobe_add_area_blacklist(unsigned long start,unsigned long end)2518 int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
2519 {
2520 unsigned long entry;
2521 int ret = 0;
2522
2523 for (entry = start; entry < end; entry += ret) {
2524 ret = kprobe_add_ksym_blacklist(entry);
2525 if (ret < 0)
2526 return ret;
2527 if (ret == 0) /* In case of alias symbol */
2528 ret = 1;
2529 }
2530 return 0;
2531 }
2532
arch_kprobe_get_kallsym(unsigned int * symnum,unsigned long * value,char * type,char * sym)2533 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
2534 char *type, char *sym)
2535 {
2536 return -ERANGE;
2537 }
2538
kprobe_get_kallsym(unsigned int symnum,unsigned long * value,char * type,char * sym)2539 int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
2540 char *sym)
2541 {
2542 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
2543 if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym))
2544 return 0;
2545 #ifdef CONFIG_OPTPROBES
2546 if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym))
2547 return 0;
2548 #endif
2549 #endif
2550 if (!arch_kprobe_get_kallsym(&symnum, value, type, sym))
2551 return 0;
2552 return -ERANGE;
2553 }
2554
arch_populate_kprobe_blacklist(void)2555 int __init __weak arch_populate_kprobe_blacklist(void)
2556 {
2557 return 0;
2558 }
2559
2560 /*
2561 * Lookup and populate the kprobe_blacklist.
2562 *
2563 * Unlike the kretprobe blacklist, we'll need to determine
2564 * the range of addresses that belong to the said functions,
2565 * since a kprobe need not necessarily be at the beginning
2566 * of a function.
2567 */
populate_kprobe_blacklist(unsigned long * start,unsigned long * end)2568 static int __init populate_kprobe_blacklist(unsigned long *start,
2569 unsigned long *end)
2570 {
2571 unsigned long entry;
2572 unsigned long *iter;
2573 int ret;
2574
2575 for (iter = start; iter < end; iter++) {
2576 entry = (unsigned long)dereference_symbol_descriptor((void *)*iter);
2577 ret = kprobe_add_ksym_blacklist(entry);
2578 if (ret == -EINVAL)
2579 continue;
2580 if (ret < 0)
2581 return ret;
2582 }
2583
2584 /* Symbols in '__kprobes_text' are blacklisted */
2585 ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
2586 (unsigned long)__kprobes_text_end);
2587 if (ret)
2588 return ret;
2589
2590 /* Symbols in 'noinstr' section are blacklisted */
2591 ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start,
2592 (unsigned long)__noinstr_text_end);
2593
2594 return ret ? : arch_populate_kprobe_blacklist();
2595 }
2596
2597 #ifdef CONFIG_MODULES
2598 /* Remove all symbols in given area from kprobe blacklist */
kprobe_remove_area_blacklist(unsigned long start,unsigned long end)2599 static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
2600 {
2601 struct kprobe_blacklist_entry *ent, *n;
2602
2603 list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) {
2604 if (ent->start_addr < start || ent->start_addr >= end)
2605 continue;
2606 list_del(&ent->list);
2607 kfree(ent);
2608 }
2609 }
2610
kprobe_remove_ksym_blacklist(unsigned long entry)2611 static void kprobe_remove_ksym_blacklist(unsigned long entry)
2612 {
2613 kprobe_remove_area_blacklist(entry, entry + 1);
2614 }
2615
add_module_kprobe_blacklist(struct module * mod)2616 static void add_module_kprobe_blacklist(struct module *mod)
2617 {
2618 unsigned long start, end;
2619 int i;
2620
2621 if (mod->kprobe_blacklist) {
2622 for (i = 0; i < mod->num_kprobe_blacklist; i++)
2623 kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]);
2624 }
2625
2626 start = (unsigned long)mod->kprobes_text_start;
2627 if (start) {
2628 end = start + mod->kprobes_text_size;
2629 kprobe_add_area_blacklist(start, end);
2630 }
2631
2632 start = (unsigned long)mod->noinstr_text_start;
2633 if (start) {
2634 end = start + mod->noinstr_text_size;
2635 kprobe_add_area_blacklist(start, end);
2636 }
2637 }
2638
remove_module_kprobe_blacklist(struct module * mod)2639 static void remove_module_kprobe_blacklist(struct module *mod)
2640 {
2641 unsigned long start, end;
2642 int i;
2643
2644 if (mod->kprobe_blacklist) {
2645 for (i = 0; i < mod->num_kprobe_blacklist; i++)
2646 kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]);
2647 }
2648
2649 start = (unsigned long)mod->kprobes_text_start;
2650 if (start) {
2651 end = start + mod->kprobes_text_size;
2652 kprobe_remove_area_blacklist(start, end);
2653 }
2654
2655 start = (unsigned long)mod->noinstr_text_start;
2656 if (start) {
2657 end = start + mod->noinstr_text_size;
2658 kprobe_remove_area_blacklist(start, end);
2659 }
2660 }
2661
2662 /* Module notifier call back, checking kprobes on the module */
kprobes_module_callback(struct notifier_block * nb,unsigned long val,void * data)2663 static int kprobes_module_callback(struct notifier_block *nb,
2664 unsigned long val, void *data)
2665 {
2666 struct module *mod = data;
2667 struct hlist_head *head;
2668 struct kprobe *p;
2669 unsigned int i;
2670 int checkcore = (val == MODULE_STATE_GOING);
2671
2672 guard(mutex)(&kprobe_mutex);
2673
2674 if (val == MODULE_STATE_COMING)
2675 add_module_kprobe_blacklist(mod);
2676
2677 if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
2678 return NOTIFY_DONE;
2679
2680 /*
2681 * When 'MODULE_STATE_GOING' was notified, both of module '.text' and
2682 * '.init.text' sections would be freed. When 'MODULE_STATE_LIVE' was
2683 * notified, only '.init.text' section would be freed. We need to
2684 * disable kprobes which have been inserted in the sections.
2685 */
2686 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2687 head = &kprobe_table[i];
2688 hlist_for_each_entry(p, head, hlist)
2689 if (within_module_init((unsigned long)p->addr, mod) ||
2690 (checkcore &&
2691 within_module_core((unsigned long)p->addr, mod))) {
2692 /*
2693 * The vaddr this probe is installed will soon
2694 * be vfreed buy not synced to disk. Hence,
2695 * disarming the breakpoint isn't needed.
2696 *
2697 * Note, this will also move any optimized probes
2698 * that are pending to be removed from their
2699 * corresponding lists to the 'freeing_list' and
2700 * will not be touched by the delayed
2701 * kprobe_optimizer() work handler.
2702 */
2703 kill_kprobe(p);
2704 }
2705 }
2706 if (val == MODULE_STATE_GOING)
2707 remove_module_kprobe_blacklist(mod);
2708 return NOTIFY_DONE;
2709 }
2710
2711 static struct notifier_block kprobe_module_nb = {
2712 .notifier_call = kprobes_module_callback,
2713 .priority = 0
2714 };
2715
kprobe_register_module_notifier(void)2716 static int kprobe_register_module_notifier(void)
2717 {
2718 return register_module_notifier(&kprobe_module_nb);
2719 }
2720 #else
kprobe_register_module_notifier(void)2721 static int kprobe_register_module_notifier(void)
2722 {
2723 return 0;
2724 }
2725 #endif /* CONFIG_MODULES */
2726
kprobe_free_init_mem(void)2727 void kprobe_free_init_mem(void)
2728 {
2729 void *start = (void *)(&__init_begin);
2730 void *end = (void *)(&__init_end);
2731 struct hlist_head *head;
2732 struct kprobe *p;
2733 int i;
2734
2735 guard(mutex)(&kprobe_mutex);
2736
2737 /* Kill all kprobes on initmem because the target code has been freed. */
2738 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2739 head = &kprobe_table[i];
2740 hlist_for_each_entry(p, head, hlist) {
2741 if (start <= (void *)p->addr && (void *)p->addr < end)
2742 kill_kprobe(p);
2743 }
2744 }
2745 }
2746
init_kprobes(void)2747 static int __init init_kprobes(void)
2748 {
2749 int i, err;
2750
2751 /* FIXME allocate the probe table, currently defined statically */
2752 /* initialize all list heads */
2753 for (i = 0; i < KPROBE_TABLE_SIZE; i++)
2754 INIT_HLIST_HEAD(&kprobe_table[i]);
2755
2756 err = populate_kprobe_blacklist(__start_kprobe_blacklist,
2757 __stop_kprobe_blacklist);
2758 if (err)
2759 pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err);
2760
2761 if (kretprobe_blacklist_size) {
2762 /* lookup the function address from its name */
2763 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
2764 kretprobe_blacklist[i].addr =
2765 kprobe_lookup_name(kretprobe_blacklist[i].name, 0);
2766 if (!kretprobe_blacklist[i].addr)
2767 pr_err("Failed to lookup symbol '%s' for kretprobe blacklist. Maybe the target function is removed or renamed.\n",
2768 kretprobe_blacklist[i].name);
2769 }
2770 }
2771
2772 /* By default, kprobes are armed */
2773 kprobes_all_disarmed = false;
2774
2775 /* Initialize the optimization infrastructure */
2776 init_optprobe();
2777
2778 err = arch_init_kprobes();
2779 if (!err)
2780 err = register_die_notifier(&kprobe_exceptions_nb);
2781 if (!err)
2782 err = kprobe_register_module_notifier();
2783
2784 kprobes_initialized = (err == 0);
2785 kprobe_sysctls_init();
2786 return err;
2787 }
2788 early_initcall(init_kprobes);
2789
2790 #if defined(CONFIG_OPTPROBES)
init_optprobes(void)2791 static int __init init_optprobes(void)
2792 {
2793 /*
2794 * Enable kprobe optimization - this kicks the optimizer which
2795 * depends on synchronize_rcu_tasks() and ksoftirqd, that is
2796 * not spawned in early initcall. So delay the optimization.
2797 */
2798 optimize_all_kprobes();
2799
2800 return 0;
2801 }
2802 subsys_initcall(init_optprobes);
2803 #endif
2804
2805 #ifdef CONFIG_DEBUG_FS
report_probe(struct seq_file * pi,struct kprobe * p,const char * sym,int offset,char * modname,struct kprobe * pp)2806 static void report_probe(struct seq_file *pi, struct kprobe *p,
2807 const char *sym, int offset, char *modname, struct kprobe *pp)
2808 {
2809 char *kprobe_type;
2810 void *addr = p->addr;
2811
2812 if (p->pre_handler == pre_handler_kretprobe)
2813 kprobe_type = "r";
2814 else
2815 kprobe_type = "k";
2816
2817 if (!kallsyms_show_value(pi->file->f_cred))
2818 addr = NULL;
2819
2820 if (sym)
2821 seq_printf(pi, "%px %s %s+0x%x %s ",
2822 addr, kprobe_type, sym, offset,
2823 (modname ? modname : " "));
2824 else /* try to use %pS */
2825 seq_printf(pi, "%px %s %pS ",
2826 addr, kprobe_type, p->addr);
2827
2828 if (!pp)
2829 pp = p;
2830 seq_printf(pi, "%s%s%s%s\n",
2831 (kprobe_gone(p) ? "[GONE]" : ""),
2832 ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""),
2833 (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""),
2834 (kprobe_ftrace(pp) ? "[FTRACE]" : ""));
2835 }
2836
kprobe_seq_start(struct seq_file * f,loff_t * pos)2837 static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
2838 {
2839 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
2840 }
2841
kprobe_seq_next(struct seq_file * f,void * v,loff_t * pos)2842 static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
2843 {
2844 (*pos)++;
2845 if (*pos >= KPROBE_TABLE_SIZE)
2846 return NULL;
2847 return pos;
2848 }
2849
kprobe_seq_stop(struct seq_file * f,void * v)2850 static void kprobe_seq_stop(struct seq_file *f, void *v)
2851 {
2852 /* Nothing to do */
2853 }
2854
show_kprobe_addr(struct seq_file * pi,void * v)2855 static int show_kprobe_addr(struct seq_file *pi, void *v)
2856 {
2857 struct hlist_head *head;
2858 struct kprobe *p, *kp;
2859 const char *sym;
2860 unsigned int i = *(loff_t *) v;
2861 unsigned long offset = 0;
2862 char *modname, namebuf[KSYM_NAME_LEN];
2863
2864 head = &kprobe_table[i];
2865 preempt_disable();
2866 hlist_for_each_entry_rcu(p, head, hlist) {
2867 sym = kallsyms_lookup((unsigned long)p->addr, NULL,
2868 &offset, &modname, namebuf);
2869 if (kprobe_aggrprobe(p)) {
2870 list_for_each_entry_rcu(kp, &p->list, list)
2871 report_probe(pi, kp, sym, offset, modname, p);
2872 } else
2873 report_probe(pi, p, sym, offset, modname, NULL);
2874 }
2875 preempt_enable();
2876 return 0;
2877 }
2878
2879 static const struct seq_operations kprobes_sops = {
2880 .start = kprobe_seq_start,
2881 .next = kprobe_seq_next,
2882 .stop = kprobe_seq_stop,
2883 .show = show_kprobe_addr
2884 };
2885
2886 DEFINE_SEQ_ATTRIBUTE(kprobes);
2887
2888 /* kprobes/blacklist -- shows which functions can not be probed */
kprobe_blacklist_seq_start(struct seq_file * m,loff_t * pos)2889 static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
2890 {
2891 mutex_lock(&kprobe_mutex);
2892 return seq_list_start(&kprobe_blacklist, *pos);
2893 }
2894
kprobe_blacklist_seq_next(struct seq_file * m,void * v,loff_t * pos)2895 static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
2896 {
2897 return seq_list_next(v, &kprobe_blacklist, pos);
2898 }
2899
kprobe_blacklist_seq_show(struct seq_file * m,void * v)2900 static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
2901 {
2902 struct kprobe_blacklist_entry *ent =
2903 list_entry(v, struct kprobe_blacklist_entry, list);
2904
2905 /*
2906 * If '/proc/kallsyms' is not showing kernel address, we won't
2907 * show them here either.
2908 */
2909 if (!kallsyms_show_value(m->file->f_cred))
2910 seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL,
2911 (void *)ent->start_addr);
2912 else
2913 seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr,
2914 (void *)ent->end_addr, (void *)ent->start_addr);
2915 return 0;
2916 }
2917
kprobe_blacklist_seq_stop(struct seq_file * f,void * v)2918 static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v)
2919 {
2920 mutex_unlock(&kprobe_mutex);
2921 }
2922
2923 static const struct seq_operations kprobe_blacklist_sops = {
2924 .start = kprobe_blacklist_seq_start,
2925 .next = kprobe_blacklist_seq_next,
2926 .stop = kprobe_blacklist_seq_stop,
2927 .show = kprobe_blacklist_seq_show,
2928 };
2929 DEFINE_SEQ_ATTRIBUTE(kprobe_blacklist);
2930
arm_all_kprobes(void)2931 static int arm_all_kprobes(void)
2932 {
2933 struct hlist_head *head;
2934 struct kprobe *p;
2935 unsigned int i, total = 0, errors = 0;
2936 int err, ret = 0;
2937
2938 guard(mutex)(&kprobe_mutex);
2939
2940 /* If kprobes are armed, just return */
2941 if (!kprobes_all_disarmed)
2942 return 0;
2943
2944 /*
2945 * optimize_kprobe() called by arm_kprobe() checks
2946 * kprobes_all_disarmed, so set kprobes_all_disarmed before
2947 * arm_kprobe.
2948 */
2949 kprobes_all_disarmed = false;
2950 /* Arming kprobes doesn't optimize kprobe itself */
2951 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2952 head = &kprobe_table[i];
2953 /* Arm all kprobes on a best-effort basis */
2954 hlist_for_each_entry(p, head, hlist) {
2955 if (!kprobe_disabled(p)) {
2956 err = arm_kprobe(p);
2957 if (err) {
2958 errors++;
2959 ret = err;
2960 }
2961 total++;
2962 }
2963 }
2964 }
2965
2966 if (errors)
2967 pr_warn("Kprobes globally enabled, but failed to enable %d out of %d probes. Please check which kprobes are kept disabled via debugfs.\n",
2968 errors, total);
2969 else
2970 pr_info("Kprobes globally enabled\n");
2971
2972 return ret;
2973 }
2974
disarm_all_kprobes(void)2975 static int disarm_all_kprobes(void)
2976 {
2977 struct hlist_head *head;
2978 struct kprobe *p;
2979 unsigned int i, total = 0, errors = 0;
2980 int err, ret = 0;
2981
2982 guard(mutex)(&kprobe_mutex);
2983
2984 /* If kprobes are already disarmed, just return */
2985 if (kprobes_all_disarmed)
2986 return 0;
2987
2988 kprobes_all_disarmed = true;
2989
2990 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2991 head = &kprobe_table[i];
2992 /* Disarm all kprobes on a best-effort basis */
2993 hlist_for_each_entry(p, head, hlist) {
2994 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
2995 err = disarm_kprobe(p, false);
2996 if (err) {
2997 errors++;
2998 ret = err;
2999 }
3000 total++;
3001 }
3002 }
3003 }
3004
3005 if (errors)
3006 pr_warn("Kprobes globally disabled, but failed to disable %d out of %d probes. Please check which kprobes are kept enabled via debugfs.\n",
3007 errors, total);
3008 else
3009 pr_info("Kprobes globally disabled\n");
3010
3011 /* Wait for disarming all kprobes by optimizer */
3012 wait_for_kprobe_optimizer_locked();
3013 return ret;
3014 }
3015
3016 /*
3017 * XXX: The debugfs bool file interface doesn't allow for callbacks
3018 * when the bool state is switched. We can reuse that facility when
3019 * available
3020 */
read_enabled_file_bool(struct file * file,char __user * user_buf,size_t count,loff_t * ppos)3021 static ssize_t read_enabled_file_bool(struct file *file,
3022 char __user *user_buf, size_t count, loff_t *ppos)
3023 {
3024 char buf[3];
3025
3026 if (!kprobes_all_disarmed)
3027 buf[0] = '1';
3028 else
3029 buf[0] = '0';
3030 buf[1] = '\n';
3031 buf[2] = 0x00;
3032 return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
3033 }
3034
write_enabled_file_bool(struct file * file,const char __user * user_buf,size_t count,loff_t * ppos)3035 static ssize_t write_enabled_file_bool(struct file *file,
3036 const char __user *user_buf, size_t count, loff_t *ppos)
3037 {
3038 bool enable;
3039 int ret;
3040
3041 ret = kstrtobool_from_user(user_buf, count, &enable);
3042 if (ret)
3043 return ret;
3044
3045 ret = enable ? arm_all_kprobes() : disarm_all_kprobes();
3046 if (ret)
3047 return ret;
3048
3049 return count;
3050 }
3051
3052 static const struct file_operations fops_kp = {
3053 .read = read_enabled_file_bool,
3054 .write = write_enabled_file_bool,
3055 .llseek = default_llseek,
3056 };
3057
debugfs_kprobe_init(void)3058 static int __init debugfs_kprobe_init(void)
3059 {
3060 struct dentry *dir;
3061
3062 dir = debugfs_create_dir("kprobes", NULL);
3063
3064 debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops);
3065
3066 debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp);
3067
3068 debugfs_create_file("blacklist", 0400, dir, NULL,
3069 &kprobe_blacklist_fops);
3070
3071 return 0;
3072 }
3073
3074 late_initcall(debugfs_kprobe_init);
3075 #endif /* CONFIG_DEBUG_FS */
3076