xref: /linux/kernel/kprobes.c (revision 5ef268cb7a0aac55521fd9881f1939fa94a8988e)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Kernel Probes (KProbes)
4  *
5  * Copyright (C) IBM Corporation, 2002, 2004
6  *
7  * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
8  *		Probes initial implementation (includes suggestions from
9  *		Rusty Russell).
10  * 2004-Aug	Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
11  *		hlists and exceptions notifier as suggested by Andi Kleen.
12  * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
13  *		interface to access function arguments.
14  * 2004-Sep	Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
15  *		exceptions notifier to be first on the priority list.
16  * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
17  *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
18  *		<prasanna@in.ibm.com> added function-return probes.
19  */
20 
21 #define pr_fmt(fmt) "kprobes: " fmt
22 
23 #include <linux/kprobes.h>
24 #include <linux/hash.h>
25 #include <linux/init.h>
26 #include <linux/slab.h>
27 #include <linux/stddef.h>
28 #include <linux/export.h>
29 #include <linux/kallsyms.h>
30 #include <linux/freezer.h>
31 #include <linux/seq_file.h>
32 #include <linux/debugfs.h>
33 #include <linux/sysctl.h>
34 #include <linux/kdebug.h>
35 #include <linux/kthread.h>
36 #include <linux/memory.h>
37 #include <linux/ftrace.h>
38 #include <linux/cpu.h>
39 #include <linux/jump_label.h>
40 #include <linux/static_call.h>
41 #include <linux/perf_event.h>
42 #include <linux/execmem.h>
43 #include <linux/cleanup.h>
44 #include <linux/wait.h>
45 
46 #include <asm/sections.h>
47 #include <asm/cacheflush.h>
48 #include <asm/errno.h>
49 #include <linux/uaccess.h>
50 
51 #define KPROBE_HASH_BITS 6
52 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
53 
54 #if !defined(CONFIG_OPTPROBES) || !defined(CONFIG_SYSCTL)
55 #define kprobe_sysctls_init() do { } while (0)
56 #endif
57 
58 static int kprobes_initialized;
59 /* kprobe_table can be accessed by
60  * - Normal hlist traversal and RCU add/del under 'kprobe_mutex' is held.
61  * Or
62  * - RCU hlist traversal under disabling preempt (breakpoint handlers)
63  */
64 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
65 
66 /* NOTE: change this value only with 'kprobe_mutex' held */
67 static bool kprobes_all_disarmed;
68 
69 /* This protects 'kprobe_table' and 'optimizing_list' */
70 static DEFINE_MUTEX(kprobe_mutex);
71 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance);
72 
73 kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
74 					unsigned int __unused)
75 {
76 	return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
77 }
78 
79 /*
80  * Blacklist -- list of 'struct kprobe_blacklist_entry' to store info where
81  * kprobes can not probe.
82  */
83 static LIST_HEAD(kprobe_blacklist);
84 
85 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
86 /*
87  * 'kprobe::ainsn.insn' points to the copy of the instruction to be
88  * single-stepped. x86_64, POWER4 and above have no-exec support and
89  * stepping on the instruction on a vmalloced/kmalloced/data page
90  * is a recipe for disaster
91  */
92 struct kprobe_insn_page {
93 	struct list_head list;
94 	kprobe_opcode_t *insns;		/* Page of instruction slots */
95 	struct kprobe_insn_cache *cache;
96 	int nused;
97 	int ngarbage;
98 	char slot_used[];
99 };
100 
101 static int slots_per_page(struct kprobe_insn_cache *c)
102 {
103 	return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
104 }
105 
106 enum kprobe_slot_state {
107 	SLOT_CLEAN = 0,
108 	SLOT_DIRTY = 1,
109 	SLOT_USED = 2,
110 };
111 
112 void __weak *alloc_insn_page(void)
113 {
114 	/*
115 	 * Use execmem_alloc() so this page is within +/- 2GB of where the
116 	 * kernel image and loaded module images reside. This is required
117 	 * for most of the architectures.
118 	 * (e.g. x86-64 needs this to handle the %rip-relative fixups.)
119 	 */
120 	return execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
121 }
122 
123 static void free_insn_page(void *page)
124 {
125 	execmem_free(page);
126 }
127 
128 struct kprobe_insn_cache kprobe_insn_slots = {
129 	.mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
130 	.alloc = alloc_insn_page,
131 	.free = free_insn_page,
132 	.sym = KPROBE_INSN_PAGE_SYM,
133 	.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
134 	.insn_size = MAX_INSN_SIZE,
135 	.nr_garbage = 0,
136 };
137 static int collect_garbage_slots(struct kprobe_insn_cache *c);
138 
139 /**
140  * __get_insn_slot - Find a slot on an executable page for an instruction.
141  * @c: Pointer to kprobe instruction cache
142  *
143  * Description: Locates available slot on existing executable pages,
144  *              allocates an executable page if there's no room on existing ones.
145  * Return: Pointer to instruction slot on success, NULL on failure.
146  */
147 kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
148 {
149 	struct kprobe_insn_page *kip;
150 
151 	/* Since the slot array is not protected by rcu, we need a mutex */
152 	guard(mutex)(&c->mutex);
153 	do {
154 		guard(rcu)();
155 		list_for_each_entry_rcu(kip, &c->pages, list) {
156 			if (kip->nused < slots_per_page(c)) {
157 				int i;
158 
159 				for (i = 0; i < slots_per_page(c); i++) {
160 					if (kip->slot_used[i] == SLOT_CLEAN) {
161 						kip->slot_used[i] = SLOT_USED;
162 						kip->nused++;
163 						return kip->insns + (i * c->insn_size);
164 					}
165 				}
166 				/* kip->nused is broken. Fix it. */
167 				kip->nused = slots_per_page(c);
168 				WARN_ON(1);
169 			}
170 		}
171 	/* If there are any garbage slots, collect it and try again. */
172 	} while (c->nr_garbage && collect_garbage_slots(c) == 0);
173 
174 	/* All out of space.  Need to allocate a new page. */
175 	kip = kmalloc_flex(*kip, slot_used, slots_per_page(c));
176 	if (!kip)
177 		return NULL;
178 
179 	kip->insns = c->alloc();
180 	if (!kip->insns) {
181 		kfree(kip);
182 		return NULL;
183 	}
184 	INIT_LIST_HEAD(&kip->list);
185 	memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
186 	kip->slot_used[0] = SLOT_USED;
187 	kip->nused = 1;
188 	kip->ngarbage = 0;
189 	kip->cache = c;
190 	list_add_rcu(&kip->list, &c->pages);
191 
192 	/* Record the perf ksymbol register event after adding the page */
193 	perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL, (unsigned long)kip->insns,
194 			   PAGE_SIZE, false, c->sym);
195 
196 	return kip->insns;
197 }
198 
199 /* Return true if all garbages are collected, otherwise false. */
200 static bool collect_one_slot(struct kprobe_insn_page *kip, int idx)
201 {
202 	kip->slot_used[idx] = SLOT_CLEAN;
203 	kip->nused--;
204 	if (kip->nused != 0)
205 		return false;
206 
207 	/*
208 	 * Page is no longer in use.  Free it unless
209 	 * it's the last one.  We keep the last one
210 	 * so as not to have to set it up again the
211 	 * next time somebody inserts a probe.
212 	 */
213 	if (!list_is_singular(&kip->list)) {
214 		/*
215 		 * Record perf ksymbol unregister event before removing
216 		 * the page.
217 		 */
218 		perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_OOL,
219 				   (unsigned long)kip->insns, PAGE_SIZE, true,
220 				   kip->cache->sym);
221 		list_del_rcu(&kip->list);
222 		synchronize_rcu();
223 		kip->cache->free(kip->insns);
224 		kfree(kip);
225 	}
226 	return true;
227 }
228 
229 static int collect_garbage_slots(struct kprobe_insn_cache *c)
230 {
231 	struct kprobe_insn_page *kip, *next;
232 
233 	/* Ensure no-one is interrupted on the garbages */
234 	synchronize_rcu();
235 
236 	list_for_each_entry_safe(kip, next, &c->pages, list) {
237 		int i;
238 
239 		if (kip->ngarbage == 0)
240 			continue;
241 		kip->ngarbage = 0;	/* we will collect all garbages */
242 		for (i = 0; i < slots_per_page(c); i++) {
243 			if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i))
244 				break;
245 		}
246 	}
247 	c->nr_garbage = 0;
248 	return 0;
249 }
250 
251 static long __find_insn_page(struct kprobe_insn_cache *c,
252 	kprobe_opcode_t *slot, struct kprobe_insn_page **pkip)
253 {
254 	struct kprobe_insn_page *kip = NULL;
255 	long idx;
256 
257 	guard(rcu)();
258 	list_for_each_entry_rcu(kip, &c->pages, list) {
259 		idx = ((long)slot - (long)kip->insns) /
260 			(c->insn_size * sizeof(kprobe_opcode_t));
261 		if (idx >= 0 && idx < slots_per_page(c)) {
262 			*pkip = kip;
263 			return idx;
264 		}
265 	}
266 	/* Could not find this slot. */
267 	WARN_ON(1);
268 	*pkip = NULL;
269 	return -1;
270 }
271 
272 void __free_insn_slot(struct kprobe_insn_cache *c,
273 		      kprobe_opcode_t *slot, int dirty)
274 {
275 	struct kprobe_insn_page *kip = NULL;
276 	long idx;
277 
278 	guard(mutex)(&c->mutex);
279 	idx = __find_insn_page(c, slot, &kip);
280 	/* Mark and sweep: this may sleep */
281 	if (kip) {
282 		/* Check double free */
283 		WARN_ON(kip->slot_used[idx] != SLOT_USED);
284 		if (dirty) {
285 			kip->slot_used[idx] = SLOT_DIRTY;
286 			kip->ngarbage++;
287 			if (++c->nr_garbage > slots_per_page(c))
288 				collect_garbage_slots(c);
289 		} else {
290 			collect_one_slot(kip, idx);
291 		}
292 	}
293 }
294 
295 /*
296  * Check given address is on the page of kprobe instruction slots.
297  * This will be used for checking whether the address on a stack
298  * is on a text area or not.
299  */
300 bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
301 {
302 	struct kprobe_insn_page *kip;
303 	bool ret = false;
304 
305 	rcu_read_lock();
306 	list_for_each_entry_rcu(kip, &c->pages, list) {
307 		if (addr >= (unsigned long)kip->insns &&
308 		    addr < (unsigned long)kip->insns + PAGE_SIZE) {
309 			ret = true;
310 			break;
311 		}
312 	}
313 	rcu_read_unlock();
314 
315 	return ret;
316 }
317 
318 int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
319 			     unsigned long *value, char *type, char *sym)
320 {
321 	struct kprobe_insn_page *kip;
322 	int ret = -ERANGE;
323 
324 	rcu_read_lock();
325 	list_for_each_entry_rcu(kip, &c->pages, list) {
326 		if ((*symnum)--)
327 			continue;
328 		strscpy(sym, c->sym, KSYM_NAME_LEN);
329 		*type = 't';
330 		*value = (unsigned long)kip->insns;
331 		ret = 0;
332 		break;
333 	}
334 	rcu_read_unlock();
335 
336 	return ret;
337 }
338 
339 #ifdef CONFIG_OPTPROBES
340 void __weak *alloc_optinsn_page(void)
341 {
342 	return alloc_insn_page();
343 }
344 
345 void __weak free_optinsn_page(void *page)
346 {
347 	free_insn_page(page);
348 }
349 
350 /* For optimized_kprobe buffer */
351 struct kprobe_insn_cache kprobe_optinsn_slots = {
352 	.mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
353 	.alloc = alloc_optinsn_page,
354 	.free = free_optinsn_page,
355 	.sym = KPROBE_OPTINSN_PAGE_SYM,
356 	.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
357 	/* .insn_size is initialized later */
358 	.nr_garbage = 0,
359 };
360 #endif /* CONFIG_OPTPROBES */
361 #endif /* __ARCH_WANT_KPROBES_INSN_SLOT */
362 
363 /* We have preemption disabled.. so it is safe to use __ versions */
364 static inline void set_kprobe_instance(struct kprobe *kp)
365 {
366 	__this_cpu_write(kprobe_instance, kp);
367 }
368 
369 static inline void reset_kprobe_instance(void)
370 {
371 	__this_cpu_write(kprobe_instance, NULL);
372 }
373 
374 /*
375  * This routine is called either:
376  *	- under the 'kprobe_mutex' - during kprobe_[un]register().
377  *				OR
378  *	- with preemption disabled - from architecture specific code.
379  */
380 struct kprobe *get_kprobe(void *addr)
381 {
382 	struct hlist_head *head;
383 	struct kprobe *p;
384 
385 	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
386 	hlist_for_each_entry_rcu(p, head, hlist,
387 				 lockdep_is_held(&kprobe_mutex)) {
388 		if (p->addr == addr)
389 			return p;
390 	}
391 
392 	return NULL;
393 }
394 NOKPROBE_SYMBOL(get_kprobe);
395 
396 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
397 
398 /* Return true if 'p' is an aggregator */
399 static inline bool kprobe_aggrprobe(struct kprobe *p)
400 {
401 	return p->pre_handler == aggr_pre_handler;
402 }
403 
404 /* Return true if 'p' is unused */
405 static inline bool kprobe_unused(struct kprobe *p)
406 {
407 	return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
408 	       list_empty(&p->list);
409 }
410 
411 /* Keep all fields in the kprobe consistent. */
412 static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
413 {
414 	memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
415 	memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
416 }
417 
418 #ifdef CONFIG_OPTPROBES
419 /* NOTE: This is protected by 'kprobe_mutex'. */
420 static bool kprobes_allow_optimization;
421 
422 /*
423  * Call all 'kprobe::pre_handler' on the list, but ignores its return value.
424  * This must be called from arch-dep optimized caller.
425  */
426 void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
427 {
428 	struct kprobe *kp;
429 
430 	list_for_each_entry_rcu(kp, &p->list, list) {
431 		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
432 			set_kprobe_instance(kp);
433 			kp->pre_handler(kp, regs);
434 		}
435 		reset_kprobe_instance();
436 	}
437 }
438 NOKPROBE_SYMBOL(opt_pre_handler);
439 
440 /* Free optimized instructions and optimized_kprobe */
441 static void free_aggr_kprobe(struct kprobe *p)
442 {
443 	struct optimized_kprobe *op;
444 
445 	op = container_of(p, struct optimized_kprobe, kp);
446 	arch_remove_optimized_kprobe(op);
447 	arch_remove_kprobe(p);
448 	kfree(op);
449 }
450 
451 /* Return true if the kprobe is ready for optimization. */
452 static inline int kprobe_optready(struct kprobe *p)
453 {
454 	struct optimized_kprobe *op;
455 
456 	if (kprobe_aggrprobe(p)) {
457 		op = container_of(p, struct optimized_kprobe, kp);
458 		return arch_prepared_optinsn(&op->optinsn);
459 	}
460 
461 	return 0;
462 }
463 
464 /* Return true if the kprobe is disarmed. Note: p must be on hash list */
465 bool kprobe_disarmed(struct kprobe *p)
466 {
467 	struct optimized_kprobe *op;
468 
469 	/* If kprobe is not aggr/opt probe, just return kprobe is disabled */
470 	if (!kprobe_aggrprobe(p))
471 		return kprobe_disabled(p);
472 
473 	op = container_of(p, struct optimized_kprobe, kp);
474 
475 	return kprobe_disabled(p) && list_empty(&op->list);
476 }
477 
478 /* Return true if the probe is queued on (un)optimizing lists */
479 static bool kprobe_queued(struct kprobe *p)
480 {
481 	struct optimized_kprobe *op;
482 
483 	if (kprobe_aggrprobe(p)) {
484 		op = container_of(p, struct optimized_kprobe, kp);
485 		if (!list_empty(&op->list))
486 			return true;
487 	}
488 	return false;
489 }
490 
491 /*
492  * Return an optimized kprobe whose optimizing code replaces
493  * instructions including 'addr' (exclude breakpoint).
494  */
495 static struct kprobe *get_optimized_kprobe(kprobe_opcode_t *addr)
496 {
497 	int i;
498 	struct kprobe *p = NULL;
499 	struct optimized_kprobe *op;
500 
501 	/* Don't check i == 0, since that is a breakpoint case. */
502 	for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH / sizeof(kprobe_opcode_t); i++)
503 		p = get_kprobe(addr - i);
504 
505 	if (p && kprobe_optready(p)) {
506 		op = container_of(p, struct optimized_kprobe, kp);
507 		if (arch_within_optimized_kprobe(op, addr))
508 			return p;
509 	}
510 
511 	return NULL;
512 }
513 
514 /* Optimization staging list, protected by 'kprobe_mutex' */
515 static LIST_HEAD(optimizing_list);
516 static LIST_HEAD(unoptimizing_list);
517 static LIST_HEAD(freeing_list);
518 
519 static void optimize_kprobe(struct kprobe *p);
520 static struct task_struct *kprobe_optimizer_task;
521 static wait_queue_head_t kprobe_optimizer_wait;
522 static atomic_t optimizer_state;
523 enum {
524 	OPTIMIZER_ST_IDLE = 0,
525 	OPTIMIZER_ST_KICKED = 1,
526 	OPTIMIZER_ST_FLUSHING = 2,
527 };
528 
529 static DECLARE_COMPLETION(optimizer_completion);
530 
531 #define OPTIMIZE_DELAY 5
532 
533 /*
534  * Optimize (replace a breakpoint with a jump) kprobes listed on
535  * 'optimizing_list'.
536  */
537 static void do_optimize_kprobes(void)
538 {
539 	lockdep_assert_held(&text_mutex);
540 	/*
541 	 * The optimization/unoptimization refers 'online_cpus' via
542 	 * stop_machine() and cpu-hotplug modifies the 'online_cpus'.
543 	 * And same time, 'text_mutex' will be held in cpu-hotplug and here.
544 	 * This combination can cause a deadlock (cpu-hotplug tries to lock
545 	 * 'text_mutex' but stop_machine() can not be done because
546 	 * the 'online_cpus' has been changed)
547 	 * To avoid this deadlock, caller must have locked cpu-hotplug
548 	 * for preventing cpu-hotplug outside of 'text_mutex' locking.
549 	 */
550 	lockdep_assert_cpus_held();
551 
552 	/* Optimization never be done when disarmed */
553 	if (kprobes_all_disarmed || !kprobes_allow_optimization ||
554 	    list_empty(&optimizing_list))
555 		return;
556 
557 	arch_optimize_kprobes(&optimizing_list);
558 }
559 
560 /*
561  * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
562  * if need) kprobes listed on 'unoptimizing_list'.
563  */
564 static void do_unoptimize_kprobes(void)
565 {
566 	struct optimized_kprobe *op, *tmp;
567 
568 	lockdep_assert_held(&text_mutex);
569 	/* See comment in do_optimize_kprobes() */
570 	lockdep_assert_cpus_held();
571 
572 	if (!list_empty(&unoptimizing_list))
573 		arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
574 
575 	/* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
576 	list_for_each_entry_safe(op, tmp, &freeing_list, list) {
577 		/* Switching from detour code to origin */
578 		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
579 		/* Disarm probes if marked disabled and not gone */
580 		if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
581 			arch_disarm_kprobe(&op->kp);
582 		if (kprobe_unused(&op->kp)) {
583 			/*
584 			 * Remove unused probes from hash list. After waiting
585 			 * for synchronization, these probes are reclaimed.
586 			 * (reclaiming is done by do_free_cleaned_kprobes().)
587 			 */
588 			hlist_del_rcu(&op->kp.hlist);
589 		} else
590 			list_del_init(&op->list);
591 	}
592 }
593 
594 /* Reclaim all kprobes on the 'freeing_list' */
595 static void do_free_cleaned_kprobes(void)
596 {
597 	struct optimized_kprobe *op, *tmp;
598 
599 	list_for_each_entry_safe(op, tmp, &freeing_list, list) {
600 		list_del_init(&op->list);
601 		if (WARN_ON_ONCE(!kprobe_unused(&op->kp))) {
602 			/*
603 			 * This must not happen, but if there is a kprobe
604 			 * still in use, keep it on kprobes hash list.
605 			 */
606 			continue;
607 		}
608 
609 		/*
610 		 * The aggregator was holding back another probe while it sat on the
611 		 * unoptimizing/freeing lists.  Now that the aggregator has been fully
612 		 * reverted we can safely retry the optimization of that sibling.
613 		 */
614 
615 		struct kprobe *_p = get_optimized_kprobe(op->kp.addr);
616 		if (unlikely(_p))
617 			optimize_kprobe(_p);
618 
619 		free_aggr_kprobe(&op->kp);
620 	}
621 }
622 
623 static void kick_kprobe_optimizer(void);
624 
625 /* Kprobe jump optimizer */
626 static void kprobe_optimizer(void)
627 {
628 	guard(mutex)(&kprobe_mutex);
629 
630 	scoped_guard(cpus_read_lock) {
631 		guard(mutex)(&text_mutex);
632 
633 		/*
634 		 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
635 		 * kprobes before waiting for quiesence period.
636 		 */
637 		do_unoptimize_kprobes();
638 
639 		/*
640 		 * Step 2: Wait for quiesence period to ensure all potentially
641 		 * preempted tasks to have normally scheduled. Because optprobe
642 		 * may modify multiple instructions, there is a chance that Nth
643 		 * instruction is preempted. In that case, such tasks can return
644 		 * to 2nd-Nth byte of jump instruction. This wait is for avoiding it.
645 		 * Note that on non-preemptive kernel, this is transparently converted
646 		 * to synchronoze_sched() to wait for all interrupts to have completed.
647 		 */
648 		synchronize_rcu_tasks();
649 
650 		/* Step 3: Optimize kprobes after quiesence period */
651 		do_optimize_kprobes();
652 
653 		/* Step 4: Free cleaned kprobes after quiesence period */
654 		do_free_cleaned_kprobes();
655 	}
656 
657 	/* Step 5: Kick optimizer again if needed. But if there is a flush requested, */
658 	if (completion_done(&optimizer_completion))
659 		complete(&optimizer_completion);
660 
661 	if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
662 		kick_kprobe_optimizer();	/*normal kick*/
663 }
664 
665 static int kprobe_optimizer_thread(void *data)
666 {
667 	while (!kthread_should_stop()) {
668 		/* To avoid hung_task, wait in interruptible state. */
669 		wait_event_interruptible(kprobe_optimizer_wait,
670 			   atomic_read(&optimizer_state) != OPTIMIZER_ST_IDLE ||
671 			   kthread_should_stop());
672 
673 		if (kthread_should_stop())
674 			break;
675 
676 		/*
677 		 * If it was a normal kick, wait for OPTIMIZE_DELAY.
678 		 * This wait can be interrupted by a flush request.
679 		 */
680 		if (atomic_read(&optimizer_state) == 1)
681 			wait_event_interruptible_timeout(
682 				kprobe_optimizer_wait,
683 				atomic_read(&optimizer_state) == OPTIMIZER_ST_FLUSHING ||
684 				kthread_should_stop(),
685 				OPTIMIZE_DELAY);
686 
687 		if (kthread_should_stop())
688 			break;
689 
690 		atomic_set(&optimizer_state, OPTIMIZER_ST_IDLE);
691 
692 		kprobe_optimizer();
693 	}
694 	return 0;
695 }
696 
697 /* Start optimizer after OPTIMIZE_DELAY passed */
698 static void kick_kprobe_optimizer(void)
699 {
700 	lockdep_assert_held(&kprobe_mutex);
701 	if (atomic_cmpxchg(&optimizer_state,
702 		OPTIMIZER_ST_IDLE, OPTIMIZER_ST_KICKED) == OPTIMIZER_ST_IDLE)
703 		wake_up(&kprobe_optimizer_wait);
704 }
705 
706 static void wait_for_kprobe_optimizer_locked(void)
707 {
708 	lockdep_assert_held(&kprobe_mutex);
709 
710 	while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) {
711 		init_completion(&optimizer_completion);
712 		/*
713 		 * Set state to OPTIMIZER_ST_FLUSHING and wake up the thread if it's
714 		 * idle. If it's already kicked, it will see the state change.
715 		 */
716 		if (atomic_xchg_acquire(&optimizer_state,
717 			OPTIMIZER_ST_FLUSHING) != OPTIMIZER_ST_FLUSHING)
718 			wake_up(&kprobe_optimizer_wait);
719 
720 		mutex_unlock(&kprobe_mutex);
721 		wait_for_completion(&optimizer_completion);
722 		mutex_lock(&kprobe_mutex);
723 	}
724 }
725 
726 /* Wait for completing optimization and unoptimization */
727 void wait_for_kprobe_optimizer(void)
728 {
729 	guard(mutex)(&kprobe_mutex);
730 
731 	wait_for_kprobe_optimizer_locked();
732 }
733 
734 bool optprobe_queued_unopt(struct optimized_kprobe *op)
735 {
736 	struct optimized_kprobe *_op;
737 
738 	list_for_each_entry(_op, &unoptimizing_list, list) {
739 		if (op == _op)
740 			return true;
741 	}
742 
743 	return false;
744 }
745 
746 /* Optimize kprobe if p is ready to be optimized */
747 static void optimize_kprobe(struct kprobe *p)
748 {
749 	struct optimized_kprobe *op;
750 
751 	/* Check if the kprobe is disabled or not ready for optimization. */
752 	if (!kprobe_optready(p) || !kprobes_allow_optimization ||
753 	    (kprobe_disabled(p) || kprobes_all_disarmed))
754 		return;
755 
756 	/* kprobes with 'post_handler' can not be optimized */
757 	if (p->post_handler)
758 		return;
759 
760 	op = container_of(p, struct optimized_kprobe, kp);
761 
762 	/* Check there is no other kprobes at the optimized instructions */
763 	if (arch_check_optimized_kprobe(op) < 0)
764 		return;
765 
766 	/* Check if it is already optimized. */
767 	if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) {
768 		if (optprobe_queued_unopt(op)) {
769 			/* This is under unoptimizing. Just dequeue the probe */
770 			list_del_init(&op->list);
771 		}
772 		return;
773 	}
774 	op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
775 
776 	/*
777 	 * On the 'unoptimizing_list' and 'optimizing_list',
778 	 * 'op' must have OPTIMIZED flag
779 	 */
780 	if (WARN_ON_ONCE(!list_empty(&op->list)))
781 		return;
782 
783 	list_add(&op->list, &optimizing_list);
784 	kick_kprobe_optimizer();
785 }
786 
787 /* Short cut to direct unoptimizing */
788 static void force_unoptimize_kprobe(struct optimized_kprobe *op)
789 {
790 	lockdep_assert_cpus_held();
791 	arch_unoptimize_kprobe(op);
792 	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
793 }
794 
795 /* Unoptimize a kprobe if p is optimized */
796 static void unoptimize_kprobe(struct kprobe *p, bool force)
797 {
798 	struct optimized_kprobe *op;
799 
800 	if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
801 		return; /* This is not an optprobe nor optimized */
802 
803 	op = container_of(p, struct optimized_kprobe, kp);
804 	if (!kprobe_optimized(p))
805 		return;
806 
807 	if (!list_empty(&op->list)) {
808 		if (optprobe_queued_unopt(op)) {
809 			/* Queued in unoptimizing queue */
810 			if (force) {
811 				/*
812 				 * Forcibly unoptimize the kprobe here, and queue it
813 				 * in the freeing list for release afterwards.
814 				 */
815 				force_unoptimize_kprobe(op);
816 				list_move(&op->list, &freeing_list);
817 			}
818 		} else {
819 			/* Dequeue from the optimizing queue */
820 			list_del_init(&op->list);
821 			op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
822 		}
823 		return;
824 	}
825 
826 	/* Optimized kprobe case */
827 	if (force) {
828 		/* Forcibly update the code: this is a special case */
829 		force_unoptimize_kprobe(op);
830 	} else {
831 		list_add(&op->list, &unoptimizing_list);
832 		kick_kprobe_optimizer();
833 	}
834 }
835 
836 /* Cancel unoptimizing for reusing */
837 static int reuse_unused_kprobe(struct kprobe *ap)
838 {
839 	struct optimized_kprobe *op;
840 
841 	/*
842 	 * Unused kprobe MUST be on the way of delayed unoptimizing (means
843 	 * there is still a relative jump) and disabled.
844 	 */
845 	op = container_of(ap, struct optimized_kprobe, kp);
846 	WARN_ON_ONCE(list_empty(&op->list));
847 	/* Enable the probe again */
848 	ap->flags &= ~KPROBE_FLAG_DISABLED;
849 	/* Optimize it again. (remove from 'op->list') */
850 	if (!kprobe_optready(ap))
851 		return -EINVAL;
852 
853 	optimize_kprobe(ap);
854 	return 0;
855 }
856 
857 /* Remove optimized instructions */
858 static void kill_optimized_kprobe(struct kprobe *p)
859 {
860 	struct optimized_kprobe *op;
861 
862 	op = container_of(p, struct optimized_kprobe, kp);
863 	if (!list_empty(&op->list))
864 		/* Dequeue from the (un)optimization queue */
865 		list_del_init(&op->list);
866 	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
867 
868 	if (kprobe_unused(p)) {
869 		/*
870 		 * Unused kprobe is on unoptimizing or freeing list. We move it
871 		 * to freeing_list and let the kprobe_optimizer() remove it from
872 		 * the kprobe hash list and free it.
873 		 */
874 		if (optprobe_queued_unopt(op))
875 			list_move(&op->list, &freeing_list);
876 	}
877 
878 	/* Don't touch the code, because it is already freed. */
879 	arch_remove_optimized_kprobe(op);
880 }
881 
882 static inline
883 void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
884 {
885 	if (!kprobe_ftrace(p))
886 		arch_prepare_optimized_kprobe(op, p);
887 }
888 
889 /* Try to prepare optimized instructions */
890 static void prepare_optimized_kprobe(struct kprobe *p)
891 {
892 	struct optimized_kprobe *op;
893 
894 	op = container_of(p, struct optimized_kprobe, kp);
895 	__prepare_optimized_kprobe(op, p);
896 }
897 
898 /* Allocate new optimized_kprobe and try to prepare optimized instructions. */
899 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
900 {
901 	struct optimized_kprobe *op;
902 
903 	op = kzalloc_obj(struct optimized_kprobe);
904 	if (!op)
905 		return NULL;
906 
907 	INIT_LIST_HEAD(&op->list);
908 	op->kp.addr = p->addr;
909 	__prepare_optimized_kprobe(op, p);
910 
911 	return &op->kp;
912 }
913 
914 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
915 
916 /*
917  * Prepare an optimized_kprobe and optimize it.
918  * NOTE: 'p' must be a normal registered kprobe.
919  */
920 static void try_to_optimize_kprobe(struct kprobe *p)
921 {
922 	struct kprobe *ap;
923 	struct optimized_kprobe *op;
924 
925 	/* Impossible to optimize ftrace-based kprobe. */
926 	if (kprobe_ftrace(p))
927 		return;
928 
929 	/* For preparing optimization, jump_label_text_reserved() is called. */
930 	guard(cpus_read_lock)();
931 	guard(jump_label_lock)();
932 	guard(mutex)(&text_mutex);
933 
934 	ap = alloc_aggr_kprobe(p);
935 	if (!ap)
936 		return;
937 
938 	op = container_of(ap, struct optimized_kprobe, kp);
939 	if (!arch_prepared_optinsn(&op->optinsn)) {
940 		/* If failed to setup optimizing, fallback to kprobe. */
941 		arch_remove_optimized_kprobe(op);
942 		kfree(op);
943 		return;
944 	}
945 
946 	init_aggr_kprobe(ap, p);
947 	optimize_kprobe(ap);	/* This just kicks optimizer thread. */
948 }
949 
950 static void optimize_all_kprobes(void)
951 {
952 	struct hlist_head *head;
953 	struct kprobe *p;
954 	unsigned int i;
955 
956 	guard(mutex)(&kprobe_mutex);
957 	/* If optimization is already allowed, just return. */
958 	if (kprobes_allow_optimization)
959 		return;
960 
961 	cpus_read_lock();
962 	kprobes_allow_optimization = true;
963 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
964 		head = &kprobe_table[i];
965 		hlist_for_each_entry(p, head, hlist)
966 			if (!kprobe_disabled(p))
967 				optimize_kprobe(p);
968 	}
969 	cpus_read_unlock();
970 	pr_info("kprobe jump-optimization is enabled. All kprobes are optimized if possible.\n");
971 }
972 
973 #ifdef CONFIG_SYSCTL
974 static void unoptimize_all_kprobes(void)
975 {
976 	struct hlist_head *head;
977 	struct kprobe *p;
978 	unsigned int i;
979 
980 	guard(mutex)(&kprobe_mutex);
981 	/* If optimization is already prohibited, just return. */
982 	if (!kprobes_allow_optimization)
983 		return;
984 
985 	cpus_read_lock();
986 	kprobes_allow_optimization = false;
987 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
988 		head = &kprobe_table[i];
989 		hlist_for_each_entry(p, head, hlist) {
990 			if (!kprobe_disabled(p))
991 				unoptimize_kprobe(p, false);
992 		}
993 	}
994 	cpus_read_unlock();
995 	/* Wait for unoptimizing completion. */
996 	wait_for_kprobe_optimizer_locked();
997 	pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n");
998 }
999 
1000 static DEFINE_MUTEX(kprobe_sysctl_mutex);
1001 static int sysctl_kprobes_optimization;
1002 static int proc_kprobes_optimization_handler(const struct ctl_table *table,
1003 					     int write, void *buffer,
1004 					     size_t *length, loff_t *ppos)
1005 {
1006 	int ret;
1007 
1008 	guard(mutex)(&kprobe_sysctl_mutex);
1009 	sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
1010 	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
1011 
1012 	if (sysctl_kprobes_optimization)
1013 		optimize_all_kprobes();
1014 	else
1015 		unoptimize_all_kprobes();
1016 
1017 	return ret;
1018 }
1019 
1020 static const struct ctl_table kprobe_sysctls[] = {
1021 	{
1022 		.procname	= "kprobes-optimization",
1023 		.data		= &sysctl_kprobes_optimization,
1024 		.maxlen		= sizeof(int),
1025 		.mode		= 0644,
1026 		.proc_handler	= proc_kprobes_optimization_handler,
1027 		.extra1		= SYSCTL_ZERO,
1028 		.extra2		= SYSCTL_ONE,
1029 	},
1030 };
1031 
1032 static void __init kprobe_sysctls_init(void)
1033 {
1034 	register_sysctl_init("debug", kprobe_sysctls);
1035 }
1036 #endif /* CONFIG_SYSCTL */
1037 
1038 /* Put a breakpoint for a probe. */
1039 static void __arm_kprobe(struct kprobe *p)
1040 {
1041 	struct kprobe *_p;
1042 
1043 	lockdep_assert_held(&text_mutex);
1044 
1045 	/* Find the overlapping optimized kprobes. */
1046 	_p = get_optimized_kprobe(p->addr);
1047 	if (unlikely(_p))
1048 		/* Fallback to unoptimized kprobe */
1049 		unoptimize_kprobe(_p, true);
1050 
1051 	arch_arm_kprobe(p);
1052 	optimize_kprobe(p);	/* Try to optimize (add kprobe to a list) */
1053 }
1054 
1055 /* Remove the breakpoint of a probe. */
1056 static void __disarm_kprobe(struct kprobe *p, bool reopt)
1057 {
1058 	struct kprobe *_p;
1059 
1060 	lockdep_assert_held(&text_mutex);
1061 
1062 	/* Try to unoptimize */
1063 	unoptimize_kprobe(p, kprobes_all_disarmed);
1064 
1065 	if (!kprobe_queued(p)) {
1066 		arch_disarm_kprobe(p);
1067 		/* If another kprobe was blocked, re-optimize it. */
1068 		_p = get_optimized_kprobe(p->addr);
1069 		if (unlikely(_p) && reopt)
1070 			optimize_kprobe(_p);
1071 	}
1072 }
1073 
1074 static void __init init_optprobe(void)
1075 {
1076 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
1077 	/* Init 'kprobe_optinsn_slots' for allocation */
1078 	kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
1079 #endif
1080 
1081 	init_waitqueue_head(&kprobe_optimizer_wait);
1082 	atomic_set(&optimizer_state, OPTIMIZER_ST_IDLE);
1083 	kprobe_optimizer_task = kthread_run(kprobe_optimizer_thread, NULL,
1084 					    "kprobe-optimizer");
1085 }
1086 #else /* !CONFIG_OPTPROBES */
1087 
1088 #define init_optprobe()				do {} while (0)
1089 #define optimize_kprobe(p)			do {} while (0)
1090 #define unoptimize_kprobe(p, f)			do {} while (0)
1091 #define kill_optimized_kprobe(p)		do {} while (0)
1092 #define prepare_optimized_kprobe(p)		do {} while (0)
1093 #define try_to_optimize_kprobe(p)		do {} while (0)
1094 #define __arm_kprobe(p)				arch_arm_kprobe(p)
1095 #define __disarm_kprobe(p, o)			arch_disarm_kprobe(p)
1096 #define kprobe_disarmed(p)			kprobe_disabled(p)
1097 #define wait_for_kprobe_optimizer_locked()			\
1098 	lockdep_assert_held(&kprobe_mutex)
1099 
1100 static int reuse_unused_kprobe(struct kprobe *ap)
1101 {
1102 	/*
1103 	 * If the optimized kprobe is NOT supported, the aggr kprobe is
1104 	 * released at the same time that the last aggregated kprobe is
1105 	 * unregistered.
1106 	 * Thus there should be no chance to reuse unused kprobe.
1107 	 */
1108 	WARN_ON_ONCE(1);
1109 	return -EINVAL;
1110 }
1111 
1112 static void free_aggr_kprobe(struct kprobe *p)
1113 {
1114 	arch_remove_kprobe(p);
1115 	kfree(p);
1116 }
1117 
1118 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
1119 {
1120 	return kzalloc_obj(struct kprobe);
1121 }
1122 #endif /* CONFIG_OPTPROBES */
1123 
1124 #ifdef CONFIG_KPROBES_ON_FTRACE
1125 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
1126 	.func = kprobe_ftrace_handler,
1127 	.flags = FTRACE_OPS_FL_SAVE_REGS,
1128 };
1129 
1130 static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
1131 	.func = kprobe_ftrace_handler,
1132 	.flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
1133 };
1134 
1135 static int kprobe_ipmodify_enabled;
1136 static int kprobe_ftrace_enabled;
1137 bool kprobe_ftrace_disabled;
1138 
1139 static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1140 			       int *cnt)
1141 {
1142 	int ret;
1143 
1144 	lockdep_assert_held(&kprobe_mutex);
1145 
1146 	ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0);
1147 	if (ret < 0)
1148 		return ret;
1149 
1150 	if (*cnt == 0) {
1151 		ret = register_ftrace_function(ops);
1152 		if (ret < 0) {
1153 			/*
1154 			 * At this point, sinec ops is not registered, we should be sefe from
1155 			 * registering empty filter.
1156 			 */
1157 			ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1158 			return ret;
1159 		}
1160 	}
1161 
1162 	(*cnt)++;
1163 	return ret;
1164 }
1165 
1166 static int arm_kprobe_ftrace(struct kprobe *p)
1167 {
1168 	bool ipmodify = (p->post_handler != NULL);
1169 
1170 	return __arm_kprobe_ftrace(p,
1171 		ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1172 		ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1173 }
1174 
1175 static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1176 				  int *cnt)
1177 {
1178 	int ret;
1179 
1180 	lockdep_assert_held(&kprobe_mutex);
1181 	if (unlikely(kprobe_ftrace_disabled)) {
1182 		/* Now ftrace is disabled forever, disarm is already done. */
1183 		return 0;
1184 	}
1185 
1186 	if (*cnt == 1) {
1187 		ret = unregister_ftrace_function(ops);
1188 		if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (error %d)\n", ret))
1189 			return ret;
1190 	}
1191 
1192 	(*cnt)--;
1193 
1194 	ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1195 	WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (error %d)\n",
1196 		  p->addr, ret);
1197 	return ret;
1198 }
1199 
1200 static int disarm_kprobe_ftrace(struct kprobe *p)
1201 {
1202 	bool ipmodify = (p->post_handler != NULL);
1203 
1204 	return __disarm_kprobe_ftrace(p,
1205 		ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1206 		ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1207 }
1208 
1209 void kprobe_ftrace_kill(void)
1210 {
1211 	kprobe_ftrace_disabled = true;
1212 }
1213 #else	/* !CONFIG_KPROBES_ON_FTRACE */
1214 static inline int arm_kprobe_ftrace(struct kprobe *p)
1215 {
1216 	return -ENODEV;
1217 }
1218 
1219 static inline int disarm_kprobe_ftrace(struct kprobe *p)
1220 {
1221 	return -ENODEV;
1222 }
1223 #endif
1224 
1225 static int prepare_kprobe(struct kprobe *p)
1226 {
1227 	/* Must ensure p->addr is really on ftrace */
1228 	if (kprobe_ftrace(p))
1229 		return arch_prepare_kprobe_ftrace(p);
1230 
1231 	return arch_prepare_kprobe(p);
1232 }
1233 
1234 static int arm_kprobe(struct kprobe *kp)
1235 {
1236 	if (unlikely(kprobe_ftrace(kp)))
1237 		return arm_kprobe_ftrace(kp);
1238 
1239 	guard(cpus_read_lock)();
1240 	guard(mutex)(&text_mutex);
1241 	__arm_kprobe(kp);
1242 	return 0;
1243 }
1244 
1245 static int disarm_kprobe(struct kprobe *kp, bool reopt)
1246 {
1247 	if (unlikely(kprobe_ftrace(kp)))
1248 		return disarm_kprobe_ftrace(kp);
1249 
1250 	guard(cpus_read_lock)();
1251 	guard(mutex)(&text_mutex);
1252 	__disarm_kprobe(kp, reopt);
1253 	return 0;
1254 }
1255 
1256 /*
1257  * Aggregate handlers for multiple kprobes support - these handlers
1258  * take care of invoking the individual kprobe handlers on p->list
1259  */
1260 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
1261 {
1262 	struct kprobe *kp;
1263 
1264 	list_for_each_entry_rcu(kp, &p->list, list) {
1265 		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
1266 			set_kprobe_instance(kp);
1267 			if (kp->pre_handler(kp, regs))
1268 				return 1;
1269 		}
1270 		reset_kprobe_instance();
1271 	}
1272 	return 0;
1273 }
1274 NOKPROBE_SYMBOL(aggr_pre_handler);
1275 
1276 static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
1277 			      unsigned long flags)
1278 {
1279 	struct kprobe *kp;
1280 
1281 	list_for_each_entry_rcu(kp, &p->list, list) {
1282 		if (kp->post_handler && likely(!kprobe_disabled(kp))) {
1283 			set_kprobe_instance(kp);
1284 			kp->post_handler(kp, regs, flags);
1285 			reset_kprobe_instance();
1286 		}
1287 	}
1288 }
1289 NOKPROBE_SYMBOL(aggr_post_handler);
1290 
1291 /* Walks the list and increments 'nmissed' if 'p' has child probes. */
1292 void kprobes_inc_nmissed_count(struct kprobe *p)
1293 {
1294 	struct kprobe *kp;
1295 
1296 	if (!kprobe_aggrprobe(p)) {
1297 		p->nmissed++;
1298 	} else {
1299 		list_for_each_entry_rcu(kp, &p->list, list)
1300 			kp->nmissed++;
1301 	}
1302 }
1303 NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
1304 
1305 static struct kprobe kprobe_busy = {
1306 	.addr = (void *) get_kprobe,
1307 };
1308 
1309 void kprobe_busy_begin(void)
1310 {
1311 	struct kprobe_ctlblk *kcb;
1312 
1313 	preempt_disable();
1314 	__this_cpu_write(current_kprobe, &kprobe_busy);
1315 	kcb = get_kprobe_ctlblk();
1316 	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1317 }
1318 
1319 void kprobe_busy_end(void)
1320 {
1321 	__this_cpu_write(current_kprobe, NULL);
1322 	preempt_enable();
1323 }
1324 
1325 /* Add the new probe to 'ap->list'. */
1326 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
1327 {
1328 	if (p->post_handler)
1329 		unoptimize_kprobe(ap, true);	/* Fall back to normal kprobe */
1330 
1331 	list_add_rcu(&p->list, &ap->list);
1332 	if (p->post_handler && !ap->post_handler)
1333 		ap->post_handler = aggr_post_handler;
1334 
1335 	return 0;
1336 }
1337 
1338 /*
1339  * Fill in the required fields of the aggregator kprobe. Replace the
1340  * earlier kprobe in the hlist with the aggregator kprobe.
1341  */
1342 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
1343 {
1344 	/* Copy the insn slot of 'p' to 'ap'. */
1345 	copy_kprobe(p, ap);
1346 	flush_insn_slot(ap);
1347 	ap->addr = p->addr;
1348 	ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
1349 	ap->pre_handler = aggr_pre_handler;
1350 	/* We don't care the kprobe which has gone. */
1351 	if (p->post_handler && !kprobe_gone(p))
1352 		ap->post_handler = aggr_post_handler;
1353 
1354 	INIT_LIST_HEAD(&ap->list);
1355 	INIT_HLIST_NODE(&ap->hlist);
1356 
1357 	list_add_rcu(&p->list, &ap->list);
1358 	hlist_replace_rcu(&p->hlist, &ap->hlist);
1359 }
1360 
1361 /*
1362  * This registers the second or subsequent kprobe at the same address.
1363  */
1364 static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
1365 {
1366 	int ret = 0;
1367 	struct kprobe *ap = orig_p;
1368 
1369 	scoped_guard(cpus_read_lock) {
1370 		/* For preparing optimization, jump_label_text_reserved() is called */
1371 		guard(jump_label_lock)();
1372 		guard(mutex)(&text_mutex);
1373 
1374 		if (!kprobe_aggrprobe(orig_p)) {
1375 			/* If 'orig_p' is not an 'aggr_kprobe', create new one. */
1376 			ap = alloc_aggr_kprobe(orig_p);
1377 			if (!ap)
1378 				return -ENOMEM;
1379 			init_aggr_kprobe(ap, orig_p);
1380 		} else if (kprobe_unused(ap)) {
1381 			/* This probe is going to die. Rescue it */
1382 			ret = reuse_unused_kprobe(ap);
1383 			if (ret)
1384 				return ret;
1385 		}
1386 
1387 		if (kprobe_gone(ap)) {
1388 			/*
1389 			 * Attempting to insert new probe at the same location that
1390 			 * had a probe in the module vaddr area which already
1391 			 * freed. So, the instruction slot has already been
1392 			 * released. We need a new slot for the new probe.
1393 			 */
1394 			ret = arch_prepare_kprobe(ap);
1395 			if (ret)
1396 				/*
1397 				 * Even if fail to allocate new slot, don't need to
1398 				 * free the 'ap'. It will be used next time, or
1399 				 * freed by unregister_kprobe().
1400 				 */
1401 				return ret;
1402 
1403 			/* Prepare optimized instructions if possible. */
1404 			prepare_optimized_kprobe(ap);
1405 
1406 			/*
1407 			 * Clear gone flag to prevent allocating new slot again, and
1408 			 * set disabled flag because it is not armed yet.
1409 			 */
1410 			ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
1411 					| KPROBE_FLAG_DISABLED;
1412 		}
1413 
1414 		/* Copy the insn slot of 'p' to 'ap'. */
1415 		copy_kprobe(ap, p);
1416 		ret = add_new_kprobe(ap, p);
1417 	}
1418 
1419 	if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
1420 		ap->flags &= ~KPROBE_FLAG_DISABLED;
1421 		if (!kprobes_all_disarmed) {
1422 			/* Arm the breakpoint again. */
1423 			ret = arm_kprobe(ap);
1424 			if (ret) {
1425 				ap->flags |= KPROBE_FLAG_DISABLED;
1426 				list_del_rcu(&p->list);
1427 				synchronize_rcu();
1428 			}
1429 		}
1430 	}
1431 	return ret;
1432 }
1433 
1434 bool __weak arch_within_kprobe_blacklist(unsigned long addr)
1435 {
1436 	/* The '__kprobes' functions and entry code must not be probed. */
1437 	return addr >= (unsigned long)__kprobes_text_start &&
1438 	       addr < (unsigned long)__kprobes_text_end;
1439 }
1440 
1441 static bool __within_kprobe_blacklist(unsigned long addr)
1442 {
1443 	struct kprobe_blacklist_entry *ent;
1444 
1445 	if (arch_within_kprobe_blacklist(addr))
1446 		return true;
1447 	/*
1448 	 * If 'kprobe_blacklist' is defined, check the address and
1449 	 * reject any probe registration in the prohibited area.
1450 	 */
1451 	list_for_each_entry(ent, &kprobe_blacklist, list) {
1452 		if (addr >= ent->start_addr && addr < ent->end_addr)
1453 			return true;
1454 	}
1455 	return false;
1456 }
1457 
1458 bool within_kprobe_blacklist(unsigned long addr)
1459 {
1460 	char symname[KSYM_NAME_LEN], *p;
1461 
1462 	if (__within_kprobe_blacklist(addr))
1463 		return true;
1464 
1465 	/* Check if the address is on a suffixed-symbol */
1466 	if (!lookup_symbol_name(addr, symname)) {
1467 		p = strchr(symname, '.');
1468 		if (!p)
1469 			return false;
1470 		*p = '\0';
1471 		addr = (unsigned long)kprobe_lookup_name(symname, 0);
1472 		if (addr)
1473 			return __within_kprobe_blacklist(addr);
1474 	}
1475 	return false;
1476 }
1477 
1478 /*
1479  * arch_adjust_kprobe_addr - adjust the address
1480  * @addr: symbol base address
1481  * @offset: offset within the symbol
1482  * @on_func_entry: was this @addr+@offset on the function entry
1483  *
1484  * Typically returns @addr + @offset, except for special cases where the
1485  * function might be prefixed by a CFI landing pad, in that case any offset
1486  * inside the landing pad is mapped to the first 'real' instruction of the
1487  * symbol.
1488  *
1489  * Specifically, for things like IBT/BTI, skip the resp. ENDBR/BTI.C
1490  * instruction at +0.
1491  */
1492 kprobe_opcode_t *__weak arch_adjust_kprobe_addr(unsigned long addr,
1493 						unsigned long offset,
1494 						bool *on_func_entry)
1495 {
1496 	*on_func_entry = !offset;
1497 	return (kprobe_opcode_t *)(addr + offset);
1498 }
1499 
1500 /*
1501  * If 'symbol_name' is specified, look it up and add the 'offset'
1502  * to it. This way, we can specify a relative address to a symbol.
1503  * This returns encoded errors if it fails to look up symbol or invalid
1504  * combination of parameters.
1505  */
1506 static kprobe_opcode_t *
1507 _kprobe_addr(kprobe_opcode_t *addr, const char *symbol_name,
1508 	     unsigned long offset, bool *on_func_entry)
1509 {
1510 	if ((symbol_name && addr) || (!symbol_name && !addr))
1511 		return ERR_PTR(-EINVAL);
1512 
1513 	if (symbol_name) {
1514 		/*
1515 		 * Input: @sym + @offset
1516 		 * Output: @addr + @offset
1517 		 *
1518 		 * NOTE: kprobe_lookup_name() does *NOT* fold the offset
1519 		 *       argument into it's output!
1520 		 */
1521 		addr = kprobe_lookup_name(symbol_name, offset);
1522 		if (!addr)
1523 			return ERR_PTR(-ENOENT);
1524 	}
1525 
1526 	/*
1527 	 * So here we have @addr + @offset, displace it into a new
1528 	 * @addr' + @offset' where @addr' is the symbol start address.
1529 	 */
1530 	addr = (void *)addr + offset;
1531 	if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset))
1532 		return ERR_PTR(-ENOENT);
1533 	addr = (void *)addr - offset;
1534 
1535 	/*
1536 	 * Then ask the architecture to re-combine them, taking care of
1537 	 * magical function entry details while telling us if this was indeed
1538 	 * at the start of the function.
1539 	 */
1540 	addr = arch_adjust_kprobe_addr((unsigned long)addr, offset, on_func_entry);
1541 	if (!addr)
1542 		return ERR_PTR(-EINVAL);
1543 
1544 	return addr;
1545 }
1546 
1547 static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
1548 {
1549 	bool on_func_entry;
1550 
1551 	return _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry);
1552 }
1553 
1554 /*
1555  * Check the 'p' is valid and return the aggregator kprobe
1556  * at the same address.
1557  */
1558 static struct kprobe *__get_valid_kprobe(struct kprobe *p)
1559 {
1560 	struct kprobe *ap, *list_p;
1561 
1562 	lockdep_assert_held(&kprobe_mutex);
1563 
1564 	ap = get_kprobe(p->addr);
1565 	if (unlikely(!ap))
1566 		return NULL;
1567 
1568 	if (p == ap)
1569 		return ap;
1570 
1571 	list_for_each_entry(list_p, &ap->list, list)
1572 		if (list_p == p)
1573 		/* kprobe p is a valid probe */
1574 			return ap;
1575 
1576 	return NULL;
1577 }
1578 
1579 /*
1580  * Warn and return error if the kprobe is being re-registered since
1581  * there must be a software bug.
1582  */
1583 static inline int warn_kprobe_rereg(struct kprobe *p)
1584 {
1585 	guard(mutex)(&kprobe_mutex);
1586 
1587 	if (WARN_ON_ONCE(__get_valid_kprobe(p)))
1588 		return -EINVAL;
1589 
1590 	return 0;
1591 }
1592 
1593 static int check_ftrace_location(struct kprobe *p)
1594 {
1595 	unsigned long addr = (unsigned long)p->addr;
1596 
1597 	if (ftrace_location(addr) == addr) {
1598 #ifdef CONFIG_KPROBES_ON_FTRACE
1599 		p->flags |= KPROBE_FLAG_FTRACE;
1600 #else
1601 		return -EINVAL;
1602 #endif
1603 	}
1604 	return 0;
1605 }
1606 
1607 static bool is_cfi_preamble_symbol(unsigned long addr)
1608 {
1609 	char symbuf[KSYM_NAME_LEN];
1610 
1611 	if (lookup_symbol_name(addr, symbuf))
1612 		return false;
1613 
1614 	return str_has_prefix(symbuf, "__cfi_") ||
1615 		str_has_prefix(symbuf, "__pfx_");
1616 }
1617 
1618 static int check_kprobe_address_safe(struct kprobe *p,
1619 				     struct module **probed_mod)
1620 {
1621 	int ret;
1622 
1623 	ret = check_ftrace_location(p);
1624 	if (ret)
1625 		return ret;
1626 
1627 	guard(jump_label_lock)();
1628 
1629 	/* Ensure the address is in a text area, and find a module if exists. */
1630 	*probed_mod = NULL;
1631 	if (!core_kernel_text((unsigned long) p->addr)) {
1632 		guard(rcu)();
1633 		*probed_mod = __module_text_address((unsigned long) p->addr);
1634 		if (!(*probed_mod))
1635 			return -EINVAL;
1636 
1637 		/*
1638 		 * We must hold a refcount of the probed module while updating
1639 		 * its code to prohibit unexpected unloading.
1640 		 */
1641 		if (unlikely(!try_module_get(*probed_mod)))
1642 			return -ENOENT;
1643 	}
1644 	/* Ensure it is not in reserved area. */
1645 	if (in_gate_area_no_mm((unsigned long) p->addr) ||
1646 	    within_kprobe_blacklist((unsigned long) p->addr) ||
1647 	    jump_label_text_reserved(p->addr, p->addr) ||
1648 	    static_call_text_reserved(p->addr, p->addr) ||
1649 	    find_bug((unsigned long)p->addr) ||
1650 	    is_cfi_preamble_symbol((unsigned long)p->addr)) {
1651 		module_put(*probed_mod);
1652 		return -EINVAL;
1653 	}
1654 
1655 	/* Get module refcount and reject __init functions for loaded modules. */
1656 	if (IS_ENABLED(CONFIG_MODULES) && *probed_mod) {
1657 		/*
1658 		 * If the module freed '.init.text', we couldn't insert
1659 		 * kprobes in there.
1660 		 */
1661 		if (within_module_init((unsigned long)p->addr, *probed_mod) &&
1662 		    !module_is_coming(*probed_mod)) {
1663 			module_put(*probed_mod);
1664 			return -ENOENT;
1665 		}
1666 	}
1667 
1668 	return 0;
1669 }
1670 
1671 static int __register_kprobe(struct kprobe *p)
1672 {
1673 	int ret;
1674 	struct kprobe *old_p;
1675 
1676 	guard(mutex)(&kprobe_mutex);
1677 
1678 	old_p = get_kprobe(p->addr);
1679 	if (old_p)
1680 		/* Since this may unoptimize 'old_p', locking 'text_mutex'. */
1681 		return register_aggr_kprobe(old_p, p);
1682 
1683 	scoped_guard(cpus_read_lock) {
1684 		/* Prevent text modification */
1685 		guard(mutex)(&text_mutex);
1686 		ret = prepare_kprobe(p);
1687 		if (ret)
1688 			return ret;
1689 	}
1690 
1691 	INIT_HLIST_NODE(&p->hlist);
1692 	hlist_add_head_rcu(&p->hlist,
1693 		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
1694 
1695 	if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
1696 		ret = arm_kprobe(p);
1697 		if (ret) {
1698 			hlist_del_rcu(&p->hlist);
1699 			synchronize_rcu();
1700 		}
1701 	}
1702 
1703 	/* Try to optimize kprobe */
1704 	try_to_optimize_kprobe(p);
1705 	return 0;
1706 }
1707 
1708 int register_kprobe(struct kprobe *p)
1709 {
1710 	int ret;
1711 	struct module *probed_mod;
1712 	kprobe_opcode_t *addr;
1713 	bool on_func_entry;
1714 
1715 	/* Canonicalize probe address from symbol */
1716 	addr = _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry);
1717 	if (IS_ERR(addr))
1718 		return PTR_ERR(addr);
1719 	p->addr = addr;
1720 
1721 	ret = warn_kprobe_rereg(p);
1722 	if (ret)
1723 		return ret;
1724 
1725 	/* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
1726 	p->flags &= KPROBE_FLAG_DISABLED;
1727 	if (on_func_entry)
1728 		p->flags |= KPROBE_FLAG_ON_FUNC_ENTRY;
1729 	p->nmissed = 0;
1730 	INIT_LIST_HEAD(&p->list);
1731 
1732 	ret = check_kprobe_address_safe(p, &probed_mod);
1733 	if (ret)
1734 		return ret;
1735 
1736 	ret = __register_kprobe(p);
1737 
1738 	if (probed_mod)
1739 		module_put(probed_mod);
1740 
1741 	return ret;
1742 }
1743 EXPORT_SYMBOL_GPL(register_kprobe);
1744 
1745 /* Check if all probes on the 'ap' are disabled. */
1746 static bool aggr_kprobe_disabled(struct kprobe *ap)
1747 {
1748 	struct kprobe *kp;
1749 
1750 	lockdep_assert_held(&kprobe_mutex);
1751 
1752 	list_for_each_entry(kp, &ap->list, list)
1753 		if (!kprobe_disabled(kp))
1754 			/*
1755 			 * Since there is an active probe on the list,
1756 			 * we can't disable this 'ap'.
1757 			 */
1758 			return false;
1759 
1760 	return true;
1761 }
1762 
1763 static struct kprobe *__disable_kprobe(struct kprobe *p)
1764 {
1765 	struct kprobe *orig_p;
1766 	int ret;
1767 
1768 	lockdep_assert_held(&kprobe_mutex);
1769 
1770 	/* Get an original kprobe for return */
1771 	orig_p = __get_valid_kprobe(p);
1772 	if (unlikely(orig_p == NULL))
1773 		return ERR_PTR(-EINVAL);
1774 
1775 	if (kprobe_disabled(p))
1776 		return orig_p;
1777 
1778 	/* Disable probe if it is a child probe */
1779 	if (p != orig_p)
1780 		p->flags |= KPROBE_FLAG_DISABLED;
1781 
1782 	/* Try to disarm and disable this/parent probe */
1783 	if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1784 		/*
1785 		 * Don't be lazy here.  Even if 'kprobes_all_disarmed'
1786 		 * is false, 'orig_p' might not have been armed yet.
1787 		 * Note arm_all_kprobes() __tries__ to arm all kprobes
1788 		 * on the best effort basis.
1789 		 */
1790 		if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) {
1791 			ret = disarm_kprobe(orig_p, true);
1792 			if (ret) {
1793 				p->flags &= ~KPROBE_FLAG_DISABLED;
1794 				return ERR_PTR(ret);
1795 			}
1796 		}
1797 		orig_p->flags |= KPROBE_FLAG_DISABLED;
1798 	}
1799 
1800 	return orig_p;
1801 }
1802 
1803 /*
1804  * Unregister a kprobe without a scheduler synchronization.
1805  */
1806 static int __unregister_kprobe_top(struct kprobe *p)
1807 {
1808 	struct kprobe *ap, *list_p;
1809 
1810 	/* Disable kprobe. This will disarm it if needed. */
1811 	ap = __disable_kprobe(p);
1812 	if (IS_ERR(ap))
1813 		return PTR_ERR(ap);
1814 
1815 	WARN_ON(ap != p && !kprobe_aggrprobe(ap));
1816 
1817 	/*
1818 	 * If the probe is an independent(and non-optimized) kprobe
1819 	 * (not an aggrprobe), the last kprobe on the aggrprobe, or
1820 	 * kprobe is already disarmed, just remove from the hash list.
1821 	 */
1822 	if (ap == p ||
1823 		(list_is_singular(&ap->list) && kprobe_disarmed(ap))) {
1824 		/*
1825 		 * !disarmed could be happen if the probe is under delayed
1826 		 * unoptimizing.
1827 		 */
1828 		hlist_del_rcu(&ap->hlist);
1829 		return 0;
1830 	}
1831 
1832 	/* If disabling probe has special handlers, update aggrprobe */
1833 	if (p->post_handler && !kprobe_gone(p)) {
1834 		list_for_each_entry(list_p, &ap->list, list) {
1835 			if ((list_p != p) && (list_p->post_handler))
1836 				break;
1837 		}
1838 		/* No other probe has post_handler */
1839 		if (list_entry_is_head(list_p, &ap->list, list)) {
1840 			/*
1841 			 * For the kprobe-on-ftrace case, we keep the
1842 			 * post_handler setting to identify this aggrprobe
1843 			 * armed with kprobe_ipmodify_ops.
1844 			 */
1845 			if (!kprobe_ftrace(ap))
1846 				ap->post_handler = NULL;
1847 		}
1848 	}
1849 
1850 	/*
1851 	 * Remove from the aggrprobe: this path will do nothing in
1852 	 * __unregister_kprobe_bottom().
1853 	 */
1854 	list_del_rcu(&p->list);
1855 	if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
1856 		/*
1857 		 * Try to optimize this probe again, because post
1858 		 * handler may have been changed.
1859 		 */
1860 		optimize_kprobe(ap);
1861 	return 0;
1862 
1863 }
1864 
1865 static void __unregister_kprobe_bottom(struct kprobe *p)
1866 {
1867 	struct kprobe *ap;
1868 
1869 	if (list_empty(&p->list))
1870 		/* This is an independent kprobe */
1871 		arch_remove_kprobe(p);
1872 	else if (list_is_singular(&p->list)) {
1873 		/* This is the last child of an aggrprobe */
1874 		ap = list_entry(p->list.next, struct kprobe, list);
1875 		list_del(&p->list);
1876 		free_aggr_kprobe(ap);
1877 	}
1878 	/* Otherwise, do nothing. */
1879 }
1880 
1881 int register_kprobes(struct kprobe **kps, int num)
1882 {
1883 	int i, ret = 0;
1884 
1885 	if (num <= 0)
1886 		return -EINVAL;
1887 	for (i = 0; i < num; i++) {
1888 		ret = register_kprobe(kps[i]);
1889 		if (ret < 0) {
1890 			if (i > 0)
1891 				unregister_kprobes(kps, i);
1892 			break;
1893 		}
1894 	}
1895 	return ret;
1896 }
1897 EXPORT_SYMBOL_GPL(register_kprobes);
1898 
1899 void unregister_kprobe(struct kprobe *p)
1900 {
1901 	unregister_kprobes(&p, 1);
1902 }
1903 EXPORT_SYMBOL_GPL(unregister_kprobe);
1904 
1905 void unregister_kprobes(struct kprobe **kps, int num)
1906 {
1907 	int i;
1908 
1909 	if (num <= 0)
1910 		return;
1911 	scoped_guard(mutex, &kprobe_mutex) {
1912 		for (i = 0; i < num; i++)
1913 			if (__unregister_kprobe_top(kps[i]) < 0)
1914 				kps[i]->addr = NULL;
1915 	}
1916 	synchronize_rcu();
1917 	for (i = 0; i < num; i++)
1918 		if (kps[i]->addr)
1919 			__unregister_kprobe_bottom(kps[i]);
1920 }
1921 EXPORT_SYMBOL_GPL(unregister_kprobes);
1922 
1923 int __weak kprobe_exceptions_notify(struct notifier_block *self,
1924 					unsigned long val, void *data)
1925 {
1926 	return NOTIFY_DONE;
1927 }
1928 NOKPROBE_SYMBOL(kprobe_exceptions_notify);
1929 
1930 static struct notifier_block kprobe_exceptions_nb = {
1931 	.notifier_call = kprobe_exceptions_notify,
1932 	.priority = 0x7fffffff /* we need to be notified first */
1933 };
1934 
1935 #ifdef CONFIG_KRETPROBES
1936 
1937 #if !defined(CONFIG_KRETPROBE_ON_RETHOOK)
1938 
1939 /* callbacks for objpool of kretprobe instances */
1940 static int kretprobe_init_inst(void *nod, void *context)
1941 {
1942 	struct kretprobe_instance *ri = nod;
1943 
1944 	ri->rph = context;
1945 	return 0;
1946 }
1947 static int kretprobe_fini_pool(struct objpool_head *head, void *context)
1948 {
1949 	kfree(context);
1950 	return 0;
1951 }
1952 
1953 static void free_rp_inst_rcu(struct rcu_head *head)
1954 {
1955 	struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
1956 	struct kretprobe_holder *rph = ri->rph;
1957 
1958 	objpool_drop(ri, &rph->pool);
1959 }
1960 NOKPROBE_SYMBOL(free_rp_inst_rcu);
1961 
1962 static void recycle_rp_inst(struct kretprobe_instance *ri)
1963 {
1964 	struct kretprobe *rp = get_kretprobe(ri);
1965 
1966 	if (likely(rp))
1967 		objpool_push(ri, &rp->rph->pool);
1968 	else
1969 		call_rcu(&ri->rcu, free_rp_inst_rcu);
1970 }
1971 NOKPROBE_SYMBOL(recycle_rp_inst);
1972 
1973 /*
1974  * This function is called from delayed_put_task_struct() when a task is
1975  * dead and cleaned up to recycle any kretprobe instances associated with
1976  * this task. These left over instances represent probed functions that
1977  * have been called but will never return.
1978  */
1979 void kprobe_flush_task(struct task_struct *tk)
1980 {
1981 	struct kretprobe_instance *ri;
1982 	struct llist_node *node;
1983 
1984 	/* Early boot, not yet initialized. */
1985 	if (unlikely(!kprobes_initialized))
1986 		return;
1987 
1988 	kprobe_busy_begin();
1989 
1990 	node = __llist_del_all(&tk->kretprobe_instances);
1991 	while (node) {
1992 		ri = container_of(node, struct kretprobe_instance, llist);
1993 		node = node->next;
1994 
1995 		recycle_rp_inst(ri);
1996 	}
1997 
1998 	kprobe_busy_end();
1999 }
2000 NOKPROBE_SYMBOL(kprobe_flush_task);
2001 
2002 static inline void free_rp_inst(struct kretprobe *rp)
2003 {
2004 	struct kretprobe_holder *rph = rp->rph;
2005 
2006 	if (!rph)
2007 		return;
2008 	rp->rph = NULL;
2009 	objpool_fini(&rph->pool);
2010 }
2011 
2012 /* This assumes the 'tsk' is the current task or the is not running. */
2013 static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk,
2014 						  struct llist_node **cur)
2015 {
2016 	struct kretprobe_instance *ri = NULL;
2017 	struct llist_node *node = *cur;
2018 
2019 	if (!node)
2020 		node = tsk->kretprobe_instances.first;
2021 	else
2022 		node = node->next;
2023 
2024 	while (node) {
2025 		ri = container_of(node, struct kretprobe_instance, llist);
2026 		if (ri->ret_addr != kretprobe_trampoline_addr()) {
2027 			*cur = node;
2028 			return ri->ret_addr;
2029 		}
2030 		node = node->next;
2031 	}
2032 	return NULL;
2033 }
2034 NOKPROBE_SYMBOL(__kretprobe_find_ret_addr);
2035 
2036 /**
2037  * kretprobe_find_ret_addr -- Find correct return address modified by kretprobe
2038  * @tsk: Target task
2039  * @fp: A frame pointer
2040  * @cur: a storage of the loop cursor llist_node pointer for next call
2041  *
2042  * Find the correct return address modified by a kretprobe on @tsk in unsigned
2043  * long type. If it finds the return address, this returns that address value,
2044  * or this returns 0.
2045  * The @tsk must be 'current' or a task which is not running. @fp is a hint
2046  * to get the currect return address - which is compared with the
2047  * kretprobe_instance::fp field. The @cur is a loop cursor for searching the
2048  * kretprobe return addresses on the @tsk. The '*@cur' should be NULL at the
2049  * first call, but '@cur' itself must NOT NULL.
2050  */
2051 unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
2052 				      struct llist_node **cur)
2053 {
2054 	struct kretprobe_instance *ri;
2055 	kprobe_opcode_t *ret;
2056 
2057 	if (WARN_ON_ONCE(!cur))
2058 		return 0;
2059 
2060 	do {
2061 		ret = __kretprobe_find_ret_addr(tsk, cur);
2062 		if (!ret)
2063 			break;
2064 		ri = container_of(*cur, struct kretprobe_instance, llist);
2065 	} while (ri->fp != fp);
2066 
2067 	return (unsigned long)ret;
2068 }
2069 NOKPROBE_SYMBOL(kretprobe_find_ret_addr);
2070 
2071 void __weak arch_kretprobe_fixup_return(struct pt_regs *regs,
2072 					kprobe_opcode_t *correct_ret_addr)
2073 {
2074 	/*
2075 	 * Do nothing by default. Please fill this to update the fake return
2076 	 * address on the stack with the correct one on each arch if possible.
2077 	 */
2078 }
2079 
2080 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
2081 					     void *frame_pointer)
2082 {
2083 	struct kretprobe_instance *ri = NULL;
2084 	struct llist_node *first, *node = NULL;
2085 	kprobe_opcode_t *correct_ret_addr;
2086 	struct kretprobe *rp;
2087 
2088 	/* Find correct address and all nodes for this frame. */
2089 	correct_ret_addr = __kretprobe_find_ret_addr(current, &node);
2090 	if (!correct_ret_addr) {
2091 		pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n");
2092 		BUG_ON(1);
2093 	}
2094 
2095 	/*
2096 	 * Set the return address as the instruction pointer, because if the
2097 	 * user handler calls stack_trace_save_regs() with this 'regs',
2098 	 * the stack trace will start from the instruction pointer.
2099 	 */
2100 	instruction_pointer_set(regs, (unsigned long)correct_ret_addr);
2101 
2102 	/* Run the user handler of the nodes. */
2103 	first = current->kretprobe_instances.first;
2104 	while (first) {
2105 		ri = container_of(first, struct kretprobe_instance, llist);
2106 
2107 		if (WARN_ON_ONCE(ri->fp != frame_pointer))
2108 			break;
2109 
2110 		rp = get_kretprobe(ri);
2111 		if (rp && rp->handler) {
2112 			struct kprobe *prev = kprobe_running();
2113 
2114 			__this_cpu_write(current_kprobe, &rp->kp);
2115 			ri->ret_addr = correct_ret_addr;
2116 			rp->handler(ri, regs);
2117 			__this_cpu_write(current_kprobe, prev);
2118 		}
2119 		if (first == node)
2120 			break;
2121 
2122 		first = first->next;
2123 	}
2124 
2125 	arch_kretprobe_fixup_return(regs, correct_ret_addr);
2126 
2127 	/* Unlink all nodes for this frame. */
2128 	first = current->kretprobe_instances.first;
2129 	current->kretprobe_instances.first = node->next;
2130 	node->next = NULL;
2131 
2132 	/* Recycle free instances. */
2133 	while (first) {
2134 		ri = container_of(first, struct kretprobe_instance, llist);
2135 		first = first->next;
2136 
2137 		recycle_rp_inst(ri);
2138 	}
2139 
2140 	return (unsigned long)correct_ret_addr;
2141 }
2142 NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
2143 
2144 /*
2145  * This kprobe pre_handler is registered with every kretprobe. When probe
2146  * hits it will set up the return probe.
2147  */
2148 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2149 {
2150 	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
2151 	struct kretprobe_holder *rph = rp->rph;
2152 	struct kretprobe_instance *ri;
2153 
2154 	ri = objpool_pop(&rph->pool);
2155 	if (!ri) {
2156 		rp->nmissed++;
2157 		return 0;
2158 	}
2159 
2160 	if (rp->entry_handler && rp->entry_handler(ri, regs)) {
2161 		objpool_push(ri, &rph->pool);
2162 		return 0;
2163 	}
2164 
2165 	arch_prepare_kretprobe(ri, regs);
2166 
2167 	__llist_add(&ri->llist, &current->kretprobe_instances);
2168 
2169 	return 0;
2170 }
2171 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2172 #else /* CONFIG_KRETPROBE_ON_RETHOOK */
2173 /*
2174  * This kprobe pre_handler is registered with every kretprobe. When probe
2175  * hits it will set up the return probe.
2176  */
2177 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2178 {
2179 	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
2180 	struct kretprobe_instance *ri;
2181 	struct rethook_node *rhn;
2182 
2183 	rhn = rethook_try_get(rp->rh);
2184 	if (!rhn) {
2185 		rp->nmissed++;
2186 		return 0;
2187 	}
2188 
2189 	ri = container_of(rhn, struct kretprobe_instance, node);
2190 
2191 	if (rp->entry_handler && rp->entry_handler(ri, regs))
2192 		rethook_recycle(rhn);
2193 	else
2194 		rethook_hook(rhn, regs, kprobe_ftrace(p));
2195 
2196 	return 0;
2197 }
2198 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2199 
2200 static void kretprobe_rethook_handler(struct rethook_node *rh, void *data,
2201 				      unsigned long ret_addr,
2202 				      struct pt_regs *regs)
2203 {
2204 	struct kretprobe *rp = (struct kretprobe *)data;
2205 	struct kretprobe_instance *ri;
2206 	struct kprobe_ctlblk *kcb;
2207 
2208 	/* The data must NOT be null. This means rethook data structure is broken. */
2209 	if (WARN_ON_ONCE(!data) || !rp->handler)
2210 		return;
2211 
2212 	__this_cpu_write(current_kprobe, &rp->kp);
2213 	kcb = get_kprobe_ctlblk();
2214 	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
2215 
2216 	ri = container_of(rh, struct kretprobe_instance, node);
2217 	rp->handler(ri, regs);
2218 
2219 	__this_cpu_write(current_kprobe, NULL);
2220 }
2221 NOKPROBE_SYMBOL(kretprobe_rethook_handler);
2222 
2223 #endif /* !CONFIG_KRETPROBE_ON_RETHOOK */
2224 
2225 /**
2226  * kprobe_on_func_entry() -- check whether given address is function entry
2227  * @addr: Target address
2228  * @sym:  Target symbol name
2229  * @offset: The offset from the symbol or the address
2230  *
2231  * This checks whether the given @addr+@offset or @sym+@offset is on the
2232  * function entry address or not.
2233  * This returns 0 if it is the function entry, or -EINVAL if it is not.
2234  * And also it returns -ENOENT if it fails the symbol or address lookup.
2235  * Caller must pass @addr or @sym (either one must be NULL), or this
2236  * returns -EINVAL.
2237  */
2238 int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
2239 {
2240 	bool on_func_entry;
2241 	kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset, &on_func_entry);
2242 
2243 	if (IS_ERR(kp_addr))
2244 		return PTR_ERR(kp_addr);
2245 
2246 	if (!on_func_entry)
2247 		return -EINVAL;
2248 
2249 	return 0;
2250 }
2251 
2252 int register_kretprobe(struct kretprobe *rp)
2253 {
2254 	int ret;
2255 	int i;
2256 	void *addr;
2257 
2258 	ret = kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset);
2259 	if (ret)
2260 		return ret;
2261 
2262 	/* If only 'rp->kp.addr' is specified, check reregistering kprobes */
2263 	if (rp->kp.addr && warn_kprobe_rereg(&rp->kp))
2264 		return -EINVAL;
2265 
2266 	if (kretprobe_blacklist_size) {
2267 		addr = kprobe_addr(&rp->kp);
2268 		if (IS_ERR(addr))
2269 			return PTR_ERR(addr);
2270 
2271 		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
2272 			if (kretprobe_blacklist[i].addr == addr)
2273 				return -EINVAL;
2274 		}
2275 	}
2276 
2277 	if (rp->data_size > KRETPROBE_MAX_DATA_SIZE)
2278 		return -E2BIG;
2279 
2280 	rp->kp.pre_handler = pre_handler_kretprobe;
2281 	rp->kp.post_handler = NULL;
2282 
2283 	/* Pre-allocate memory for max kretprobe instances */
2284 	if (rp->maxactive <= 0)
2285 		rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
2286 
2287 #ifdef CONFIG_KRETPROBE_ON_RETHOOK
2288 	rp->rh = rethook_alloc((void *)rp, kretprobe_rethook_handler,
2289 				sizeof(struct kretprobe_instance) +
2290 				rp->data_size, rp->maxactive);
2291 	if (IS_ERR(rp->rh))
2292 		return PTR_ERR(rp->rh);
2293 
2294 	rp->nmissed = 0;
2295 	/* Establish function entry probe point */
2296 	ret = register_kprobe(&rp->kp);
2297 	if (ret != 0) {
2298 		rethook_free(rp->rh);
2299 		rp->rh = NULL;
2300 	}
2301 #else	/* !CONFIG_KRETPROBE_ON_RETHOOK */
2302 	rp->rph = kzalloc_obj(struct kretprobe_holder);
2303 	if (!rp->rph)
2304 		return -ENOMEM;
2305 
2306 	if (objpool_init(&rp->rph->pool, rp->maxactive, rp->data_size +
2307 			sizeof(struct kretprobe_instance), GFP_KERNEL,
2308 			rp->rph, kretprobe_init_inst, kretprobe_fini_pool)) {
2309 		kfree(rp->rph);
2310 		rp->rph = NULL;
2311 		return -ENOMEM;
2312 	}
2313 	rcu_assign_pointer(rp->rph->rp, rp);
2314 	rp->nmissed = 0;
2315 	/* Establish function entry probe point */
2316 	ret = register_kprobe(&rp->kp);
2317 	if (ret != 0)
2318 		free_rp_inst(rp);
2319 #endif
2320 	return ret;
2321 }
2322 EXPORT_SYMBOL_GPL(register_kretprobe);
2323 
2324 int register_kretprobes(struct kretprobe **rps, int num)
2325 {
2326 	int ret = 0, i;
2327 
2328 	if (num <= 0)
2329 		return -EINVAL;
2330 	for (i = 0; i < num; i++) {
2331 		ret = register_kretprobe(rps[i]);
2332 		if (ret < 0) {
2333 			if (i > 0)
2334 				unregister_kretprobes(rps, i);
2335 			break;
2336 		}
2337 	}
2338 	return ret;
2339 }
2340 EXPORT_SYMBOL_GPL(register_kretprobes);
2341 
2342 void unregister_kretprobe(struct kretprobe *rp)
2343 {
2344 	unregister_kretprobes(&rp, 1);
2345 }
2346 EXPORT_SYMBOL_GPL(unregister_kretprobe);
2347 
2348 void unregister_kretprobes(struct kretprobe **rps, int num)
2349 {
2350 	int i;
2351 
2352 	if (num <= 0)
2353 		return;
2354 	for (i = 0; i < num; i++) {
2355 		guard(mutex)(&kprobe_mutex);
2356 
2357 		if (__unregister_kprobe_top(&rps[i]->kp) < 0)
2358 			rps[i]->kp.addr = NULL;
2359 #ifdef CONFIG_KRETPROBE_ON_RETHOOK
2360 		rethook_free(rps[i]->rh);
2361 #else
2362 		rcu_assign_pointer(rps[i]->rph->rp, NULL);
2363 #endif
2364 	}
2365 
2366 	synchronize_rcu();
2367 	for (i = 0; i < num; i++) {
2368 		if (rps[i]->kp.addr) {
2369 			__unregister_kprobe_bottom(&rps[i]->kp);
2370 #ifndef CONFIG_KRETPROBE_ON_RETHOOK
2371 			free_rp_inst(rps[i]);
2372 #endif
2373 		}
2374 	}
2375 }
2376 EXPORT_SYMBOL_GPL(unregister_kretprobes);
2377 
2378 #else /* CONFIG_KRETPROBES */
2379 int register_kretprobe(struct kretprobe *rp)
2380 {
2381 	return -EOPNOTSUPP;
2382 }
2383 EXPORT_SYMBOL_GPL(register_kretprobe);
2384 
2385 int register_kretprobes(struct kretprobe **rps, int num)
2386 {
2387 	return -EOPNOTSUPP;
2388 }
2389 EXPORT_SYMBOL_GPL(register_kretprobes);
2390 
2391 void unregister_kretprobe(struct kretprobe *rp)
2392 {
2393 }
2394 EXPORT_SYMBOL_GPL(unregister_kretprobe);
2395 
2396 void unregister_kretprobes(struct kretprobe **rps, int num)
2397 {
2398 }
2399 EXPORT_SYMBOL_GPL(unregister_kretprobes);
2400 
2401 static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2402 {
2403 	return 0;
2404 }
2405 NOKPROBE_SYMBOL(pre_handler_kretprobe);
2406 
2407 #endif /* CONFIG_KRETPROBES */
2408 
2409 /* Set the kprobe gone and remove its instruction buffer. */
2410 static void kill_kprobe(struct kprobe *p)
2411 {
2412 	struct kprobe *kp;
2413 
2414 	lockdep_assert_held(&kprobe_mutex);
2415 
2416 	/*
2417 	 * The module is going away. We should disarm the kprobe which
2418 	 * is using ftrace, because ftrace framework is still available at
2419 	 * 'MODULE_STATE_GOING' notification.
2420 	 */
2421 	if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed)
2422 		disarm_kprobe_ftrace(p);
2423 
2424 	p->flags |= KPROBE_FLAG_GONE;
2425 	if (kprobe_aggrprobe(p)) {
2426 		/*
2427 		 * If this is an aggr_kprobe, we have to list all the
2428 		 * chained probes and mark them GONE.
2429 		 */
2430 		list_for_each_entry(kp, &p->list, list)
2431 			kp->flags |= KPROBE_FLAG_GONE;
2432 		p->post_handler = NULL;
2433 		kill_optimized_kprobe(p);
2434 	}
2435 	/*
2436 	 * Here, we can remove insn_slot safely, because no thread calls
2437 	 * the original probed function (which will be freed soon) any more.
2438 	 */
2439 	arch_remove_kprobe(p);
2440 }
2441 
2442 /* Disable one kprobe */
2443 int disable_kprobe(struct kprobe *kp)
2444 {
2445 	struct kprobe *p;
2446 
2447 	guard(mutex)(&kprobe_mutex);
2448 
2449 	/* Disable this kprobe */
2450 	p = __disable_kprobe(kp);
2451 
2452 	return IS_ERR(p) ? PTR_ERR(p) : 0;
2453 }
2454 EXPORT_SYMBOL_GPL(disable_kprobe);
2455 
2456 /* Enable one kprobe */
2457 int enable_kprobe(struct kprobe *kp)
2458 {
2459 	int ret = 0;
2460 	struct kprobe *p;
2461 
2462 	guard(mutex)(&kprobe_mutex);
2463 
2464 	/* Check whether specified probe is valid. */
2465 	p = __get_valid_kprobe(kp);
2466 	if (unlikely(p == NULL))
2467 		return -EINVAL;
2468 
2469 	if (kprobe_gone(kp))
2470 		/* This kprobe has gone, we couldn't enable it. */
2471 		return -EINVAL;
2472 
2473 	if (p != kp)
2474 		kp->flags &= ~KPROBE_FLAG_DISABLED;
2475 
2476 	if (!kprobes_all_disarmed && kprobe_disabled(p)) {
2477 		p->flags &= ~KPROBE_FLAG_DISABLED;
2478 		ret = arm_kprobe(p);
2479 		if (ret) {
2480 			p->flags |= KPROBE_FLAG_DISABLED;
2481 			if (p != kp)
2482 				kp->flags |= KPROBE_FLAG_DISABLED;
2483 		}
2484 	}
2485 	return ret;
2486 }
2487 EXPORT_SYMBOL_GPL(enable_kprobe);
2488 
2489 /* Caller must NOT call this in usual path. This is only for critical case */
2490 void dump_kprobe(struct kprobe *kp)
2491 {
2492 	pr_err("Dump kprobe:\n.symbol_name = %s, .offset = %x, .addr = %pS\n",
2493 	       kp->symbol_name, kp->offset, kp->addr);
2494 }
2495 NOKPROBE_SYMBOL(dump_kprobe);
2496 
2497 int kprobe_add_ksym_blacklist(unsigned long entry)
2498 {
2499 	struct kprobe_blacklist_entry *ent;
2500 	unsigned long offset = 0, size = 0;
2501 
2502 	if (!kernel_text_address(entry) ||
2503 	    !kallsyms_lookup_size_offset(entry, &size, &offset))
2504 		return -EINVAL;
2505 
2506 	ent = kmalloc_obj(*ent);
2507 	if (!ent)
2508 		return -ENOMEM;
2509 	ent->start_addr = entry;
2510 	ent->end_addr = entry + size;
2511 	INIT_LIST_HEAD(&ent->list);
2512 	list_add_tail(&ent->list, &kprobe_blacklist);
2513 
2514 	return (int)size;
2515 }
2516 
2517 /* Add all symbols in given area into kprobe blacklist */
2518 int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
2519 {
2520 	unsigned long entry;
2521 	int ret = 0;
2522 
2523 	for (entry = start; entry < end; entry += ret) {
2524 		ret = kprobe_add_ksym_blacklist(entry);
2525 		if (ret < 0)
2526 			return ret;
2527 		if (ret == 0)	/* In case of alias symbol */
2528 			ret = 1;
2529 	}
2530 	return 0;
2531 }
2532 
2533 int __weak arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
2534 				   char *type, char *sym)
2535 {
2536 	return -ERANGE;
2537 }
2538 
2539 int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
2540 		       char *sym)
2541 {
2542 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
2543 	if (!kprobe_cache_get_kallsym(&kprobe_insn_slots, &symnum, value, type, sym))
2544 		return 0;
2545 #ifdef CONFIG_OPTPROBES
2546 	if (!kprobe_cache_get_kallsym(&kprobe_optinsn_slots, &symnum, value, type, sym))
2547 		return 0;
2548 #endif
2549 #endif
2550 	if (!arch_kprobe_get_kallsym(&symnum, value, type, sym))
2551 		return 0;
2552 	return -ERANGE;
2553 }
2554 
2555 int __init __weak arch_populate_kprobe_blacklist(void)
2556 {
2557 	return 0;
2558 }
2559 
2560 /*
2561  * Lookup and populate the kprobe_blacklist.
2562  *
2563  * Unlike the kretprobe blacklist, we'll need to determine
2564  * the range of addresses that belong to the said functions,
2565  * since a kprobe need not necessarily be at the beginning
2566  * of a function.
2567  */
2568 static int __init populate_kprobe_blacklist(unsigned long *start,
2569 					     unsigned long *end)
2570 {
2571 	unsigned long entry;
2572 	unsigned long *iter;
2573 	int ret;
2574 
2575 	for (iter = start; iter < end; iter++) {
2576 		entry = (unsigned long)dereference_symbol_descriptor((void *)*iter);
2577 		ret = kprobe_add_ksym_blacklist(entry);
2578 		if (ret == -EINVAL)
2579 			continue;
2580 		if (ret < 0)
2581 			return ret;
2582 	}
2583 
2584 	/* Symbols in '__kprobes_text' are blacklisted */
2585 	ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
2586 					(unsigned long)__kprobes_text_end);
2587 	if (ret)
2588 		return ret;
2589 
2590 	/* Symbols in 'noinstr' section are blacklisted */
2591 	ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start,
2592 					(unsigned long)__noinstr_text_end);
2593 
2594 	return ret ? : arch_populate_kprobe_blacklist();
2595 }
2596 
2597 #ifdef CONFIG_MODULES
2598 /* Remove all symbols in given area from kprobe blacklist */
2599 static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
2600 {
2601 	struct kprobe_blacklist_entry *ent, *n;
2602 
2603 	list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) {
2604 		if (ent->start_addr < start || ent->start_addr >= end)
2605 			continue;
2606 		list_del(&ent->list);
2607 		kfree(ent);
2608 	}
2609 }
2610 
2611 static void kprobe_remove_ksym_blacklist(unsigned long entry)
2612 {
2613 	kprobe_remove_area_blacklist(entry, entry + 1);
2614 }
2615 
2616 static void add_module_kprobe_blacklist(struct module *mod)
2617 {
2618 	unsigned long start, end;
2619 	int i;
2620 
2621 	if (mod->kprobe_blacklist) {
2622 		for (i = 0; i < mod->num_kprobe_blacklist; i++)
2623 			kprobe_add_ksym_blacklist(mod->kprobe_blacklist[i]);
2624 	}
2625 
2626 	start = (unsigned long)mod->kprobes_text_start;
2627 	if (start) {
2628 		end = start + mod->kprobes_text_size;
2629 		kprobe_add_area_blacklist(start, end);
2630 	}
2631 
2632 	start = (unsigned long)mod->noinstr_text_start;
2633 	if (start) {
2634 		end = start + mod->noinstr_text_size;
2635 		kprobe_add_area_blacklist(start, end);
2636 	}
2637 }
2638 
2639 static void remove_module_kprobe_blacklist(struct module *mod)
2640 {
2641 	unsigned long start, end;
2642 	int i;
2643 
2644 	if (mod->kprobe_blacklist) {
2645 		for (i = 0; i < mod->num_kprobe_blacklist; i++)
2646 			kprobe_remove_ksym_blacklist(mod->kprobe_blacklist[i]);
2647 	}
2648 
2649 	start = (unsigned long)mod->kprobes_text_start;
2650 	if (start) {
2651 		end = start + mod->kprobes_text_size;
2652 		kprobe_remove_area_blacklist(start, end);
2653 	}
2654 
2655 	start = (unsigned long)mod->noinstr_text_start;
2656 	if (start) {
2657 		end = start + mod->noinstr_text_size;
2658 		kprobe_remove_area_blacklist(start, end);
2659 	}
2660 }
2661 
2662 /* Module notifier call back, checking kprobes on the module */
2663 static int kprobes_module_callback(struct notifier_block *nb,
2664 				   unsigned long val, void *data)
2665 {
2666 	struct module *mod = data;
2667 	struct hlist_head *head;
2668 	struct kprobe *p;
2669 	unsigned int i;
2670 	int checkcore = (val == MODULE_STATE_GOING);
2671 
2672 	guard(mutex)(&kprobe_mutex);
2673 
2674 	if (val == MODULE_STATE_COMING)
2675 		add_module_kprobe_blacklist(mod);
2676 
2677 	if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
2678 		return NOTIFY_DONE;
2679 
2680 	/*
2681 	 * When 'MODULE_STATE_GOING' was notified, both of module '.text' and
2682 	 * '.init.text' sections would be freed. When 'MODULE_STATE_LIVE' was
2683 	 * notified, only '.init.text' section would be freed. We need to
2684 	 * disable kprobes which have been inserted in the sections.
2685 	 */
2686 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2687 		head = &kprobe_table[i];
2688 		hlist_for_each_entry(p, head, hlist)
2689 			if (within_module_init((unsigned long)p->addr, mod) ||
2690 			    (checkcore &&
2691 			     within_module_core((unsigned long)p->addr, mod))) {
2692 				/*
2693 				 * The vaddr this probe is installed will soon
2694 				 * be vfreed buy not synced to disk. Hence,
2695 				 * disarming the breakpoint isn't needed.
2696 				 *
2697 				 * Note, this will also move any optimized probes
2698 				 * that are pending to be removed from their
2699 				 * corresponding lists to the 'freeing_list' and
2700 				 * will not be touched by the delayed
2701 				 * kprobe_optimizer() work handler.
2702 				 */
2703 				kill_kprobe(p);
2704 			}
2705 	}
2706 	if (val == MODULE_STATE_GOING)
2707 		remove_module_kprobe_blacklist(mod);
2708 	return NOTIFY_DONE;
2709 }
2710 
2711 static struct notifier_block kprobe_module_nb = {
2712 	.notifier_call = kprobes_module_callback,
2713 	.priority = 0
2714 };
2715 
2716 static int kprobe_register_module_notifier(void)
2717 {
2718 	return register_module_notifier(&kprobe_module_nb);
2719 }
2720 #else
2721 static int kprobe_register_module_notifier(void)
2722 {
2723 	return 0;
2724 }
2725 #endif /* CONFIG_MODULES */
2726 
2727 void kprobe_free_init_mem(void)
2728 {
2729 	void *start = (void *)(&__init_begin);
2730 	void *end = (void *)(&__init_end);
2731 	struct hlist_head *head;
2732 	struct kprobe *p;
2733 	int i;
2734 
2735 	guard(mutex)(&kprobe_mutex);
2736 
2737 	/* Kill all kprobes on initmem because the target code has been freed. */
2738 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2739 		head = &kprobe_table[i];
2740 		hlist_for_each_entry(p, head, hlist) {
2741 			if (start <= (void *)p->addr && (void *)p->addr < end)
2742 				kill_kprobe(p);
2743 		}
2744 	}
2745 }
2746 
2747 static int __init init_kprobes(void)
2748 {
2749 	int i, err;
2750 
2751 	/* FIXME allocate the probe table, currently defined statically */
2752 	/* initialize all list heads */
2753 	for (i = 0; i < KPROBE_TABLE_SIZE; i++)
2754 		INIT_HLIST_HEAD(&kprobe_table[i]);
2755 
2756 	err = populate_kprobe_blacklist(__start_kprobe_blacklist,
2757 					__stop_kprobe_blacklist);
2758 	if (err)
2759 		pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err);
2760 
2761 	if (kretprobe_blacklist_size) {
2762 		/* lookup the function address from its name */
2763 		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
2764 			kretprobe_blacklist[i].addr =
2765 				kprobe_lookup_name(kretprobe_blacklist[i].name, 0);
2766 			if (!kretprobe_blacklist[i].addr)
2767 				pr_err("Failed to lookup symbol '%s' for kretprobe blacklist. Maybe the target function is removed or renamed.\n",
2768 				       kretprobe_blacklist[i].name);
2769 		}
2770 	}
2771 
2772 	/* By default, kprobes are armed */
2773 	kprobes_all_disarmed = false;
2774 
2775 	/* Initialize the optimization infrastructure */
2776 	init_optprobe();
2777 
2778 	err = arch_init_kprobes();
2779 	if (!err)
2780 		err = register_die_notifier(&kprobe_exceptions_nb);
2781 	if (!err)
2782 		err = kprobe_register_module_notifier();
2783 
2784 	kprobes_initialized = (err == 0);
2785 	kprobe_sysctls_init();
2786 	return err;
2787 }
2788 early_initcall(init_kprobes);
2789 
2790 #if defined(CONFIG_OPTPROBES)
2791 static int __init init_optprobes(void)
2792 {
2793 	/*
2794 	 * Enable kprobe optimization - this kicks the optimizer which
2795 	 * depends on synchronize_rcu_tasks() and ksoftirqd, that is
2796 	 * not spawned in early initcall. So delay the optimization.
2797 	 */
2798 	optimize_all_kprobes();
2799 
2800 	return 0;
2801 }
2802 subsys_initcall(init_optprobes);
2803 #endif
2804 
2805 #ifdef CONFIG_DEBUG_FS
2806 static void report_probe(struct seq_file *pi, struct kprobe *p,
2807 		const char *sym, int offset, char *modname, struct kprobe *pp)
2808 {
2809 	char *kprobe_type;
2810 	void *addr = p->addr;
2811 
2812 	if (p->pre_handler == pre_handler_kretprobe)
2813 		kprobe_type = "r";
2814 	else
2815 		kprobe_type = "k";
2816 
2817 	if (!kallsyms_show_value(pi->file->f_cred))
2818 		addr = NULL;
2819 
2820 	if (sym)
2821 		seq_printf(pi, "%px  %s  %s+0x%x  %s ",
2822 			addr, kprobe_type, sym, offset,
2823 			(modname ? modname : " "));
2824 	else	/* try to use %pS */
2825 		seq_printf(pi, "%px  %s  %pS ",
2826 			addr, kprobe_type, p->addr);
2827 
2828 	if (!pp)
2829 		pp = p;
2830 	seq_printf(pi, "%s%s%s%s\n",
2831 		(kprobe_gone(p) ? "[GONE]" : ""),
2832 		((kprobe_disabled(p) && !kprobe_gone(p)) ?  "[DISABLED]" : ""),
2833 		(kprobe_optimized(pp) ? "[OPTIMIZED]" : ""),
2834 		(kprobe_ftrace(pp) ? "[FTRACE]" : ""));
2835 }
2836 
2837 static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
2838 {
2839 	return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
2840 }
2841 
2842 static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
2843 {
2844 	(*pos)++;
2845 	if (*pos >= KPROBE_TABLE_SIZE)
2846 		return NULL;
2847 	return pos;
2848 }
2849 
2850 static void kprobe_seq_stop(struct seq_file *f, void *v)
2851 {
2852 	/* Nothing to do */
2853 }
2854 
2855 static int show_kprobe_addr(struct seq_file *pi, void *v)
2856 {
2857 	struct hlist_head *head;
2858 	struct kprobe *p, *kp;
2859 	const char *sym;
2860 	unsigned int i = *(loff_t *) v;
2861 	unsigned long offset = 0;
2862 	char *modname, namebuf[KSYM_NAME_LEN];
2863 
2864 	head = &kprobe_table[i];
2865 	preempt_disable();
2866 	hlist_for_each_entry_rcu(p, head, hlist) {
2867 		sym = kallsyms_lookup((unsigned long)p->addr, NULL,
2868 					&offset, &modname, namebuf);
2869 		if (kprobe_aggrprobe(p)) {
2870 			list_for_each_entry_rcu(kp, &p->list, list)
2871 				report_probe(pi, kp, sym, offset, modname, p);
2872 		} else
2873 			report_probe(pi, p, sym, offset, modname, NULL);
2874 	}
2875 	preempt_enable();
2876 	return 0;
2877 }
2878 
2879 static const struct seq_operations kprobes_sops = {
2880 	.start = kprobe_seq_start,
2881 	.next  = kprobe_seq_next,
2882 	.stop  = kprobe_seq_stop,
2883 	.show  = show_kprobe_addr
2884 };
2885 
2886 DEFINE_SEQ_ATTRIBUTE(kprobes);
2887 
2888 /* kprobes/blacklist -- shows which functions can not be probed */
2889 static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
2890 {
2891 	mutex_lock(&kprobe_mutex);
2892 	return seq_list_start(&kprobe_blacklist, *pos);
2893 }
2894 
2895 static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
2896 {
2897 	return seq_list_next(v, &kprobe_blacklist, pos);
2898 }
2899 
2900 static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
2901 {
2902 	struct kprobe_blacklist_entry *ent =
2903 		list_entry(v, struct kprobe_blacklist_entry, list);
2904 
2905 	/*
2906 	 * If '/proc/kallsyms' is not showing kernel address, we won't
2907 	 * show them here either.
2908 	 */
2909 	if (!kallsyms_show_value(m->file->f_cred))
2910 		seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL,
2911 			   (void *)ent->start_addr);
2912 	else
2913 		seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr,
2914 			   (void *)ent->end_addr, (void *)ent->start_addr);
2915 	return 0;
2916 }
2917 
2918 static void kprobe_blacklist_seq_stop(struct seq_file *f, void *v)
2919 {
2920 	mutex_unlock(&kprobe_mutex);
2921 }
2922 
2923 static const struct seq_operations kprobe_blacklist_sops = {
2924 	.start = kprobe_blacklist_seq_start,
2925 	.next  = kprobe_blacklist_seq_next,
2926 	.stop  = kprobe_blacklist_seq_stop,
2927 	.show  = kprobe_blacklist_seq_show,
2928 };
2929 DEFINE_SEQ_ATTRIBUTE(kprobe_blacklist);
2930 
2931 static int arm_all_kprobes(void)
2932 {
2933 	struct hlist_head *head;
2934 	struct kprobe *p;
2935 	unsigned int i, total = 0, errors = 0;
2936 	int err, ret = 0;
2937 
2938 	guard(mutex)(&kprobe_mutex);
2939 
2940 	/* If kprobes are armed, just return */
2941 	if (!kprobes_all_disarmed)
2942 		return 0;
2943 
2944 	/*
2945 	 * optimize_kprobe() called by arm_kprobe() checks
2946 	 * kprobes_all_disarmed, so set kprobes_all_disarmed before
2947 	 * arm_kprobe.
2948 	 */
2949 	kprobes_all_disarmed = false;
2950 	/* Arming kprobes doesn't optimize kprobe itself */
2951 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2952 		head = &kprobe_table[i];
2953 		/* Arm all kprobes on a best-effort basis */
2954 		hlist_for_each_entry(p, head, hlist) {
2955 			if (!kprobe_disabled(p)) {
2956 				err = arm_kprobe(p);
2957 				if (err)  {
2958 					errors++;
2959 					ret = err;
2960 				}
2961 				total++;
2962 			}
2963 		}
2964 	}
2965 
2966 	if (errors)
2967 		pr_warn("Kprobes globally enabled, but failed to enable %d out of %d probes. Please check which kprobes are kept disabled via debugfs.\n",
2968 			errors, total);
2969 	else
2970 		pr_info("Kprobes globally enabled\n");
2971 
2972 	return ret;
2973 }
2974 
2975 static int disarm_all_kprobes(void)
2976 {
2977 	struct hlist_head *head;
2978 	struct kprobe *p;
2979 	unsigned int i, total = 0, errors = 0;
2980 	int err, ret = 0;
2981 
2982 	guard(mutex)(&kprobe_mutex);
2983 
2984 	/* If kprobes are already disarmed, just return */
2985 	if (kprobes_all_disarmed)
2986 		return 0;
2987 
2988 	kprobes_all_disarmed = true;
2989 
2990 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2991 		head = &kprobe_table[i];
2992 		/* Disarm all kprobes on a best-effort basis */
2993 		hlist_for_each_entry(p, head, hlist) {
2994 			if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
2995 				err = disarm_kprobe(p, false);
2996 				if (err) {
2997 					errors++;
2998 					ret = err;
2999 				}
3000 				total++;
3001 			}
3002 		}
3003 	}
3004 
3005 	if (errors)
3006 		pr_warn("Kprobes globally disabled, but failed to disable %d out of %d probes. Please check which kprobes are kept enabled via debugfs.\n",
3007 			errors, total);
3008 	else
3009 		pr_info("Kprobes globally disabled\n");
3010 
3011 	/* Wait for disarming all kprobes by optimizer */
3012 	wait_for_kprobe_optimizer_locked();
3013 	return ret;
3014 }
3015 
3016 /*
3017  * XXX: The debugfs bool file interface doesn't allow for callbacks
3018  * when the bool state is switched. We can reuse that facility when
3019  * available
3020  */
3021 static ssize_t read_enabled_file_bool(struct file *file,
3022 	       char __user *user_buf, size_t count, loff_t *ppos)
3023 {
3024 	char buf[3];
3025 
3026 	if (!kprobes_all_disarmed)
3027 		buf[0] = '1';
3028 	else
3029 		buf[0] = '0';
3030 	buf[1] = '\n';
3031 	buf[2] = 0x00;
3032 	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
3033 }
3034 
3035 static ssize_t write_enabled_file_bool(struct file *file,
3036 	       const char __user *user_buf, size_t count, loff_t *ppos)
3037 {
3038 	bool enable;
3039 	int ret;
3040 
3041 	ret = kstrtobool_from_user(user_buf, count, &enable);
3042 	if (ret)
3043 		return ret;
3044 
3045 	ret = enable ? arm_all_kprobes() : disarm_all_kprobes();
3046 	if (ret)
3047 		return ret;
3048 
3049 	return count;
3050 }
3051 
3052 static const struct file_operations fops_kp = {
3053 	.read =         read_enabled_file_bool,
3054 	.write =        write_enabled_file_bool,
3055 	.llseek =	default_llseek,
3056 };
3057 
3058 static int __init debugfs_kprobe_init(void)
3059 {
3060 	struct dentry *dir;
3061 
3062 	dir = debugfs_create_dir("kprobes", NULL);
3063 
3064 	debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops);
3065 
3066 	debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp);
3067 
3068 	debugfs_create_file("blacklist", 0400, dir, NULL,
3069 			    &kprobe_blacklist_fops);
3070 
3071 	return 0;
3072 }
3073 
3074 late_initcall(debugfs_kprobe_init);
3075 #endif /* CONFIG_DEBUG_FS */
3076