xref: /linux/kernel/entry/common.c (revision 056e065a6b6e01ab54bb9770c0d5a15350e571e2)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/futex.h>
4 #include <linux/highmem.h>
5 #include <linux/irq-entry-common.h>
6 #include <linux/jump_label.h>
7 #include <linux/kmsan.h>
8 #include <linux/livepatch.h>
9 #include <linux/resume_user_mode.h>
10 #include <linux/tick.h>
11 
12 /* Workaround to allow gradual conversion of architecture code */
13 void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
14 
15 #ifdef CONFIG_HAVE_GENERIC_TIF_BITS
16 #define EXIT_TO_USER_MODE_WORK_LOOP	(EXIT_TO_USER_MODE_WORK & ~_TIF_RSEQ)
17 #else
18 #define EXIT_TO_USER_MODE_WORK_LOOP	(EXIT_TO_USER_MODE_WORK)
19 #endif
20 
21 /* TIF bits, which prevent a time slice extension. */
22 #ifdef CONFIG_PREEMPT_RT
23 /*
24  * Since rseq slice ext has a direct correlation to the worst case
25  * scheduling latency (schedule is delayed after all), only have it affect
26  * LAZY reschedules on PREEMPT_RT for now.
27  *
28  * However, since this delay is only applicable to userspace, a value
29  * for rseq_slice_extension_nsec that is strictly less than the worst case
30  * kernel space preempt_disable() region, should mean the scheduling latency
31  * is not affected, even for !LAZY.
32  *
33  * However, since this value depends on the hardware at hand, it cannot be
34  * pre-determined in any sensible way. Hence punt on this problem for now.
35  */
36 # define TIF_SLICE_EXT_SCHED	(_TIF_NEED_RESCHED_LAZY)
37 #else
38 # define TIF_SLICE_EXT_SCHED	(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
39 #endif
40 #define TIF_SLICE_EXT_DENY	(EXIT_TO_USER_MODE_WORK & ~TIF_SLICE_EXT_SCHED)
41 
42 static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *regs,
43 							      unsigned long ti_work)
44 {
45 	/*
46 	 * Before returning to user space ensure that all pending work
47 	 * items have been completed.
48 	 */
49 	while (ti_work & EXIT_TO_USER_MODE_WORK_LOOP) {
50 
51 		local_irq_enable();
52 
53 		if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) {
54 			if (!rseq_grant_slice_extension(ti_work, TIF_SLICE_EXT_DENY))
55 				schedule();
56 		}
57 
58 		if (ti_work & _TIF_UPROBE)
59 			uprobe_notify_resume(regs);
60 
61 		if (ti_work & _TIF_PATCH_PENDING)
62 			klp_update_patch_state(current);
63 
64 		if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {
65 			futex_fixup_robust_unlock(regs);
66 			arch_do_signal_or_restart(regs);
67 		}
68 
69 		if (ti_work & _TIF_NOTIFY_RESUME)
70 			resume_user_mode_work(regs);
71 
72 		/* Architecture specific TIF work */
73 		arch_exit_to_user_mode_work(regs, ti_work);
74 
75 		/*
76 		 * Disable interrupts and reevaluate the work flags as they
77 		 * might have changed while interrupts and preemption was
78 		 * enabled above.
79 		 */
80 		local_irq_disable();
81 
82 		/* Check if any of the above work has queued a deferred wakeup */
83 		tick_nohz_user_enter_prepare();
84 
85 		ti_work = read_thread_flags();
86 	}
87 
88 	/* Return the latest work state for arch_exit_to_user_mode() */
89 	return ti_work;
90 }
91 
92 /**
93  * exit_to_user_mode_loop - do any pending work before leaving to user space
94  * @regs:	Pointer to pt_regs on entry stack
95  * @ti_work:	TIF work flags as read by the caller
96  */
97 __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
98 						     unsigned long ti_work)
99 {
100 	for (;;) {
101 		ti_work = __exit_to_user_mode_loop(regs, ti_work);
102 
103 		if (likely(!rseq_exit_to_user_mode_restart(regs, ti_work)))
104 			return ti_work;
105 		ti_work = read_thread_flags();
106 	}
107 }
108 
109 noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
110 {
111 	if (user_mode(regs)) {
112 		irqentry_state_t ret = {
113 			.exit_rcu = false,
114 		};
115 
116 		irqentry_enter_from_user_mode(regs);
117 		return ret;
118 	}
119 
120 	return irqentry_enter_from_kernel_mode(regs);
121 }
122 
123 /**
124  * arch_irqentry_exit_need_resched - Architecture specific need resched function
125  *
126  * Invoked from raw_irqentry_exit_cond_resched() to check if resched is needed.
127  * Defaults return true.
128  *
129  * The main purpose is to permit arch to avoid preemption of a task from an IRQ.
130  */
131 static inline bool arch_irqentry_exit_need_resched(void);
132 
133 #ifndef arch_irqentry_exit_need_resched
134 static inline bool arch_irqentry_exit_need_resched(void) { return true; }
135 #endif
136 
137 void raw_irqentry_exit_cond_resched(void)
138 {
139 	if (!preempt_count()) {
140 		/* Sanity check RCU and thread stack */
141 		rcu_irq_exit_check_preempt();
142 		if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
143 			WARN_ON_ONCE(!on_thread_stack());
144 		if (need_resched() && arch_irqentry_exit_need_resched())
145 			preempt_schedule_irq();
146 	}
147 }
148 #ifdef CONFIG_PREEMPT_DYNAMIC
149 #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
150 DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched);
151 #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
152 DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
153 void dynamic_irqentry_exit_cond_resched(void)
154 {
155 	if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
156 		return;
157 	raw_irqentry_exit_cond_resched();
158 }
159 #endif
160 #endif
161 
162 noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
163 {
164 	if (user_mode(regs))
165 		irqentry_exit_to_user_mode(regs);
166 	else
167 		irqentry_exit_to_kernel_mode(regs, state);
168 }
169 
170 irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
171 {
172 	irqentry_state_t irq_state;
173 
174 	irq_state.lockdep = lockdep_hardirqs_enabled();
175 
176 	__nmi_enter();
177 	lockdep_hardirqs_off(CALLER_ADDR0);
178 	lockdep_hardirq_enter();
179 	ct_nmi_enter();
180 
181 	instrumentation_begin();
182 	kmsan_unpoison_entry_regs(regs);
183 	trace_hardirqs_off_finish();
184 	ftrace_nmi_enter();
185 	instrumentation_end();
186 
187 	return irq_state;
188 }
189 
190 void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
191 {
192 	instrumentation_begin();
193 	ftrace_nmi_exit();
194 	if (irq_state.lockdep) {
195 		trace_hardirqs_on_prepare();
196 		lockdep_hardirqs_on_prepare();
197 	}
198 	instrumentation_end();
199 
200 	ct_nmi_exit();
201 	lockdep_hardirq_exit();
202 	if (irq_state.lockdep)
203 		lockdep_hardirqs_on(CALLER_ADDR0);
204 	__nmi_exit();
205 }
206