1 #include <linux/sched.h> 2 #include <linux/sched/task.h> 3 #include <linux/sched/task_stack.h> 4 #include <linux/interrupt.h> 5 #include <asm/sections.h> 6 #include <asm/ptrace.h> 7 #include <asm/bitops.h> 8 #include <asm/stacktrace.h> 9 #include <asm/unwind.h> 10 11 #define FRAME_HEADER_SIZE (sizeof(long) * 2) 12 13 unsigned long unwind_get_return_address(struct unwind_state *state) 14 { 15 if (unwind_done(state)) 16 return 0; 17 18 return __kernel_text_address(state->ip) ? state->ip : 0; 19 } 20 EXPORT_SYMBOL_GPL(unwind_get_return_address); 21 22 unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) 23 { 24 if (unwind_done(state)) 25 return NULL; 26 27 return state->regs ? &state->regs->ip : state->bp + 1; 28 } 29 30 static void unwind_dump(struct unwind_state *state) 31 { 32 static bool dumped_before = false; 33 bool prev_zero, zero = false; 34 unsigned long word, *sp; 35 struct stack_info stack_info = {0}; 36 unsigned long visit_mask = 0; 37 38 if (dumped_before) 39 return; 40 41 dumped_before = true; 42 43 printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n", 44 state->stack_info.type, state->stack_info.next_sp, 45 state->stack_mask, state->graph_idx); 46 47 for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp; 48 sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { 49 if (get_stack_info(sp, state->task, &stack_info, &visit_mask)) 50 break; 51 52 for (; sp < stack_info.end; sp++) { 53 54 word = READ_ONCE_NOCHECK(*sp); 55 56 prev_zero = zero; 57 zero = word == 0; 58 59 if (zero) { 60 if (!prev_zero) 61 printk_deferred("%p: %0*x ...\n", 62 sp, BITS_PER_LONG/4, 0); 63 continue; 64 } 65 66 printk_deferred("%p: %0*lx (%pB)\n", 67 sp, BITS_PER_LONG/4, word, (void *)word); 68 } 69 } 70 } 71 72 static size_t regs_size(struct pt_regs *regs) 73 { 74 /* x86_32 regs from kernel mode are two words shorter: */ 75 if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs)) 76 return sizeof(*regs) - 2*sizeof(long); 77 78 return sizeof(*regs); 79 } 80 81 static bool in_entry_code(unsigned long ip) 82 { 83 char *addr = (char *)ip; 84 85 if (addr >= __entry_text_start && addr < __entry_text_end) 86 return true; 87 88 if (addr >= __irqentry_text_start && addr < __irqentry_text_end) 89 return true; 90 91 return false; 92 } 93 94 static inline unsigned long *last_frame(struct unwind_state *state) 95 { 96 return (unsigned long *)task_pt_regs(state->task) - 2; 97 } 98 99 static bool is_last_frame(struct unwind_state *state) 100 { 101 return state->bp == last_frame(state); 102 } 103 104 #ifdef CONFIG_X86_32 105 #define GCC_REALIGN_WORDS 3 106 #else 107 #define GCC_REALIGN_WORDS 1 108 #endif 109 110 static inline unsigned long *last_aligned_frame(struct unwind_state *state) 111 { 112 return last_frame(state) - GCC_REALIGN_WORDS; 113 } 114 115 static bool is_last_aligned_frame(struct unwind_state *state) 116 { 117 unsigned long *last_bp = last_frame(state); 118 unsigned long *aligned_bp = last_aligned_frame(state); 119 120 /* 121 * GCC can occasionally decide to realign the stack pointer and change 122 * the offset of the stack frame in the prologue of a function called 123 * by head/entry code. Examples: 124 * 125 * <start_secondary>: 126 * push %edi 127 * lea 0x8(%esp),%edi 128 * and $0xfffffff8,%esp 129 * pushl -0x4(%edi) 130 * push %ebp 131 * mov %esp,%ebp 132 * 133 * <x86_64_start_kernel>: 134 * lea 0x8(%rsp),%r10 135 * and $0xfffffffffffffff0,%rsp 136 * pushq -0x8(%r10) 137 * push %rbp 138 * mov %rsp,%rbp 139 * 140 * After aligning the stack, it pushes a duplicate copy of the return 141 * address before pushing the frame pointer. 142 */ 143 return (state->bp == aligned_bp && *(aligned_bp + 1) == *(last_bp + 1)); 144 } 145 146 static bool is_last_ftrace_frame(struct unwind_state *state) 147 { 148 unsigned long *last_bp = last_frame(state); 149 unsigned long *last_ftrace_bp = last_bp - 3; 150 151 /* 152 * When unwinding from an ftrace handler of a function called by entry 153 * code, the stack layout of the last frame is: 154 * 155 * bp 156 * parent ret addr 157 * bp 158 * function ret addr 159 * parent ret addr 160 * pt_regs 161 * ----------------- 162 */ 163 return (state->bp == last_ftrace_bp && 164 *state->bp == *(state->bp + 2) && 165 *(state->bp + 1) == *(state->bp + 4)); 166 } 167 168 static bool is_last_task_frame(struct unwind_state *state) 169 { 170 return is_last_frame(state) || is_last_aligned_frame(state) || 171 is_last_ftrace_frame(state); 172 } 173 174 /* 175 * This determines if the frame pointer actually contains an encoded pointer to 176 * pt_regs on the stack. See ENCODE_FRAME_POINTER. 177 */ 178 #ifdef CONFIG_X86_64 179 static struct pt_regs *decode_frame_pointer(unsigned long *bp) 180 { 181 unsigned long regs = (unsigned long)bp; 182 183 if (!(regs & 0x1)) 184 return NULL; 185 186 return (struct pt_regs *)(regs & ~0x1); 187 } 188 #else 189 static struct pt_regs *decode_frame_pointer(unsigned long *bp) 190 { 191 unsigned long regs = (unsigned long)bp; 192 193 if (regs & 0x80000000) 194 return NULL; 195 196 return (struct pt_regs *)(regs | 0x80000000); 197 } 198 #endif 199 200 #ifdef CONFIG_X86_32 201 #define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long)) 202 #else 203 #define KERNEL_REGS_SIZE (sizeof(struct pt_regs)) 204 #endif 205 206 static bool update_stack_state(struct unwind_state *state, 207 unsigned long *next_bp) 208 { 209 struct stack_info *info = &state->stack_info; 210 enum stack_type prev_type = info->type; 211 struct pt_regs *regs; 212 unsigned long *frame, *prev_frame_end, *addr_p, addr; 213 size_t len; 214 215 if (state->regs) 216 prev_frame_end = (void *)state->regs + regs_size(state->regs); 217 else 218 prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE; 219 220 /* Is the next frame pointer an encoded pointer to pt_regs? */ 221 regs = decode_frame_pointer(next_bp); 222 if (regs) { 223 frame = (unsigned long *)regs; 224 len = KERNEL_REGS_SIZE; 225 state->got_irq = true; 226 } else { 227 frame = next_bp; 228 len = FRAME_HEADER_SIZE; 229 } 230 231 /* 232 * If the next bp isn't on the current stack, switch to the next one. 233 * 234 * We may have to traverse multiple stacks to deal with the possibility 235 * that info->next_sp could point to an empty stack and the next bp 236 * could be on a subsequent stack. 237 */ 238 while (!on_stack(info, frame, len)) 239 if (get_stack_info(info->next_sp, state->task, info, 240 &state->stack_mask)) 241 return false; 242 243 /* Make sure it only unwinds up and doesn't overlap the prev frame: */ 244 if (state->orig_sp && state->stack_info.type == prev_type && 245 frame < prev_frame_end) 246 return false; 247 248 /* 249 * On 32-bit with user mode regs, make sure the last two regs are safe 250 * to access: 251 */ 252 if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) && 253 !on_stack(info, frame, len + 2*sizeof(long))) 254 return false; 255 256 /* Move state to the next frame: */ 257 if (regs) { 258 state->regs = regs; 259 state->bp = NULL; 260 } else { 261 state->bp = next_bp; 262 state->regs = NULL; 263 } 264 265 /* Save the return address: */ 266 if (state->regs && user_mode(state->regs)) 267 state->ip = 0; 268 else { 269 addr_p = unwind_get_return_address_ptr(state); 270 addr = READ_ONCE_TASK_STACK(state->task, *addr_p); 271 state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, 272 addr, addr_p); 273 } 274 275 /* Save the original stack pointer for unwind_dump(): */ 276 if (!state->orig_sp) 277 state->orig_sp = frame; 278 279 return true; 280 } 281 282 bool unwind_next_frame(struct unwind_state *state) 283 { 284 struct pt_regs *regs; 285 unsigned long *next_bp; 286 287 if (unwind_done(state)) 288 return false; 289 290 /* Have we reached the end? */ 291 if (state->regs && user_mode(state->regs)) 292 goto the_end; 293 294 if (is_last_task_frame(state)) { 295 regs = task_pt_regs(state->task); 296 297 /* 298 * kthreads (other than the boot CPU's idle thread) have some 299 * partial regs at the end of their stack which were placed 300 * there by copy_thread_tls(). But the regs don't have any 301 * useful information, so we can skip them. 302 * 303 * This user_mode() check is slightly broader than a PF_KTHREAD 304 * check because it also catches the awkward situation where a 305 * newly forked kthread transitions into a user task by calling 306 * do_execve(), which eventually clears PF_KTHREAD. 307 */ 308 if (!user_mode(regs)) 309 goto the_end; 310 311 /* 312 * We're almost at the end, but not quite: there's still the 313 * syscall regs frame. Entry code doesn't encode the regs 314 * pointer for syscalls, so we have to set it manually. 315 */ 316 state->regs = regs; 317 state->bp = NULL; 318 state->ip = 0; 319 return true; 320 } 321 322 /* Get the next frame pointer: */ 323 if (state->regs) 324 next_bp = (unsigned long *)state->regs->bp; 325 else 326 next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task, *state->bp); 327 328 /* Move to the next frame if it's safe: */ 329 if (!update_stack_state(state, next_bp)) 330 goto bad_address; 331 332 return true; 333 334 bad_address: 335 state->error = true; 336 337 /* 338 * When unwinding a non-current task, the task might actually be 339 * running on another CPU, in which case it could be modifying its 340 * stack while we're reading it. This is generally not a problem and 341 * can be ignored as long as the caller understands that unwinding 342 * another task will not always succeed. 343 */ 344 if (state->task != current) 345 goto the_end; 346 347 /* 348 * Don't warn if the unwinder got lost due to an interrupt in entry 349 * code or in the C handler before the first frame pointer got set up: 350 */ 351 if (state->got_irq && in_entry_code(state->ip)) 352 goto the_end; 353 if (state->regs && 354 state->regs->sp >= (unsigned long)last_aligned_frame(state) && 355 state->regs->sp < (unsigned long)task_pt_regs(state->task)) 356 goto the_end; 357 358 /* 359 * There are some known frame pointer issues on 32-bit. Disable 360 * unwinder warnings on 32-bit until it gets objtool support. 361 */ 362 if (IS_ENABLED(CONFIG_X86_32)) 363 goto the_end; 364 365 if (state->regs) { 366 printk_deferred_once(KERN_WARNING 367 "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", 368 state->regs, state->task->comm, 369 state->task->pid, next_bp); 370 unwind_dump(state); 371 } else { 372 printk_deferred_once(KERN_WARNING 373 "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n", 374 state->bp, state->task->comm, 375 state->task->pid, next_bp); 376 unwind_dump(state); 377 } 378 the_end: 379 state->stack_info.type = STACK_TYPE_UNKNOWN; 380 return false; 381 } 382 EXPORT_SYMBOL_GPL(unwind_next_frame); 383 384 void __unwind_start(struct unwind_state *state, struct task_struct *task, 385 struct pt_regs *regs, unsigned long *first_frame) 386 { 387 unsigned long *bp; 388 389 memset(state, 0, sizeof(*state)); 390 state->task = task; 391 state->got_irq = (regs); 392 393 /* Don't even attempt to start from user mode regs: */ 394 if (regs && user_mode(regs)) { 395 state->stack_info.type = STACK_TYPE_UNKNOWN; 396 return; 397 } 398 399 bp = get_frame_pointer(task, regs); 400 401 /* Initialize stack info and make sure the frame data is accessible: */ 402 get_stack_info(bp, state->task, &state->stack_info, 403 &state->stack_mask); 404 update_stack_state(state, bp); 405 406 /* 407 * The caller can provide the address of the first frame directly 408 * (first_frame) or indirectly (regs->sp) to indicate which stack frame 409 * to start unwinding at. Skip ahead until we reach it. 410 */ 411 while (!unwind_done(state) && 412 (!on_stack(&state->stack_info, first_frame, sizeof(long)) || 413 state->bp < first_frame)) 414 unwind_next_frame(state); 415 } 416 EXPORT_SYMBOL_GPL(__unwind_start); 417