17c7900f8SJosh Poimboeuf #include <linux/sched.h> 2*29930025SIngo Molnar #include <linux/sched/task.h> 37c7900f8SJosh Poimboeuf #include <asm/ptrace.h> 47c7900f8SJosh Poimboeuf #include <asm/bitops.h> 57c7900f8SJosh Poimboeuf #include <asm/stacktrace.h> 67c7900f8SJosh Poimboeuf #include <asm/unwind.h> 77c7900f8SJosh Poimboeuf 87c7900f8SJosh Poimboeuf #define FRAME_HEADER_SIZE (sizeof(long) * 2) 97c7900f8SJosh Poimboeuf 1084936118SJosh Poimboeuf /* 1184936118SJosh Poimboeuf * This disables KASAN checking when reading a value from another task's stack, 1284936118SJosh Poimboeuf * since the other task could be running on another CPU and could have poisoned 1384936118SJosh Poimboeuf * the stack in the meantime. 1484936118SJosh Poimboeuf */ 1584936118SJosh Poimboeuf #define READ_ONCE_TASK_STACK(task, x) \ 1684936118SJosh Poimboeuf ({ \ 1784936118SJosh Poimboeuf unsigned long val; \ 1884936118SJosh Poimboeuf if (task == current) \ 1984936118SJosh Poimboeuf val = READ_ONCE(x); \ 2084936118SJosh Poimboeuf else \ 2184936118SJosh Poimboeuf val = READ_ONCE_NOCHECK(x); \ 2284936118SJosh Poimboeuf val; \ 2384936118SJosh Poimboeuf }) 2484936118SJosh Poimboeuf 258b5e99f0SJosh Poimboeuf static void unwind_dump(struct unwind_state *state, unsigned long *sp) 268b5e99f0SJosh Poimboeuf { 278b5e99f0SJosh Poimboeuf static bool dumped_before = false; 288b5e99f0SJosh Poimboeuf bool prev_zero, zero = false; 298b5e99f0SJosh Poimboeuf unsigned long word; 308b5e99f0SJosh Poimboeuf 318b5e99f0SJosh Poimboeuf if (dumped_before) 328b5e99f0SJosh Poimboeuf return; 338b5e99f0SJosh Poimboeuf 348b5e99f0SJosh Poimboeuf dumped_before = true; 358b5e99f0SJosh Poimboeuf 368b5e99f0SJosh Poimboeuf printk_deferred("unwind stack type:%d next_sp:%p mask:%lx graph_idx:%d\n", 378b5e99f0SJosh Poimboeuf state->stack_info.type, state->stack_info.next_sp, 388b5e99f0SJosh Poimboeuf state->stack_mask, state->graph_idx); 398b5e99f0SJosh Poimboeuf 408b5e99f0SJosh Poimboeuf for (sp = state->orig_sp; sp < state->stack_info.end; sp++) { 418b5e99f0SJosh Poimboeuf word = READ_ONCE_NOCHECK(*sp); 428b5e99f0SJosh Poimboeuf 438b5e99f0SJosh Poimboeuf prev_zero = zero; 448b5e99f0SJosh Poimboeuf zero = word == 0; 458b5e99f0SJosh Poimboeuf 468b5e99f0SJosh Poimboeuf if (zero) { 478b5e99f0SJosh Poimboeuf if (!prev_zero) 488b5e99f0SJosh Poimboeuf printk_deferred("%p: %016x ...\n", sp, 0); 498b5e99f0SJosh Poimboeuf continue; 508b5e99f0SJosh Poimboeuf } 518b5e99f0SJosh Poimboeuf 528b5e99f0SJosh Poimboeuf printk_deferred("%p: %016lx (%pB)\n", sp, word, (void *)word); 538b5e99f0SJosh Poimboeuf } 548b5e99f0SJosh Poimboeuf } 558b5e99f0SJosh Poimboeuf 567c7900f8SJosh Poimboeuf unsigned long unwind_get_return_address(struct unwind_state *state) 577c7900f8SJosh Poimboeuf { 587c7900f8SJosh Poimboeuf unsigned long addr; 597c7900f8SJosh Poimboeuf unsigned long *addr_p = unwind_get_return_address_ptr(state); 607c7900f8SJosh Poimboeuf 617c7900f8SJosh Poimboeuf if (unwind_done(state)) 627c7900f8SJosh Poimboeuf return 0; 637c7900f8SJosh Poimboeuf 64946c1911SJosh Poimboeuf if (state->regs && user_mode(state->regs)) 65946c1911SJosh Poimboeuf return 0; 66946c1911SJosh Poimboeuf 6784936118SJosh Poimboeuf addr = READ_ONCE_TASK_STACK(state->task, *addr_p); 6884936118SJosh Poimboeuf addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr, 697c7900f8SJosh Poimboeuf addr_p); 707c7900f8SJosh Poimboeuf 71c280f773SJosh Poimboeuf return __kernel_text_address(addr) ? addr : 0; 727c7900f8SJosh Poimboeuf } 737c7900f8SJosh Poimboeuf EXPORT_SYMBOL_GPL(unwind_get_return_address); 747c7900f8SJosh Poimboeuf 7524d86f59SJosh Poimboeuf static size_t regs_size(struct pt_regs *regs) 7624d86f59SJosh Poimboeuf { 7724d86f59SJosh Poimboeuf /* x86_32 regs from kernel mode are two words shorter: */ 7824d86f59SJosh Poimboeuf if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs)) 7924d86f59SJosh Poimboeuf return sizeof(*regs) - 2*sizeof(long); 8024d86f59SJosh Poimboeuf 8124d86f59SJosh Poimboeuf return sizeof(*regs); 8224d86f59SJosh Poimboeuf } 8324d86f59SJosh Poimboeuf 84acb4608aSJosh Poimboeuf static bool is_last_task_frame(struct unwind_state *state) 85acb4608aSJosh Poimboeuf { 86acb4608aSJosh Poimboeuf unsigned long bp = (unsigned long)state->bp; 87acb4608aSJosh Poimboeuf unsigned long regs = (unsigned long)task_pt_regs(state->task); 88acb4608aSJosh Poimboeuf 898023e0e2SJosh Poimboeuf /* 908023e0e2SJosh Poimboeuf * We have to check for the last task frame at two different locations 918023e0e2SJosh Poimboeuf * because gcc can occasionally decide to realign the stack pointer and 928023e0e2SJosh Poimboeuf * change the offset of the stack frame by a word in the prologue of a 938023e0e2SJosh Poimboeuf * function called by head/entry code. 948023e0e2SJosh Poimboeuf */ 958023e0e2SJosh Poimboeuf return bp == regs - FRAME_HEADER_SIZE || 968023e0e2SJosh Poimboeuf bp == regs - FRAME_HEADER_SIZE - sizeof(long); 97acb4608aSJosh Poimboeuf } 98acb4608aSJosh Poimboeuf 99946c1911SJosh Poimboeuf /* 100946c1911SJosh Poimboeuf * This determines if the frame pointer actually contains an encoded pointer to 101946c1911SJosh Poimboeuf * pt_regs on the stack. See ENCODE_FRAME_POINTER. 102946c1911SJosh Poimboeuf */ 103946c1911SJosh Poimboeuf static struct pt_regs *decode_frame_pointer(unsigned long *bp) 104946c1911SJosh Poimboeuf { 105946c1911SJosh Poimboeuf unsigned long regs = (unsigned long)bp; 106946c1911SJosh Poimboeuf 107946c1911SJosh Poimboeuf if (!(regs & 0x1)) 108946c1911SJosh Poimboeuf return NULL; 109946c1911SJosh Poimboeuf 110946c1911SJosh Poimboeuf return (struct pt_regs *)(regs & ~0x1); 111946c1911SJosh Poimboeuf } 112946c1911SJosh Poimboeuf 1137c7900f8SJosh Poimboeuf static bool update_stack_state(struct unwind_state *state, void *addr, 1147c7900f8SJosh Poimboeuf size_t len) 1157c7900f8SJosh Poimboeuf { 1167c7900f8SJosh Poimboeuf struct stack_info *info = &state->stack_info; 1178b5e99f0SJosh Poimboeuf enum stack_type orig_type = info->type; 1187c7900f8SJosh Poimboeuf 1197c7900f8SJosh Poimboeuf /* 1207c7900f8SJosh Poimboeuf * If addr isn't on the current stack, switch to the next one. 1217c7900f8SJosh Poimboeuf * 1227c7900f8SJosh Poimboeuf * We may have to traverse multiple stacks to deal with the possibility 1237c7900f8SJosh Poimboeuf * that 'info->next_sp' could point to an empty stack and 'addr' could 1247c7900f8SJosh Poimboeuf * be on a subsequent stack. 1257c7900f8SJosh Poimboeuf */ 1267c7900f8SJosh Poimboeuf while (!on_stack(info, addr, len)) 1277c7900f8SJosh Poimboeuf if (get_stack_info(info->next_sp, state->task, info, 1287c7900f8SJosh Poimboeuf &state->stack_mask)) 1297c7900f8SJosh Poimboeuf return false; 1307c7900f8SJosh Poimboeuf 1318b5e99f0SJosh Poimboeuf if (!state->orig_sp || info->type != orig_type) 1328b5e99f0SJosh Poimboeuf state->orig_sp = addr; 1338b5e99f0SJosh Poimboeuf 1347c7900f8SJosh Poimboeuf return true; 1357c7900f8SJosh Poimboeuf } 1367c7900f8SJosh Poimboeuf 1377c7900f8SJosh Poimboeuf bool unwind_next_frame(struct unwind_state *state) 1387c7900f8SJosh Poimboeuf { 139946c1911SJosh Poimboeuf struct pt_regs *regs; 140946c1911SJosh Poimboeuf unsigned long *next_bp, *next_frame; 141946c1911SJosh Poimboeuf size_t next_len; 14224d86f59SJosh Poimboeuf enum stack_type prev_type = state->stack_info.type; 1437c7900f8SJosh Poimboeuf 1447c7900f8SJosh Poimboeuf if (unwind_done(state)) 1457c7900f8SJosh Poimboeuf return false; 1467c7900f8SJosh Poimboeuf 147946c1911SJosh Poimboeuf /* have we reached the end? */ 148946c1911SJosh Poimboeuf if (state->regs && user_mode(state->regs)) 149946c1911SJosh Poimboeuf goto the_end; 150946c1911SJosh Poimboeuf 151acb4608aSJosh Poimboeuf if (is_last_task_frame(state)) { 152acb4608aSJosh Poimboeuf regs = task_pt_regs(state->task); 153acb4608aSJosh Poimboeuf 154acb4608aSJosh Poimboeuf /* 155acb4608aSJosh Poimboeuf * kthreads (other than the boot CPU's idle thread) have some 156acb4608aSJosh Poimboeuf * partial regs at the end of their stack which were placed 157acb4608aSJosh Poimboeuf * there by copy_thread_tls(). But the regs don't have any 158acb4608aSJosh Poimboeuf * useful information, so we can skip them. 159acb4608aSJosh Poimboeuf * 160acb4608aSJosh Poimboeuf * This user_mode() check is slightly broader than a PF_KTHREAD 161acb4608aSJosh Poimboeuf * check because it also catches the awkward situation where a 162acb4608aSJosh Poimboeuf * newly forked kthread transitions into a user task by calling 163acb4608aSJosh Poimboeuf * do_execve(), which eventually clears PF_KTHREAD. 164acb4608aSJosh Poimboeuf */ 165acb4608aSJosh Poimboeuf if (!user_mode(regs)) 166acb4608aSJosh Poimboeuf goto the_end; 167acb4608aSJosh Poimboeuf 168acb4608aSJosh Poimboeuf /* 169acb4608aSJosh Poimboeuf * We're almost at the end, but not quite: there's still the 170acb4608aSJosh Poimboeuf * syscall regs frame. Entry code doesn't encode the regs 171acb4608aSJosh Poimboeuf * pointer for syscalls, so we have to set it manually. 172acb4608aSJosh Poimboeuf */ 173acb4608aSJosh Poimboeuf state->regs = regs; 174acb4608aSJosh Poimboeuf state->bp = NULL; 175acb4608aSJosh Poimboeuf return true; 176acb4608aSJosh Poimboeuf } 177acb4608aSJosh Poimboeuf 178946c1911SJosh Poimboeuf /* get the next frame pointer */ 179946c1911SJosh Poimboeuf if (state->regs) 180946c1911SJosh Poimboeuf next_bp = (unsigned long *)state->regs->bp; 181946c1911SJosh Poimboeuf else 18284936118SJosh Poimboeuf next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task,*state->bp); 1837c7900f8SJosh Poimboeuf 184946c1911SJosh Poimboeuf /* is the next frame pointer an encoded pointer to pt_regs? */ 185946c1911SJosh Poimboeuf regs = decode_frame_pointer(next_bp); 186946c1911SJosh Poimboeuf if (regs) { 187946c1911SJosh Poimboeuf next_frame = (unsigned long *)regs; 188946c1911SJosh Poimboeuf next_len = sizeof(*regs); 189946c1911SJosh Poimboeuf } else { 190946c1911SJosh Poimboeuf next_frame = next_bp; 191946c1911SJosh Poimboeuf next_len = FRAME_HEADER_SIZE; 192946c1911SJosh Poimboeuf } 1937c7900f8SJosh Poimboeuf 194946c1911SJosh Poimboeuf /* make sure the next frame's data is accessible */ 195c32c47c6SJosh Poimboeuf if (!update_stack_state(state, next_frame, next_len)) { 196c32c47c6SJosh Poimboeuf /* 197c32c47c6SJosh Poimboeuf * Don't warn on bad regs->bp. An interrupt in entry code 198c32c47c6SJosh Poimboeuf * might cause a false positive warning. 199c32c47c6SJosh Poimboeuf */ 200c32c47c6SJosh Poimboeuf if (state->regs) 201c32c47c6SJosh Poimboeuf goto the_end; 202c32c47c6SJosh Poimboeuf 203c32c47c6SJosh Poimboeuf goto bad_address; 204c32c47c6SJosh Poimboeuf } 205c32c47c6SJosh Poimboeuf 20624d86f59SJosh Poimboeuf /* Make sure it only unwinds up and doesn't overlap the last frame: */ 20724d86f59SJosh Poimboeuf if (state->stack_info.type == prev_type) { 20824d86f59SJosh Poimboeuf if (state->regs && (void *)next_frame < (void *)state->regs + regs_size(state->regs)) 20924d86f59SJosh Poimboeuf goto bad_address; 21024d86f59SJosh Poimboeuf 21124d86f59SJosh Poimboeuf if (state->bp && (void *)next_frame < (void *)state->bp + FRAME_HEADER_SIZE) 21224d86f59SJosh Poimboeuf goto bad_address; 21324d86f59SJosh Poimboeuf } 21424d86f59SJosh Poimboeuf 2157c7900f8SJosh Poimboeuf /* move to the next frame */ 216946c1911SJosh Poimboeuf if (regs) { 217946c1911SJosh Poimboeuf state->regs = regs; 218946c1911SJosh Poimboeuf state->bp = NULL; 219946c1911SJosh Poimboeuf } else { 2207c7900f8SJosh Poimboeuf state->bp = next_bp; 221946c1911SJosh Poimboeuf state->regs = NULL; 222946c1911SJosh Poimboeuf } 223946c1911SJosh Poimboeuf 2247c7900f8SJosh Poimboeuf return true; 225946c1911SJosh Poimboeuf 226c32c47c6SJosh Poimboeuf bad_address: 227900742d8SJosh Poimboeuf /* 228900742d8SJosh Poimboeuf * When unwinding a non-current task, the task might actually be 229900742d8SJosh Poimboeuf * running on another CPU, in which case it could be modifying its 230900742d8SJosh Poimboeuf * stack while we're reading it. This is generally not a problem and 231900742d8SJosh Poimboeuf * can be ignored as long as the caller understands that unwinding 232900742d8SJosh Poimboeuf * another task will not always succeed. 233900742d8SJosh Poimboeuf */ 234900742d8SJosh Poimboeuf if (state->task != current) 235900742d8SJosh Poimboeuf goto the_end; 236900742d8SJosh Poimboeuf 23724d86f59SJosh Poimboeuf if (state->regs) { 23824d86f59SJosh Poimboeuf printk_deferred_once(KERN_WARNING 23924d86f59SJosh Poimboeuf "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", 24024d86f59SJosh Poimboeuf state->regs, state->task->comm, 24124d86f59SJosh Poimboeuf state->task->pid, next_frame); 2428b5e99f0SJosh Poimboeuf unwind_dump(state, (unsigned long *)state->regs); 24324d86f59SJosh Poimboeuf } else { 244c32c47c6SJosh Poimboeuf printk_deferred_once(KERN_WARNING 245c32c47c6SJosh Poimboeuf "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n", 246c32c47c6SJosh Poimboeuf state->bp, state->task->comm, 24724d86f59SJosh Poimboeuf state->task->pid, next_frame); 2488b5e99f0SJosh Poimboeuf unwind_dump(state, state->bp); 24924d86f59SJosh Poimboeuf } 250946c1911SJosh Poimboeuf the_end: 251946c1911SJosh Poimboeuf state->stack_info.type = STACK_TYPE_UNKNOWN; 252946c1911SJosh Poimboeuf return false; 2537c7900f8SJosh Poimboeuf } 2547c7900f8SJosh Poimboeuf EXPORT_SYMBOL_GPL(unwind_next_frame); 2557c7900f8SJosh Poimboeuf 2567c7900f8SJosh Poimboeuf void __unwind_start(struct unwind_state *state, struct task_struct *task, 2577c7900f8SJosh Poimboeuf struct pt_regs *regs, unsigned long *first_frame) 2587c7900f8SJosh Poimboeuf { 259946c1911SJosh Poimboeuf unsigned long *bp, *frame; 260946c1911SJosh Poimboeuf size_t len; 261946c1911SJosh Poimboeuf 2627c7900f8SJosh Poimboeuf memset(state, 0, sizeof(*state)); 2637c7900f8SJosh Poimboeuf state->task = task; 2647c7900f8SJosh Poimboeuf 2657c7900f8SJosh Poimboeuf /* don't even attempt to start from user mode regs */ 2667c7900f8SJosh Poimboeuf if (regs && user_mode(regs)) { 2677c7900f8SJosh Poimboeuf state->stack_info.type = STACK_TYPE_UNKNOWN; 2687c7900f8SJosh Poimboeuf return; 2697c7900f8SJosh Poimboeuf } 2707c7900f8SJosh Poimboeuf 2717c7900f8SJosh Poimboeuf /* set up the starting stack frame */ 272946c1911SJosh Poimboeuf bp = get_frame_pointer(task, regs); 273946c1911SJosh Poimboeuf regs = decode_frame_pointer(bp); 274946c1911SJosh Poimboeuf if (regs) { 275946c1911SJosh Poimboeuf state->regs = regs; 276946c1911SJosh Poimboeuf frame = (unsigned long *)regs; 277946c1911SJosh Poimboeuf len = sizeof(*regs); 278946c1911SJosh Poimboeuf } else { 279946c1911SJosh Poimboeuf state->bp = bp; 280946c1911SJosh Poimboeuf frame = bp; 281946c1911SJosh Poimboeuf len = FRAME_HEADER_SIZE; 282946c1911SJosh Poimboeuf } 2837c7900f8SJosh Poimboeuf 2847c7900f8SJosh Poimboeuf /* initialize stack info and make sure the frame data is accessible */ 285946c1911SJosh Poimboeuf get_stack_info(frame, state->task, &state->stack_info, 2867c7900f8SJosh Poimboeuf &state->stack_mask); 287946c1911SJosh Poimboeuf update_stack_state(state, frame, len); 2887c7900f8SJosh Poimboeuf 2897c7900f8SJosh Poimboeuf /* 2907c7900f8SJosh Poimboeuf * The caller can provide the address of the first frame directly 2917c7900f8SJosh Poimboeuf * (first_frame) or indirectly (regs->sp) to indicate which stack frame 2927c7900f8SJosh Poimboeuf * to start unwinding at. Skip ahead until we reach it. 2937c7900f8SJosh Poimboeuf */ 2947c7900f8SJosh Poimboeuf while (!unwind_done(state) && 2957c7900f8SJosh Poimboeuf (!on_stack(&state->stack_info, first_frame, sizeof(long)) || 2967c7900f8SJosh Poimboeuf state->bp < first_frame)) 2977c7900f8SJosh Poimboeuf unwind_next_frame(state); 2987c7900f8SJosh Poimboeuf } 2997c7900f8SJosh Poimboeuf EXPORT_SYMBOL_GPL(__unwind_start); 300