xref: /linux/arch/x86/kernel/dumpstack_64.c (revision a5c4300389bb33ade2515c082709217f0614cf15)
1 /*
2  *  Copyright (C) 1991, 1992  Linus Torvalds
3  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4  */
5 #include <linux/kallsyms.h>
6 #include <linux/kprobes.h>
7 #include <linux/uaccess.h>
8 #include <linux/hardirq.h>
9 #include <linux/kdebug.h>
10 #include <linux/module.h>
11 #include <linux/ptrace.h>
12 #include <linux/kexec.h>
13 #include <linux/sysfs.h>
14 #include <linux/bug.h>
15 #include <linux/nmi.h>
16 
17 #include <asm/stacktrace.h>
18 
19 #include "dumpstack.h"
20 
21 #define N_EXCEPTION_STACKS_END \
22 		(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
23 
24 static char x86_stack_ids[][8] = {
25 		[ DEBUG_STACK-1			]	= "#DB",
26 		[ NMI_STACK-1			]	= "NMI",
27 		[ DOUBLEFAULT_STACK-1		]	= "#DF",
28 		[ STACKFAULT_STACK-1		]	= "#SS",
29 		[ MCE_STACK-1			]	= "#MC",
30 #if DEBUG_STKSZ > EXCEPTION_STKSZ
31 		[ N_EXCEPTION_STACKS ...
32 		  N_EXCEPTION_STACKS_END	]	= "#DB[?]"
33 #endif
34 };
35 
36 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
37 					 unsigned *usedp, char **idp)
38 {
39 	unsigned k;
40 
41 	/*
42 	 * Iterate over all exception stacks, and figure out whether
43 	 * 'stack' is in one of them:
44 	 */
45 	for (k = 0; k < N_EXCEPTION_STACKS; k++) {
46 		unsigned long end = per_cpu(orig_ist, cpu).ist[k];
47 		/*
48 		 * Is 'stack' above this exception frame's end?
49 		 * If yes then skip to the next frame.
50 		 */
51 		if (stack >= end)
52 			continue;
53 		/*
54 		 * Is 'stack' above this exception frame's start address?
55 		 * If yes then we found the right frame.
56 		 */
57 		if (stack >= end - EXCEPTION_STKSZ) {
58 			/*
59 			 * Make sure we only iterate through an exception
60 			 * stack once. If it comes up for the second time
61 			 * then there's something wrong going on - just
62 			 * break out and return NULL:
63 			 */
64 			if (*usedp & (1U << k))
65 				break;
66 			*usedp |= 1U << k;
67 			*idp = x86_stack_ids[k];
68 			return (unsigned long *)end;
69 		}
70 		/*
71 		 * If this is a debug stack, and if it has a larger size than
72 		 * the usual exception stacks, then 'stack' might still
73 		 * be within the lower portion of the debug stack:
74 		 */
75 #if DEBUG_STKSZ > EXCEPTION_STKSZ
76 		if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
77 			unsigned j = N_EXCEPTION_STACKS - 1;
78 
79 			/*
80 			 * Black magic. A large debug stack is composed of
81 			 * multiple exception stack entries, which we
82 			 * iterate through now. Dont look:
83 			 */
84 			do {
85 				++j;
86 				end -= EXCEPTION_STKSZ;
87 				x86_stack_ids[j][4] = '1' +
88 						(j - N_EXCEPTION_STACKS);
89 			} while (stack < end - EXCEPTION_STKSZ);
90 			if (*usedp & (1U << j))
91 				break;
92 			*usedp |= 1U << j;
93 			*idp = x86_stack_ids[j];
94 			return (unsigned long *)end;
95 		}
96 #endif
97 	}
98 	return NULL;
99 }
100 
101 static inline int
102 in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
103 	     unsigned long *irq_stack_end)
104 {
105 	return (stack >= irq_stack && stack < irq_stack_end);
106 }
107 
108 /*
109  * We are returning from the irq stack and go to the previous one.
110  * If the previous stack is also in the irq stack, then bp in the first
111  * frame of the irq stack points to the previous, interrupted one.
112  * Otherwise we have another level of indirection: We first save
113  * the bp of the previous stack, then we switch the stack to the irq one
114  * and save a new bp that links to the previous one.
115  * (See save_args())
116  */
117 static inline unsigned long
118 fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
119 		  unsigned long *irq_stack, unsigned long *irq_stack_end)
120 {
121 #ifdef CONFIG_FRAME_POINTER
122 	struct stack_frame *frame = (struct stack_frame *)bp;
123 	unsigned long next;
124 
125 	if (!in_irq_stack(stack, irq_stack, irq_stack_end)) {
126 		if (!probe_kernel_address(&frame->next_frame, next))
127 			return next;
128 		else
129 			WARN_ONCE(1, "Perf: bad frame pointer = %p in "
130 				  "callchain\n", &frame->next_frame);
131 	}
132 #endif
133 	return bp;
134 }
135 
136 /*
137  * x86-64 can have up to three kernel stacks:
138  * process stack
139  * interrupt stack
140  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
141  */
142 
143 void dump_trace(struct task_struct *task, struct pt_regs *regs,
144 		unsigned long *stack, unsigned long bp,
145 		const struct stacktrace_ops *ops, void *data)
146 {
147 	const unsigned cpu = get_cpu();
148 	unsigned long *irq_stack_end =
149 		(unsigned long *)per_cpu(irq_stack_ptr, cpu);
150 	unsigned used = 0;
151 	struct thread_info *tinfo;
152 	int graph = 0;
153 
154 	if (!task)
155 		task = current;
156 
157 	if (!stack) {
158 		unsigned long dummy;
159 		stack = &dummy;
160 		if (task && task != current)
161 			stack = (unsigned long *)task->thread.sp;
162 	}
163 
164 #ifdef CONFIG_FRAME_POINTER
165 	if (!bp) {
166 		if (task == current) {
167 			/* Grab bp right from our regs */
168 			get_bp(bp);
169 		} else {
170 			/* bp is the last reg pushed by switch_to */
171 			bp = *(unsigned long *) task->thread.sp;
172 		}
173 	}
174 #endif
175 
176 	/*
177 	 * Print function call entries in all stacks, starting at the
178 	 * current stack address. If the stacks consist of nested
179 	 * exceptions
180 	 */
181 	tinfo = task_thread_info(task);
182 	for (;;) {
183 		char *id;
184 		unsigned long *estack_end;
185 		estack_end = in_exception_stack(cpu, (unsigned long)stack,
186 						&used, &id);
187 
188 		if (estack_end) {
189 			if (ops->stack(data, id) < 0)
190 				break;
191 
192 			bp = ops->walk_stack(tinfo, stack, bp, ops,
193 					     data, estack_end, &graph);
194 			ops->stack(data, "<EOE>");
195 			/*
196 			 * We link to the next stack via the
197 			 * second-to-last pointer (index -2 to end) in the
198 			 * exception stack:
199 			 */
200 			stack = (unsigned long *) estack_end[-2];
201 			continue;
202 		}
203 		if (irq_stack_end) {
204 			unsigned long *irq_stack;
205 			irq_stack = irq_stack_end -
206 				(IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
207 
208 			if (in_irq_stack(stack, irq_stack, irq_stack_end)) {
209 				if (ops->stack(data, "IRQ") < 0)
210 					break;
211 				bp = ops->walk_stack(tinfo, stack, bp,
212 					ops, data, irq_stack_end, &graph);
213 				/*
214 				 * We link to the next stack (which would be
215 				 * the process stack normally) the last
216 				 * pointer (index -1 to end) in the IRQ stack:
217 				 */
218 				stack = (unsigned long *) (irq_stack_end[-1]);
219 				bp = fixup_bp_irq_link(bp, stack, irq_stack,
220 						       irq_stack_end);
221 				irq_stack_end = NULL;
222 				ops->stack(data, "EOI");
223 				continue;
224 			}
225 		}
226 		break;
227 	}
228 
229 	/*
230 	 * This handles the process stack:
231 	 */
232 	bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
233 	put_cpu();
234 }
235 EXPORT_SYMBOL(dump_trace);
236 
237 void
238 show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
239 		   unsigned long *sp, unsigned long bp, char *log_lvl)
240 {
241 	unsigned long *irq_stack_end;
242 	unsigned long *irq_stack;
243 	unsigned long *stack;
244 	int cpu;
245 	int i;
246 
247 	preempt_disable();
248 	cpu = smp_processor_id();
249 
250 	irq_stack_end	= (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
251 	irq_stack	= (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
252 
253 	/*
254 	 * Debugging aid: "show_stack(NULL, NULL);" prints the
255 	 * back trace for this cpu:
256 	 */
257 	if (sp == NULL) {
258 		if (task)
259 			sp = (unsigned long *)task->thread.sp;
260 		else
261 			sp = (unsigned long *)&sp;
262 	}
263 
264 	stack = sp;
265 	for (i = 0; i < kstack_depth_to_print; i++) {
266 		if (stack >= irq_stack && stack <= irq_stack_end) {
267 			if (stack == irq_stack_end) {
268 				stack = (unsigned long *) (irq_stack_end[-1]);
269 				printk(" <EOI> ");
270 			}
271 		} else {
272 		if (((long) stack & (THREAD_SIZE-1)) == 0)
273 			break;
274 		}
275 		if (i && ((i % STACKSLOTS_PER_LINE) == 0))
276 			printk("\n%s", log_lvl);
277 		printk(" %016lx", *stack++);
278 		touch_nmi_watchdog();
279 	}
280 	preempt_enable();
281 
282 	printk("\n");
283 	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
284 }
285 
286 void show_registers(struct pt_regs *regs)
287 {
288 	int i;
289 	unsigned long sp;
290 	const int cpu = smp_processor_id();
291 	struct task_struct *cur = current;
292 
293 	sp = regs->sp;
294 	printk("CPU %d ", cpu);
295 	print_modules();
296 	__show_regs(regs, 1);
297 	printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
298 		cur->comm, cur->pid, task_thread_info(cur), cur);
299 
300 	/*
301 	 * When in-kernel, we also print out the stack and code at the
302 	 * time of the fault..
303 	 */
304 	if (!user_mode(regs)) {
305 		unsigned int code_prologue = code_bytes * 43 / 64;
306 		unsigned int code_len = code_bytes;
307 		unsigned char c;
308 		u8 *ip;
309 
310 		printk(KERN_EMERG "Stack:\n");
311 		show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
312 				regs->bp, KERN_EMERG);
313 
314 		printk(KERN_EMERG "Code: ");
315 
316 		ip = (u8 *)regs->ip - code_prologue;
317 		if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
318 			/* try starting at IP */
319 			ip = (u8 *)regs->ip;
320 			code_len = code_len - code_prologue + 1;
321 		}
322 		for (i = 0; i < code_len; i++, ip++) {
323 			if (ip < (u8 *)PAGE_OFFSET ||
324 					probe_kernel_address(ip, c)) {
325 				printk(" Bad RIP value.");
326 				break;
327 			}
328 			if (ip == (u8 *)regs->ip)
329 				printk("<%02x> ", c);
330 			else
331 				printk("%02x ", c);
332 		}
333 	}
334 	printk("\n");
335 }
336 
337 int is_valid_bugaddr(unsigned long ip)
338 {
339 	unsigned short ud2;
340 
341 	if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
342 		return 0;
343 
344 	return ud2 == 0x0b0f;
345 }
346