xref: /linux/arch/sh/kernel/dwarf.c (revision 5499b45190237ca90dd2ac86395cf464fe1f4cc7)
1 /*
2  * Copyright (C) 2009 Matt Fleming <matt@console-pimps.org>
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * This is an implementation of a DWARF unwinder. Its main purpose is
9  * for generating stacktrace information. Based on the DWARF 3
10  * specification from http://www.dwarfstd.org.
11  *
12  * TODO:
13  *	- DWARF64 doesn't work.
14  *	- Registers with DWARF_VAL_OFFSET rules aren't handled properly.
15  */
16 
17 /* #define DEBUG */
18 #include <linux/kernel.h>
19 #include <linux/io.h>
20 #include <linux/list.h>
21 #include <linux/mempool.h>
22 #include <linux/mm.h>
23 #include <linux/elf.h>
24 #include <linux/ftrace.h>
25 #include <asm/dwarf.h>
26 #include <asm/unwinder.h>
27 #include <asm/sections.h>
28 #include <asm/unaligned.h>
29 #include <asm/stacktrace.h>
30 
31 /* Reserve enough memory for two stack frames */
32 #define DWARF_FRAME_MIN_REQ	2
33 /* ... with 4 registers per frame. */
34 #define DWARF_REG_MIN_REQ	(DWARF_FRAME_MIN_REQ * 4)
35 
36 static struct kmem_cache *dwarf_frame_cachep;
37 static mempool_t *dwarf_frame_pool;
38 
39 static struct kmem_cache *dwarf_reg_cachep;
40 static mempool_t *dwarf_reg_pool;
41 
42 static struct rb_root cie_root;
43 static DEFINE_SPINLOCK(dwarf_cie_lock);
44 
45 static struct rb_root fde_root;
46 static DEFINE_SPINLOCK(dwarf_fde_lock);
47 
48 static struct dwarf_cie *cached_cie;
49 
50 /**
51  *	dwarf_frame_alloc_reg - allocate memory for a DWARF register
52  *	@frame: the DWARF frame whose list of registers we insert on
53  *	@reg_num: the register number
54  *
55  *	Allocate space for, and initialise, a dwarf reg from
56  *	dwarf_reg_pool and insert it onto the (unsorted) linked-list of
57  *	dwarf registers for @frame.
58  *
59  *	Return the initialised DWARF reg.
60  */
61 static struct dwarf_reg *dwarf_frame_alloc_reg(struct dwarf_frame *frame,
62 					       unsigned int reg_num)
63 {
64 	struct dwarf_reg *reg;
65 
66 	reg = mempool_alloc(dwarf_reg_pool, GFP_ATOMIC);
67 	if (!reg) {
68 		printk(KERN_WARNING "Unable to allocate a DWARF register\n");
69 		/*
70 		 * Let's just bomb hard here, we have no way to
71 		 * gracefully recover.
72 		 */
73 		UNWINDER_BUG();
74 	}
75 
76 	reg->number = reg_num;
77 	reg->addr = 0;
78 	reg->flags = 0;
79 
80 	list_add(&reg->link, &frame->reg_list);
81 
82 	return reg;
83 }
84 
85 static void dwarf_frame_free_regs(struct dwarf_frame *frame)
86 {
87 	struct dwarf_reg *reg, *n;
88 
89 	list_for_each_entry_safe(reg, n, &frame->reg_list, link) {
90 		list_del(&reg->link);
91 		mempool_free(reg, dwarf_reg_pool);
92 	}
93 }
94 
95 /**
96  *	dwarf_frame_reg - return a DWARF register
97  *	@frame: the DWARF frame to search in for @reg_num
98  *	@reg_num: the register number to search for
99  *
100  *	Lookup and return the dwarf reg @reg_num for this frame. Return
101  *	NULL if @reg_num is an register invalid number.
102  */
103 static struct dwarf_reg *dwarf_frame_reg(struct dwarf_frame *frame,
104 					 unsigned int reg_num)
105 {
106 	struct dwarf_reg *reg;
107 
108 	list_for_each_entry(reg, &frame->reg_list, link) {
109 		if (reg->number == reg_num)
110 			return reg;
111 	}
112 
113 	return NULL;
114 }
115 
116 /**
117  *	dwarf_read_addr - read dwarf data
118  *	@src: source address of data
119  *	@dst: destination address to store the data to
120  *
121  *	Read 'n' bytes from @src, where 'n' is the size of an address on
122  *	the native machine. We return the number of bytes read, which
123  *	should always be 'n'. We also have to be careful when reading
124  *	from @src and writing to @dst, because they can be arbitrarily
125  *	aligned. Return 'n' - the number of bytes read.
126  */
127 static inline int dwarf_read_addr(unsigned long *src, unsigned long *dst)
128 {
129 	u32 val = get_unaligned(src);
130 	put_unaligned(val, dst);
131 	return sizeof(unsigned long *);
132 }
133 
134 /**
135  *	dwarf_read_uleb128 - read unsigned LEB128 data
136  *	@addr: the address where the ULEB128 data is stored
137  *	@ret: address to store the result
138  *
139  *	Decode an unsigned LEB128 encoded datum. The algorithm is taken
140  *	from Appendix C of the DWARF 3 spec. For information on the
141  *	encodings refer to section "7.6 - Variable Length Data". Return
142  *	the number of bytes read.
143  */
144 static inline unsigned long dwarf_read_uleb128(char *addr, unsigned int *ret)
145 {
146 	unsigned int result;
147 	unsigned char byte;
148 	int shift, count;
149 
150 	result = 0;
151 	shift = 0;
152 	count = 0;
153 
154 	while (1) {
155 		byte = __raw_readb(addr);
156 		addr++;
157 		count++;
158 
159 		result |= (byte & 0x7f) << shift;
160 		shift += 7;
161 
162 		if (!(byte & 0x80))
163 			break;
164 	}
165 
166 	*ret = result;
167 
168 	return count;
169 }
170 
171 /**
172  *	dwarf_read_leb128 - read signed LEB128 data
173  *	@addr: the address of the LEB128 encoded data
174  *	@ret: address to store the result
175  *
176  *	Decode signed LEB128 data. The algorithm is taken from Appendix
177  *	C of the DWARF 3 spec. Return the number of bytes read.
178  */
179 static inline unsigned long dwarf_read_leb128(char *addr, int *ret)
180 {
181 	unsigned char byte;
182 	int result, shift;
183 	int num_bits;
184 	int count;
185 
186 	result = 0;
187 	shift = 0;
188 	count = 0;
189 
190 	while (1) {
191 		byte = __raw_readb(addr);
192 		addr++;
193 		result |= (byte & 0x7f) << shift;
194 		shift += 7;
195 		count++;
196 
197 		if (!(byte & 0x80))
198 			break;
199 	}
200 
201 	/* The number of bits in a signed integer. */
202 	num_bits = 8 * sizeof(result);
203 
204 	if ((shift < num_bits) && (byte & 0x40))
205 		result |= (-1 << shift);
206 
207 	*ret = result;
208 
209 	return count;
210 }
211 
212 /**
213  *	dwarf_read_encoded_value - return the decoded value at @addr
214  *	@addr: the address of the encoded value
215  *	@val: where to write the decoded value
216  *	@encoding: the encoding with which we can decode @addr
217  *
218  *	GCC emits encoded address in the .eh_frame FDE entries. Decode
219  *	the value at @addr using @encoding. The decoded value is written
220  *	to @val and the number of bytes read is returned.
221  */
222 static int dwarf_read_encoded_value(char *addr, unsigned long *val,
223 				    char encoding)
224 {
225 	unsigned long decoded_addr = 0;
226 	int count = 0;
227 
228 	switch (encoding & 0x70) {
229 	case DW_EH_PE_absptr:
230 		break;
231 	case DW_EH_PE_pcrel:
232 		decoded_addr = (unsigned long)addr;
233 		break;
234 	default:
235 		pr_debug("encoding=0x%x\n", (encoding & 0x70));
236 		UNWINDER_BUG();
237 	}
238 
239 	if ((encoding & 0x07) == 0x00)
240 		encoding |= DW_EH_PE_udata4;
241 
242 	switch (encoding & 0x0f) {
243 	case DW_EH_PE_sdata4:
244 	case DW_EH_PE_udata4:
245 		count += 4;
246 		decoded_addr += get_unaligned((u32 *)addr);
247 		__raw_writel(decoded_addr, val);
248 		break;
249 	default:
250 		pr_debug("encoding=0x%x\n", encoding);
251 		UNWINDER_BUG();
252 	}
253 
254 	return count;
255 }
256 
257 /**
258  *	dwarf_entry_len - return the length of an FDE or CIE
259  *	@addr: the address of the entry
260  *	@len: the length of the entry
261  *
262  *	Read the initial_length field of the entry and store the size of
263  *	the entry in @len. We return the number of bytes read. Return a
264  *	count of 0 on error.
265  */
266 static inline int dwarf_entry_len(char *addr, unsigned long *len)
267 {
268 	u32 initial_len;
269 	int count;
270 
271 	initial_len = get_unaligned((u32 *)addr);
272 	count = 4;
273 
274 	/*
275 	 * An initial length field value in the range DW_LEN_EXT_LO -
276 	 * DW_LEN_EXT_HI indicates an extension, and should not be
277 	 * interpreted as a length. The only extension that we currently
278 	 * understand is the use of DWARF64 addresses.
279 	 */
280 	if (initial_len >= DW_EXT_LO && initial_len <= DW_EXT_HI) {
281 		/*
282 		 * The 64-bit length field immediately follows the
283 		 * compulsory 32-bit length field.
284 		 */
285 		if (initial_len == DW_EXT_DWARF64) {
286 			*len = get_unaligned((u64 *)addr + 4);
287 			count = 12;
288 		} else {
289 			printk(KERN_WARNING "Unknown DWARF extension\n");
290 			count = 0;
291 		}
292 	} else
293 		*len = initial_len;
294 
295 	return count;
296 }
297 
298 /**
299  *	dwarf_lookup_cie - locate the cie
300  *	@cie_ptr: pointer to help with lookup
301  */
302 static struct dwarf_cie *dwarf_lookup_cie(unsigned long cie_ptr)
303 {
304 	struct rb_node **rb_node = &cie_root.rb_node;
305 	struct dwarf_cie *cie = NULL;
306 	unsigned long flags;
307 
308 	spin_lock_irqsave(&dwarf_cie_lock, flags);
309 
310 	/*
311 	 * We've cached the last CIE we looked up because chances are
312 	 * that the FDE wants this CIE.
313 	 */
314 	if (cached_cie && cached_cie->cie_pointer == cie_ptr) {
315 		cie = cached_cie;
316 		goto out;
317 	}
318 
319 	while (*rb_node) {
320 		struct dwarf_cie *cie_tmp;
321 
322 		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
323 		BUG_ON(!cie_tmp);
324 
325 		if (cie_ptr == cie_tmp->cie_pointer) {
326 			cie = cie_tmp;
327 			cached_cie = cie_tmp;
328 			goto out;
329 		} else {
330 			if (cie_ptr < cie_tmp->cie_pointer)
331 				rb_node = &(*rb_node)->rb_left;
332 			else
333 				rb_node = &(*rb_node)->rb_right;
334 		}
335 	}
336 
337 out:
338 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
339 	return cie;
340 }
341 
342 /**
343  *	dwarf_lookup_fde - locate the FDE that covers pc
344  *	@pc: the program counter
345  */
346 struct dwarf_fde *dwarf_lookup_fde(unsigned long pc)
347 {
348 	struct rb_node **rb_node = &fde_root.rb_node;
349 	struct dwarf_fde *fde = NULL;
350 	unsigned long flags;
351 
352 	spin_lock_irqsave(&dwarf_fde_lock, flags);
353 
354 	while (*rb_node) {
355 		struct dwarf_fde *fde_tmp;
356 		unsigned long tmp_start, tmp_end;
357 
358 		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
359 		BUG_ON(!fde_tmp);
360 
361 		tmp_start = fde_tmp->initial_location;
362 		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
363 
364 		if (pc < tmp_start) {
365 			rb_node = &(*rb_node)->rb_left;
366 		} else {
367 			if (pc < tmp_end) {
368 				fde = fde_tmp;
369 				goto out;
370 			} else
371 				rb_node = &(*rb_node)->rb_right;
372 		}
373 	}
374 
375 out:
376 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
377 
378 	return fde;
379 }
380 
381 /**
382  *	dwarf_cfa_execute_insns - execute instructions to calculate a CFA
383  *	@insn_start: address of the first instruction
384  *	@insn_end: address of the last instruction
385  *	@cie: the CIE for this function
386  *	@fde: the FDE for this function
387  *	@frame: the instructions calculate the CFA for this frame
388  *	@pc: the program counter of the address we're interested in
389  *
390  *	Execute the Call Frame instruction sequence starting at
391  *	@insn_start and ending at @insn_end. The instructions describe
392  *	how to calculate the Canonical Frame Address of a stackframe.
393  *	Store the results in @frame.
394  */
395 static int dwarf_cfa_execute_insns(unsigned char *insn_start,
396 				   unsigned char *insn_end,
397 				   struct dwarf_cie *cie,
398 				   struct dwarf_fde *fde,
399 				   struct dwarf_frame *frame,
400 				   unsigned long pc)
401 {
402 	unsigned char insn;
403 	unsigned char *current_insn;
404 	unsigned int count, delta, reg, expr_len, offset;
405 	struct dwarf_reg *regp;
406 
407 	current_insn = insn_start;
408 
409 	while (current_insn < insn_end && frame->pc <= pc) {
410 		insn = __raw_readb(current_insn++);
411 
412 		/*
413 		 * Firstly, handle the opcodes that embed their operands
414 		 * in the instructions.
415 		 */
416 		switch (DW_CFA_opcode(insn)) {
417 		case DW_CFA_advance_loc:
418 			delta = DW_CFA_operand(insn);
419 			delta *= cie->code_alignment_factor;
420 			frame->pc += delta;
421 			continue;
422 			/* NOTREACHED */
423 		case DW_CFA_offset:
424 			reg = DW_CFA_operand(insn);
425 			count = dwarf_read_uleb128(current_insn, &offset);
426 			current_insn += count;
427 			offset *= cie->data_alignment_factor;
428 			regp = dwarf_frame_alloc_reg(frame, reg);
429 			regp->addr = offset;
430 			regp->flags |= DWARF_REG_OFFSET;
431 			continue;
432 			/* NOTREACHED */
433 		case DW_CFA_restore:
434 			reg = DW_CFA_operand(insn);
435 			continue;
436 			/* NOTREACHED */
437 		}
438 
439 		/*
440 		 * Secondly, handle the opcodes that don't embed their
441 		 * operands in the instruction.
442 		 */
443 		switch (insn) {
444 		case DW_CFA_nop:
445 			continue;
446 		case DW_CFA_advance_loc1:
447 			delta = *current_insn++;
448 			frame->pc += delta * cie->code_alignment_factor;
449 			break;
450 		case DW_CFA_advance_loc2:
451 			delta = get_unaligned((u16 *)current_insn);
452 			current_insn += 2;
453 			frame->pc += delta * cie->code_alignment_factor;
454 			break;
455 		case DW_CFA_advance_loc4:
456 			delta = get_unaligned((u32 *)current_insn);
457 			current_insn += 4;
458 			frame->pc += delta * cie->code_alignment_factor;
459 			break;
460 		case DW_CFA_offset_extended:
461 			count = dwarf_read_uleb128(current_insn, &reg);
462 			current_insn += count;
463 			count = dwarf_read_uleb128(current_insn, &offset);
464 			current_insn += count;
465 			offset *= cie->data_alignment_factor;
466 			break;
467 		case DW_CFA_restore_extended:
468 			count = dwarf_read_uleb128(current_insn, &reg);
469 			current_insn += count;
470 			break;
471 		case DW_CFA_undefined:
472 			count = dwarf_read_uleb128(current_insn, &reg);
473 			current_insn += count;
474 			regp = dwarf_frame_alloc_reg(frame, reg);
475 			regp->flags |= DWARF_UNDEFINED;
476 			break;
477 		case DW_CFA_def_cfa:
478 			count = dwarf_read_uleb128(current_insn,
479 						   &frame->cfa_register);
480 			current_insn += count;
481 			count = dwarf_read_uleb128(current_insn,
482 						   &frame->cfa_offset);
483 			current_insn += count;
484 
485 			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
486 			break;
487 		case DW_CFA_def_cfa_register:
488 			count = dwarf_read_uleb128(current_insn,
489 						   &frame->cfa_register);
490 			current_insn += count;
491 			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
492 			break;
493 		case DW_CFA_def_cfa_offset:
494 			count = dwarf_read_uleb128(current_insn, &offset);
495 			current_insn += count;
496 			frame->cfa_offset = offset;
497 			break;
498 		case DW_CFA_def_cfa_expression:
499 			count = dwarf_read_uleb128(current_insn, &expr_len);
500 			current_insn += count;
501 
502 			frame->cfa_expr = current_insn;
503 			frame->cfa_expr_len = expr_len;
504 			current_insn += expr_len;
505 
506 			frame->flags |= DWARF_FRAME_CFA_REG_EXP;
507 			break;
508 		case DW_CFA_offset_extended_sf:
509 			count = dwarf_read_uleb128(current_insn, &reg);
510 			current_insn += count;
511 			count = dwarf_read_leb128(current_insn, &offset);
512 			current_insn += count;
513 			offset *= cie->data_alignment_factor;
514 			regp = dwarf_frame_alloc_reg(frame, reg);
515 			regp->flags |= DWARF_REG_OFFSET;
516 			regp->addr = offset;
517 			break;
518 		case DW_CFA_val_offset:
519 			count = dwarf_read_uleb128(current_insn, &reg);
520 			current_insn += count;
521 			count = dwarf_read_leb128(current_insn, &offset);
522 			offset *= cie->data_alignment_factor;
523 			regp = dwarf_frame_alloc_reg(frame, reg);
524 			regp->flags |= DWARF_VAL_OFFSET;
525 			regp->addr = offset;
526 			break;
527 		case DW_CFA_GNU_args_size:
528 			count = dwarf_read_uleb128(current_insn, &offset);
529 			current_insn += count;
530 			break;
531 		case DW_CFA_GNU_negative_offset_extended:
532 			count = dwarf_read_uleb128(current_insn, &reg);
533 			current_insn += count;
534 			count = dwarf_read_uleb128(current_insn, &offset);
535 			offset *= cie->data_alignment_factor;
536 
537 			regp = dwarf_frame_alloc_reg(frame, reg);
538 			regp->flags |= DWARF_REG_OFFSET;
539 			regp->addr = -offset;
540 			break;
541 		default:
542 			pr_debug("unhandled DWARF instruction 0x%x\n", insn);
543 			UNWINDER_BUG();
544 			break;
545 		}
546 	}
547 
548 	return 0;
549 }
550 
551 /**
552  *	dwarf_free_frame - free the memory allocated for @frame
553  *	@frame: the frame to free
554  */
555 void dwarf_free_frame(struct dwarf_frame *frame)
556 {
557 	dwarf_frame_free_regs(frame);
558 	mempool_free(frame, dwarf_frame_pool);
559 }
560 
561 extern void ret_from_irq(void);
562 
563 /**
564  *	dwarf_unwind_stack - unwind the stack
565  *
566  *	@pc: address of the function to unwind
567  *	@prev: struct dwarf_frame of the previous stackframe on the callstack
568  *
569  *	Return a struct dwarf_frame representing the most recent frame
570  *	on the callstack. Each of the lower (older) stack frames are
571  *	linked via the "prev" member.
572  */
573 struct dwarf_frame *dwarf_unwind_stack(unsigned long pc,
574 				       struct dwarf_frame *prev)
575 {
576 	struct dwarf_frame *frame;
577 	struct dwarf_cie *cie;
578 	struct dwarf_fde *fde;
579 	struct dwarf_reg *reg;
580 	unsigned long addr;
581 
582 	/*
583 	 * If we're starting at the top of the stack we need get the
584 	 * contents of a physical register to get the CFA in order to
585 	 * begin the virtual unwinding of the stack.
586 	 *
587 	 * NOTE: the return address is guaranteed to be setup by the
588 	 * time this function makes its first function call.
589 	 */
590 	if (!pc || !prev)
591 		pc = (unsigned long)current_text_addr();
592 
593 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
594 	/*
595 	 * If our stack has been patched by the function graph tracer
596 	 * then we might see the address of return_to_handler() where we
597 	 * expected to find the real return address.
598 	 */
599 	if (pc == (unsigned long)&return_to_handler) {
600 		int index = current->curr_ret_stack;
601 
602 		/*
603 		 * We currently have no way of tracking how many
604 		 * return_to_handler()'s we've seen. If there is more
605 		 * than one patched return address on our stack,
606 		 * complain loudly.
607 		 */
608 		WARN_ON(index > 0);
609 
610 		pc = current->ret_stack[index].ret;
611 	}
612 #endif
613 
614 	frame = mempool_alloc(dwarf_frame_pool, GFP_ATOMIC);
615 	if (!frame) {
616 		printk(KERN_ERR "Unable to allocate a dwarf frame\n");
617 		UNWINDER_BUG();
618 	}
619 
620 	INIT_LIST_HEAD(&frame->reg_list);
621 	frame->flags = 0;
622 	frame->prev = prev;
623 	frame->return_addr = 0;
624 
625 	fde = dwarf_lookup_fde(pc);
626 	if (!fde) {
627 		/*
628 		 * This is our normal exit path. There are two reasons
629 		 * why we might exit here,
630 		 *
631 		 *	a) pc has no asscociated DWARF frame info and so
632 		 *	we don't know how to unwind this frame. This is
633 		 *	usually the case when we're trying to unwind a
634 		 *	frame that was called from some assembly code
635 		 *	that has no DWARF info, e.g. syscalls.
636 		 *
637 		 *	b) the DEBUG info for pc is bogus. There's
638 		 *	really no way to distinguish this case from the
639 		 *	case above, which sucks because we could print a
640 		 *	warning here.
641 		 */
642 		goto bail;
643 	}
644 
645 	cie = dwarf_lookup_cie(fde->cie_pointer);
646 
647 	frame->pc = fde->initial_location;
648 
649 	/* CIE initial instructions */
650 	dwarf_cfa_execute_insns(cie->initial_instructions,
651 				cie->instructions_end, cie, fde,
652 				frame, pc);
653 
654 	/* FDE instructions */
655 	dwarf_cfa_execute_insns(fde->instructions, fde->end, cie,
656 				fde, frame, pc);
657 
658 	/* Calculate the CFA */
659 	switch (frame->flags) {
660 	case DWARF_FRAME_CFA_REG_OFFSET:
661 		if (prev) {
662 			reg = dwarf_frame_reg(prev, frame->cfa_register);
663 			UNWINDER_BUG_ON(!reg);
664 			UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
665 
666 			addr = prev->cfa + reg->addr;
667 			frame->cfa = __raw_readl(addr);
668 
669 		} else {
670 			/*
671 			 * Again, we're starting from the top of the
672 			 * stack. We need to physically read
673 			 * the contents of a register in order to get
674 			 * the Canonical Frame Address for this
675 			 * function.
676 			 */
677 			frame->cfa = dwarf_read_arch_reg(frame->cfa_register);
678 		}
679 
680 		frame->cfa += frame->cfa_offset;
681 		break;
682 	default:
683 		UNWINDER_BUG();
684 	}
685 
686 	reg = dwarf_frame_reg(frame, DWARF_ARCH_RA_REG);
687 
688 	/*
689 	 * If we haven't seen the return address register or the return
690 	 * address column is undefined then we must assume that this is
691 	 * the end of the callstack.
692 	 */
693 	if (!reg || reg->flags == DWARF_UNDEFINED)
694 		goto bail;
695 
696 	UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
697 
698 	addr = frame->cfa + reg->addr;
699 	frame->return_addr = __raw_readl(addr);
700 
701 	/*
702 	 * Ah, the joys of unwinding through interrupts.
703 	 *
704 	 * Interrupts are tricky - the DWARF info needs to be _really_
705 	 * accurate and unfortunately I'm seeing a lot of bogus DWARF
706 	 * info. For example, I've seen interrupts occur in epilogues
707 	 * just after the frame pointer (r14) had been restored. The
708 	 * problem was that the DWARF info claimed that the CFA could be
709 	 * reached by using the value of the frame pointer before it was
710 	 * restored.
711 	 *
712 	 * So until the compiler can be trusted to produce reliable
713 	 * DWARF info when it really matters, let's stop unwinding once
714 	 * we've calculated the function that was interrupted.
715 	 */
716 	if (prev && prev->pc == (unsigned long)ret_from_irq)
717 		frame->return_addr = 0;
718 
719 	return frame;
720 
721 bail:
722 	dwarf_free_frame(frame);
723 	return NULL;
724 }
725 
726 static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
727 			   unsigned char *end, struct module *mod)
728 {
729 	struct rb_node **rb_node = &cie_root.rb_node;
730 	struct rb_node *parent;
731 	struct dwarf_cie *cie;
732 	unsigned long flags;
733 	int count;
734 
735 	cie = kzalloc(sizeof(*cie), GFP_KERNEL);
736 	if (!cie)
737 		return -ENOMEM;
738 
739 	cie->length = len;
740 
741 	/*
742 	 * Record the offset into the .eh_frame section
743 	 * for this CIE. It allows this CIE to be
744 	 * quickly and easily looked up from the
745 	 * corresponding FDE.
746 	 */
747 	cie->cie_pointer = (unsigned long)entry;
748 
749 	cie->version = *(char *)p++;
750 	UNWINDER_BUG_ON(cie->version != 1);
751 
752 	cie->augmentation = p;
753 	p += strlen(cie->augmentation) + 1;
754 
755 	count = dwarf_read_uleb128(p, &cie->code_alignment_factor);
756 	p += count;
757 
758 	count = dwarf_read_leb128(p, &cie->data_alignment_factor);
759 	p += count;
760 
761 	/*
762 	 * Which column in the rule table contains the
763 	 * return address?
764 	 */
765 	if (cie->version == 1) {
766 		cie->return_address_reg = __raw_readb(p);
767 		p++;
768 	} else {
769 		count = dwarf_read_uleb128(p, &cie->return_address_reg);
770 		p += count;
771 	}
772 
773 	if (cie->augmentation[0] == 'z') {
774 		unsigned int length, count;
775 		cie->flags |= DWARF_CIE_Z_AUGMENTATION;
776 
777 		count = dwarf_read_uleb128(p, &length);
778 		p += count;
779 
780 		UNWINDER_BUG_ON((unsigned char *)p > end);
781 
782 		cie->initial_instructions = p + length;
783 		cie->augmentation++;
784 	}
785 
786 	while (*cie->augmentation) {
787 		/*
788 		 * "L" indicates a byte showing how the
789 		 * LSDA pointer is encoded. Skip it.
790 		 */
791 		if (*cie->augmentation == 'L') {
792 			p++;
793 			cie->augmentation++;
794 		} else if (*cie->augmentation == 'R') {
795 			/*
796 			 * "R" indicates a byte showing
797 			 * how FDE addresses are
798 			 * encoded.
799 			 */
800 			cie->encoding = *(char *)p++;
801 			cie->augmentation++;
802 		} else if (*cie->augmentation == 'P') {
803 			/*
804 			 * "R" indicates a personality
805 			 * routine in the CIE
806 			 * augmentation.
807 			 */
808 			UNWINDER_BUG();
809 		} else if (*cie->augmentation == 'S') {
810 			UNWINDER_BUG();
811 		} else {
812 			/*
813 			 * Unknown augmentation. Assume
814 			 * 'z' augmentation.
815 			 */
816 			p = cie->initial_instructions;
817 			UNWINDER_BUG_ON(!p);
818 			break;
819 		}
820 	}
821 
822 	cie->initial_instructions = p;
823 	cie->instructions_end = end;
824 
825 	/* Add to list */
826 	spin_lock_irqsave(&dwarf_cie_lock, flags);
827 
828 	while (*rb_node) {
829 		struct dwarf_cie *cie_tmp;
830 
831 		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
832 
833 		parent = *rb_node;
834 
835 		if (cie->cie_pointer < cie_tmp->cie_pointer)
836 			rb_node = &parent->rb_left;
837 		else if (cie->cie_pointer >= cie_tmp->cie_pointer)
838 			rb_node = &parent->rb_right;
839 		else
840 			WARN_ON(1);
841 	}
842 
843 	rb_link_node(&cie->node, parent, rb_node);
844 	rb_insert_color(&cie->node, &cie_root);
845 
846 	if (mod != NULL)
847 		list_add_tail(&cie->link, &mod->arch.cie_list);
848 
849 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
850 
851 	return 0;
852 }
853 
854 static int dwarf_parse_fde(void *entry, u32 entry_type,
855 			   void *start, unsigned long len,
856 			   unsigned char *end, struct module *mod)
857 {
858 	struct rb_node **rb_node = &fde_root.rb_node;
859 	struct rb_node *parent;
860 	struct dwarf_fde *fde;
861 	struct dwarf_cie *cie;
862 	unsigned long flags;
863 	int count;
864 	void *p = start;
865 
866 	fde = kzalloc(sizeof(*fde), GFP_KERNEL);
867 	if (!fde)
868 		return -ENOMEM;
869 
870 	fde->length = len;
871 
872 	/*
873 	 * In a .eh_frame section the CIE pointer is the
874 	 * delta between the address within the FDE
875 	 */
876 	fde->cie_pointer = (unsigned long)(p - entry_type - 4);
877 
878 	cie = dwarf_lookup_cie(fde->cie_pointer);
879 	fde->cie = cie;
880 
881 	if (cie->encoding)
882 		count = dwarf_read_encoded_value(p, &fde->initial_location,
883 						 cie->encoding);
884 	else
885 		count = dwarf_read_addr(p, &fde->initial_location);
886 
887 	p += count;
888 
889 	if (cie->encoding)
890 		count = dwarf_read_encoded_value(p, &fde->address_range,
891 						 cie->encoding & 0x0f);
892 	else
893 		count = dwarf_read_addr(p, &fde->address_range);
894 
895 	p += count;
896 
897 	if (fde->cie->flags & DWARF_CIE_Z_AUGMENTATION) {
898 		unsigned int length;
899 		count = dwarf_read_uleb128(p, &length);
900 		p += count + length;
901 	}
902 
903 	/* Call frame instructions. */
904 	fde->instructions = p;
905 	fde->end = end;
906 
907 	/* Add to list. */
908 	spin_lock_irqsave(&dwarf_fde_lock, flags);
909 
910 	while (*rb_node) {
911 		struct dwarf_fde *fde_tmp;
912 		unsigned long tmp_start, tmp_end;
913 		unsigned long start, end;
914 
915 		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
916 
917 		start = fde->initial_location;
918 		end = fde->initial_location + fde->address_range;
919 
920 		tmp_start = fde_tmp->initial_location;
921 		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
922 
923 		parent = *rb_node;
924 
925 		if (start < tmp_start)
926 			rb_node = &parent->rb_left;
927 		else if (start >= tmp_end)
928 			rb_node = &parent->rb_right;
929 		else
930 			WARN_ON(1);
931 	}
932 
933 	rb_link_node(&fde->node, parent, rb_node);
934 	rb_insert_color(&fde->node, &fde_root);
935 
936 	if (mod != NULL)
937 		list_add_tail(&fde->link, &mod->arch.fde_list);
938 
939 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
940 
941 	return 0;
942 }
943 
944 static void dwarf_unwinder_dump(struct task_struct *task,
945 				struct pt_regs *regs,
946 				unsigned long *sp,
947 				const struct stacktrace_ops *ops,
948 				void *data)
949 {
950 	struct dwarf_frame *frame, *_frame;
951 	unsigned long return_addr;
952 
953 	_frame = NULL;
954 	return_addr = 0;
955 
956 	while (1) {
957 		frame = dwarf_unwind_stack(return_addr, _frame);
958 
959 		if (_frame)
960 			dwarf_free_frame(_frame);
961 
962 		_frame = frame;
963 
964 		if (!frame || !frame->return_addr)
965 			break;
966 
967 		return_addr = frame->return_addr;
968 		ops->address(data, return_addr, 1);
969 	}
970 
971 	if (frame)
972 		dwarf_free_frame(frame);
973 }
974 
975 static struct unwinder dwarf_unwinder = {
976 	.name = "dwarf-unwinder",
977 	.dump = dwarf_unwinder_dump,
978 	.rating = 150,
979 };
980 
981 static void dwarf_unwinder_cleanup(void)
982 {
983 	struct rb_node **fde_rb_node = &fde_root.rb_node;
984 	struct rb_node **cie_rb_node = &cie_root.rb_node;
985 
986 	/*
987 	 * Deallocate all the memory allocated for the DWARF unwinder.
988 	 * Traverse all the FDE/CIE lists and remove and free all the
989 	 * memory associated with those data structures.
990 	 */
991 	while (*fde_rb_node) {
992 		struct dwarf_fde *fde;
993 
994 		fde = rb_entry(*fde_rb_node, struct dwarf_fde, node);
995 		rb_erase(*fde_rb_node, &fde_root);
996 		kfree(fde);
997 	}
998 
999 	while (*cie_rb_node) {
1000 		struct dwarf_cie *cie;
1001 
1002 		cie = rb_entry(*cie_rb_node, struct dwarf_cie, node);
1003 		rb_erase(*cie_rb_node, &cie_root);
1004 		kfree(cie);
1005 	}
1006 
1007 	kmem_cache_destroy(dwarf_reg_cachep);
1008 	kmem_cache_destroy(dwarf_frame_cachep);
1009 }
1010 
1011 /**
1012  *	dwarf_parse_section - parse DWARF section
1013  *	@eh_frame_start: start address of the .eh_frame section
1014  *	@eh_frame_end: end address of the .eh_frame section
1015  *	@mod: the kernel module containing the .eh_frame section
1016  *
1017  *	Parse the information in a .eh_frame section.
1018  */
1019 static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end,
1020 			       struct module *mod)
1021 {
1022 	u32 entry_type;
1023 	void *p, *entry;
1024 	int count, err = 0;
1025 	unsigned long len = 0;
1026 	unsigned int c_entries, f_entries;
1027 	unsigned char *end;
1028 
1029 	c_entries = 0;
1030 	f_entries = 0;
1031 	entry = eh_frame_start;
1032 
1033 	while ((char *)entry < eh_frame_end) {
1034 		p = entry;
1035 
1036 		count = dwarf_entry_len(p, &len);
1037 		if (count == 0) {
1038 			/*
1039 			 * We read a bogus length field value. There is
1040 			 * nothing we can do here apart from disabling
1041 			 * the DWARF unwinder. We can't even skip this
1042 			 * entry and move to the next one because 'len'
1043 			 * tells us where our next entry is.
1044 			 */
1045 			err = -EINVAL;
1046 			goto out;
1047 		} else
1048 			p += count;
1049 
1050 		/* initial length does not include itself */
1051 		end = p + len;
1052 
1053 		entry_type = get_unaligned((u32 *)p);
1054 		p += 4;
1055 
1056 		if (entry_type == DW_EH_FRAME_CIE) {
1057 			err = dwarf_parse_cie(entry, p, len, end, mod);
1058 			if (err < 0)
1059 				goto out;
1060 			else
1061 				c_entries++;
1062 		} else {
1063 			err = dwarf_parse_fde(entry, entry_type, p, len,
1064 					      end, mod);
1065 			if (err < 0)
1066 				goto out;
1067 			else
1068 				f_entries++;
1069 		}
1070 
1071 		entry = (char *)entry + len + 4;
1072 	}
1073 
1074 	printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n",
1075 	       c_entries, f_entries);
1076 
1077 	return 0;
1078 
1079 out:
1080 	return err;
1081 }
1082 
1083 #ifdef CONFIG_MODULES
1084 int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
1085 			  struct module *me)
1086 {
1087 	unsigned int i, err;
1088 	unsigned long start, end;
1089 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
1090 
1091 	start = end = 0;
1092 
1093 	for (i = 1; i < hdr->e_shnum; i++) {
1094 		/* Alloc bit cleared means "ignore it." */
1095 		if ((sechdrs[i].sh_flags & SHF_ALLOC)
1096 		    && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) {
1097 			start = sechdrs[i].sh_addr;
1098 			end = start + sechdrs[i].sh_size;
1099 			break;
1100 		}
1101 	}
1102 
1103 	/* Did we find the .eh_frame section? */
1104 	if (i != hdr->e_shnum) {
1105 		INIT_LIST_HEAD(&me->arch.cie_list);
1106 		INIT_LIST_HEAD(&me->arch.fde_list);
1107 		err = dwarf_parse_section((char *)start, (char *)end, me);
1108 		if (err) {
1109 			printk(KERN_WARNING "%s: failed to parse DWARF info\n",
1110 			       me->name);
1111 			return err;
1112 		}
1113 	}
1114 
1115 	return 0;
1116 }
1117 
1118 /**
1119  *	module_dwarf_cleanup - remove FDE/CIEs associated with @mod
1120  *	@mod: the module that is being unloaded
1121  *
1122  *	Remove any FDEs and CIEs from the global lists that came from
1123  *	@mod's .eh_frame section because @mod is being unloaded.
1124  */
1125 void module_dwarf_cleanup(struct module *mod)
1126 {
1127 	struct dwarf_fde *fde, *ftmp;
1128 	struct dwarf_cie *cie, *ctmp;
1129 	unsigned long flags;
1130 
1131 	spin_lock_irqsave(&dwarf_cie_lock, flags);
1132 
1133 	list_for_each_entry_safe(cie, ctmp, &mod->arch.cie_list, link) {
1134 		list_del(&cie->link);
1135 		rb_erase(&cie->node, &cie_root);
1136 		kfree(cie);
1137 	}
1138 
1139 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
1140 
1141 	spin_lock_irqsave(&dwarf_fde_lock, flags);
1142 
1143 	list_for_each_entry_safe(fde, ftmp, &mod->arch.fde_list, link) {
1144 		list_del(&fde->link);
1145 		rb_erase(&fde->node, &fde_root);
1146 		kfree(fde);
1147 	}
1148 
1149 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
1150 }
1151 #endif /* CONFIG_MODULES */
1152 
1153 /**
1154  *	dwarf_unwinder_init - initialise the dwarf unwinder
1155  *
1156  *	Build the data structures describing the .dwarf_frame section to
1157  *	make it easier to lookup CIE and FDE entries. Because the
1158  *	.eh_frame section is packed as tightly as possible it is not
1159  *	easy to lookup the FDE for a given PC, so we build a list of FDE
1160  *	and CIE entries that make it easier.
1161  */
1162 static int __init dwarf_unwinder_init(void)
1163 {
1164 	int err;
1165 
1166 	dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
1167 			sizeof(struct dwarf_frame), 0,
1168 			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
1169 
1170 	dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
1171 			sizeof(struct dwarf_reg), 0,
1172 			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
1173 
1174 	dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ,
1175 					  mempool_alloc_slab,
1176 					  mempool_free_slab,
1177 					  dwarf_frame_cachep);
1178 
1179 	dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ,
1180 					 mempool_alloc_slab,
1181 					 mempool_free_slab,
1182 					 dwarf_reg_cachep);
1183 
1184 	err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL);
1185 	if (err)
1186 		goto out;
1187 
1188 	err = unwinder_register(&dwarf_unwinder);
1189 	if (err)
1190 		goto out;
1191 
1192 	return 0;
1193 
1194 out:
1195 	printk(KERN_ERR "Failed to initialise DWARF unwinder: %d\n", err);
1196 	dwarf_unwinder_cleanup();
1197 	return -EINVAL;
1198 }
1199 early_initcall(dwarf_unwinder_init);
1200