xref: /linux/arch/sh/kernel/dwarf.c (revision c145211d1f9e2ef19e7b4c2b943f68366daa97af)
1 /*
2  * Copyright (C) 2009 Matt Fleming <matt@console-pimps.org>
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * This is an implementation of a DWARF unwinder. Its main purpose is
9  * for generating stacktrace information. Based on the DWARF 3
10  * specification from http://www.dwarfstd.org.
11  *
12  * TODO:
13  *	- DWARF64 doesn't work.
14  *	- Registers with DWARF_VAL_OFFSET rules aren't handled properly.
15  */
16 
17 /* #define DEBUG */
18 #include <linux/kernel.h>
19 #include <linux/io.h>
20 #include <linux/list.h>
21 #include <linux/mempool.h>
22 #include <linux/mm.h>
23 #include <linux/elf.h>
24 #include <linux/ftrace.h>
25 #include <linux/module.h>
26 #include <linux/slab.h>
27 #include <asm/dwarf.h>
28 #include <asm/unwinder.h>
29 #include <asm/sections.h>
30 #include <asm/unaligned.h>
31 #include <asm/stacktrace.h>
32 
33 /* Reserve enough memory for two stack frames */
34 #define DWARF_FRAME_MIN_REQ	2
35 /* ... with 4 registers per frame. */
36 #define DWARF_REG_MIN_REQ	(DWARF_FRAME_MIN_REQ * 4)
37 
38 static struct kmem_cache *dwarf_frame_cachep;
39 static mempool_t *dwarf_frame_pool;
40 
41 static struct kmem_cache *dwarf_reg_cachep;
42 static mempool_t *dwarf_reg_pool;
43 
44 static struct rb_root cie_root;
45 static DEFINE_SPINLOCK(dwarf_cie_lock);
46 
47 static struct rb_root fde_root;
48 static DEFINE_SPINLOCK(dwarf_fde_lock);
49 
50 static struct dwarf_cie *cached_cie;
51 
52 /**
53  *	dwarf_frame_alloc_reg - allocate memory for a DWARF register
54  *	@frame: the DWARF frame whose list of registers we insert on
55  *	@reg_num: the register number
56  *
57  *	Allocate space for, and initialise, a dwarf reg from
58  *	dwarf_reg_pool and insert it onto the (unsorted) linked-list of
59  *	dwarf registers for @frame.
60  *
61  *	Return the initialised DWARF reg.
62  */
63 static struct dwarf_reg *dwarf_frame_alloc_reg(struct dwarf_frame *frame,
64 					       unsigned int reg_num)
65 {
66 	struct dwarf_reg *reg;
67 
68 	reg = mempool_alloc(dwarf_reg_pool, GFP_ATOMIC);
69 	if (!reg) {
70 		printk(KERN_WARNING "Unable to allocate a DWARF register\n");
71 		/*
72 		 * Let's just bomb hard here, we have no way to
73 		 * gracefully recover.
74 		 */
75 		UNWINDER_BUG();
76 	}
77 
78 	reg->number = reg_num;
79 	reg->addr = 0;
80 	reg->flags = 0;
81 
82 	list_add(&reg->link, &frame->reg_list);
83 
84 	return reg;
85 }
86 
87 static void dwarf_frame_free_regs(struct dwarf_frame *frame)
88 {
89 	struct dwarf_reg *reg, *n;
90 
91 	list_for_each_entry_safe(reg, n, &frame->reg_list, link) {
92 		list_del(&reg->link);
93 		mempool_free(reg, dwarf_reg_pool);
94 	}
95 }
96 
97 /**
98  *	dwarf_frame_reg - return a DWARF register
99  *	@frame: the DWARF frame to search in for @reg_num
100  *	@reg_num: the register number to search for
101  *
102  *	Lookup and return the dwarf reg @reg_num for this frame. Return
103  *	NULL if @reg_num is an register invalid number.
104  */
105 static struct dwarf_reg *dwarf_frame_reg(struct dwarf_frame *frame,
106 					 unsigned int reg_num)
107 {
108 	struct dwarf_reg *reg;
109 
110 	list_for_each_entry(reg, &frame->reg_list, link) {
111 		if (reg->number == reg_num)
112 			return reg;
113 	}
114 
115 	return NULL;
116 }
117 
118 /**
119  *	dwarf_read_addr - read dwarf data
120  *	@src: source address of data
121  *	@dst: destination address to store the data to
122  *
123  *	Read 'n' bytes from @src, where 'n' is the size of an address on
124  *	the native machine. We return the number of bytes read, which
125  *	should always be 'n'. We also have to be careful when reading
126  *	from @src and writing to @dst, because they can be arbitrarily
127  *	aligned. Return 'n' - the number of bytes read.
128  */
129 static inline int dwarf_read_addr(unsigned long *src, unsigned long *dst)
130 {
131 	u32 val = get_unaligned(src);
132 	put_unaligned(val, dst);
133 	return sizeof(unsigned long *);
134 }
135 
136 /**
137  *	dwarf_read_uleb128 - read unsigned LEB128 data
138  *	@addr: the address where the ULEB128 data is stored
139  *	@ret: address to store the result
140  *
141  *	Decode an unsigned LEB128 encoded datum. The algorithm is taken
142  *	from Appendix C of the DWARF 3 spec. For information on the
143  *	encodings refer to section "7.6 - Variable Length Data". Return
144  *	the number of bytes read.
145  */
146 static inline unsigned long dwarf_read_uleb128(char *addr, unsigned int *ret)
147 {
148 	unsigned int result;
149 	unsigned char byte;
150 	int shift, count;
151 
152 	result = 0;
153 	shift = 0;
154 	count = 0;
155 
156 	while (1) {
157 		byte = __raw_readb(addr);
158 		addr++;
159 		count++;
160 
161 		result |= (byte & 0x7f) << shift;
162 		shift += 7;
163 
164 		if (!(byte & 0x80))
165 			break;
166 	}
167 
168 	*ret = result;
169 
170 	return count;
171 }
172 
173 /**
174  *	dwarf_read_leb128 - read signed LEB128 data
175  *	@addr: the address of the LEB128 encoded data
176  *	@ret: address to store the result
177  *
178  *	Decode signed LEB128 data. The algorithm is taken from Appendix
179  *	C of the DWARF 3 spec. Return the number of bytes read.
180  */
181 static inline unsigned long dwarf_read_leb128(char *addr, int *ret)
182 {
183 	unsigned char byte;
184 	int result, shift;
185 	int num_bits;
186 	int count;
187 
188 	result = 0;
189 	shift = 0;
190 	count = 0;
191 
192 	while (1) {
193 		byte = __raw_readb(addr);
194 		addr++;
195 		result |= (byte & 0x7f) << shift;
196 		shift += 7;
197 		count++;
198 
199 		if (!(byte & 0x80))
200 			break;
201 	}
202 
203 	/* The number of bits in a signed integer. */
204 	num_bits = 8 * sizeof(result);
205 
206 	if ((shift < num_bits) && (byte & 0x40))
207 		result |= (-1 << shift);
208 
209 	*ret = result;
210 
211 	return count;
212 }
213 
214 /**
215  *	dwarf_read_encoded_value - return the decoded value at @addr
216  *	@addr: the address of the encoded value
217  *	@val: where to write the decoded value
218  *	@encoding: the encoding with which we can decode @addr
219  *
220  *	GCC emits encoded address in the .eh_frame FDE entries. Decode
221  *	the value at @addr using @encoding. The decoded value is written
222  *	to @val and the number of bytes read is returned.
223  */
224 static int dwarf_read_encoded_value(char *addr, unsigned long *val,
225 				    char encoding)
226 {
227 	unsigned long decoded_addr = 0;
228 	int count = 0;
229 
230 	switch (encoding & 0x70) {
231 	case DW_EH_PE_absptr:
232 		break;
233 	case DW_EH_PE_pcrel:
234 		decoded_addr = (unsigned long)addr;
235 		break;
236 	default:
237 		pr_debug("encoding=0x%x\n", (encoding & 0x70));
238 		UNWINDER_BUG();
239 	}
240 
241 	if ((encoding & 0x07) == 0x00)
242 		encoding |= DW_EH_PE_udata4;
243 
244 	switch (encoding & 0x0f) {
245 	case DW_EH_PE_sdata4:
246 	case DW_EH_PE_udata4:
247 		count += 4;
248 		decoded_addr += get_unaligned((u32 *)addr);
249 		__raw_writel(decoded_addr, val);
250 		break;
251 	default:
252 		pr_debug("encoding=0x%x\n", encoding);
253 		UNWINDER_BUG();
254 	}
255 
256 	return count;
257 }
258 
259 /**
260  *	dwarf_entry_len - return the length of an FDE or CIE
261  *	@addr: the address of the entry
262  *	@len: the length of the entry
263  *
264  *	Read the initial_length field of the entry and store the size of
265  *	the entry in @len. We return the number of bytes read. Return a
266  *	count of 0 on error.
267  */
268 static inline int dwarf_entry_len(char *addr, unsigned long *len)
269 {
270 	u32 initial_len;
271 	int count;
272 
273 	initial_len = get_unaligned((u32 *)addr);
274 	count = 4;
275 
276 	/*
277 	 * An initial length field value in the range DW_LEN_EXT_LO -
278 	 * DW_LEN_EXT_HI indicates an extension, and should not be
279 	 * interpreted as a length. The only extension that we currently
280 	 * understand is the use of DWARF64 addresses.
281 	 */
282 	if (initial_len >= DW_EXT_LO && initial_len <= DW_EXT_HI) {
283 		/*
284 		 * The 64-bit length field immediately follows the
285 		 * compulsory 32-bit length field.
286 		 */
287 		if (initial_len == DW_EXT_DWARF64) {
288 			*len = get_unaligned((u64 *)addr + 4);
289 			count = 12;
290 		} else {
291 			printk(KERN_WARNING "Unknown DWARF extension\n");
292 			count = 0;
293 		}
294 	} else
295 		*len = initial_len;
296 
297 	return count;
298 }
299 
300 /**
301  *	dwarf_lookup_cie - locate the cie
302  *	@cie_ptr: pointer to help with lookup
303  */
304 static struct dwarf_cie *dwarf_lookup_cie(unsigned long cie_ptr)
305 {
306 	struct rb_node **rb_node = &cie_root.rb_node;
307 	struct dwarf_cie *cie = NULL;
308 	unsigned long flags;
309 
310 	spin_lock_irqsave(&dwarf_cie_lock, flags);
311 
312 	/*
313 	 * We've cached the last CIE we looked up because chances are
314 	 * that the FDE wants this CIE.
315 	 */
316 	if (cached_cie && cached_cie->cie_pointer == cie_ptr) {
317 		cie = cached_cie;
318 		goto out;
319 	}
320 
321 	while (*rb_node) {
322 		struct dwarf_cie *cie_tmp;
323 
324 		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
325 		BUG_ON(!cie_tmp);
326 
327 		if (cie_ptr == cie_tmp->cie_pointer) {
328 			cie = cie_tmp;
329 			cached_cie = cie_tmp;
330 			goto out;
331 		} else {
332 			if (cie_ptr < cie_tmp->cie_pointer)
333 				rb_node = &(*rb_node)->rb_left;
334 			else
335 				rb_node = &(*rb_node)->rb_right;
336 		}
337 	}
338 
339 out:
340 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
341 	return cie;
342 }
343 
344 /**
345  *	dwarf_lookup_fde - locate the FDE that covers pc
346  *	@pc: the program counter
347  */
348 struct dwarf_fde *dwarf_lookup_fde(unsigned long pc)
349 {
350 	struct rb_node **rb_node = &fde_root.rb_node;
351 	struct dwarf_fde *fde = NULL;
352 	unsigned long flags;
353 
354 	spin_lock_irqsave(&dwarf_fde_lock, flags);
355 
356 	while (*rb_node) {
357 		struct dwarf_fde *fde_tmp;
358 		unsigned long tmp_start, tmp_end;
359 
360 		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
361 		BUG_ON(!fde_tmp);
362 
363 		tmp_start = fde_tmp->initial_location;
364 		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
365 
366 		if (pc < tmp_start) {
367 			rb_node = &(*rb_node)->rb_left;
368 		} else {
369 			if (pc < tmp_end) {
370 				fde = fde_tmp;
371 				goto out;
372 			} else
373 				rb_node = &(*rb_node)->rb_right;
374 		}
375 	}
376 
377 out:
378 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
379 
380 	return fde;
381 }
382 
383 /**
384  *	dwarf_cfa_execute_insns - execute instructions to calculate a CFA
385  *	@insn_start: address of the first instruction
386  *	@insn_end: address of the last instruction
387  *	@cie: the CIE for this function
388  *	@fde: the FDE for this function
389  *	@frame: the instructions calculate the CFA for this frame
390  *	@pc: the program counter of the address we're interested in
391  *
392  *	Execute the Call Frame instruction sequence starting at
393  *	@insn_start and ending at @insn_end. The instructions describe
394  *	how to calculate the Canonical Frame Address of a stackframe.
395  *	Store the results in @frame.
396  */
397 static int dwarf_cfa_execute_insns(unsigned char *insn_start,
398 				   unsigned char *insn_end,
399 				   struct dwarf_cie *cie,
400 				   struct dwarf_fde *fde,
401 				   struct dwarf_frame *frame,
402 				   unsigned long pc)
403 {
404 	unsigned char insn;
405 	unsigned char *current_insn;
406 	unsigned int count, delta, reg, expr_len, offset;
407 	struct dwarf_reg *regp;
408 
409 	current_insn = insn_start;
410 
411 	while (current_insn < insn_end && frame->pc <= pc) {
412 		insn = __raw_readb(current_insn++);
413 
414 		/*
415 		 * Firstly, handle the opcodes that embed their operands
416 		 * in the instructions.
417 		 */
418 		switch (DW_CFA_opcode(insn)) {
419 		case DW_CFA_advance_loc:
420 			delta = DW_CFA_operand(insn);
421 			delta *= cie->code_alignment_factor;
422 			frame->pc += delta;
423 			continue;
424 			/* NOTREACHED */
425 		case DW_CFA_offset:
426 			reg = DW_CFA_operand(insn);
427 			count = dwarf_read_uleb128(current_insn, &offset);
428 			current_insn += count;
429 			offset *= cie->data_alignment_factor;
430 			regp = dwarf_frame_alloc_reg(frame, reg);
431 			regp->addr = offset;
432 			regp->flags |= DWARF_REG_OFFSET;
433 			continue;
434 			/* NOTREACHED */
435 		case DW_CFA_restore:
436 			reg = DW_CFA_operand(insn);
437 			continue;
438 			/* NOTREACHED */
439 		}
440 
441 		/*
442 		 * Secondly, handle the opcodes that don't embed their
443 		 * operands in the instruction.
444 		 */
445 		switch (insn) {
446 		case DW_CFA_nop:
447 			continue;
448 		case DW_CFA_advance_loc1:
449 			delta = *current_insn++;
450 			frame->pc += delta * cie->code_alignment_factor;
451 			break;
452 		case DW_CFA_advance_loc2:
453 			delta = get_unaligned((u16 *)current_insn);
454 			current_insn += 2;
455 			frame->pc += delta * cie->code_alignment_factor;
456 			break;
457 		case DW_CFA_advance_loc4:
458 			delta = get_unaligned((u32 *)current_insn);
459 			current_insn += 4;
460 			frame->pc += delta * cie->code_alignment_factor;
461 			break;
462 		case DW_CFA_offset_extended:
463 			count = dwarf_read_uleb128(current_insn, &reg);
464 			current_insn += count;
465 			count = dwarf_read_uleb128(current_insn, &offset);
466 			current_insn += count;
467 			offset *= cie->data_alignment_factor;
468 			break;
469 		case DW_CFA_restore_extended:
470 			count = dwarf_read_uleb128(current_insn, &reg);
471 			current_insn += count;
472 			break;
473 		case DW_CFA_undefined:
474 			count = dwarf_read_uleb128(current_insn, &reg);
475 			current_insn += count;
476 			regp = dwarf_frame_alloc_reg(frame, reg);
477 			regp->flags |= DWARF_UNDEFINED;
478 			break;
479 		case DW_CFA_def_cfa:
480 			count = dwarf_read_uleb128(current_insn,
481 						   &frame->cfa_register);
482 			current_insn += count;
483 			count = dwarf_read_uleb128(current_insn,
484 						   &frame->cfa_offset);
485 			current_insn += count;
486 
487 			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
488 			break;
489 		case DW_CFA_def_cfa_register:
490 			count = dwarf_read_uleb128(current_insn,
491 						   &frame->cfa_register);
492 			current_insn += count;
493 			frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
494 			break;
495 		case DW_CFA_def_cfa_offset:
496 			count = dwarf_read_uleb128(current_insn, &offset);
497 			current_insn += count;
498 			frame->cfa_offset = offset;
499 			break;
500 		case DW_CFA_def_cfa_expression:
501 			count = dwarf_read_uleb128(current_insn, &expr_len);
502 			current_insn += count;
503 
504 			frame->cfa_expr = current_insn;
505 			frame->cfa_expr_len = expr_len;
506 			current_insn += expr_len;
507 
508 			frame->flags |= DWARF_FRAME_CFA_REG_EXP;
509 			break;
510 		case DW_CFA_offset_extended_sf:
511 			count = dwarf_read_uleb128(current_insn, &reg);
512 			current_insn += count;
513 			count = dwarf_read_leb128(current_insn, &offset);
514 			current_insn += count;
515 			offset *= cie->data_alignment_factor;
516 			regp = dwarf_frame_alloc_reg(frame, reg);
517 			regp->flags |= DWARF_REG_OFFSET;
518 			regp->addr = offset;
519 			break;
520 		case DW_CFA_val_offset:
521 			count = dwarf_read_uleb128(current_insn, &reg);
522 			current_insn += count;
523 			count = dwarf_read_leb128(current_insn, &offset);
524 			offset *= cie->data_alignment_factor;
525 			regp = dwarf_frame_alloc_reg(frame, reg);
526 			regp->flags |= DWARF_VAL_OFFSET;
527 			regp->addr = offset;
528 			break;
529 		case DW_CFA_GNU_args_size:
530 			count = dwarf_read_uleb128(current_insn, &offset);
531 			current_insn += count;
532 			break;
533 		case DW_CFA_GNU_negative_offset_extended:
534 			count = dwarf_read_uleb128(current_insn, &reg);
535 			current_insn += count;
536 			count = dwarf_read_uleb128(current_insn, &offset);
537 			offset *= cie->data_alignment_factor;
538 
539 			regp = dwarf_frame_alloc_reg(frame, reg);
540 			regp->flags |= DWARF_REG_OFFSET;
541 			regp->addr = -offset;
542 			break;
543 		default:
544 			pr_debug("unhandled DWARF instruction 0x%x\n", insn);
545 			UNWINDER_BUG();
546 			break;
547 		}
548 	}
549 
550 	return 0;
551 }
552 
553 /**
554  *	dwarf_free_frame - free the memory allocated for @frame
555  *	@frame: the frame to free
556  */
557 void dwarf_free_frame(struct dwarf_frame *frame)
558 {
559 	dwarf_frame_free_regs(frame);
560 	mempool_free(frame, dwarf_frame_pool);
561 }
562 
563 extern void ret_from_irq(void);
564 
565 /**
566  *	dwarf_unwind_stack - unwind the stack
567  *
568  *	@pc: address of the function to unwind
569  *	@prev: struct dwarf_frame of the previous stackframe on the callstack
570  *
571  *	Return a struct dwarf_frame representing the most recent frame
572  *	on the callstack. Each of the lower (older) stack frames are
573  *	linked via the "prev" member.
574  */
575 struct dwarf_frame *dwarf_unwind_stack(unsigned long pc,
576 				       struct dwarf_frame *prev)
577 {
578 	struct dwarf_frame *frame;
579 	struct dwarf_cie *cie;
580 	struct dwarf_fde *fde;
581 	struct dwarf_reg *reg;
582 	unsigned long addr;
583 
584 	/*
585 	 * If we're starting at the top of the stack we need get the
586 	 * contents of a physical register to get the CFA in order to
587 	 * begin the virtual unwinding of the stack.
588 	 *
589 	 * NOTE: the return address is guaranteed to be setup by the
590 	 * time this function makes its first function call.
591 	 */
592 	if (!pc || !prev)
593 		pc = (unsigned long)current_text_addr();
594 
595 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
596 	/*
597 	 * If our stack has been patched by the function graph tracer
598 	 * then we might see the address of return_to_handler() where we
599 	 * expected to find the real return address.
600 	 */
601 	if (pc == (unsigned long)&return_to_handler) {
602 		int index = current->curr_ret_stack;
603 
604 		/*
605 		 * We currently have no way of tracking how many
606 		 * return_to_handler()'s we've seen. If there is more
607 		 * than one patched return address on our stack,
608 		 * complain loudly.
609 		 */
610 		WARN_ON(index > 0);
611 
612 		pc = current->ret_stack[index].ret;
613 	}
614 #endif
615 
616 	frame = mempool_alloc(dwarf_frame_pool, GFP_ATOMIC);
617 	if (!frame) {
618 		printk(KERN_ERR "Unable to allocate a dwarf frame\n");
619 		UNWINDER_BUG();
620 	}
621 
622 	INIT_LIST_HEAD(&frame->reg_list);
623 	frame->flags = 0;
624 	frame->prev = prev;
625 	frame->return_addr = 0;
626 
627 	fde = dwarf_lookup_fde(pc);
628 	if (!fde) {
629 		/*
630 		 * This is our normal exit path. There are two reasons
631 		 * why we might exit here,
632 		 *
633 		 *	a) pc has no asscociated DWARF frame info and so
634 		 *	we don't know how to unwind this frame. This is
635 		 *	usually the case when we're trying to unwind a
636 		 *	frame that was called from some assembly code
637 		 *	that has no DWARF info, e.g. syscalls.
638 		 *
639 		 *	b) the DEBUG info for pc is bogus. There's
640 		 *	really no way to distinguish this case from the
641 		 *	case above, which sucks because we could print a
642 		 *	warning here.
643 		 */
644 		goto bail;
645 	}
646 
647 	cie = dwarf_lookup_cie(fde->cie_pointer);
648 
649 	frame->pc = fde->initial_location;
650 
651 	/* CIE initial instructions */
652 	dwarf_cfa_execute_insns(cie->initial_instructions,
653 				cie->instructions_end, cie, fde,
654 				frame, pc);
655 
656 	/* FDE instructions */
657 	dwarf_cfa_execute_insns(fde->instructions, fde->end, cie,
658 				fde, frame, pc);
659 
660 	/* Calculate the CFA */
661 	switch (frame->flags) {
662 	case DWARF_FRAME_CFA_REG_OFFSET:
663 		if (prev) {
664 			reg = dwarf_frame_reg(prev, frame->cfa_register);
665 			UNWINDER_BUG_ON(!reg);
666 			UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
667 
668 			addr = prev->cfa + reg->addr;
669 			frame->cfa = __raw_readl(addr);
670 
671 		} else {
672 			/*
673 			 * Again, we're starting from the top of the
674 			 * stack. We need to physically read
675 			 * the contents of a register in order to get
676 			 * the Canonical Frame Address for this
677 			 * function.
678 			 */
679 			frame->cfa = dwarf_read_arch_reg(frame->cfa_register);
680 		}
681 
682 		frame->cfa += frame->cfa_offset;
683 		break;
684 	default:
685 		UNWINDER_BUG();
686 	}
687 
688 	reg = dwarf_frame_reg(frame, DWARF_ARCH_RA_REG);
689 
690 	/*
691 	 * If we haven't seen the return address register or the return
692 	 * address column is undefined then we must assume that this is
693 	 * the end of the callstack.
694 	 */
695 	if (!reg || reg->flags == DWARF_UNDEFINED)
696 		goto bail;
697 
698 	UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
699 
700 	addr = frame->cfa + reg->addr;
701 	frame->return_addr = __raw_readl(addr);
702 
703 	/*
704 	 * Ah, the joys of unwinding through interrupts.
705 	 *
706 	 * Interrupts are tricky - the DWARF info needs to be _really_
707 	 * accurate and unfortunately I'm seeing a lot of bogus DWARF
708 	 * info. For example, I've seen interrupts occur in epilogues
709 	 * just after the frame pointer (r14) had been restored. The
710 	 * problem was that the DWARF info claimed that the CFA could be
711 	 * reached by using the value of the frame pointer before it was
712 	 * restored.
713 	 *
714 	 * So until the compiler can be trusted to produce reliable
715 	 * DWARF info when it really matters, let's stop unwinding once
716 	 * we've calculated the function that was interrupted.
717 	 */
718 	if (prev && prev->pc == (unsigned long)ret_from_irq)
719 		frame->return_addr = 0;
720 
721 	return frame;
722 
723 bail:
724 	dwarf_free_frame(frame);
725 	return NULL;
726 }
727 
728 static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
729 			   unsigned char *end, struct module *mod)
730 {
731 	struct rb_node **rb_node = &cie_root.rb_node;
732 	struct rb_node *parent = *rb_node;
733 	struct dwarf_cie *cie;
734 	unsigned long flags;
735 	int count;
736 
737 	cie = kzalloc(sizeof(*cie), GFP_KERNEL);
738 	if (!cie)
739 		return -ENOMEM;
740 
741 	cie->length = len;
742 
743 	/*
744 	 * Record the offset into the .eh_frame section
745 	 * for this CIE. It allows this CIE to be
746 	 * quickly and easily looked up from the
747 	 * corresponding FDE.
748 	 */
749 	cie->cie_pointer = (unsigned long)entry;
750 
751 	cie->version = *(char *)p++;
752 	UNWINDER_BUG_ON(cie->version != 1);
753 
754 	cie->augmentation = p;
755 	p += strlen(cie->augmentation) + 1;
756 
757 	count = dwarf_read_uleb128(p, &cie->code_alignment_factor);
758 	p += count;
759 
760 	count = dwarf_read_leb128(p, &cie->data_alignment_factor);
761 	p += count;
762 
763 	/*
764 	 * Which column in the rule table contains the
765 	 * return address?
766 	 */
767 	if (cie->version == 1) {
768 		cie->return_address_reg = __raw_readb(p);
769 		p++;
770 	} else {
771 		count = dwarf_read_uleb128(p, &cie->return_address_reg);
772 		p += count;
773 	}
774 
775 	if (cie->augmentation[0] == 'z') {
776 		unsigned int length, count;
777 		cie->flags |= DWARF_CIE_Z_AUGMENTATION;
778 
779 		count = dwarf_read_uleb128(p, &length);
780 		p += count;
781 
782 		UNWINDER_BUG_ON((unsigned char *)p > end);
783 
784 		cie->initial_instructions = p + length;
785 		cie->augmentation++;
786 	}
787 
788 	while (*cie->augmentation) {
789 		/*
790 		 * "L" indicates a byte showing how the
791 		 * LSDA pointer is encoded. Skip it.
792 		 */
793 		if (*cie->augmentation == 'L') {
794 			p++;
795 			cie->augmentation++;
796 		} else if (*cie->augmentation == 'R') {
797 			/*
798 			 * "R" indicates a byte showing
799 			 * how FDE addresses are
800 			 * encoded.
801 			 */
802 			cie->encoding = *(char *)p++;
803 			cie->augmentation++;
804 		} else if (*cie->augmentation == 'P') {
805 			/*
806 			 * "R" indicates a personality
807 			 * routine in the CIE
808 			 * augmentation.
809 			 */
810 			UNWINDER_BUG();
811 		} else if (*cie->augmentation == 'S') {
812 			UNWINDER_BUG();
813 		} else {
814 			/*
815 			 * Unknown augmentation. Assume
816 			 * 'z' augmentation.
817 			 */
818 			p = cie->initial_instructions;
819 			UNWINDER_BUG_ON(!p);
820 			break;
821 		}
822 	}
823 
824 	cie->initial_instructions = p;
825 	cie->instructions_end = end;
826 
827 	/* Add to list */
828 	spin_lock_irqsave(&dwarf_cie_lock, flags);
829 
830 	while (*rb_node) {
831 		struct dwarf_cie *cie_tmp;
832 
833 		cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
834 
835 		parent = *rb_node;
836 
837 		if (cie->cie_pointer < cie_tmp->cie_pointer)
838 			rb_node = &parent->rb_left;
839 		else if (cie->cie_pointer >= cie_tmp->cie_pointer)
840 			rb_node = &parent->rb_right;
841 		else
842 			WARN_ON(1);
843 	}
844 
845 	rb_link_node(&cie->node, parent, rb_node);
846 	rb_insert_color(&cie->node, &cie_root);
847 
848 	if (mod != NULL)
849 		list_add_tail(&cie->link, &mod->arch.cie_list);
850 
851 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
852 
853 	return 0;
854 }
855 
856 static int dwarf_parse_fde(void *entry, u32 entry_type,
857 			   void *start, unsigned long len,
858 			   unsigned char *end, struct module *mod)
859 {
860 	struct rb_node **rb_node = &fde_root.rb_node;
861 	struct rb_node *parent = *rb_node;
862 	struct dwarf_fde *fde;
863 	struct dwarf_cie *cie;
864 	unsigned long flags;
865 	int count;
866 	void *p = start;
867 
868 	fde = kzalloc(sizeof(*fde), GFP_KERNEL);
869 	if (!fde)
870 		return -ENOMEM;
871 
872 	fde->length = len;
873 
874 	/*
875 	 * In a .eh_frame section the CIE pointer is the
876 	 * delta between the address within the FDE
877 	 */
878 	fde->cie_pointer = (unsigned long)(p - entry_type - 4);
879 
880 	cie = dwarf_lookup_cie(fde->cie_pointer);
881 	fde->cie = cie;
882 
883 	if (cie->encoding)
884 		count = dwarf_read_encoded_value(p, &fde->initial_location,
885 						 cie->encoding);
886 	else
887 		count = dwarf_read_addr(p, &fde->initial_location);
888 
889 	p += count;
890 
891 	if (cie->encoding)
892 		count = dwarf_read_encoded_value(p, &fde->address_range,
893 						 cie->encoding & 0x0f);
894 	else
895 		count = dwarf_read_addr(p, &fde->address_range);
896 
897 	p += count;
898 
899 	if (fde->cie->flags & DWARF_CIE_Z_AUGMENTATION) {
900 		unsigned int length;
901 		count = dwarf_read_uleb128(p, &length);
902 		p += count + length;
903 	}
904 
905 	/* Call frame instructions. */
906 	fde->instructions = p;
907 	fde->end = end;
908 
909 	/* Add to list. */
910 	spin_lock_irqsave(&dwarf_fde_lock, flags);
911 
912 	while (*rb_node) {
913 		struct dwarf_fde *fde_tmp;
914 		unsigned long tmp_start, tmp_end;
915 		unsigned long start, end;
916 
917 		fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
918 
919 		start = fde->initial_location;
920 		end = fde->initial_location + fde->address_range;
921 
922 		tmp_start = fde_tmp->initial_location;
923 		tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
924 
925 		parent = *rb_node;
926 
927 		if (start < tmp_start)
928 			rb_node = &parent->rb_left;
929 		else if (start >= tmp_end)
930 			rb_node = &parent->rb_right;
931 		else
932 			WARN_ON(1);
933 	}
934 
935 	rb_link_node(&fde->node, parent, rb_node);
936 	rb_insert_color(&fde->node, &fde_root);
937 
938 	if (mod != NULL)
939 		list_add_tail(&fde->link, &mod->arch.fde_list);
940 
941 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
942 
943 	return 0;
944 }
945 
946 static void dwarf_unwinder_dump(struct task_struct *task,
947 				struct pt_regs *regs,
948 				unsigned long *sp,
949 				const struct stacktrace_ops *ops,
950 				void *data)
951 {
952 	struct dwarf_frame *frame, *_frame;
953 	unsigned long return_addr;
954 
955 	_frame = NULL;
956 	return_addr = 0;
957 
958 	while (1) {
959 		frame = dwarf_unwind_stack(return_addr, _frame);
960 
961 		if (_frame)
962 			dwarf_free_frame(_frame);
963 
964 		_frame = frame;
965 
966 		if (!frame || !frame->return_addr)
967 			break;
968 
969 		return_addr = frame->return_addr;
970 		ops->address(data, return_addr, 1);
971 	}
972 
973 	if (frame)
974 		dwarf_free_frame(frame);
975 }
976 
977 static struct unwinder dwarf_unwinder = {
978 	.name = "dwarf-unwinder",
979 	.dump = dwarf_unwinder_dump,
980 	.rating = 150,
981 };
982 
983 static void dwarf_unwinder_cleanup(void)
984 {
985 	struct rb_node **fde_rb_node = &fde_root.rb_node;
986 	struct rb_node **cie_rb_node = &cie_root.rb_node;
987 
988 	/*
989 	 * Deallocate all the memory allocated for the DWARF unwinder.
990 	 * Traverse all the FDE/CIE lists and remove and free all the
991 	 * memory associated with those data structures.
992 	 */
993 	while (*fde_rb_node) {
994 		struct dwarf_fde *fde;
995 
996 		fde = rb_entry(*fde_rb_node, struct dwarf_fde, node);
997 		rb_erase(*fde_rb_node, &fde_root);
998 		kfree(fde);
999 	}
1000 
1001 	while (*cie_rb_node) {
1002 		struct dwarf_cie *cie;
1003 
1004 		cie = rb_entry(*cie_rb_node, struct dwarf_cie, node);
1005 		rb_erase(*cie_rb_node, &cie_root);
1006 		kfree(cie);
1007 	}
1008 
1009 	kmem_cache_destroy(dwarf_reg_cachep);
1010 	kmem_cache_destroy(dwarf_frame_cachep);
1011 }
1012 
1013 /**
1014  *	dwarf_parse_section - parse DWARF section
1015  *	@eh_frame_start: start address of the .eh_frame section
1016  *	@eh_frame_end: end address of the .eh_frame section
1017  *	@mod: the kernel module containing the .eh_frame section
1018  *
1019  *	Parse the information in a .eh_frame section.
1020  */
1021 static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end,
1022 			       struct module *mod)
1023 {
1024 	u32 entry_type;
1025 	void *p, *entry;
1026 	int count, err = 0;
1027 	unsigned long len = 0;
1028 	unsigned int c_entries, f_entries;
1029 	unsigned char *end;
1030 
1031 	c_entries = 0;
1032 	f_entries = 0;
1033 	entry = eh_frame_start;
1034 
1035 	while ((char *)entry < eh_frame_end) {
1036 		p = entry;
1037 
1038 		count = dwarf_entry_len(p, &len);
1039 		if (count == 0) {
1040 			/*
1041 			 * We read a bogus length field value. There is
1042 			 * nothing we can do here apart from disabling
1043 			 * the DWARF unwinder. We can't even skip this
1044 			 * entry and move to the next one because 'len'
1045 			 * tells us where our next entry is.
1046 			 */
1047 			err = -EINVAL;
1048 			goto out;
1049 		} else
1050 			p += count;
1051 
1052 		/* initial length does not include itself */
1053 		end = p + len;
1054 
1055 		entry_type = get_unaligned((u32 *)p);
1056 		p += 4;
1057 
1058 		if (entry_type == DW_EH_FRAME_CIE) {
1059 			err = dwarf_parse_cie(entry, p, len, end, mod);
1060 			if (err < 0)
1061 				goto out;
1062 			else
1063 				c_entries++;
1064 		} else {
1065 			err = dwarf_parse_fde(entry, entry_type, p, len,
1066 					      end, mod);
1067 			if (err < 0)
1068 				goto out;
1069 			else
1070 				f_entries++;
1071 		}
1072 
1073 		entry = (char *)entry + len + 4;
1074 	}
1075 
1076 	printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n",
1077 	       c_entries, f_entries);
1078 
1079 	return 0;
1080 
1081 out:
1082 	return err;
1083 }
1084 
1085 #ifdef CONFIG_MODULES
1086 int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
1087 			  struct module *me)
1088 {
1089 	unsigned int i, err;
1090 	unsigned long start, end;
1091 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
1092 
1093 	start = end = 0;
1094 
1095 	for (i = 1; i < hdr->e_shnum; i++) {
1096 		/* Alloc bit cleared means "ignore it." */
1097 		if ((sechdrs[i].sh_flags & SHF_ALLOC)
1098 		    && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) {
1099 			start = sechdrs[i].sh_addr;
1100 			end = start + sechdrs[i].sh_size;
1101 			break;
1102 		}
1103 	}
1104 
1105 	/* Did we find the .eh_frame section? */
1106 	if (i != hdr->e_shnum) {
1107 		INIT_LIST_HEAD(&me->arch.cie_list);
1108 		INIT_LIST_HEAD(&me->arch.fde_list);
1109 		err = dwarf_parse_section((char *)start, (char *)end, me);
1110 		if (err) {
1111 			printk(KERN_WARNING "%s: failed to parse DWARF info\n",
1112 			       me->name);
1113 			return err;
1114 		}
1115 	}
1116 
1117 	return 0;
1118 }
1119 
1120 /**
1121  *	module_dwarf_cleanup - remove FDE/CIEs associated with @mod
1122  *	@mod: the module that is being unloaded
1123  *
1124  *	Remove any FDEs and CIEs from the global lists that came from
1125  *	@mod's .eh_frame section because @mod is being unloaded.
1126  */
1127 void module_dwarf_cleanup(struct module *mod)
1128 {
1129 	struct dwarf_fde *fde, *ftmp;
1130 	struct dwarf_cie *cie, *ctmp;
1131 	unsigned long flags;
1132 
1133 	spin_lock_irqsave(&dwarf_cie_lock, flags);
1134 
1135 	list_for_each_entry_safe(cie, ctmp, &mod->arch.cie_list, link) {
1136 		list_del(&cie->link);
1137 		rb_erase(&cie->node, &cie_root);
1138 		kfree(cie);
1139 	}
1140 
1141 	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
1142 
1143 	spin_lock_irqsave(&dwarf_fde_lock, flags);
1144 
1145 	list_for_each_entry_safe(fde, ftmp, &mod->arch.fde_list, link) {
1146 		list_del(&fde->link);
1147 		rb_erase(&fde->node, &fde_root);
1148 		kfree(fde);
1149 	}
1150 
1151 	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
1152 }
1153 #endif /* CONFIG_MODULES */
1154 
1155 /**
1156  *	dwarf_unwinder_init - initialise the dwarf unwinder
1157  *
1158  *	Build the data structures describing the .dwarf_frame section to
1159  *	make it easier to lookup CIE and FDE entries. Because the
1160  *	.eh_frame section is packed as tightly as possible it is not
1161  *	easy to lookup the FDE for a given PC, so we build a list of FDE
1162  *	and CIE entries that make it easier.
1163  */
1164 static int __init dwarf_unwinder_init(void)
1165 {
1166 	int err;
1167 
1168 	dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
1169 			sizeof(struct dwarf_frame), 0,
1170 			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
1171 
1172 	dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
1173 			sizeof(struct dwarf_reg), 0,
1174 			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
1175 
1176 	dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ,
1177 					  mempool_alloc_slab,
1178 					  mempool_free_slab,
1179 					  dwarf_frame_cachep);
1180 
1181 	dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ,
1182 					 mempool_alloc_slab,
1183 					 mempool_free_slab,
1184 					 dwarf_reg_cachep);
1185 
1186 	err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL);
1187 	if (err)
1188 		goto out;
1189 
1190 	err = unwinder_register(&dwarf_unwinder);
1191 	if (err)
1192 		goto out;
1193 
1194 	return 0;
1195 
1196 out:
1197 	printk(KERN_ERR "Failed to initialise DWARF unwinder: %d\n", err);
1198 	dwarf_unwinder_cleanup();
1199 	return -EINVAL;
1200 }
1201 early_initcall(dwarf_unwinder_init);
1202