1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2009 Matt Fleming <matt@console-pimps.org>
4 *
5 * This is an implementation of a DWARF unwinder. Its main purpose is
6 * for generating stacktrace information. Based on the DWARF 3
7 * specification from http://www.dwarfstd.org.
8 *
9 * TODO:
10 * - DWARF64 doesn't work.
11 * - Registers with DWARF_VAL_OFFSET rules aren't handled properly.
12 */
13
14 /* #define DEBUG */
15 #include <linux/kernel.h>
16 #include <linux/io.h>
17 #include <linux/list.h>
18 #include <linux/mempool.h>
19 #include <linux/mm.h>
20 #include <linux/elf.h>
21 #include <linux/ftrace.h>
22 #include <linux/module.h>
23 #include <linux/slab.h>
24 #include <asm/dwarf.h>
25 #include <asm/unwinder.h>
26 #include <asm/sections.h>
27 #include <linux/unaligned.h>
28 #include <asm/stacktrace.h>
29
30 /* Reserve enough memory for two stack frames */
31 #define DWARF_FRAME_MIN_REQ 2
32 /* ... with 4 registers per frame. */
33 #define DWARF_REG_MIN_REQ (DWARF_FRAME_MIN_REQ * 4)
34
35 static struct kmem_cache *dwarf_frame_cachep;
36 static mempool_t *dwarf_frame_pool;
37
38 static struct kmem_cache *dwarf_reg_cachep;
39 static mempool_t *dwarf_reg_pool;
40
41 static struct rb_root cie_root;
42 static DEFINE_SPINLOCK(dwarf_cie_lock);
43
44 static struct rb_root fde_root;
45 static DEFINE_SPINLOCK(dwarf_fde_lock);
46
47 static struct dwarf_cie *cached_cie;
48
49 static unsigned int dwarf_unwinder_ready;
50
51 /**
52 * dwarf_frame_alloc_reg - allocate memory for a DWARF register
53 * @frame: the DWARF frame whose list of registers we insert on
54 * @reg_num: the register number
55 *
56 * Allocate space for, and initialise, a dwarf reg from
57 * dwarf_reg_pool and insert it onto the (unsorted) linked-list of
58 * dwarf registers for @frame.
59 *
60 * Return the initialised DWARF reg.
61 */
dwarf_frame_alloc_reg(struct dwarf_frame * frame,unsigned int reg_num)62 static struct dwarf_reg *dwarf_frame_alloc_reg(struct dwarf_frame *frame,
63 unsigned int reg_num)
64 {
65 struct dwarf_reg *reg;
66
67 reg = mempool_alloc(dwarf_reg_pool, GFP_ATOMIC);
68 if (!reg) {
69 printk(KERN_WARNING "Unable to allocate a DWARF register\n");
70 /*
71 * Let's just bomb hard here, we have no way to
72 * gracefully recover.
73 */
74 UNWINDER_BUG();
75 }
76
77 reg->number = reg_num;
78 reg->addr = 0;
79 reg->flags = 0;
80
81 list_add(®->link, &frame->reg_list);
82
83 return reg;
84 }
85
dwarf_frame_free_regs(struct dwarf_frame * frame)86 static void dwarf_frame_free_regs(struct dwarf_frame *frame)
87 {
88 struct dwarf_reg *reg, *n;
89
90 list_for_each_entry_safe(reg, n, &frame->reg_list, link) {
91 list_del(®->link);
92 mempool_free(reg, dwarf_reg_pool);
93 }
94 }
95
96 /**
97 * dwarf_frame_reg - return a DWARF register
98 * @frame: the DWARF frame to search in for @reg_num
99 * @reg_num: the register number to search for
100 *
101 * Lookup and return the dwarf reg @reg_num for this frame. Return
102 * NULL if @reg_num is an register invalid number.
103 */
dwarf_frame_reg(struct dwarf_frame * frame,unsigned int reg_num)104 static struct dwarf_reg *dwarf_frame_reg(struct dwarf_frame *frame,
105 unsigned int reg_num)
106 {
107 struct dwarf_reg *reg;
108
109 list_for_each_entry(reg, &frame->reg_list, link) {
110 if (reg->number == reg_num)
111 return reg;
112 }
113
114 return NULL;
115 }
116
117 /**
118 * dwarf_read_addr - read dwarf data
119 * @src: source address of data
120 * @dst: destination address to store the data to
121 *
122 * Read 'n' bytes from @src, where 'n' is the size of an address on
123 * the native machine. We return the number of bytes read, which
124 * should always be 'n'. We also have to be careful when reading
125 * from @src and writing to @dst, because they can be arbitrarily
126 * aligned. Return 'n' - the number of bytes read.
127 */
dwarf_read_addr(unsigned long * src,unsigned long * dst)128 static inline int dwarf_read_addr(unsigned long *src, unsigned long *dst)
129 {
130 u32 val = get_unaligned(src);
131 put_unaligned(val, dst);
132 return sizeof(unsigned long *);
133 }
134
135 /**
136 * dwarf_read_uleb128 - read unsigned LEB128 data
137 * @addr: the address where the ULEB128 data is stored
138 * @ret: address to store the result
139 *
140 * Decode an unsigned LEB128 encoded datum. The algorithm is taken
141 * from Appendix C of the DWARF 3 spec. For information on the
142 * encodings refer to section "7.6 - Variable Length Data". Return
143 * the number of bytes read.
144 */
dwarf_read_uleb128(char * addr,unsigned int * ret)145 static inline unsigned long dwarf_read_uleb128(char *addr, unsigned int *ret)
146 {
147 unsigned int result;
148 unsigned char byte;
149 int shift, count;
150
151 result = 0;
152 shift = 0;
153 count = 0;
154
155 while (1) {
156 byte = __raw_readb(addr);
157 addr++;
158 count++;
159
160 result |= (byte & 0x7f) << shift;
161 shift += 7;
162
163 if (!(byte & 0x80))
164 break;
165 }
166
167 *ret = result;
168
169 return count;
170 }
171
172 /**
173 * dwarf_read_leb128 - read signed LEB128 data
174 * @addr: the address of the LEB128 encoded data
175 * @ret: address to store the result
176 *
177 * Decode signed LEB128 data. The algorithm is taken from Appendix
178 * C of the DWARF 3 spec. Return the number of bytes read.
179 */
dwarf_read_leb128(char * addr,int * ret)180 static inline unsigned long dwarf_read_leb128(char *addr, int *ret)
181 {
182 unsigned char byte;
183 int result, shift;
184 int num_bits;
185 int count;
186
187 result = 0;
188 shift = 0;
189 count = 0;
190
191 while (1) {
192 byte = __raw_readb(addr);
193 addr++;
194 result |= (byte & 0x7f) << shift;
195 shift += 7;
196 count++;
197
198 if (!(byte & 0x80))
199 break;
200 }
201
202 /* The number of bits in a signed integer. */
203 num_bits = 8 * sizeof(result);
204
205 if ((shift < num_bits) && (byte & 0x40))
206 result |= (-1 << shift);
207
208 *ret = result;
209
210 return count;
211 }
212
213 /**
214 * dwarf_read_encoded_value - return the decoded value at @addr
215 * @addr: the address of the encoded value
216 * @val: where to write the decoded value
217 * @encoding: the encoding with which we can decode @addr
218 *
219 * GCC emits encoded address in the .eh_frame FDE entries. Decode
220 * the value at @addr using @encoding. The decoded value is written
221 * to @val and the number of bytes read is returned.
222 */
dwarf_read_encoded_value(char * addr,unsigned long * val,char encoding)223 static int dwarf_read_encoded_value(char *addr, unsigned long *val,
224 char encoding)
225 {
226 unsigned long decoded_addr = 0;
227 int count = 0;
228
229 switch (encoding & 0x70) {
230 case DW_EH_PE_absptr:
231 break;
232 case DW_EH_PE_pcrel:
233 decoded_addr = (unsigned long)addr;
234 break;
235 default:
236 pr_debug("encoding=0x%x\n", (encoding & 0x70));
237 UNWINDER_BUG();
238 }
239
240 if ((encoding & 0x07) == 0x00)
241 encoding |= DW_EH_PE_udata4;
242
243 switch (encoding & 0x0f) {
244 case DW_EH_PE_sdata4:
245 case DW_EH_PE_udata4:
246 count += 4;
247 decoded_addr += get_unaligned((u32 *)addr);
248 __raw_writel(decoded_addr, val);
249 break;
250 default:
251 pr_debug("encoding=0x%x\n", encoding);
252 UNWINDER_BUG();
253 }
254
255 return count;
256 }
257
258 /**
259 * dwarf_entry_len - return the length of an FDE or CIE
260 * @addr: the address of the entry
261 * @len: the length of the entry
262 *
263 * Read the initial_length field of the entry and store the size of
264 * the entry in @len. We return the number of bytes read. Return a
265 * count of 0 on error.
266 */
dwarf_entry_len(char * addr,unsigned long * len)267 static inline int dwarf_entry_len(char *addr, unsigned long *len)
268 {
269 u32 initial_len;
270 int count;
271
272 initial_len = get_unaligned((u32 *)addr);
273 count = 4;
274
275 /*
276 * An initial length field value in the range DW_LEN_EXT_LO -
277 * DW_LEN_EXT_HI indicates an extension, and should not be
278 * interpreted as a length. The only extension that we currently
279 * understand is the use of DWARF64 addresses.
280 */
281 if (initial_len >= DW_EXT_LO && initial_len <= DW_EXT_HI) {
282 /*
283 * The 64-bit length field immediately follows the
284 * compulsory 32-bit length field.
285 */
286 if (initial_len == DW_EXT_DWARF64) {
287 *len = get_unaligned((u64 *)addr + 4);
288 count = 12;
289 } else {
290 printk(KERN_WARNING "Unknown DWARF extension\n");
291 count = 0;
292 }
293 } else
294 *len = initial_len;
295
296 return count;
297 }
298
299 /**
300 * dwarf_lookup_cie - locate the cie
301 * @cie_ptr: pointer to help with lookup
302 */
dwarf_lookup_cie(unsigned long cie_ptr)303 static struct dwarf_cie *dwarf_lookup_cie(unsigned long cie_ptr)
304 {
305 struct rb_node **rb_node = &cie_root.rb_node;
306 struct dwarf_cie *cie = NULL;
307 unsigned long flags;
308
309 spin_lock_irqsave(&dwarf_cie_lock, flags);
310
311 /*
312 * We've cached the last CIE we looked up because chances are
313 * that the FDE wants this CIE.
314 */
315 if (cached_cie && cached_cie->cie_pointer == cie_ptr) {
316 cie = cached_cie;
317 goto out;
318 }
319
320 while (*rb_node) {
321 struct dwarf_cie *cie_tmp;
322
323 cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
324 BUG_ON(!cie_tmp);
325
326 if (cie_ptr == cie_tmp->cie_pointer) {
327 cie = cie_tmp;
328 cached_cie = cie_tmp;
329 goto out;
330 } else {
331 if (cie_ptr < cie_tmp->cie_pointer)
332 rb_node = &(*rb_node)->rb_left;
333 else
334 rb_node = &(*rb_node)->rb_right;
335 }
336 }
337
338 out:
339 spin_unlock_irqrestore(&dwarf_cie_lock, flags);
340 return cie;
341 }
342
343 /**
344 * dwarf_lookup_fde - locate the FDE that covers pc
345 * @pc: the program counter
346 */
dwarf_lookup_fde(unsigned long pc)347 static struct dwarf_fde *dwarf_lookup_fde(unsigned long pc)
348 {
349 struct rb_node **rb_node = &fde_root.rb_node;
350 struct dwarf_fde *fde = NULL;
351 unsigned long flags;
352
353 spin_lock_irqsave(&dwarf_fde_lock, flags);
354
355 while (*rb_node) {
356 struct dwarf_fde *fde_tmp;
357 unsigned long tmp_start, tmp_end;
358
359 fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
360 BUG_ON(!fde_tmp);
361
362 tmp_start = fde_tmp->initial_location;
363 tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
364
365 if (pc < tmp_start) {
366 rb_node = &(*rb_node)->rb_left;
367 } else {
368 if (pc < tmp_end) {
369 fde = fde_tmp;
370 goto out;
371 } else
372 rb_node = &(*rb_node)->rb_right;
373 }
374 }
375
376 out:
377 spin_unlock_irqrestore(&dwarf_fde_lock, flags);
378
379 return fde;
380 }
381
382 /**
383 * dwarf_cfa_execute_insns - execute instructions to calculate a CFA
384 * @insn_start: address of the first instruction
385 * @insn_end: address of the last instruction
386 * @cie: the CIE for this function
387 * @fde: the FDE for this function
388 * @frame: the instructions calculate the CFA for this frame
389 * @pc: the program counter of the address we're interested in
390 *
391 * Execute the Call Frame instruction sequence starting at
392 * @insn_start and ending at @insn_end. The instructions describe
393 * how to calculate the Canonical Frame Address of a stackframe.
394 * Store the results in @frame.
395 */
dwarf_cfa_execute_insns(unsigned char * insn_start,unsigned char * insn_end,struct dwarf_cie * cie,struct dwarf_fde * fde,struct dwarf_frame * frame,unsigned long pc)396 static int dwarf_cfa_execute_insns(unsigned char *insn_start,
397 unsigned char *insn_end,
398 struct dwarf_cie *cie,
399 struct dwarf_fde *fde,
400 struct dwarf_frame *frame,
401 unsigned long pc)
402 {
403 unsigned char insn;
404 unsigned char *current_insn;
405 unsigned int count, delta, reg, expr_len, offset;
406 struct dwarf_reg *regp;
407
408 current_insn = insn_start;
409
410 while (current_insn < insn_end && frame->pc <= pc) {
411 insn = __raw_readb(current_insn++);
412
413 /*
414 * Firstly, handle the opcodes that embed their operands
415 * in the instructions.
416 */
417 switch (DW_CFA_opcode(insn)) {
418 case DW_CFA_advance_loc:
419 delta = DW_CFA_operand(insn);
420 delta *= cie->code_alignment_factor;
421 frame->pc += delta;
422 continue;
423 /* NOTREACHED */
424 case DW_CFA_offset:
425 reg = DW_CFA_operand(insn);
426 count = dwarf_read_uleb128(current_insn, &offset);
427 current_insn += count;
428 offset *= cie->data_alignment_factor;
429 regp = dwarf_frame_alloc_reg(frame, reg);
430 regp->addr = offset;
431 regp->flags |= DWARF_REG_OFFSET;
432 continue;
433 /* NOTREACHED */
434 case DW_CFA_restore:
435 reg = DW_CFA_operand(insn);
436 continue;
437 /* NOTREACHED */
438 }
439
440 /*
441 * Secondly, handle the opcodes that don't embed their
442 * operands in the instruction.
443 */
444 switch (insn) {
445 case DW_CFA_nop:
446 continue;
447 case DW_CFA_advance_loc1:
448 delta = *current_insn++;
449 frame->pc += delta * cie->code_alignment_factor;
450 break;
451 case DW_CFA_advance_loc2:
452 delta = get_unaligned((u16 *)current_insn);
453 current_insn += 2;
454 frame->pc += delta * cie->code_alignment_factor;
455 break;
456 case DW_CFA_advance_loc4:
457 delta = get_unaligned((u32 *)current_insn);
458 current_insn += 4;
459 frame->pc += delta * cie->code_alignment_factor;
460 break;
461 case DW_CFA_offset_extended:
462 count = dwarf_read_uleb128(current_insn, ®);
463 current_insn += count;
464 count = dwarf_read_uleb128(current_insn, &offset);
465 current_insn += count;
466 offset *= cie->data_alignment_factor;
467 break;
468 case DW_CFA_restore_extended:
469 count = dwarf_read_uleb128(current_insn, ®);
470 current_insn += count;
471 break;
472 case DW_CFA_undefined:
473 count = dwarf_read_uleb128(current_insn, ®);
474 current_insn += count;
475 regp = dwarf_frame_alloc_reg(frame, reg);
476 regp->flags |= DWARF_UNDEFINED;
477 break;
478 case DW_CFA_def_cfa:
479 count = dwarf_read_uleb128(current_insn,
480 &frame->cfa_register);
481 current_insn += count;
482 count = dwarf_read_uleb128(current_insn,
483 &frame->cfa_offset);
484 current_insn += count;
485
486 frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
487 break;
488 case DW_CFA_def_cfa_register:
489 count = dwarf_read_uleb128(current_insn,
490 &frame->cfa_register);
491 current_insn += count;
492 frame->flags |= DWARF_FRAME_CFA_REG_OFFSET;
493 break;
494 case DW_CFA_def_cfa_offset:
495 count = dwarf_read_uleb128(current_insn, &offset);
496 current_insn += count;
497 frame->cfa_offset = offset;
498 break;
499 case DW_CFA_def_cfa_expression:
500 count = dwarf_read_uleb128(current_insn, &expr_len);
501 current_insn += count;
502
503 frame->cfa_expr = current_insn;
504 frame->cfa_expr_len = expr_len;
505 current_insn += expr_len;
506
507 frame->flags |= DWARF_FRAME_CFA_REG_EXP;
508 break;
509 case DW_CFA_offset_extended_sf:
510 count = dwarf_read_uleb128(current_insn, ®);
511 current_insn += count;
512 count = dwarf_read_leb128(current_insn, &offset);
513 current_insn += count;
514 offset *= cie->data_alignment_factor;
515 regp = dwarf_frame_alloc_reg(frame, reg);
516 regp->flags |= DWARF_REG_OFFSET;
517 regp->addr = offset;
518 break;
519 case DW_CFA_val_offset:
520 count = dwarf_read_uleb128(current_insn, ®);
521 current_insn += count;
522 count = dwarf_read_leb128(current_insn, &offset);
523 offset *= cie->data_alignment_factor;
524 regp = dwarf_frame_alloc_reg(frame, reg);
525 regp->flags |= DWARF_VAL_OFFSET;
526 regp->addr = offset;
527 break;
528 case DW_CFA_GNU_args_size:
529 count = dwarf_read_uleb128(current_insn, &offset);
530 current_insn += count;
531 break;
532 case DW_CFA_GNU_negative_offset_extended:
533 count = dwarf_read_uleb128(current_insn, ®);
534 current_insn += count;
535 count = dwarf_read_uleb128(current_insn, &offset);
536 offset *= cie->data_alignment_factor;
537
538 regp = dwarf_frame_alloc_reg(frame, reg);
539 regp->flags |= DWARF_REG_OFFSET;
540 regp->addr = -offset;
541 break;
542 default:
543 pr_debug("unhandled DWARF instruction 0x%x\n", insn);
544 UNWINDER_BUG();
545 break;
546 }
547 }
548
549 return 0;
550 }
551
552 /**
553 * dwarf_free_frame - free the memory allocated for @frame
554 * @frame: the frame to free
555 */
dwarf_free_frame(struct dwarf_frame * frame)556 void dwarf_free_frame(struct dwarf_frame *frame)
557 {
558 dwarf_frame_free_regs(frame);
559 mempool_free(frame, dwarf_frame_pool);
560 }
561
562 extern void ret_from_irq(void);
563
564 /**
565 * dwarf_unwind_stack - unwind the stack
566 *
567 * @pc: address of the function to unwind
568 * @prev: struct dwarf_frame of the previous stackframe on the callstack
569 *
570 * Return a struct dwarf_frame representing the most recent frame
571 * on the callstack. Each of the lower (older) stack frames are
572 * linked via the "prev" member.
573 */
dwarf_unwind_stack(unsigned long pc,struct dwarf_frame * prev)574 struct dwarf_frame *dwarf_unwind_stack(unsigned long pc,
575 struct dwarf_frame *prev)
576 {
577 struct dwarf_frame *frame;
578 struct dwarf_cie *cie;
579 struct dwarf_fde *fde;
580 struct dwarf_reg *reg;
581 unsigned long addr;
582
583 /*
584 * If we've been called in to before initialization has
585 * completed, bail out immediately.
586 */
587 if (!dwarf_unwinder_ready)
588 return NULL;
589
590 /*
591 * If we're starting at the top of the stack we need get the
592 * contents of a physical register to get the CFA in order to
593 * begin the virtual unwinding of the stack.
594 *
595 * NOTE: the return address is guaranteed to be setup by the
596 * time this function makes its first function call.
597 */
598 if (!pc || !prev)
599 pc = _THIS_IP_;
600
601 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
602 /*
603 * If our stack has been patched by the function graph tracer
604 * then we might see the address of return_to_handler() where we
605 * expected to find the real return address.
606 */
607 if (pc == (unsigned long)&return_to_handler) {
608 struct ftrace_ret_stack *ret_stack;
609
610 ret_stack = ftrace_graph_get_ret_stack(current, 0);
611 if (ret_stack)
612 pc = ret_stack->ret;
613 /*
614 * We currently have no way of tracking how many
615 * return_to_handler()'s we've seen. If there is more
616 * than one patched return address on our stack,
617 * complain loudly.
618 */
619 WARN_ON(ftrace_graph_get_ret_stack(current, 1));
620 }
621 #endif
622
623 frame = mempool_alloc(dwarf_frame_pool, GFP_ATOMIC);
624 if (!frame) {
625 printk(KERN_ERR "Unable to allocate a dwarf frame\n");
626 UNWINDER_BUG();
627 }
628
629 INIT_LIST_HEAD(&frame->reg_list);
630 frame->flags = 0;
631 frame->prev = prev;
632 frame->return_addr = 0;
633
634 fde = dwarf_lookup_fde(pc);
635 if (!fde) {
636 /*
637 * This is our normal exit path. There are two reasons
638 * why we might exit here,
639 *
640 * a) pc has no asscociated DWARF frame info and so
641 * we don't know how to unwind this frame. This is
642 * usually the case when we're trying to unwind a
643 * frame that was called from some assembly code
644 * that has no DWARF info, e.g. syscalls.
645 *
646 * b) the DEBUG info for pc is bogus. There's
647 * really no way to distinguish this case from the
648 * case above, which sucks because we could print a
649 * warning here.
650 */
651 goto bail;
652 }
653
654 cie = dwarf_lookup_cie(fde->cie_pointer);
655
656 frame->pc = fde->initial_location;
657
658 /* CIE initial instructions */
659 dwarf_cfa_execute_insns(cie->initial_instructions,
660 cie->instructions_end, cie, fde,
661 frame, pc);
662
663 /* FDE instructions */
664 dwarf_cfa_execute_insns(fde->instructions, fde->end, cie,
665 fde, frame, pc);
666
667 /* Calculate the CFA */
668 switch (frame->flags) {
669 case DWARF_FRAME_CFA_REG_OFFSET:
670 if (prev) {
671 reg = dwarf_frame_reg(prev, frame->cfa_register);
672 UNWINDER_BUG_ON(!reg);
673 UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
674
675 addr = prev->cfa + reg->addr;
676 frame->cfa = __raw_readl(addr);
677
678 } else {
679 /*
680 * Again, we're starting from the top of the
681 * stack. We need to physically read
682 * the contents of a register in order to get
683 * the Canonical Frame Address for this
684 * function.
685 */
686 frame->cfa = dwarf_read_arch_reg(frame->cfa_register);
687 }
688
689 frame->cfa += frame->cfa_offset;
690 break;
691 default:
692 UNWINDER_BUG();
693 }
694
695 reg = dwarf_frame_reg(frame, DWARF_ARCH_RA_REG);
696
697 /*
698 * If we haven't seen the return address register or the return
699 * address column is undefined then we must assume that this is
700 * the end of the callstack.
701 */
702 if (!reg || reg->flags == DWARF_UNDEFINED)
703 goto bail;
704
705 UNWINDER_BUG_ON(reg->flags != DWARF_REG_OFFSET);
706
707 addr = frame->cfa + reg->addr;
708 frame->return_addr = __raw_readl(addr);
709
710 /*
711 * Ah, the joys of unwinding through interrupts.
712 *
713 * Interrupts are tricky - the DWARF info needs to be _really_
714 * accurate and unfortunately I'm seeing a lot of bogus DWARF
715 * info. For example, I've seen interrupts occur in epilogues
716 * just after the frame pointer (r14) had been restored. The
717 * problem was that the DWARF info claimed that the CFA could be
718 * reached by using the value of the frame pointer before it was
719 * restored.
720 *
721 * So until the compiler can be trusted to produce reliable
722 * DWARF info when it really matters, let's stop unwinding once
723 * we've calculated the function that was interrupted.
724 */
725 if (prev && prev->pc == (unsigned long)ret_from_irq)
726 frame->return_addr = 0;
727
728 return frame;
729
730 bail:
731 dwarf_free_frame(frame);
732 return NULL;
733 }
734
dwarf_parse_cie(void * entry,void * p,unsigned long len,unsigned char * end,struct module * mod)735 static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
736 unsigned char *end, struct module *mod)
737 {
738 struct rb_node **rb_node = &cie_root.rb_node;
739 struct rb_node *parent = *rb_node;
740 struct dwarf_cie *cie;
741 unsigned long flags;
742 int count;
743
744 cie = kzalloc(sizeof(*cie), GFP_KERNEL);
745 if (!cie)
746 return -ENOMEM;
747
748 cie->length = len;
749
750 /*
751 * Record the offset into the .eh_frame section
752 * for this CIE. It allows this CIE to be
753 * quickly and easily looked up from the
754 * corresponding FDE.
755 */
756 cie->cie_pointer = (unsigned long)entry;
757
758 cie->version = *(char *)p++;
759 UNWINDER_BUG_ON(cie->version != 1);
760
761 cie->augmentation = p;
762 p += strlen(cie->augmentation) + 1;
763
764 count = dwarf_read_uleb128(p, &cie->code_alignment_factor);
765 p += count;
766
767 count = dwarf_read_leb128(p, &cie->data_alignment_factor);
768 p += count;
769
770 /*
771 * Which column in the rule table contains the
772 * return address?
773 */
774 if (cie->version == 1) {
775 cie->return_address_reg = __raw_readb(p);
776 p++;
777 } else {
778 count = dwarf_read_uleb128(p, &cie->return_address_reg);
779 p += count;
780 }
781
782 if (cie->augmentation[0] == 'z') {
783 unsigned int length, count;
784 cie->flags |= DWARF_CIE_Z_AUGMENTATION;
785
786 count = dwarf_read_uleb128(p, &length);
787 p += count;
788
789 UNWINDER_BUG_ON((unsigned char *)p > end);
790
791 cie->initial_instructions = p + length;
792 cie->augmentation++;
793 }
794
795 while (*cie->augmentation) {
796 /*
797 * "L" indicates a byte showing how the
798 * LSDA pointer is encoded. Skip it.
799 */
800 if (*cie->augmentation == 'L') {
801 p++;
802 cie->augmentation++;
803 } else if (*cie->augmentation == 'R') {
804 /*
805 * "R" indicates a byte showing
806 * how FDE addresses are
807 * encoded.
808 */
809 cie->encoding = *(char *)p++;
810 cie->augmentation++;
811 } else if (*cie->augmentation == 'P') {
812 /*
813 * "R" indicates a personality
814 * routine in the CIE
815 * augmentation.
816 */
817 UNWINDER_BUG();
818 } else if (*cie->augmentation == 'S') {
819 UNWINDER_BUG();
820 } else {
821 /*
822 * Unknown augmentation. Assume
823 * 'z' augmentation.
824 */
825 p = cie->initial_instructions;
826 UNWINDER_BUG_ON(!p);
827 break;
828 }
829 }
830
831 cie->initial_instructions = p;
832 cie->instructions_end = end;
833
834 /* Add to list */
835 spin_lock_irqsave(&dwarf_cie_lock, flags);
836
837 while (*rb_node) {
838 struct dwarf_cie *cie_tmp;
839
840 cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
841
842 parent = *rb_node;
843
844 if (cie->cie_pointer < cie_tmp->cie_pointer)
845 rb_node = &parent->rb_left;
846 else if (cie->cie_pointer >= cie_tmp->cie_pointer)
847 rb_node = &parent->rb_right;
848 else
849 WARN_ON(1);
850 }
851
852 rb_link_node(&cie->node, parent, rb_node);
853 rb_insert_color(&cie->node, &cie_root);
854
855 #ifdef CONFIG_MODULES
856 if (mod != NULL)
857 list_add_tail(&cie->link, &mod->arch.cie_list);
858 #endif
859
860 spin_unlock_irqrestore(&dwarf_cie_lock, flags);
861
862 return 0;
863 }
864
dwarf_parse_fde(void * entry,u32 entry_type,void * start,unsigned long len,unsigned char * end,struct module * mod)865 static int dwarf_parse_fde(void *entry, u32 entry_type,
866 void *start, unsigned long len,
867 unsigned char *end, struct module *mod)
868 {
869 struct rb_node **rb_node = &fde_root.rb_node;
870 struct rb_node *parent = *rb_node;
871 struct dwarf_fde *fde;
872 struct dwarf_cie *cie;
873 unsigned long flags;
874 int count;
875 void *p = start;
876
877 fde = kzalloc(sizeof(*fde), GFP_KERNEL);
878 if (!fde)
879 return -ENOMEM;
880
881 fde->length = len;
882
883 /*
884 * In a .eh_frame section the CIE pointer is the
885 * delta between the address within the FDE
886 */
887 fde->cie_pointer = (unsigned long)(p - entry_type - 4);
888
889 cie = dwarf_lookup_cie(fde->cie_pointer);
890 fde->cie = cie;
891
892 if (cie->encoding)
893 count = dwarf_read_encoded_value(p, &fde->initial_location,
894 cie->encoding);
895 else
896 count = dwarf_read_addr(p, &fde->initial_location);
897
898 p += count;
899
900 if (cie->encoding)
901 count = dwarf_read_encoded_value(p, &fde->address_range,
902 cie->encoding & 0x0f);
903 else
904 count = dwarf_read_addr(p, &fde->address_range);
905
906 p += count;
907
908 if (fde->cie->flags & DWARF_CIE_Z_AUGMENTATION) {
909 unsigned int length;
910 count = dwarf_read_uleb128(p, &length);
911 p += count + length;
912 }
913
914 /* Call frame instructions. */
915 fde->instructions = p;
916 fde->end = end;
917
918 /* Add to list. */
919 spin_lock_irqsave(&dwarf_fde_lock, flags);
920
921 while (*rb_node) {
922 struct dwarf_fde *fde_tmp;
923 unsigned long tmp_start, tmp_end;
924 unsigned long start, end;
925
926 fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
927
928 start = fde->initial_location;
929 end = fde->initial_location + fde->address_range;
930
931 tmp_start = fde_tmp->initial_location;
932 tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
933
934 parent = *rb_node;
935
936 if (start < tmp_start)
937 rb_node = &parent->rb_left;
938 else if (start >= tmp_end)
939 rb_node = &parent->rb_right;
940 else
941 WARN_ON(1);
942 }
943
944 rb_link_node(&fde->node, parent, rb_node);
945 rb_insert_color(&fde->node, &fde_root);
946
947 #ifdef CONFIG_MODULES
948 if (mod != NULL)
949 list_add_tail(&fde->link, &mod->arch.fde_list);
950 #endif
951
952 spin_unlock_irqrestore(&dwarf_fde_lock, flags);
953
954 return 0;
955 }
956
dwarf_unwinder_dump(struct task_struct * task,struct pt_regs * regs,unsigned long * sp,const struct stacktrace_ops * ops,void * data)957 static void dwarf_unwinder_dump(struct task_struct *task,
958 struct pt_regs *regs,
959 unsigned long *sp,
960 const struct stacktrace_ops *ops,
961 void *data)
962 {
963 struct dwarf_frame *frame, *_frame;
964 unsigned long return_addr;
965
966 _frame = NULL;
967 return_addr = 0;
968
969 while (1) {
970 frame = dwarf_unwind_stack(return_addr, _frame);
971
972 if (_frame)
973 dwarf_free_frame(_frame);
974
975 _frame = frame;
976
977 if (!frame || !frame->return_addr)
978 break;
979
980 return_addr = frame->return_addr;
981 ops->address(data, return_addr, 1);
982 }
983
984 if (frame)
985 dwarf_free_frame(frame);
986 }
987
988 static struct unwinder dwarf_unwinder = {
989 .name = "dwarf-unwinder",
990 .dump = dwarf_unwinder_dump,
991 .rating = 150,
992 };
993
dwarf_unwinder_cleanup(void)994 static void __init dwarf_unwinder_cleanup(void)
995 {
996 struct dwarf_fde *fde, *next_fde;
997 struct dwarf_cie *cie, *next_cie;
998
999 /*
1000 * Deallocate all the memory allocated for the DWARF unwinder.
1001 * Traverse all the FDE/CIE lists and remove and free all the
1002 * memory associated with those data structures.
1003 */
1004 rbtree_postorder_for_each_entry_safe(fde, next_fde, &fde_root, node)
1005 kfree(fde);
1006
1007 rbtree_postorder_for_each_entry_safe(cie, next_cie, &cie_root, node)
1008 kfree(cie);
1009
1010 mempool_destroy(dwarf_reg_pool);
1011 mempool_destroy(dwarf_frame_pool);
1012 kmem_cache_destroy(dwarf_reg_cachep);
1013 kmem_cache_destroy(dwarf_frame_cachep);
1014 }
1015
1016 /**
1017 * dwarf_parse_section - parse DWARF section
1018 * @eh_frame_start: start address of the .eh_frame section
1019 * @eh_frame_end: end address of the .eh_frame section
1020 * @mod: the kernel module containing the .eh_frame section
1021 *
1022 * Parse the information in a .eh_frame section.
1023 */
dwarf_parse_section(char * eh_frame_start,char * eh_frame_end,struct module * mod)1024 static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end,
1025 struct module *mod)
1026 {
1027 u32 entry_type;
1028 void *p, *entry;
1029 int count, err = 0;
1030 unsigned long len = 0;
1031 unsigned int c_entries, f_entries;
1032 unsigned char *end;
1033
1034 c_entries = 0;
1035 f_entries = 0;
1036 entry = eh_frame_start;
1037
1038 while ((char *)entry < eh_frame_end) {
1039 p = entry;
1040
1041 count = dwarf_entry_len(p, &len);
1042 if (count == 0) {
1043 /*
1044 * We read a bogus length field value. There is
1045 * nothing we can do here apart from disabling
1046 * the DWARF unwinder. We can't even skip this
1047 * entry and move to the next one because 'len'
1048 * tells us where our next entry is.
1049 */
1050 err = -EINVAL;
1051 goto out;
1052 } else
1053 p += count;
1054
1055 /* initial length does not include itself */
1056 end = p + len;
1057
1058 entry_type = get_unaligned((u32 *)p);
1059 p += 4;
1060
1061 if (entry_type == DW_EH_FRAME_CIE) {
1062 err = dwarf_parse_cie(entry, p, len, end, mod);
1063 if (err < 0)
1064 goto out;
1065 else
1066 c_entries++;
1067 } else {
1068 err = dwarf_parse_fde(entry, entry_type, p, len,
1069 end, mod);
1070 if (err < 0)
1071 goto out;
1072 else
1073 f_entries++;
1074 }
1075
1076 entry = (char *)entry + len + 4;
1077 }
1078
1079 printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n",
1080 c_entries, f_entries);
1081
1082 return 0;
1083
1084 out:
1085 return err;
1086 }
1087
1088 #ifdef CONFIG_MODULES
module_dwarf_finalize(const Elf_Ehdr * hdr,const Elf_Shdr * sechdrs,struct module * me)1089 int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
1090 struct module *me)
1091 {
1092 unsigned int i, err;
1093 unsigned long start, end;
1094 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
1095
1096 start = end = 0;
1097
1098 for (i = 1; i < hdr->e_shnum; i++) {
1099 /* Alloc bit cleared means "ignore it." */
1100 if ((sechdrs[i].sh_flags & SHF_ALLOC)
1101 && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) {
1102 start = sechdrs[i].sh_addr;
1103 end = start + sechdrs[i].sh_size;
1104 break;
1105 }
1106 }
1107
1108 /* Did we find the .eh_frame section? */
1109 if (i != hdr->e_shnum) {
1110 INIT_LIST_HEAD(&me->arch.cie_list);
1111 INIT_LIST_HEAD(&me->arch.fde_list);
1112 err = dwarf_parse_section((char *)start, (char *)end, me);
1113 if (err) {
1114 printk(KERN_WARNING "%s: failed to parse DWARF info\n",
1115 me->name);
1116 return err;
1117 }
1118 }
1119
1120 return 0;
1121 }
1122
1123 /**
1124 * module_dwarf_cleanup - remove FDE/CIEs associated with @mod
1125 * @mod: the module that is being unloaded
1126 *
1127 * Remove any FDEs and CIEs from the global lists that came from
1128 * @mod's .eh_frame section because @mod is being unloaded.
1129 */
module_dwarf_cleanup(struct module * mod)1130 void module_dwarf_cleanup(struct module *mod)
1131 {
1132 struct dwarf_fde *fde, *ftmp;
1133 struct dwarf_cie *cie, *ctmp;
1134 unsigned long flags;
1135
1136 spin_lock_irqsave(&dwarf_cie_lock, flags);
1137
1138 list_for_each_entry_safe(cie, ctmp, &mod->arch.cie_list, link) {
1139 list_del(&cie->link);
1140 rb_erase(&cie->node, &cie_root);
1141 kfree(cie);
1142 }
1143
1144 spin_unlock_irqrestore(&dwarf_cie_lock, flags);
1145
1146 spin_lock_irqsave(&dwarf_fde_lock, flags);
1147
1148 list_for_each_entry_safe(fde, ftmp, &mod->arch.fde_list, link) {
1149 list_del(&fde->link);
1150 rb_erase(&fde->node, &fde_root);
1151 kfree(fde);
1152 }
1153
1154 spin_unlock_irqrestore(&dwarf_fde_lock, flags);
1155 }
1156 #endif /* CONFIG_MODULES */
1157
1158 /**
1159 * dwarf_unwinder_init - initialise the dwarf unwinder
1160 *
1161 * Build the data structures describing the .dwarf_frame section to
1162 * make it easier to lookup CIE and FDE entries. Because the
1163 * .eh_frame section is packed as tightly as possible it is not
1164 * easy to lookup the FDE for a given PC, so we build a list of FDE
1165 * and CIE entries that make it easier.
1166 */
dwarf_unwinder_init(void)1167 static int __init dwarf_unwinder_init(void)
1168 {
1169 int err = -ENOMEM;
1170
1171 dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
1172 sizeof(struct dwarf_frame), 0,
1173 SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL);
1174
1175 dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
1176 sizeof(struct dwarf_reg), 0,
1177 SLAB_PANIC | SLAB_HWCACHE_ALIGN, NULL);
1178
1179 dwarf_frame_pool = mempool_create_slab_pool(DWARF_FRAME_MIN_REQ,
1180 dwarf_frame_cachep);
1181 if (!dwarf_frame_pool)
1182 goto out;
1183
1184 dwarf_reg_pool = mempool_create_slab_pool(DWARF_REG_MIN_REQ,
1185 dwarf_reg_cachep);
1186 if (!dwarf_reg_pool)
1187 goto out;
1188
1189 err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL);
1190 if (err)
1191 goto out;
1192
1193 err = unwinder_register(&dwarf_unwinder);
1194 if (err)
1195 goto out;
1196
1197 dwarf_unwinder_ready = 1;
1198
1199 return 0;
1200
1201 out:
1202 printk(KERN_ERR "Failed to initialise DWARF unwinder: %d\n", err);
1203 dwarf_unwinder_cleanup();
1204 return err;
1205 }
1206 early_initcall(dwarf_unwinder_init);
1207