1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /*
26 * Copyright (c) 2018, Joyent, Inc. All rights reserved.
27 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
28 * Copyright 2025 Oxide Computer Company
29 */
30
31 #include <sys/types.h>
32 #include <sys/types32.h>
33 #include <sys/reg.h>
34 #include <sys/privregs.h>
35 #include <sys/stack.h>
36 #include <sys/frame.h>
37
38 #include <mdb/mdb_isautil.h>
39 #include <mdb/mdb_ia32util.h>
40 #include <mdb/mdb_target_impl.h>
41 #include <mdb/mdb_kreg_impl.h>
42 #include <mdb/mdb_stack.h>
43 #include <mdb/mdb_debug.h>
44 #include <mdb/mdb_modapi.h>
45 #include <mdb/mdb_err.h>
46 #include <mdb/mdb.h>
47
48 #ifndef __amd64
49 /*
50 * We also define an array of register names and their corresponding
51 * array indices. This is used by the getareg and putareg entry points,
52 * and also by our register variable discipline.
53 *
54 * When built into an amd64 mdb this won't be used as it's only a subset of
55 * mdb_amd64_kregs, hence the #ifdef.
56 */
57 const mdb_tgt_regdesc_t mdb_ia32_kregs[] = {
58 { "savfp", KREG_SAVFP, MDB_TGT_R_EXPORT },
59 { "savpc", KREG_SAVPC, MDB_TGT_R_EXPORT },
60 { "eax", KREG_EAX, MDB_TGT_R_EXPORT },
61 { "ax", KREG_EAX, MDB_TGT_R_EXPORT | MDB_TGT_R_16 },
62 { "ah", KREG_EAX, MDB_TGT_R_EXPORT | MDB_TGT_R_8H },
63 { "al", KREG_EAX, MDB_TGT_R_EXPORT | MDB_TGT_R_8L },
64 { "ebx", KREG_EBX, MDB_TGT_R_EXPORT },
65 { "bx", KREG_EBX, MDB_TGT_R_EXPORT | MDB_TGT_R_16 },
66 { "bh", KREG_EBX, MDB_TGT_R_EXPORT | MDB_TGT_R_8H },
67 { "bl", KREG_EBX, MDB_TGT_R_EXPORT | MDB_TGT_R_8L },
68 { "ecx", KREG_ECX, MDB_TGT_R_EXPORT },
69 { "cx", KREG_ECX, MDB_TGT_R_EXPORT | MDB_TGT_R_16 },
70 { "ch", KREG_ECX, MDB_TGT_R_EXPORT | MDB_TGT_R_8H },
71 { "cl", KREG_ECX, MDB_TGT_R_EXPORT | MDB_TGT_R_8L },
72 { "edx", KREG_EDX, MDB_TGT_R_EXPORT },
73 { "dx", KREG_EDX, MDB_TGT_R_EXPORT | MDB_TGT_R_16 },
74 { "dh", KREG_EDX, MDB_TGT_R_EXPORT | MDB_TGT_R_8H },
75 { "dl", KREG_EDX, MDB_TGT_R_EXPORT | MDB_TGT_R_8L },
76 { "esi", KREG_ESI, MDB_TGT_R_EXPORT },
77 { "si", KREG_ESI, MDB_TGT_R_EXPORT | MDB_TGT_R_16 },
78 { "edi", KREG_EDI, MDB_TGT_R_EXPORT },
79 { "di", EDI, MDB_TGT_R_EXPORT | MDB_TGT_R_16 },
80 { "ebp", KREG_EBP, MDB_TGT_R_EXPORT },
81 { "bp", KREG_EBP, MDB_TGT_R_EXPORT | MDB_TGT_R_16 },
82 { "esp", KREG_ESP, MDB_TGT_R_EXPORT },
83 { "sp", KREG_ESP, MDB_TGT_R_EXPORT | MDB_TGT_R_16 },
84 { "cs", KREG_CS, MDB_TGT_R_EXPORT },
85 { "ds", KREG_DS, MDB_TGT_R_EXPORT },
86 { "ss", KREG_SS, MDB_TGT_R_EXPORT },
87 { "es", KREG_ES, MDB_TGT_R_EXPORT },
88 { "fs", KREG_FS, MDB_TGT_R_EXPORT },
89 { "gs", KREG_GS, MDB_TGT_R_EXPORT },
90 { "eflags", KREG_EFLAGS, MDB_TGT_R_EXPORT },
91 { "eip", KREG_EIP, MDB_TGT_R_EXPORT },
92 { "uesp", KREG_UESP, MDB_TGT_R_EXPORT | MDB_TGT_R_PRIV },
93 { "usp", KREG_UESP, MDB_TGT_R_EXPORT | MDB_TGT_R_16 },
94 { "trapno", KREG_TRAPNO, MDB_TGT_R_EXPORT | MDB_TGT_R_PRIV },
95 { "err", KREG_ERR, MDB_TGT_R_EXPORT | MDB_TGT_R_PRIV },
96 { NULL, 0, 0 }
97 };
98 #endif
99
100 void
mdb_ia32_printregs(const mdb_tgt_gregset_t * gregs)101 mdb_ia32_printregs(const mdb_tgt_gregset_t *gregs)
102 {
103 const kreg_t *kregs = &gregs->kregs[0];
104 kreg_t eflags = kregs[KREG_EFLAGS];
105
106 mdb_printf("%%cs = 0x%04x\t\t%%eax = 0x%08p %A\n",
107 kregs[KREG_CS], kregs[KREG_EAX], kregs[KREG_EAX]);
108
109 mdb_printf("%%ds = 0x%04x\t\t%%ebx = 0x%08p %A\n",
110 kregs[KREG_DS], kregs[KREG_EBX], kregs[KREG_EBX]);
111
112 mdb_printf("%%ss = 0x%04x\t\t%%ecx = 0x%08p %A\n",
113 kregs[KREG_SS], kregs[KREG_ECX], kregs[KREG_ECX]);
114
115 mdb_printf("%%es = 0x%04x\t\t%%edx = 0x%08p %A\n",
116 kregs[KREG_ES], kregs[KREG_EDX], kregs[KREG_EDX]);
117
118 mdb_printf("%%fs = 0x%04x\t\t%%esi = 0x%08p %A\n",
119 kregs[KREG_FS], kregs[KREG_ESI], kregs[KREG_ESI]);
120
121 mdb_printf("%%gs = 0x%04x\t\t%%edi = 0x%08p %A\n\n",
122 kregs[KREG_GS], kregs[KREG_EDI], kregs[KREG_EDI]);
123
124 mdb_printf("%%eip = 0x%08p %A\n", kregs[KREG_EIP], kregs[KREG_EIP]);
125 mdb_printf("%%ebp = 0x%08p\n", kregs[KREG_EBP]);
126 mdb_printf("%%esp = 0x%08p\n\n", kregs[KREG_ESP]);
127 mdb_printf("%%eflags = 0x%08x\n", eflags);
128
129 mdb_printf(" id=%u vip=%u vif=%u ac=%u vm=%u rf=%u nt=%u iopl=0x%x\n",
130 (eflags & KREG_EFLAGS_ID_MASK) >> KREG_EFLAGS_ID_SHIFT,
131 (eflags & KREG_EFLAGS_VIP_MASK) >> KREG_EFLAGS_VIP_SHIFT,
132 (eflags & KREG_EFLAGS_VIF_MASK) >> KREG_EFLAGS_VIF_SHIFT,
133 (eflags & KREG_EFLAGS_AC_MASK) >> KREG_EFLAGS_AC_SHIFT,
134 (eflags & KREG_EFLAGS_VM_MASK) >> KREG_EFLAGS_VM_SHIFT,
135 (eflags & KREG_EFLAGS_RF_MASK) >> KREG_EFLAGS_RF_SHIFT,
136 (eflags & KREG_EFLAGS_NT_MASK) >> KREG_EFLAGS_NT_SHIFT,
137 (eflags & KREG_EFLAGS_IOPL_MASK) >> KREG_EFLAGS_IOPL_SHIFT);
138
139 mdb_printf(" status=<%s,%s,%s,%s,%s,%s,%s,%s,%s>\n\n",
140 (eflags & KREG_EFLAGS_OF_MASK) ? "OF" : "of",
141 (eflags & KREG_EFLAGS_DF_MASK) ? "DF" : "df",
142 (eflags & KREG_EFLAGS_IF_MASK) ? "IF" : "if",
143 (eflags & KREG_EFLAGS_TF_MASK) ? "TF" : "tf",
144 (eflags & KREG_EFLAGS_SF_MASK) ? "SF" : "sf",
145 (eflags & KREG_EFLAGS_ZF_MASK) ? "ZF" : "zf",
146 (eflags & KREG_EFLAGS_AF_MASK) ? "AF" : "af",
147 (eflags & KREG_EFLAGS_PF_MASK) ? "PF" : "pf",
148 (eflags & KREG_EFLAGS_CF_MASK) ? "CF" : "cf");
149
150 #if !defined(__amd64) && !defined(_KMDB)
151 mdb_printf(" %%uesp = 0x%08x\n", kregs[KREG_UESP]);
152 #endif
153 mdb_printf("%%trapno = 0x%x\n", kregs[KREG_TRAPNO]);
154 mdb_printf(" %%err = 0x%x\n", kregs[KREG_ERR]);
155 }
156
157 /*
158 * Given a return address (%eip), determine the likely number of arguments
159 * that were pushed on the stack prior to its execution. We do this by
160 * expecting that a typical call sequence consists of pushing arguments on
161 * the stack, executing a call instruction, and then performing an add
162 * on %esp to restore it to the value prior to pushing the arguments for
163 * the call. We attempt to detect such an add, and divide the addend
164 * by the size of a word to determine the number of pushed arguments.
165 */
166 static uint_t
kvm_argcount(mdb_tgt_t * t,uintptr_t eip,ssize_t size)167 kvm_argcount(mdb_tgt_t *t, uintptr_t eip, ssize_t size)
168 {
169 uint8_t ins[6];
170 ulong_t n;
171
172 enum {
173 M_MODRM_ESP = 0xc4, /* Mod/RM byte indicates %esp */
174 M_ADD_IMM32 = 0x81, /* ADD imm32 to r/m32 */
175 M_ADD_IMM8 = 0x83 /* ADD imm8 to r/m32 */
176 };
177
178 if (mdb_tgt_aread(t, MDB_TGT_AS_VIRT_I, ins, sizeof (ins), eip) !=
179 sizeof (ins))
180 return (0);
181
182 if (ins[1] != M_MODRM_ESP)
183 return (0);
184
185 switch (ins[0]) {
186 case M_ADD_IMM32:
187 n = ins[2] + (ins[3] << 8) + (ins[4] << 16) + (ins[5] << 24);
188 break;
189
190 case M_ADD_IMM8:
191 n = ins[2];
192 break;
193
194 default:
195 n = 0;
196 }
197
198 return (MIN((ssize_t)n, size) / sizeof (uint32_t));
199 }
200
201 int
mdb_ia32_kvm_stack_iter(mdb_tgt_t * t,const mdb_tgt_gregset_t * gsp,mdb_tgt_stack_f * func,void * arg)202 mdb_ia32_kvm_stack_iter(mdb_tgt_t *t, const mdb_tgt_gregset_t *gsp,
203 mdb_tgt_stack_f *func, void *arg)
204 {
205 mdb_tgt_gregset_t gregs;
206 kreg_t *kregs = &gregs.kregs[0];
207 int got_pc = (gsp->kregs[KREG_EIP] != 0);
208 int err;
209
210 struct fr {
211 uintptr32_t fr_savfp;
212 uintptr32_t fr_savpc;
213 uint32_t fr_argv[32];
214 } fr;
215
216 uintptr_t fp = gsp->kregs[KREG_EBP];
217 uintptr_t pc = gsp->kregs[KREG_EIP];
218 uintptr_t lastfp = 0;
219
220 ssize_t size;
221 uint_t argc;
222 int detect_exception_frames = 0;
223 int advance_tortoise = 1;
224 uintptr_t tortoise_fp = 0;
225 #ifndef _KMDB
226 int xp;
227
228 if ((mdb_readsym(&xp, sizeof (xp), "xpv_panicking") != -1) && (xp > 0))
229 detect_exception_frames = 1;
230 #endif
231
232 bcopy(gsp, &gregs, sizeof (gregs));
233
234 while (fp != 0) {
235 if (fp & (STACK_ALIGN - 1)) {
236 err = EMDB_STKALIGN;
237 goto badfp;
238 }
239 if ((size = mdb_tgt_aread(t, MDB_TGT_AS_VIRT_S, &fr,
240 sizeof (fr), fp)) >= (ssize_t)(2 * sizeof (uintptr32_t))) {
241 size -= (ssize_t)(2 * sizeof (uintptr32_t));
242 argc = kvm_argcount(t, fr.fr_savpc, size);
243 } else {
244 err = EMDB_NOMAP;
245 goto badfp;
246 }
247
248 if (tortoise_fp == 0) {
249 tortoise_fp = fp;
250 } else {
251 /*
252 * Advance tortoise_fp every other frame, so we detect
253 * cycles with Floyd's tortoise/hare.
254 */
255 if (advance_tortoise != 0) {
256 struct fr tfr;
257
258 if (mdb_tgt_aread(t, MDB_TGT_AS_VIRT_S, &tfr,
259 sizeof (tfr), tortoise_fp) !=
260 sizeof (tfr)) {
261 err = EMDB_NOMAP;
262 goto badfp;
263 }
264
265 tortoise_fp = tfr.fr_savfp;
266 }
267
268 if (fp == tortoise_fp) {
269 err = EMDB_STKFRAME;
270 goto badfp;
271 }
272 }
273
274 advance_tortoise = !advance_tortoise;
275
276 if (got_pc &&
277 func(arg, pc, argc, (const long *)fr.fr_argv, &gregs) != 0)
278 break;
279
280 kregs[KREG_ESP] = kregs[KREG_EBP];
281
282 lastfp = fp;
283 fp = fr.fr_savfp;
284 /*
285 * The Xen hypervisor marks a stack frame as belonging to
286 * an exception by inverting the bits of the pointer to
287 * that frame. We attempt to identify these frames by
288 * inverting the pointer and seeing if it is within 0xfff
289 * bytes of the last frame.
290 */
291 if (detect_exception_frames)
292 if ((fp != 0) && (fp < lastfp) &&
293 ((lastfp ^ ~fp) < 0xfff))
294 fp = ~fp;
295
296 kregs[KREG_EBP] = fp;
297 kregs[KREG_EIP] = pc = fr.fr_savpc;
298
299 got_pc = (pc != 0);
300 }
301
302 return (0);
303
304 badfp:
305 mdb_printf("%p [%s]", fp, mdb_strerror(err));
306 return (set_errno(err));
307 }
308
309 #ifndef __amd64
310 /*
311 * The functions mdb_ia32_step_out and mdb_ia32_next haven't yet been adapted
312 * to work when built for an amd64 mdb. They are unused by the amd64-only bhyve
313 * target, hence the #ifdef.
314 */
315 /*
316 * Determine the return address for the current frame. Typically this is the
317 * fr_savpc value from the current frame, but we also perform some special
318 * handling to see if we are stopped on one of the first two instructions of a
319 * typical function prologue, in which case %ebp will not be set up yet.
320 */
321 int
mdb_ia32_step_out(mdb_tgt_t * t,uintptr_t * p,kreg_t pc,kreg_t fp,kreg_t sp,mdb_instr_t curinstr)322 mdb_ia32_step_out(mdb_tgt_t *t, uintptr_t *p, kreg_t pc, kreg_t fp, kreg_t sp,
323 mdb_instr_t curinstr)
324 {
325 struct frame fr;
326 GElf_Sym s;
327 char buf[1];
328
329 enum {
330 M_PUSHL_EBP = 0x55, /* pushl %ebp */
331 M_MOVL_EBP = 0x8b /* movl %esp, %ebp */
332 };
333
334 if (mdb_tgt_lookup_by_addr(t, pc, MDB_TGT_SYM_FUZZY,
335 buf, 0, &s, NULL) == 0) {
336 if (pc == s.st_value && curinstr == M_PUSHL_EBP)
337 fp = sp - 4;
338 else if (pc == s.st_value + 1 && curinstr == M_MOVL_EBP)
339 fp = sp;
340 }
341
342 if (mdb_tgt_aread(t, MDB_TGT_AS_VIRT_S, &fr, sizeof (fr), fp) ==
343 sizeof (fr)) {
344 *p = fr.fr_savpc;
345 return (0);
346 }
347
348 return (-1); /* errno is set for us */
349 }
350
351 /*
352 * Return the address of the next instruction following a call, or return -1
353 * and set errno to EAGAIN if the target should just single-step. We perform
354 * a bit of disassembly on the current instruction in order to determine if it
355 * is a call and how many bytes should be skipped, depending on the exact form
356 * of the call instruction that is being used.
357 */
358 int
mdb_ia32_next(mdb_tgt_t * t,uintptr_t * p,kreg_t pc,mdb_instr_t curinstr)359 mdb_ia32_next(mdb_tgt_t *t, uintptr_t *p, kreg_t pc, mdb_instr_t curinstr)
360 {
361 uint8_t m;
362
363 enum {
364 M_CALL_REL = 0xe8, /* call near with relative displacement */
365 M_CALL_REG = 0xff, /* call near indirect or call far register */
366
367 M_MODRM_MD = 0xc0, /* mask for Mod/RM byte Mod field */
368 M_MODRM_OP = 0x38, /* mask for Mod/RM byte opcode field */
369 M_MODRM_RM = 0x07, /* mask for Mod/RM byte R/M field */
370
371 M_MD_IND = 0x00, /* Mod code for [REG] */
372 M_MD_DSP8 = 0x40, /* Mod code for disp8[REG] */
373 M_MD_DSP32 = 0x80, /* Mod code for disp32[REG] */
374 M_MD_REG = 0xc0, /* Mod code for REG */
375
376 M_OP_IND = 0x10, /* Opcode for call near indirect */
377 M_RM_DSP32 = 0x05 /* R/M code for disp32 */
378 };
379
380 /*
381 * If the opcode is a near call with relative displacement, assume the
382 * displacement is a rel32 from the next instruction.
383 */
384 if (curinstr == M_CALL_REL) {
385 *p = pc + sizeof (mdb_instr_t) + sizeof (uint32_t);
386 return (0);
387 }
388
389 /*
390 * If the opcode is a call near indirect or call far register opcode,
391 * read the subsequent Mod/RM byte to perform additional decoding.
392 */
393 if (curinstr == M_CALL_REG) {
394 if (mdb_tgt_aread(t, MDB_TGT_AS_VIRT_I, &m, sizeof (m), pc + 1)
395 != sizeof (m))
396 return (-1); /* errno is set for us */
397
398 /*
399 * If the Mod/RM opcode extension indicates a near indirect
400 * call, then skip the appropriate number of additional
401 * bytes depending on the addressing form that is used.
402 */
403 if ((m & M_MODRM_OP) == M_OP_IND) {
404 switch (m & M_MODRM_MD) {
405 case M_MD_DSP8:
406 *p = pc + 3; /* skip pr_instr, m, disp8 */
407 break;
408 case M_MD_DSP32:
409 *p = pc + 6; /* skip pr_instr, m, disp32 */
410 break;
411 case M_MD_IND:
412 if ((m & M_MODRM_RM) == M_RM_DSP32) {
413 *p = pc + 6;
414 break; /* skip pr_instr, m, disp32 */
415 }
416 /* FALLTHRU */
417 case M_MD_REG:
418 *p = pc + 2; /* skip pr_instr, m */
419 break;
420 }
421 return (0);
422 }
423 }
424
425 return (set_errno(EAGAIN));
426 }
427 #endif
428
429 int
mdb_ia32_kvm_frame(void * argp,uintptr_t pc,uint_t argc,const long * argv,const mdb_tgt_gregset_t * gregs)430 mdb_ia32_kvm_frame(void *argp, uintptr_t pc, uint_t argc, const long *argv,
431 const mdb_tgt_gregset_t *gregs)
432 {
433 mdb_stack_frame_hdl_t *hdl = argp;
434 uint64_t bp;
435
436 bp = gregs->kregs[KREG_EBP];
437 mdb_stack_frame(hdl, pc, bp, argc, argv);
438 return (0);
439 }
440
441 /*
442 * Check if the instruction immediately before the given program counter (pcp)
443 * is a CALL instruction in IA-32 (x86 32-bit) mode. Since x86 instructions are
444 * variable-length, we read the 8 bytes preceding the PC and look for specific
445 * call encodings at known offsets that would align with common call
446 * instruction lengths. Although x86 instructions can be up to 15 bytes long,
447 * for a CALL to reach that length would require a long sequence of prefixes.
448 * Of those, only the address-size prefix would affect where we need to look
449 * for the instruction, and such prefixes are extremely rare in real-world
450 * code.
451 */
452 boolean_t
mdb_ia32_prev_callcheck(uintptr_t pcp)453 mdb_ia32_prev_callcheck(uintptr_t pcp)
454 {
455 uint8_t buf[8];
456
457 /*
458 * Ensure we can read 8 bytes before the PC. This accommodates the
459 * largest call encoding we care about (far calls).
460 */
461 if (pcp < 8 || mdb_vread(buf, sizeof (buf), pcp - 8) != sizeof (buf))
462 return (B_FALSE);
463
464 /*
465 * Direct near call: CALL rel32
466 * Opcode: E8, followed by 4-byte PC-relative offset.
467 */
468 if (buf[3] == 0xe8)
469 return (B_TRUE);
470
471 /*
472 * Indirect near call: CALL r/m32
473 * Opcode: FF /2 (i.e., reg field of ModR/M is 010).
474 *
475 * We're expecting the instruction to be exactly 2 bytes: FF 14,
476 * with opcode at buf[5] and ModR/M at buf[6].
477 *
478 * buf[6] == 0x14 means:
479 * - mod = 00 (no displacement)
480 * - reg = 010 (CALL)
481 * - r/m = 100 (SIB follows — typically [esp])
482 *
483 * This form is common in PLT stubs like: CALL DWORD PTR [ESP]
484 *
485 * Other encodings of FF /2 are less plausible here:
486 * - mod = 01 - 8-bit displacement - unlikely for noreturn functions
487 * - mod = 10 - 32-bit displacement - would overlap with PC; invalid
488 * - mod = 00 with r/m != 100 - e.g., CALL EAX - would return to
489 * buf[7], not pcp
490 */
491 if (buf[5] == 0xff && buf[6] == 0x14)
492 return (B_TRUE);
493
494 /*
495 * Indirect absolute call: CALL DWORD PTR [disp32]
496 * Encoding: FF 15 xx xx xx xx
497 * Instruction is 6 bytes long; opcode at buf[2], ModR/M at buf[3].
498 * Used to call through global function pointers.
499 */
500 if (buf[2] == 0xff && buf[3] == 0x15)
501 return (B_TRUE);
502
503 /*
504 * Far call (segment-based): CALL FAR ptr16:32
505 * Opcode: 9A, followed by 6-byte far pointer.
506 * Instruction is 7 bytes; opcode at buf[0].
507 */
508 if (buf[0] == 0x9a)
509 return (B_TRUE);
510
511 return (B_FALSE);
512 }
513