xref: /illumos-gate/usr/src/uts/intel/kdi/kdi_idt.c (revision 0cfdb6036e046270988a17ac442e4d717d426a44)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Management of KMDB's IDT, which is installed upon KMDB activation.
30  *
31  * Debugger activation has two flavors, which cover the cases where KMDB is
32  * loaded at boot, and when it is loaded after boot.  In brief, in both cases,
33  * the KDI needs to interpose upon several handlers in the IDT.  When
34  * mod-loaded KMDB is deactivated, we undo the IDT interposition, restoring the
35  * handlers to what they were before we started.
36  *
37  * We also take over the entirety of IDT (except the double-fault handler) on
38  * the active CPU when we're in kmdb so we can handle things like page faults
39  * sensibly.
40  *
41  * Boot-loaded KMDB
42  *
43  * When we're first activated, we're running on boot's IDT.  We need to be able
44  * to function in this world, so we'll install our handlers into boot's IDT.
45  * This is a little complicated: we're using the fake cpu_t set up by
46  * boot_kdi_tmpinit(), so we can't access cpu_idt directly.  Instead,
47  * kdi_idt_write() notices that cpu_idt is NULL, and works around this problem.
48  *
49  * Later, when we're about to switch to the kernel's IDT, it'll call us via
50  * kdi_idt_sync(), allowing us to add our handlers to the new IDT.  While
51  * boot-loaded KMDB can't be unloaded, we still need to save the descriptors we
52  * replace so we can pass traps back to the kernel as necessary.
53  *
54  * The last phase of boot-loaded KMDB activation occurs at non-boot CPU
55  * startup.  We will be called on each non-boot CPU, thus allowing us to set up
56  * any watchpoints that may have been configured on the boot CPU and interpose
57  * on the given CPU's IDT.  We don't save the interposed descriptors in this
58  * case -- see kdi_cpu_init() for details.
59  *
60  * Mod-loaded KMDB
61  *
62  * This style of activation is much simpler, as the CPUs are already running,
63  * and are using their own copy of the kernel's IDT.  We simply interpose upon
64  * each CPU's IDT.  We save the handlers we replace, both for deactivation and
65  * for passing traps back to the kernel.  Note that for the hypervisors'
66  * benefit, we need to xcall to the other CPUs to do this, since we need to
67  * actively set the trap entries in its virtual IDT from that vcpu's context
68  * rather than just modifying the IDT table from the CPU running kdi_activate().
69  */
70 
71 #include <sys/types.h>
72 #include <sys/segments.h>
73 #include <sys/trap.h>
74 #include <sys/cpuvar.h>
75 #include <sys/reboot.h>
76 #include <sys/sunddi.h>
77 #include <sys/archsystm.h>
78 #include <sys/kdi_impl.h>
79 #include <sys/x_call.h>
80 #include <ia32/sys/psw.h>
81 
82 #define	KDI_GATE_NVECS	3
83 
84 #define	KDI_IDT_NOSAVE	0
85 #define	KDI_IDT_SAVE	1
86 
87 #define	KDI_IDT_DTYPE_KERNEL	0
88 #define	KDI_IDT_DTYPE_BOOT	1
89 
90 kdi_cpusave_t *kdi_cpusave;
91 int kdi_ncpusave;
92 
93 static kdi_main_t kdi_kmdb_main;
94 
95 kdi_drreg_t kdi_drreg;
96 
97 #ifndef __amd64
98 /* Used to track the current set of valid kernel selectors. */
99 uint32_t	kdi_cs;
100 uint32_t	kdi_ds;
101 uint32_t	kdi_fs;
102 uint32_t	kdi_gs;
103 #endif
104 
105 uint_t		kdi_msr_wrexit_msr;
106 uint64_t	*kdi_msr_wrexit_valp;
107 
108 uintptr_t	kdi_kernel_handler;
109 
110 int		kdi_trap_switch;
111 
112 #define	KDI_MEMRANGES_MAX	2
113 
114 kdi_memrange_t	kdi_memranges[KDI_MEMRANGES_MAX];
115 int		kdi_nmemranges;
116 
117 typedef void idt_hdlr_f(void);
118 
119 extern idt_hdlr_f kdi_trap0, kdi_trap1, kdi_int2, kdi_trap3, kdi_trap4;
120 extern idt_hdlr_f kdi_trap5, kdi_trap6, kdi_trap7, kdi_trap9;
121 extern idt_hdlr_f kdi_traperr10, kdi_traperr11, kdi_traperr12;
122 extern idt_hdlr_f kdi_traperr13, kdi_traperr14, kdi_trap16, kdi_trap17;
123 extern idt_hdlr_f kdi_trap18, kdi_trap19, kdi_trap20, kdi_ivct32;
124 extern idt_hdlr_f kdi_invaltrap;
125 extern size_t kdi_ivct_size;
126 extern char kdi_slave_entry_patch;
127 
128 typedef struct kdi_gate_spec {
129 	uint_t kgs_vec;
130 	uint_t kgs_dpl;
131 } kdi_gate_spec_t;
132 
133 /*
134  * Beware: kdi_pass_to_kernel() has unpleasant knowledge of this list.
135  */
136 static const kdi_gate_spec_t kdi_gate_specs[KDI_GATE_NVECS] = {
137 	{ T_SGLSTP, TRP_KPL },
138 	{ T_BPTFLT, TRP_UPL },
139 	{ T_DBGENTR, TRP_KPL }
140 };
141 
142 static gate_desc_t kdi_kgates[KDI_GATE_NVECS];
143 
144 gate_desc_t kdi_idt[NIDT];
145 
146 struct idt_description {
147 	uint_t id_low;
148 	uint_t id_high;
149 	idt_hdlr_f *id_basehdlr;
150 	size_t *id_incrp;
151 } idt_description[] = {
152 	{ T_ZERODIV, 0,		kdi_trap0, NULL },
153 	{ T_SGLSTP, 0,		kdi_trap1, NULL },
154 	{ T_NMIFLT, 0,		kdi_int2, NULL },
155 	{ T_BPTFLT, 0,		kdi_trap3, NULL },
156 	{ T_OVFLW, 0,		kdi_trap4, NULL },
157 	{ T_BOUNDFLT, 0,	kdi_trap5, NULL },
158 	{ T_ILLINST, 0,		kdi_trap6, NULL },
159 	{ T_NOEXTFLT, 0,	kdi_trap7, NULL },
160 #if !defined(__xpv)
161 	{ T_DBLFLT, 0,		syserrtrap, NULL },
162 #endif
163 	{ T_EXTOVRFLT, 0,	kdi_trap9, NULL },
164 	{ T_TSSFLT, 0,		kdi_traperr10, NULL },
165 	{ T_SEGFLT, 0,		kdi_traperr11, NULL },
166 	{ T_STKFLT, 0,		kdi_traperr12, NULL },
167 	{ T_GPFLT, 0,		kdi_traperr13, NULL },
168 	{ T_PGFLT, 0,		kdi_traperr14, NULL },
169 	{ 15, 0,		kdi_invaltrap, NULL },
170 	{ T_EXTERRFLT, 0, 	kdi_trap16, NULL },
171 	{ T_ALIGNMENT, 0, 	kdi_trap17, NULL },
172 	{ T_MCE, 0,		kdi_trap18, NULL },
173 	{ T_SIMDFPE, 0,		kdi_trap19, NULL },
174 	{ T_DBGENTR, 0,		kdi_trap20, NULL },
175 	{ 21, 31,		kdi_invaltrap, NULL },
176 	{ 32, 255,		kdi_ivct32, &kdi_ivct_size },
177 	{ 0, 0, NULL },
178 };
179 
180 void
181 kdi_idt_init(selector_t sel)
182 {
183 	struct idt_description *id;
184 	int i;
185 
186 	for (id = idt_description; id->id_basehdlr != NULL; id++) {
187 		uint_t high = id->id_high != 0 ? id->id_high : id->id_low;
188 		size_t incr = id->id_incrp != NULL ? *id->id_incrp : 0;
189 
190 		for (i = id->id_low; i <= high; i++) {
191 			caddr_t hdlr = (caddr_t)id->id_basehdlr +
192 			    incr * (i - id->id_low);
193 			set_gatesegd(&kdi_idt[i], (void (*)())hdlr, sel,
194 			    SDT_SYSIGT, TRP_KPL);
195 		}
196 	}
197 }
198 
199 /*
200  * Patch caller-provided code into the debugger's IDT handlers.  This code is
201  * used to save MSRs that must be saved before the first branch.  All handlers
202  * are essentially the same, and end with a branch to kdi_cmnint.  To save the
203  * MSR, we need to patch in before the branch.  The handlers have the following
204  * structure: KDI_MSR_PATCHOFF bytes of code, KDI_MSR_PATCHSZ bytes of
205  * patchable space, followed by more code.
206  */
207 void
208 kdi_idt_patch(caddr_t code, size_t sz)
209 {
210 	int i;
211 
212 	ASSERT(sz <= KDI_MSR_PATCHSZ);
213 
214 	for (i = 0; i < sizeof (kdi_idt) / sizeof (struct gate_desc); i++) {
215 		gate_desc_t *gd;
216 		uchar_t *patch;
217 
218 		if (i == T_DBLFLT)
219 			continue;	/* uses kernel's handler */
220 
221 		gd = &kdi_idt[i];
222 		patch = (uchar_t *)GATESEG_GETOFFSET(gd) + KDI_MSR_PATCHOFF;
223 
224 		/*
225 		 * We can't ASSERT that there's a nop here, because this may be
226 		 * a debugger restart.  In that case, we're copying the new
227 		 * patch point over the old one.
228 		 */
229 		/* FIXME: dtrace fbt ... */
230 		bcopy(code, patch, sz);
231 
232 		/* Fill the rest with nops to be sure */
233 		while (sz < KDI_MSR_PATCHSZ)
234 			patch[sz++] = 0x90; /* nop */
235 	}
236 }
237 
238 static void
239 kdi_idt_gates_install(selector_t sel, int saveold)
240 {
241 	gate_desc_t gates[KDI_GATE_NVECS];
242 	int i;
243 
244 	bzero(gates, sizeof (*gates));
245 
246 	for (i = 0; i < KDI_GATE_NVECS; i++) {
247 		const kdi_gate_spec_t *gs = &kdi_gate_specs[i];
248 		uintptr_t func = GATESEG_GETOFFSET(&kdi_idt[gs->kgs_vec]);
249 		set_gatesegd(&gates[i], (void (*)())func, sel, SDT_SYSIGT,
250 		    gs->kgs_dpl);
251 	}
252 
253 	for (i = 0; i < KDI_GATE_NVECS; i++) {
254 		uint_t vec = kdi_gate_specs[i].kgs_vec;
255 
256 		if (saveold)
257 			kdi_kgates[i] = CPU->cpu_m.mcpu_idt[vec];
258 
259 		kdi_idt_write(&gates[i], vec);
260 	}
261 }
262 
263 static void
264 kdi_idt_gates_restore(void)
265 {
266 	int i;
267 
268 	for (i = 0; i < KDI_GATE_NVECS; i++)
269 		kdi_idt_write(&kdi_kgates[i], kdi_gate_specs[i].kgs_vec);
270 }
271 
272 /*
273  * Called when we switch to the kernel's IDT.  We need to interpose on the
274  * kernel's IDT entries and stop using KMDBCODE_SEL.
275  */
276 void
277 kdi_idt_sync(void)
278 {
279 	kdi_idt_init(KCS_SEL);
280 	kdi_idt_gates_install(KCS_SEL, KDI_IDT_SAVE);
281 }
282 
283 /*
284  * On some processors, we'll need to clear a certain MSR before proceeding into
285  * the debugger.  Complicating matters, this MSR must be cleared before we take
286  * any branches.  We have patch points in every trap handler, which will cover
287  * all entry paths for master CPUs.  We also have a patch point in the slave
288  * entry code.
289  */
290 static void
291 kdi_msr_add_clrentry(uint_t msr)
292 {
293 #ifdef __amd64
294 	uchar_t code[] = {
295 		0x51, 0x50, 0x52,		/* pushq %rcx, %rax, %rdx */
296 		0xb9, 0x00, 0x00, 0x00, 0x00,	/* movl $MSRNUM, %ecx */
297 		0x31, 0xc0,			/* clr %eax */
298 		0x31, 0xd2,			/* clr %edx */
299 		0x0f, 0x30,			/* wrmsr */
300 		0x5a, 0x58, 0x59		/* popq %rdx, %rax, %rcx */
301 	};
302 	uchar_t *patch = &code[4];
303 #else
304 	uchar_t code[] = {
305 		0x60,				/* pushal */
306 		0xb9, 0x00, 0x00, 0x00, 0x00,	/* movl $MSRNUM, %ecx */
307 		0x31, 0xc0,			/* clr %eax */
308 		0x31, 0xd2,			/* clr %edx */
309 		0x0f, 0x30,			/* wrmsr */
310 		0x61				/* popal */
311 	};
312 	uchar_t *patch = &code[2];
313 #endif
314 
315 	bcopy(&msr, patch, sizeof (uint32_t));
316 
317 	kdi_idt_patch((caddr_t)code, sizeof (code));
318 
319 	bcopy(code, &kdi_slave_entry_patch, sizeof (code));
320 }
321 
322 static void
323 kdi_msr_add_wrexit(uint_t msr, uint64_t *valp)
324 {
325 	kdi_msr_wrexit_msr = msr;
326 	kdi_msr_wrexit_valp = valp;
327 }
328 
329 void
330 kdi_set_debug_msrs(kdi_msr_t *msrs)
331 {
332 	int nmsrs, i;
333 
334 	ASSERT(kdi_cpusave[0].krs_msr == NULL);
335 
336 	/* Look in CPU0's MSRs for any special MSRs. */
337 	for (nmsrs = 0; msrs[nmsrs].msr_num != 0; nmsrs++) {
338 		switch (msrs[nmsrs].msr_type) {
339 		case KDI_MSR_CLEARENTRY:
340 			kdi_msr_add_clrentry(msrs[nmsrs].msr_num);
341 			break;
342 
343 		case KDI_MSR_WRITEDELAY:
344 			kdi_msr_add_wrexit(msrs[nmsrs].msr_num,
345 			    msrs[nmsrs].kdi_msr_valp);
346 			break;
347 		}
348 	}
349 
350 	nmsrs++;
351 
352 	for (i = 0; i < kdi_ncpusave; i++)
353 		kdi_cpusave[i].krs_msr = &msrs[nmsrs * i];
354 }
355 
356 void
357 kdi_update_drreg(kdi_drreg_t *drreg)
358 {
359 	kdi_drreg = *drreg;
360 }
361 
362 void
363 kdi_memrange_add(caddr_t base, size_t len)
364 {
365 	kdi_memrange_t *mr = &kdi_memranges[kdi_nmemranges];
366 
367 	ASSERT(kdi_nmemranges != KDI_MEMRANGES_MAX);
368 
369 	mr->mr_base = base;
370 	mr->mr_lim = base + len - 1;
371 	kdi_nmemranges++;
372 }
373 
374 void
375 kdi_idt_switch(kdi_cpusave_t *cpusave)
376 {
377 	if (cpusave == NULL)
378 		kdi_idtr_set(kdi_idt, sizeof (kdi_idt) - 1);
379 	else
380 		kdi_idtr_set(cpusave->krs_idt, (sizeof (*idt0) * NIDT) - 1);
381 }
382 
383 /*
384  * Activation for CPUs other than the boot CPU, called from that CPU's
385  * mp_startup().  We saved the kernel's descriptors when we initialized the
386  * boot CPU, so we don't want to do it again.  Saving the handlers from this
387  * CPU's IDT would actually be dangerous with the CPU initialization method in
388  * use at the time of this writing.  With that method, the startup code creates
389  * the IDTs for slave CPUs by copying the one used by the boot CPU, which has
390  * already been interposed upon by KMDB.  Were we to interpose again, we'd
391  * replace the kernel's descriptors with our own in the save area.  By not
392  * saving, but still overwriting, we'll work in the current world, and in any
393  * future world where the IDT is generated from scratch.
394  */
395 void
396 kdi_cpu_init(void)
397 {
398 	kdi_idt_gates_install(KCS_SEL, KDI_IDT_NOSAVE);
399 	/* Load the debug registers and MSRs */
400 	kdi_cpu_debug_init(&kdi_cpusave[CPU->cpu_id]);
401 }
402 
403 /*
404  * Activation for all CPUs for mod-loaded kmdb, i.e. a kmdb that wasn't
405  * loaded at boot.
406  */
407 static int
408 kdi_cpu_activate(void)
409 {
410 	kdi_idt_gates_install(KCS_SEL, KDI_IDT_SAVE);
411 	return (0);
412 }
413 
414 void
415 kdi_activate(kdi_main_t main, kdi_cpusave_t *cpusave, uint_t ncpusave)
416 {
417 	int i;
418 	cpuset_t cpuset;
419 
420 	CPUSET_ALL(cpuset);
421 
422 	kdi_cpusave = cpusave;
423 	kdi_ncpusave = ncpusave;
424 
425 	kdi_kmdb_main = main;
426 
427 	for (i = 0; i < kdi_ncpusave; i++) {
428 		kdi_cpusave[i].krs_cpu_id = i;
429 
430 		kdi_cpusave[i].krs_curcrumb =
431 		    &kdi_cpusave[i].krs_crumbs[KDI_NCRUMBS - 1];
432 		kdi_cpusave[i].krs_curcrumbidx = KDI_NCRUMBS - 1;
433 	}
434 
435 	if (boothowto & RB_KMDB)
436 		kdi_idt_init(KMDBCODE_SEL);
437 	else
438 		kdi_idt_init(KCS_SEL);
439 
440 	/* The initial selector set.  Updated by the debugger-entry code */
441 #ifndef __amd64
442 	kdi_cs = B32CODE_SEL;
443 	kdi_ds = kdi_fs = kdi_gs = B32DATA_SEL;
444 #endif
445 
446 	kdi_memranges[0].mr_base = kdi_segdebugbase;
447 	kdi_memranges[0].mr_lim = kdi_segdebugbase + kdi_segdebugsize - 1;
448 	kdi_nmemranges = 1;
449 
450 	kdi_drreg.dr_ctl = KDIREG_DRCTL_RESERVED;
451 	kdi_drreg.dr_stat = KDIREG_DRSTAT_RESERVED;
452 
453 	kdi_msr_wrexit_msr = 0;
454 	kdi_msr_wrexit_valp = NULL;
455 
456 	if (boothowto & RB_KMDB) {
457 		kdi_idt_gates_install(KMDBCODE_SEL, KDI_IDT_NOSAVE);
458 	} else {
459 		xc_call(0, 0, 0, X_CALL_HIPRI, cpuset,
460 		    (xc_func_t)kdi_cpu_activate);
461 	}
462 }
463 
464 static int
465 kdi_cpu_deactivate(void)
466 {
467 	kdi_idt_gates_restore();
468 	return (0);
469 }
470 
471 void
472 kdi_deactivate(void)
473 {
474 	cpuset_t cpuset;
475 	CPUSET_ALL(cpuset);
476 
477 	xc_call(0, 0, 0, X_CALL_HIPRI, cpuset, (xc_func_t)kdi_cpu_deactivate);
478 	kdi_nmemranges = 0;
479 }
480 
481 /*
482  * We receive all breakpoints and single step traps.  Some of them,
483  * including those from userland and those induced by DTrace providers,
484  * are intended for the kernel, and must be processed there.  We adopt
485  * this ours-until-proven-otherwise position due to the painful
486  * consequences of sending the kernel an unexpected breakpoint or
487  * single step.  Unless someone can prove to us that the kernel is
488  * prepared to handle the trap, we'll assume there's a problem and will
489  * give the user a chance to debug it.
490  */
491 int
492 kdi_trap_pass(kdi_cpusave_t *cpusave)
493 {
494 	greg_t tt = cpusave->krs_gregs[KDIREG_TRAPNO];
495 	greg_t pc = cpusave->krs_gregs[KDIREG_PC];
496 	greg_t cs = cpusave->krs_gregs[KDIREG_CS];
497 
498 	if (USERMODE(cs))
499 		return (1);
500 
501 	if (tt != T_BPTFLT && tt != T_SGLSTP)
502 		return (0);
503 
504 	if (tt == T_BPTFLT && kdi_dtrace_get_state() ==
505 	    KDI_DTSTATE_DTRACE_ACTIVE)
506 		return (1);
507 
508 	/*
509 	 * See the comments in the kernel's T_SGLSTP handler for why we need to
510 	 * do this.
511 	 */
512 	if (tt == T_SGLSTP &&
513 	    (pc == (greg_t)sys_sysenter || pc == (greg_t)brand_sys_sysenter))
514 		return (1);
515 
516 	return (0);
517 }
518 
519 /*
520  * State has been saved, and all CPUs are on the CPU-specific stacks.  All
521  * CPUs enter here, and head off into the debugger proper.
522  */
523 void
524 kdi_debugger_entry(kdi_cpusave_t *cpusave)
525 {
526 	/*
527 	 * BPTFLT gives us control with %eip set to the instruction *after*
528 	 * the int 3.  Back it off, so we're looking at the instruction that
529 	 * triggered the fault.
530 	 */
531 	if (cpusave->krs_gregs[KDIREG_TRAPNO] == T_BPTFLT)
532 		cpusave->krs_gregs[KDIREG_PC]--;
533 
534 	kdi_kmdb_main(cpusave);
535 }
536