xref: /linux/arch/x86/include/asm/desc.h (revision e58e871becec2d3b04ed91c0c16fe8deac9c9dfa)
1 #ifndef _ASM_X86_DESC_H
2 #define _ASM_X86_DESC_H
3 
4 #include <asm/desc_defs.h>
5 #include <asm/ldt.h>
6 #include <asm/mmu.h>
7 #include <asm/fixmap.h>
8 
9 #include <linux/smp.h>
10 #include <linux/percpu.h>
11 
12 static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
13 {
14 	desc->limit0		= info->limit & 0x0ffff;
15 
16 	desc->base0		= (info->base_addr & 0x0000ffff);
17 	desc->base1		= (info->base_addr & 0x00ff0000) >> 16;
18 
19 	desc->type		= (info->read_exec_only ^ 1) << 1;
20 	desc->type	       |= info->contents << 2;
21 
22 	desc->s			= 1;
23 	desc->dpl		= 0x3;
24 	desc->p			= info->seg_not_present ^ 1;
25 	desc->limit		= (info->limit & 0xf0000) >> 16;
26 	desc->avl		= info->useable;
27 	desc->d			= info->seg_32bit;
28 	desc->g			= info->limit_in_pages;
29 
30 	desc->base2		= (info->base_addr & 0xff000000) >> 24;
31 	/*
32 	 * Don't allow setting of the lm bit. It would confuse
33 	 * user_64bit_mode and would get overridden by sysret anyway.
34 	 */
35 	desc->l			= 0;
36 }
37 
38 extern struct desc_ptr idt_descr;
39 extern gate_desc idt_table[];
40 extern const struct desc_ptr debug_idt_descr;
41 extern gate_desc debug_idt_table[];
42 
43 struct gdt_page {
44 	struct desc_struct gdt[GDT_ENTRIES];
45 } __attribute__((aligned(PAGE_SIZE)));
46 
47 DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
48 
49 /* Provide the original GDT */
50 static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
51 {
52 	return per_cpu(gdt_page, cpu).gdt;
53 }
54 
55 /* Provide the current original GDT */
56 static inline struct desc_struct *get_current_gdt_rw(void)
57 {
58 	return this_cpu_ptr(&gdt_page)->gdt;
59 }
60 
61 /* Get the fixmap index for a specific processor */
62 static inline unsigned int get_cpu_gdt_ro_index(int cpu)
63 {
64 	return FIX_GDT_REMAP_BEGIN + cpu;
65 }
66 
67 /* Provide the fixmap address of the remapped GDT */
68 static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
69 {
70 	unsigned int idx = get_cpu_gdt_ro_index(cpu);
71 	return (struct desc_struct *)__fix_to_virt(idx);
72 }
73 
74 /* Provide the current read-only GDT */
75 static inline struct desc_struct *get_current_gdt_ro(void)
76 {
77 	return get_cpu_gdt_ro(smp_processor_id());
78 }
79 
80 /* Provide the physical address of the GDT page. */
81 static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
82 {
83 	return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
84 }
85 
86 #ifdef CONFIG_X86_64
87 
88 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
89 			     unsigned dpl, unsigned ist, unsigned seg)
90 {
91 	gate->offset_low	= PTR_LOW(func);
92 	gate->segment		= __KERNEL_CS;
93 	gate->ist		= ist;
94 	gate->p			= 1;
95 	gate->dpl		= dpl;
96 	gate->zero0		= 0;
97 	gate->zero1		= 0;
98 	gate->type		= type;
99 	gate->offset_middle	= PTR_MIDDLE(func);
100 	gate->offset_high	= PTR_HIGH(func);
101 }
102 
103 #else
104 static inline void pack_gate(gate_desc *gate, unsigned char type,
105 			     unsigned long base, unsigned dpl, unsigned flags,
106 			     unsigned short seg)
107 {
108 	gate->a = (seg << 16) | (base & 0xffff);
109 	gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8);
110 }
111 
112 #endif
113 
114 static inline int desc_empty(const void *ptr)
115 {
116 	const u32 *desc = ptr;
117 
118 	return !(desc[0] | desc[1]);
119 }
120 
121 #ifdef CONFIG_PARAVIRT
122 #include <asm/paravirt.h>
123 #else
124 #define load_TR_desc()				native_load_tr_desc()
125 #define load_gdt(dtr)				native_load_gdt(dtr)
126 #define load_idt(dtr)				native_load_idt(dtr)
127 #define load_tr(tr)				asm volatile("ltr %0"::"m" (tr))
128 #define load_ldt(ldt)				asm volatile("lldt %0"::"m" (ldt))
129 
130 #define store_gdt(dtr)				native_store_gdt(dtr)
131 #define store_idt(dtr)				native_store_idt(dtr)
132 #define store_tr(tr)				(tr = native_store_tr())
133 
134 #define load_TLS(t, cpu)			native_load_tls(t, cpu)
135 #define set_ldt					native_set_ldt
136 
137 #define write_ldt_entry(dt, entry, desc)	native_write_ldt_entry(dt, entry, desc)
138 #define write_gdt_entry(dt, entry, desc, type)	native_write_gdt_entry(dt, entry, desc, type)
139 #define write_idt_entry(dt, entry, g)		native_write_idt_entry(dt, entry, g)
140 
141 static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
142 {
143 }
144 
145 static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
146 {
147 }
148 #endif	/* CONFIG_PARAVIRT */
149 
150 #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
151 
152 static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate)
153 {
154 	memcpy(&idt[entry], gate, sizeof(*gate));
155 }
156 
157 static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc)
158 {
159 	memcpy(&ldt[entry], desc, 8);
160 }
161 
162 static inline void
163 native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type)
164 {
165 	unsigned int size;
166 
167 	switch (type) {
168 	case DESC_TSS:	size = sizeof(tss_desc);	break;
169 	case DESC_LDT:	size = sizeof(ldt_desc);	break;
170 	default:	size = sizeof(*gdt);		break;
171 	}
172 
173 	memcpy(&gdt[entry], desc, size);
174 }
175 
176 static inline void pack_descriptor(struct desc_struct *desc, unsigned long base,
177 				   unsigned long limit, unsigned char type,
178 				   unsigned char flags)
179 {
180 	desc->a = ((base & 0xffff) << 16) | (limit & 0xffff);
181 	desc->b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
182 		(limit & 0x000f0000) | ((type & 0xff) << 8) |
183 		((flags & 0xf) << 20);
184 	desc->p = 1;
185 }
186 
187 
188 static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size)
189 {
190 #ifdef CONFIG_X86_64
191 	struct ldttss_desc64 *desc = d;
192 
193 	memset(desc, 0, sizeof(*desc));
194 
195 	desc->limit0		= size & 0xFFFF;
196 	desc->base0		= PTR_LOW(addr);
197 	desc->base1		= PTR_MIDDLE(addr) & 0xFF;
198 	desc->type		= type;
199 	desc->p			= 1;
200 	desc->limit1		= (size >> 16) & 0xF;
201 	desc->base2		= (PTR_MIDDLE(addr) >> 8) & 0xFF;
202 	desc->base3		= PTR_HIGH(addr);
203 #else
204 	pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0);
205 #endif
206 }
207 
208 static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
209 {
210 	struct desc_struct *d = get_cpu_gdt_rw(cpu);
211 	tss_desc tss;
212 
213 	set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
214 			      __KERNEL_TSS_LIMIT);
215 	write_gdt_entry(d, entry, &tss, DESC_TSS);
216 }
217 
218 #define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
219 
220 static inline void native_set_ldt(const void *addr, unsigned int entries)
221 {
222 	if (likely(entries == 0))
223 		asm volatile("lldt %w0"::"q" (0));
224 	else {
225 		unsigned cpu = smp_processor_id();
226 		ldt_desc ldt;
227 
228 		set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
229 				      entries * LDT_ENTRY_SIZE - 1);
230 		write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
231 				&ldt, DESC_LDT);
232 		asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
233 	}
234 }
235 
236 static inline void native_load_gdt(const struct desc_ptr *dtr)
237 {
238 	asm volatile("lgdt %0"::"m" (*dtr));
239 }
240 
241 static inline void native_load_idt(const struct desc_ptr *dtr)
242 {
243 	asm volatile("lidt %0"::"m" (*dtr));
244 }
245 
246 static inline void native_store_gdt(struct desc_ptr *dtr)
247 {
248 	asm volatile("sgdt %0":"=m" (*dtr));
249 }
250 
251 static inline void native_store_idt(struct desc_ptr *dtr)
252 {
253 	asm volatile("sidt %0":"=m" (*dtr));
254 }
255 
256 /*
257  * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
258  * a read-only remapping. To prevent a page fault, the GDT is switched to the
259  * original writeable version when needed.
260  */
261 #ifdef CONFIG_X86_64
262 static inline void native_load_tr_desc(void)
263 {
264 	struct desc_ptr gdt;
265 	int cpu = raw_smp_processor_id();
266 	bool restore = 0;
267 	struct desc_struct *fixmap_gdt;
268 
269 	native_store_gdt(&gdt);
270 	fixmap_gdt = get_cpu_gdt_ro(cpu);
271 
272 	/*
273 	 * If the current GDT is the read-only fixmap, swap to the original
274 	 * writeable version. Swap back at the end.
275 	 */
276 	if (gdt.address == (unsigned long)fixmap_gdt) {
277 		load_direct_gdt(cpu);
278 		restore = 1;
279 	}
280 	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
281 	if (restore)
282 		load_fixmap_gdt(cpu);
283 }
284 #else
285 static inline void native_load_tr_desc(void)
286 {
287 	asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
288 }
289 #endif
290 
291 static inline unsigned long native_store_tr(void)
292 {
293 	unsigned long tr;
294 
295 	asm volatile("str %0":"=r" (tr));
296 
297 	return tr;
298 }
299 
300 static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
301 {
302 	struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
303 	unsigned int i;
304 
305 	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
306 		gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
307 }
308 
309 DECLARE_PER_CPU(bool, __tss_limit_invalid);
310 
311 static inline void force_reload_TR(void)
312 {
313 	struct desc_struct *d = get_current_gdt_rw();
314 	tss_desc tss;
315 
316 	memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
317 
318 	/*
319 	 * LTR requires an available TSS, and the TSS is currently
320 	 * busy.  Make it be available so that LTR will work.
321 	 */
322 	tss.type = DESC_TSS;
323 	write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
324 
325 	load_TR_desc();
326 	this_cpu_write(__tss_limit_invalid, false);
327 }
328 
329 /*
330  * Call this if you need the TSS limit to be correct, which should be the case
331  * if and only if you have TIF_IO_BITMAP set or you're switching to a task
332  * with TIF_IO_BITMAP set.
333  */
334 static inline void refresh_tss_limit(void)
335 {
336 	DEBUG_LOCKS_WARN_ON(preemptible());
337 
338 	if (unlikely(this_cpu_read(__tss_limit_invalid)))
339 		force_reload_TR();
340 }
341 
342 /*
343  * If you do something evil that corrupts the cached TSS limit (I'm looking
344  * at you, VMX exits), call this function.
345  *
346  * The optimization here is that the TSS limit only matters for Linux if the
347  * IO bitmap is in use.  If the TSS limit gets forced to its minimum value,
348  * everything works except that IO bitmap will be ignored and all CPL 3 IO
349  * instructions will #GP, which is exactly what we want for normal tasks.
350  */
351 static inline void invalidate_tss_limit(void)
352 {
353 	DEBUG_LOCKS_WARN_ON(preemptible());
354 
355 	if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
356 		force_reload_TR();
357 	else
358 		this_cpu_write(__tss_limit_invalid, true);
359 }
360 
361 /* This intentionally ignores lm, since 32-bit apps don't have that field. */
362 #define LDT_empty(info)					\
363 	((info)->base_addr		== 0	&&	\
364 	 (info)->limit			== 0	&&	\
365 	 (info)->contents		== 0	&&	\
366 	 (info)->read_exec_only		== 1	&&	\
367 	 (info)->seg_32bit		== 0	&&	\
368 	 (info)->limit_in_pages		== 0	&&	\
369 	 (info)->seg_not_present	== 1	&&	\
370 	 (info)->useable		== 0)
371 
372 /* Lots of programs expect an all-zero user_desc to mean "no segment at all". */
373 static inline bool LDT_zero(const struct user_desc *info)
374 {
375 	return (info->base_addr		== 0 &&
376 		info->limit		== 0 &&
377 		info->contents		== 0 &&
378 		info->read_exec_only	== 0 &&
379 		info->seg_32bit		== 0 &&
380 		info->limit_in_pages	== 0 &&
381 		info->seg_not_present	== 0 &&
382 		info->useable		== 0);
383 }
384 
385 static inline void clear_LDT(void)
386 {
387 	set_ldt(NULL, 0);
388 }
389 
390 static inline unsigned long get_desc_base(const struct desc_struct *desc)
391 {
392 	return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
393 }
394 
395 static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
396 {
397 	desc->base0 = base & 0xffff;
398 	desc->base1 = (base >> 16) & 0xff;
399 	desc->base2 = (base >> 24) & 0xff;
400 }
401 
402 static inline unsigned long get_desc_limit(const struct desc_struct *desc)
403 {
404 	return desc->limit0 | (desc->limit << 16);
405 }
406 
407 static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
408 {
409 	desc->limit0 = limit & 0xffff;
410 	desc->limit = (limit >> 16) & 0xf;
411 }
412 
413 #ifdef CONFIG_X86_64
414 static inline void set_nmi_gate(int gate, void *addr)
415 {
416 	gate_desc s;
417 
418 	pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
419 	write_idt_entry(debug_idt_table, gate, &s);
420 }
421 #endif
422 
423 #ifdef CONFIG_TRACING
424 extern struct desc_ptr trace_idt_descr;
425 extern gate_desc trace_idt_table[];
426 static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
427 {
428 	write_idt_entry(trace_idt_table, entry, gate);
429 }
430 
431 static inline void _trace_set_gate(int gate, unsigned type, void *addr,
432 				   unsigned dpl, unsigned ist, unsigned seg)
433 {
434 	gate_desc s;
435 
436 	pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
437 	/*
438 	 * does not need to be atomic because it is only done once at
439 	 * setup time
440 	 */
441 	write_trace_idt_entry(gate, &s);
442 }
443 #else
444 static inline void write_trace_idt_entry(int entry, const gate_desc *gate)
445 {
446 }
447 
448 #define _trace_set_gate(gate, type, addr, dpl, ist, seg)
449 #endif
450 
451 static inline void _set_gate(int gate, unsigned type, void *addr,
452 			     unsigned dpl, unsigned ist, unsigned seg)
453 {
454 	gate_desc s;
455 
456 	pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg);
457 	/*
458 	 * does not need to be atomic because it is only done once at
459 	 * setup time
460 	 */
461 	write_idt_entry(idt_table, gate, &s);
462 	write_trace_idt_entry(gate, &s);
463 }
464 
465 /*
466  * This needs to use 'idt_table' rather than 'idt', and
467  * thus use the _nonmapped_ version of the IDT, as the
468  * Pentium F0 0F bugfix can have resulted in the mapped
469  * IDT being write-protected.
470  */
471 #define set_intr_gate_notrace(n, addr)					\
472 	do {								\
473 		BUG_ON((unsigned)n > 0xFF);				\
474 		_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,	\
475 			  __KERNEL_CS);					\
476 	} while (0)
477 
478 #define set_intr_gate(n, addr)						\
479 	do {								\
480 		set_intr_gate_notrace(n, addr);				\
481 		_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
482 				0, 0, __KERNEL_CS);			\
483 	} while (0)
484 
485 extern int first_system_vector;
486 /* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
487 extern unsigned long used_vectors[];
488 
489 static inline void alloc_system_vector(int vector)
490 {
491 	if (!test_bit(vector, used_vectors)) {
492 		set_bit(vector, used_vectors);
493 		if (first_system_vector > vector)
494 			first_system_vector = vector;
495 	} else {
496 		BUG();
497 	}
498 }
499 
500 #define alloc_intr_gate(n, addr)				\
501 	do {							\
502 		alloc_system_vector(n);				\
503 		set_intr_gate(n, addr);				\
504 	} while (0)
505 
506 /*
507  * This routine sets up an interrupt gate at directory privilege level 3.
508  */
509 static inline void set_system_intr_gate(unsigned int n, void *addr)
510 {
511 	BUG_ON((unsigned)n > 0xFF);
512 	_set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
513 }
514 
515 static inline void set_system_trap_gate(unsigned int n, void *addr)
516 {
517 	BUG_ON((unsigned)n > 0xFF);
518 	_set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
519 }
520 
521 static inline void set_trap_gate(unsigned int n, void *addr)
522 {
523 	BUG_ON((unsigned)n > 0xFF);
524 	_set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
525 }
526 
527 static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
528 {
529 	BUG_ON((unsigned)n > 0xFF);
530 	_set_gate(n, GATE_TASK, (void *)0, 0, 0, (gdt_entry<<3));
531 }
532 
533 static inline void set_intr_gate_ist(int n, void *addr, unsigned ist)
534 {
535 	BUG_ON((unsigned)n > 0xFF);
536 	_set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
537 }
538 
539 static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
540 {
541 	BUG_ON((unsigned)n > 0xFF);
542 	_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
543 }
544 
545 #ifdef CONFIG_X86_64
546 DECLARE_PER_CPU(u32, debug_idt_ctr);
547 static inline bool is_debug_idt_enabled(void)
548 {
549 	if (this_cpu_read(debug_idt_ctr))
550 		return true;
551 
552 	return false;
553 }
554 
555 static inline void load_debug_idt(void)
556 {
557 	load_idt((const struct desc_ptr *)&debug_idt_descr);
558 }
559 #else
560 static inline bool is_debug_idt_enabled(void)
561 {
562 	return false;
563 }
564 
565 static inline void load_debug_idt(void)
566 {
567 }
568 #endif
569 
570 #ifdef CONFIG_TRACING
571 extern atomic_t trace_idt_ctr;
572 static inline bool is_trace_idt_enabled(void)
573 {
574 	if (atomic_read(&trace_idt_ctr))
575 		return true;
576 
577 	return false;
578 }
579 
580 static inline void load_trace_idt(void)
581 {
582 	load_idt((const struct desc_ptr *)&trace_idt_descr);
583 }
584 #else
585 static inline bool is_trace_idt_enabled(void)
586 {
587 	return false;
588 }
589 
590 static inline void load_trace_idt(void)
591 {
592 }
593 #endif
594 
595 /*
596  * The load_current_idt() must be called with interrupts disabled
597  * to avoid races. That way the IDT will always be set back to the expected
598  * descriptor. It's also called when a CPU is being initialized, and
599  * that doesn't need to disable interrupts, as nothing should be
600  * bothering the CPU then.
601  */
602 static inline void load_current_idt(void)
603 {
604 	if (is_debug_idt_enabled())
605 		load_debug_idt();
606 	else if (is_trace_idt_enabled())
607 		load_trace_idt();
608 	else
609 		load_idt((const struct desc_ptr *)&idt_descr);
610 }
611 #endif /* _ASM_X86_DESC_H */
612