xref: /freebsd/sys/amd64/include/cpufunc.h (revision 2357939bc239bd5334a169b62313806178dd8f30)
1 /*-
2  * Copyright (c) 2003 Peter Wemm.
3  * Copyright (c) 1993 The Regents of the University of California.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 4. Neither the name of the University nor the names of its contributors
15  *    may be used to endorse or promote products derived from this software
16  *    without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $FreeBSD$
31  */
32 
33 /*
34  * Functions to provide access to special i386 instructions.
35  * This in included in sys/systm.h, and that file should be
36  * used in preference to this.
37  */
38 
39 #ifndef _MACHINE_CPUFUNC_H_
40 #define	_MACHINE_CPUFUNC_H_
41 
42 struct region_descriptor;
43 
44 #define readb(va)	(*(volatile u_int8_t *) (va))
45 #define readw(va)	(*(volatile u_int16_t *) (va))
46 #define readl(va)	(*(volatile u_int32_t *) (va))
47 #define readq(va)	(*(volatile u_int64_t *) (va))
48 
49 #define writeb(va, d)	(*(volatile u_int8_t *) (va) = (d))
50 #define writew(va, d)	(*(volatile u_int16_t *) (va) = (d))
51 #define writel(va, d)	(*(volatile u_int32_t *) (va) = (d))
52 #define writeq(va, d)	(*(volatile u_int64_t *) (va) = (d))
53 
54 #ifdef	__GNUC__
55 
56 static __inline void
57 breakpoint(void)
58 {
59 	__asm __volatile("int $3");
60 }
61 
62 static __inline u_int
63 bsfl(u_int mask)
64 {
65 	u_int	result;
66 
67 	__asm __volatile("bsfl %1,%0" : "=r" (result) : "rm" (mask));
68 	return (result);
69 }
70 
71 static __inline u_long
72 bsfq(u_long mask)
73 {
74 	u_long	result;
75 
76 	__asm __volatile("bsfq %1,%0" : "=r" (result) : "rm" (mask));
77 	return (result);
78 }
79 
80 static __inline u_int
81 bsrl(u_int mask)
82 {
83 	u_int	result;
84 
85 	__asm __volatile("bsrl %1,%0" : "=r" (result) : "rm" (mask));
86 	return (result);
87 }
88 
89 static __inline u_long
90 bsrq(u_long mask)
91 {
92 	u_long	result;
93 
94 	__asm __volatile("bsrq %1,%0" : "=r" (result) : "rm" (mask));
95 	return (result);
96 }
97 
98 static __inline void
99 disable_intr(void)
100 {
101 	__asm __volatile("cli" : : : "memory");
102 }
103 
104 static __inline void
105 do_cpuid(u_int ax, u_int *p)
106 {
107 	__asm __volatile("cpuid"
108 			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
109 			 :  "0" (ax));
110 }
111 
112 static __inline void
113 enable_intr(void)
114 {
115 	__asm __volatile("sti");
116 }
117 
118 #ifdef _KERNEL
119 
120 #define	HAVE_INLINE_FFS
121 
122 static __inline int
123 ffs(int mask)
124 {
125 #if 0
126 	/*
127 	 * Note that gcc-2's builtin ffs would be used if we didn't declare
128 	 * this inline or turn off the builtin.  The builtin is faster but
129 	 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and later
130 	 * versions.
131 	 */
132 	return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1);
133 #else
134 	/* Actually, the above is way out of date.  The builtins use cmov etc */
135 	return (__builtin_ffs(mask));
136 #endif
137 }
138 
139 #define	HAVE_INLINE_FFSL
140 
141 static __inline int
142 ffsl(long mask)
143 {
144 	return (mask == 0 ? mask : (int)bsfq((u_long)mask) + 1);
145 }
146 
147 #define	HAVE_INLINE_FLS
148 
149 static __inline int
150 fls(int mask)
151 {
152 	return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
153 }
154 
155 #define	HAVE_INLINE_FLSL
156 
157 static __inline int
158 flsl(long mask)
159 {
160 	return (mask == 0 ? mask : (int)bsrq((u_long)mask) + 1);
161 }
162 
163 #endif /* _KERNEL */
164 
165 static __inline void
166 halt(void)
167 {
168 	__asm __volatile("hlt");
169 }
170 
171 #if __GNUC__ < 2
172 
173 #define	inb(port)		inbv(port)
174 #define	outb(port, data)	outbv(port, data)
175 
176 #else /* __GNUC >= 2 */
177 
178 /*
179  * The following complications are to get around gcc not having a
180  * constraint letter for the range 0..255.  We still put "d" in the
181  * constraint because "i" isn't a valid constraint when the port
182  * isn't constant.  This only matters for -O0 because otherwise
183  * the non-working version gets optimized away.
184  *
185  * Use an expression-statement instead of a conditional expression
186  * because gcc-2.6.0 would promote the operands of the conditional
187  * and produce poor code for "if ((inb(var) & const1) == const2)".
188  *
189  * The unnecessary test `(port) < 0x10000' is to generate a warning if
190  * the `port' has type u_short or smaller.  Such types are pessimal.
191  * This actually only works for signed types.  The range check is
192  * careful to avoid generating warnings.
193  */
194 #define	inb(port) __extension__ ({					\
195 	u_char	_data;							\
196 	if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100	\
197 	    && (port) < 0x10000)					\
198 		_data = inbc(port);					\
199 	else								\
200 		_data = inbv(port);					\
201 	_data; })
202 
203 #define	outb(port, data) (						\
204 	__builtin_constant_p(port) && ((port) & 0xffff) < 0x100		\
205 	&& (port) < 0x10000						\
206 	? outbc(port, data) : outbv(port, data))
207 
208 static __inline u_char
209 inbc(u_int port)
210 {
211 	u_char	data;
212 
213 	__asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
214 	return (data);
215 }
216 
217 static __inline void
218 outbc(u_int port, u_char data)
219 {
220 	__asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
221 }
222 
223 #endif /* __GNUC <= 2 */
224 
225 static __inline u_char
226 inbv(u_int port)
227 {
228 	u_char	data;
229 	/*
230 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
231 	 * %edx, while gcc generates inferior code (movw instead of movl)
232 	 * if we tell it to load (u_short) port.
233 	 */
234 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
235 	return (data);
236 }
237 
238 static __inline u_int
239 inl(u_int port)
240 {
241 	u_int	data;
242 
243 	__asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
244 	return (data);
245 }
246 
247 static __inline void
248 insb(u_int port, void *addr, size_t cnt)
249 {
250 	__asm __volatile("cld; rep; insb"
251 			 : "+D" (addr), "+c" (cnt)
252 			 : "d" (port)
253 			 : "memory");
254 }
255 
256 static __inline void
257 insw(u_int port, void *addr, size_t cnt)
258 {
259 	__asm __volatile("cld; rep; insw"
260 			 : "+D" (addr), "+c" (cnt)
261 			 : "d" (port)
262 			 : "memory");
263 }
264 
265 static __inline void
266 insl(u_int port, void *addr, size_t cnt)
267 {
268 	__asm __volatile("cld; rep; insl"
269 			 : "+D" (addr), "+c" (cnt)
270 			 : "d" (port)
271 			 : "memory");
272 }
273 
274 static __inline void
275 invd(void)
276 {
277 	__asm __volatile("invd");
278 }
279 
280 static __inline u_short
281 inw(u_int port)
282 {
283 	u_short	data;
284 
285 	__asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
286 	return (data);
287 }
288 
289 static __inline void
290 outbv(u_int port, u_char data)
291 {
292 	u_char	al;
293 	/*
294 	 * Use an unnecessary assignment to help gcc's register allocator.
295 	 * This make a large difference for gcc-1.40 and a tiny difference
296 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
297 	 * best results.  gcc-2.6.0 can't handle this.
298 	 */
299 	al = data;
300 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
301 }
302 
303 static __inline void
304 outl(u_int port, u_int data)
305 {
306 	/*
307 	 * outl() and outw() aren't used much so we haven't looked at
308 	 * possible micro-optimizations such as the unnecessary
309 	 * assignment for them.
310 	 */
311 	__asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
312 }
313 
314 static __inline void
315 outsb(u_int port, const void *addr, size_t cnt)
316 {
317 	__asm __volatile("cld; rep; outsb"
318 			 : "+S" (addr), "+c" (cnt)
319 			 : "d" (port));
320 }
321 
322 static __inline void
323 outsw(u_int port, const void *addr, size_t cnt)
324 {
325 	__asm __volatile("cld; rep; outsw"
326 			 : "+S" (addr), "+c" (cnt)
327 			 : "d" (port));
328 }
329 
330 static __inline void
331 outsl(u_int port, const void *addr, size_t cnt)
332 {
333 	__asm __volatile("cld; rep; outsl"
334 			 : "+S" (addr), "+c" (cnt)
335 			 : "d" (port));
336 }
337 
338 static __inline void
339 outw(u_int port, u_short data)
340 {
341 	__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
342 }
343 
344 static __inline void
345 ia32_pause(void)
346 {
347 	__asm __volatile("pause");
348 }
349 
350 static __inline u_long
351 read_rflags(void)
352 {
353 	u_long	rf;
354 
355 	__asm __volatile("pushfq; popq %0" : "=r" (rf));
356 	return (rf);
357 }
358 
359 static __inline u_int64_t
360 rdmsr(u_int msr)
361 {
362 	u_int32_t low, high;
363 
364 	__asm __volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr));
365 	return (low | ((u_int64_t)high << 32));
366 }
367 
368 static __inline u_int64_t
369 rdpmc(u_int pmc)
370 {
371 	u_int32_t low, high;
372 
373 	__asm __volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (pmc));
374 	return (low | ((u_int64_t)high << 32));
375 }
376 
377 static __inline u_int64_t
378 rdtsc(void)
379 {
380 	u_int32_t low, high;
381 
382 	__asm __volatile("rdtsc" : "=a" (low), "=d" (high));
383 	return (low | ((u_int64_t)high << 32));
384 }
385 
386 static __inline void
387 wbinvd(void)
388 {
389 	__asm __volatile("wbinvd");
390 }
391 
392 static __inline void
393 write_rflags(u_long rf)
394 {
395 	__asm __volatile("pushq %0;  popfq" : : "r" (rf));
396 }
397 
398 static __inline void
399 wrmsr(u_int msr, u_int64_t newval)
400 {
401 	u_int32_t low, high;
402 
403 	low = newval;
404 	high = newval >> 32;
405 	__asm __volatile("wrmsr" : : "a" (low), "d" (high), "c" (msr));
406 }
407 
408 static __inline void
409 load_cr0(u_long data)
410 {
411 
412 	__asm __volatile("movq %0,%%cr0" : : "r" (data));
413 }
414 
415 static __inline u_long
416 rcr0(void)
417 {
418 	u_long	data;
419 
420 	__asm __volatile("movq %%cr0,%0" : "=r" (data));
421 	return (data);
422 }
423 
424 static __inline u_long
425 rcr2(void)
426 {
427 	u_long	data;
428 
429 	__asm __volatile("movq %%cr2,%0" : "=r" (data));
430 	return (data);
431 }
432 
433 static __inline void
434 load_cr3(u_long data)
435 {
436 
437 	__asm __volatile("movq %0,%%cr3" : : "r" (data) : "memory");
438 }
439 
440 static __inline u_long
441 rcr3(void)
442 {
443 	u_long	data;
444 
445 	__asm __volatile("movq %%cr3,%0" : "=r" (data));
446 	return (data);
447 }
448 
449 static __inline void
450 load_cr4(u_long data)
451 {
452 	__asm __volatile("movq %0,%%cr4" : : "r" (data));
453 }
454 
455 static __inline u_long
456 rcr4(void)
457 {
458 	u_long	data;
459 
460 	__asm __volatile("movq %%cr4,%0" : "=r" (data));
461 	return (data);
462 }
463 
464 /*
465  * Global TLB flush (except for thise for pages marked PG_G)
466  */
467 static __inline void
468 invltlb(void)
469 {
470 
471 	load_cr3(rcr3());
472 }
473 
474 /*
475  * TLB flush for an individual page (even if it has PG_G).
476  * Only works on 486+ CPUs (i386 does not have PG_G).
477  */
478 static __inline void
479 invlpg(u_long addr)
480 {
481 
482 	__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
483 }
484 
485 static __inline u_int
486 rfs(void)
487 {
488 	u_int sel;
489 	__asm __volatile("movl %%fs,%0" : "=rm" (sel));
490 	return (sel);
491 }
492 
493 static __inline u_int
494 rgs(void)
495 {
496 	u_int sel;
497 	__asm __volatile("movl %%gs,%0" : "=rm" (sel));
498 	return (sel);
499 }
500 
501 static __inline u_int
502 rss(void)
503 {
504 	u_int sel;
505 	__asm __volatile("movl %%ss,%0" : "=rm" (sel));
506 	return (sel);
507 }
508 
509 static __inline void
510 load_ds(u_int sel)
511 {
512 	__asm __volatile("movl %0,%%ds" : : "rm" (sel));
513 }
514 
515 static __inline void
516 load_es(u_int sel)
517 {
518 	__asm __volatile("movl %0,%%es" : : "rm" (sel));
519 }
520 
521 #ifdef _KERNEL
522 /* This is defined in <machine/specialreg.h> but is too painful to get to */
523 #ifndef	MSR_FSBASE
524 #define	MSR_FSBASE	0xc0000100
525 #endif
526 static __inline void
527 load_fs(u_int sel)
528 {
529 	register u_int32_t fsbase __asm("ecx");
530 
531 	/* Preserve the fsbase value across the selector load */
532 	fsbase = MSR_FSBASE;
533         __asm __volatile("rdmsr; movl %0,%%fs; wrmsr"
534             : : "rm" (sel), "c" (fsbase) : "eax", "edx");
535 }
536 
537 #ifndef	MSR_GSBASE
538 #define	MSR_GSBASE	0xc0000101
539 #endif
540 static __inline void
541 load_gs(u_int sel)
542 {
543 	register u_int32_t gsbase __asm("ecx");
544 
545 	/*
546 	 * Preserve the gsbase value across the selector load.
547 	 * Note that we have to disable interrupts because the gsbase
548 	 * being trashed happens to be the kernel gsbase at the time.
549 	 */
550 	gsbase = MSR_GSBASE;
551         __asm __volatile("pushfq; cli; rdmsr; movl %0,%%gs; wrmsr; popfq"
552             : : "rm" (sel), "c" (gsbase) : "eax", "edx");
553 }
554 #else
555 /* Usable by userland */
556 static __inline void
557 load_fs(u_int sel)
558 {
559 	__asm __volatile("movl %0,%%fs" : : "rm" (sel));
560 }
561 
562 static __inline void
563 load_gs(u_int sel)
564 {
565 	__asm __volatile("movl %0,%%gs" : : "rm" (sel));
566 }
567 #endif
568 
569 static __inline void
570 lidt(struct region_descriptor *addr)
571 {
572 	__asm __volatile("lidt (%0)" : : "r" (addr));
573 }
574 
575 static __inline void
576 lldt(u_short sel)
577 {
578 	__asm __volatile("lldt %0" : : "r" (sel));
579 }
580 
581 static __inline void
582 ltr(u_short sel)
583 {
584 	__asm __volatile("ltr %0" : : "r" (sel));
585 }
586 
587 static __inline u_int64_t
588 rdr0(void)
589 {
590 	u_int64_t data;
591 	__asm __volatile("movq %%dr0,%0" : "=r" (data));
592 	return (data);
593 }
594 
595 static __inline void
596 load_dr0(u_int64_t dr0)
597 {
598 	__asm __volatile("movq %0,%%dr0" : : "r" (dr0));
599 }
600 
601 static __inline u_int64_t
602 rdr1(void)
603 {
604 	u_int64_t data;
605 	__asm __volatile("movq %%dr1,%0" : "=r" (data));
606 	return (data);
607 }
608 
609 static __inline void
610 load_dr1(u_int64_t dr1)
611 {
612 	__asm __volatile("movq %0,%%dr1" : : "r" (dr1));
613 }
614 
615 static __inline u_int64_t
616 rdr2(void)
617 {
618 	u_int64_t data;
619 	__asm __volatile("movq %%dr2,%0" : "=r" (data));
620 	return (data);
621 }
622 
623 static __inline void
624 load_dr2(u_int64_t dr2)
625 {
626 	__asm __volatile("movq %0,%%dr2" : : "r" (dr2));
627 }
628 
629 static __inline u_int64_t
630 rdr3(void)
631 {
632 	u_int64_t data;
633 	__asm __volatile("movq %%dr3,%0" : "=r" (data));
634 	return (data);
635 }
636 
637 static __inline void
638 load_dr3(u_int64_t dr3)
639 {
640 	__asm __volatile("movq %0,%%dr3" : : "r" (dr3));
641 }
642 
643 static __inline u_int64_t
644 rdr4(void)
645 {
646 	u_int64_t data;
647 	__asm __volatile("movq %%dr4,%0" : "=r" (data));
648 	return (data);
649 }
650 
651 static __inline void
652 load_dr4(u_int64_t dr4)
653 {
654 	__asm __volatile("movq %0,%%dr4" : : "r" (dr4));
655 }
656 
657 static __inline u_int64_t
658 rdr5(void)
659 {
660 	u_int64_t data;
661 	__asm __volatile("movq %%dr5,%0" : "=r" (data));
662 	return (data);
663 }
664 
665 static __inline void
666 load_dr5(u_int64_t dr5)
667 {
668 	__asm __volatile("movq %0,%%dr5" : : "r" (dr5));
669 }
670 
671 static __inline u_int64_t
672 rdr6(void)
673 {
674 	u_int64_t data;
675 	__asm __volatile("movq %%dr6,%0" : "=r" (data));
676 	return (data);
677 }
678 
679 static __inline void
680 load_dr6(u_int64_t dr6)
681 {
682 	__asm __volatile("movq %0,%%dr6" : : "r" (dr6));
683 }
684 
685 static __inline u_int64_t
686 rdr7(void)
687 {
688 	u_int64_t data;
689 	__asm __volatile("movq %%dr7,%0" : "=r" (data));
690 	return (data);
691 }
692 
693 static __inline void
694 load_dr7(u_int64_t dr7)
695 {
696 	__asm __volatile("movq %0,%%dr7" : : "r" (dr7));
697 }
698 
699 static __inline register_t
700 intr_disable(void)
701 {
702 	register_t rflags;
703 
704 	rflags = read_rflags();
705 	disable_intr();
706 	return (rflags);
707 }
708 
709 static __inline void
710 intr_restore(register_t rflags)
711 {
712 	write_rflags(rflags);
713 }
714 
715 #else /* !__GNUC__ */
716 
717 int	breakpoint(void);
718 u_int	bsfl(u_int mask);
719 u_int	bsrl(u_int mask);
720 void	disable_intr(void);
721 void	do_cpuid(u_int ax, u_int *p);
722 void	enable_intr(void);
723 void	halt(void);
724 void	ia32_pause(void);
725 u_char	inb(u_int port);
726 u_int	inl(u_int port);
727 void	insb(u_int port, void *addr, size_t cnt);
728 void	insl(u_int port, void *addr, size_t cnt);
729 void	insw(u_int port, void *addr, size_t cnt);
730 register_t	intr_disable(void);
731 void	intr_restore(register_t rf);
732 void	invd(void);
733 void	invlpg(u_int addr);
734 void	invltlb(void);
735 u_short	inw(u_int port);
736 void	lidt(struct region_descriptor *addr);
737 void	lldt(u_short sel);
738 void	load_cr0(u_long cr0);
739 void	load_cr3(u_long cr3);
740 void	load_cr4(u_long cr4);
741 void	load_dr0(u_int64_t dr0);
742 void	load_dr1(u_int64_t dr1);
743 void	load_dr2(u_int64_t dr2);
744 void	load_dr3(u_int64_t dr3);
745 void	load_dr4(u_int64_t dr4);
746 void	load_dr5(u_int64_t dr5);
747 void	load_dr6(u_int64_t dr6);
748 void	load_dr7(u_int64_t dr7);
749 void	load_fs(u_int sel);
750 void	load_gs(u_int sel);
751 void	ltr(u_short sel);
752 void	outb(u_int port, u_char data);
753 void	outl(u_int port, u_int data);
754 void	outsb(u_int port, const void *addr, size_t cnt);
755 void	outsl(u_int port, const void *addr, size_t cnt);
756 void	outsw(u_int port, const void *addr, size_t cnt);
757 void	outw(u_int port, u_short data);
758 u_long	rcr0(void);
759 u_long	rcr2(void);
760 u_long	rcr3(void);
761 u_long	rcr4(void);
762 u_int64_t rdmsr(u_int msr);
763 u_int64_t rdpmc(u_int pmc);
764 u_int64_t rdr0(void);
765 u_int64_t rdr1(void);
766 u_int64_t rdr2(void);
767 u_int64_t rdr3(void);
768 u_int64_t rdr4(void);
769 u_int64_t rdr5(void);
770 u_int64_t rdr6(void);
771 u_int64_t rdr7(void);
772 u_int64_t rdtsc(void);
773 u_int	read_rflags(void);
774 u_int	rfs(void);
775 u_int	rgs(void);
776 void	wbinvd(void);
777 void	write_rflags(u_int rf);
778 void	wrmsr(u_int msr, u_int64_t newval);
779 
780 #endif	/* __GNUC__ */
781 
782 void	reset_dbregs(void);
783 
784 #endif /* !_MACHINE_CPUFUNC_H_ */
785