xref: /freebsd/sys/amd64/include/cpufunc.h (revision 11afcc8f9f96d657b8e6f7547c02c1957331fc96)
1 /*-
2  * Copyright (c) 1993 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	$Id: cpufunc.h,v 1.79 1998/05/17 18:53:08 tegge Exp $
34  */
35 
36 /*
37  * Functions to provide access to special i386 instructions.
38  */
39 
40 #ifndef _MACHINE_CPUFUNC_H_
41 #define	_MACHINE_CPUFUNC_H_
42 
43 #include <sys/cdefs.h>
44 #include <sys/types.h>
45 
46 #include <machine/lock.h>
47 
48 #if defined(SWTCH_OPTIM_STATS)
49 extern int tlb_flush_count;
50 #endif
51 
52 #ifdef	__GNUC__
53 
54 static __inline void
55 breakpoint(void)
56 {
57 	__asm __volatile("int $3");
58 }
59 
60 static __inline void
61 disable_intr(void)
62 {
63 	__asm __volatile("cli" : : : "memory");
64 	MPINTR_LOCK();
65 }
66 
67 static __inline void
68 enable_intr(void)
69 {
70 	MPINTR_UNLOCK();
71 	__asm __volatile("sti");
72 }
73 
74 #define	HAVE_INLINE_FFS
75 
76 static __inline int
77 ffs(int mask)
78 {
79 	int	result;
80 	/*
81 	 * bsfl turns out to be not all that slow on 486's.  It can beaten
82 	 * using a binary search to reduce to 4 bits and then a table lookup,
83 	 * but only if the code is inlined and in the cache, and the code
84 	 * is quite large so inlining it probably busts the cache.
85 	 *
86 	 * Note that gcc-2's builtin ffs would be used if we didn't declare
87 	 * this inline or turn off the builtin.  The builtin is faster but
88 	 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and 2.6.
89 	 */
90 	__asm __volatile("testl %0,%0; je 1f; bsfl %0,%0; incl %0; 1:"
91 			 : "=r" (result) : "0" (mask));
92 	return (result);
93 }
94 
95 #define	HAVE_INLINE_FLS
96 
97 static __inline int
98 fls(int mask)
99 {
100 	int	result;
101 	__asm __volatile("testl %0,%0; je 1f; bsrl %0,%0; incl %0; 1:"
102 			 : "=r" (result) : "0" (mask));
103 	return (result);
104 }
105 
106 #if __GNUC__ < 2
107 
108 #define	inb(port)		inbv(port)
109 #define	outb(port, data)	outbv(port, data)
110 
111 #else /* __GNUC >= 2 */
112 
113 /*
114  * The following complications are to get around gcc not having a
115  * constraint letter for the range 0..255.  We still put "d" in the
116  * constraint because "i" isn't a valid constraint when the port
117  * isn't constant.  This only matters for -O0 because otherwise
118  * the non-working version gets optimized away.
119  *
120  * Use an expression-statement instead of a conditional expression
121  * because gcc-2.6.0 would promote the operands of the conditional
122  * and produce poor code for "if ((inb(var) & const1) == const2)".
123  *
124  * The unnecessary test `(port) < 0x10000' is to generate a warning if
125  * the `port' has type u_short or smaller.  Such types are pessimal.
126  * This actually only works for signed types.  The range check is
127  * careful to avoid generating warnings.
128  */
129 #define	inb(port) __extension__ ({					\
130 	u_char	_data;							\
131 	if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100	\
132 	    && (port) < 0x10000)					\
133 		_data = inbc(port);					\
134 	else								\
135 		_data = inbv(port);					\
136 	_data; })
137 
138 #define	outb(port, data) (						\
139 	__builtin_constant_p(port) && ((port) & 0xffff) < 0x100		\
140 	&& (port) < 0x10000						\
141 	? outbc(port, data) : outbv(port, data))
142 
143 static __inline u_char
144 inbc(u_int port)
145 {
146 	u_char	data;
147 
148 	__asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
149 	return (data);
150 }
151 
152 static __inline void
153 outbc(u_int port, u_char data)
154 {
155 	__asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
156 }
157 
158 #endif /* __GNUC <= 2 */
159 
160 static __inline u_char
161 inbv(u_int port)
162 {
163 	u_char	data;
164 	/*
165 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
166 	 * %edx, while gcc generates inferior code (movw instead of movl)
167 	 * if we tell it to load (u_short) port.
168 	 */
169 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
170 	return (data);
171 }
172 
173 static __inline u_int
174 inl(u_int port)
175 {
176 	u_int	data;
177 
178 	__asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
179 	return (data);
180 }
181 
182 static __inline void
183 insb(u_int port, void *addr, size_t cnt)
184 {
185 	__asm __volatile("cld; rep; insb"
186 			 : : "d" (port), "D" (addr), "c" (cnt)
187 			 : "di", "cx", "memory");
188 }
189 
190 static __inline void
191 insw(u_int port, void *addr, size_t cnt)
192 {
193 	__asm __volatile("cld; rep; insw"
194 			 : : "d" (port), "D" (addr), "c" (cnt)
195 			 : "di", "cx", "memory");
196 }
197 
198 static __inline void
199 insl(u_int port, void *addr, size_t cnt)
200 {
201 	__asm __volatile("cld; rep; insl"
202 			 : : "d" (port), "D" (addr), "c" (cnt)
203 			 : "di", "cx", "memory");
204 }
205 
206 static __inline void
207 invd(void)
208 {
209 	__asm __volatile("invd");
210 }
211 
212 #ifdef KERNEL
213 #ifdef SMP
214 
215 /*
216  * When using APIC IPI's, the inlining cost is prohibitive since the call
217  * executes into the IPI transmission system.
218  */
219 void	invlpg		__P((u_int addr));
220 void	invltlb		__P((void));
221 
222 static __inline void
223 cpu_invlpg(void *addr)
224 {
225 	__asm   __volatile("invlpg %0"::"m"(*(char *)addr):"memory");
226 }
227 
228 static __inline void
229 cpu_invltlb(void)
230 {
231 	u_int	temp;
232 	/*
233 	 * This should be implemented as load_cr3(rcr3()) when load_cr3()
234 	 * is inlined.
235 	 */
236 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
237 			 : : "memory");
238 #if defined(SWTCH_OPTIM_STATS)
239 	++tlb_flush_count;
240 #endif
241 }
242 #else  /* !SMP */
243 
244 static __inline void
245 invlpg(u_int addr)
246 {
247 	__asm   __volatile("invlpg %0"::"m"(*(char *)addr):"memory");
248 }
249 
250 
251 static __inline void
252 invltlb(void)
253 {
254 	u_int	temp;
255 	/*
256 	 * This should be implemented as load_cr3(rcr3()) when load_cr3()
257 	 * is inlined.
258 	 */
259 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
260 			 : : "memory");
261 #if defined(SWTCH_OPTIM_STATS)
262 	++tlb_flush_count;
263 #endif
264 }
265 
266 #endif	/* SMP */
267 #endif  /* KERNEL */
268 
269 static __inline u_short
270 inw(u_int port)
271 {
272 	u_short	data;
273 
274 	__asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
275 	return (data);
276 }
277 
278 static __inline u_int
279 loadandclear(u_int *addr)
280 {
281 	u_int	result;
282 
283 	__asm __volatile("xorl %0,%0; xchgl %1,%0"
284 			 : "=&r" (result) : "m" (*addr));
285 	return (result);
286 }
287 
288 static __inline void
289 outbv(u_int port, u_char data)
290 {
291 	u_char	al;
292 	/*
293 	 * Use an unnecessary assignment to help gcc's register allocator.
294 	 * This make a large difference for gcc-1.40 and a tiny difference
295 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
296 	 * best results.  gcc-2.6.0 can't handle this.
297 	 */
298 	al = data;
299 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
300 }
301 
302 static __inline void
303 outl(u_int port, u_int data)
304 {
305 	/*
306 	 * outl() and outw() aren't used much so we haven't looked at
307 	 * possible micro-optimizations such as the unnecessary
308 	 * assignment for them.
309 	 */
310 	__asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
311 }
312 
313 static __inline void
314 outsb(u_int port, const void *addr, size_t cnt)
315 {
316 	__asm __volatile("cld; rep; outsb"
317 			 : : "d" (port), "S" (addr), "c" (cnt)
318 			 : "si", "cx");
319 }
320 
321 static __inline void
322 outsw(u_int port, const void *addr, size_t cnt)
323 {
324 	__asm __volatile("cld; rep; outsw"
325 			 : : "d" (port), "S" (addr), "c" (cnt)
326 			 : "si", "cx");
327 }
328 
329 static __inline void
330 outsl(u_int port, const void *addr, size_t cnt)
331 {
332 	__asm __volatile("cld; rep; outsl"
333 			 : : "d" (port), "S" (addr), "c" (cnt)
334 			 : "si", "cx");
335 }
336 
337 static __inline void
338 outw(u_int port, u_short data)
339 {
340 	__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
341 }
342 
343 static __inline u_int
344 rcr2(void)
345 {
346 	u_int	data;
347 
348 	__asm __volatile("movl %%cr2,%0" : "=r" (data));
349 	return (data);
350 }
351 
352 static __inline u_int
353 read_eflags(void)
354 {
355 	u_int	ef;
356 
357 	__asm __volatile("pushfl; popl %0" : "=r" (ef));
358 	return (ef);
359 }
360 
361 static __inline u_int64_t
362 rdmsr(u_int msr)
363 {
364 	u_int64_t rv;
365 
366 	__asm __volatile(".byte 0x0f, 0x32" : "=A" (rv) : "c" (msr));
367 	return (rv);
368 }
369 
370 static __inline u_int64_t
371 rdpmc(u_int pmc)
372 {
373 	u_int64_t rv;
374 
375 	__asm __volatile(".byte 0x0f, 0x33" : "=A" (rv) : "c" (pmc));
376 	return (rv);
377 }
378 
379 static __inline u_int64_t
380 rdtsc(void)
381 {
382 	u_int64_t rv;
383 
384 	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
385 	return (rv);
386 }
387 
388 static __inline void
389 setbits(volatile unsigned *addr, u_int bits)
390 {
391 	__asm __volatile(
392 #ifdef SMP
393 			 "lock; "
394 #endif
395 			 "orl %1,%0" : "=m" (*addr) : "ir" (bits));
396 }
397 
398 static __inline void
399 wbinvd(void)
400 {
401 	__asm __volatile("wbinvd");
402 }
403 
404 static __inline void
405 write_eflags(u_int ef)
406 {
407 	__asm __volatile("pushl %0; popfl" : : "r" (ef));
408 }
409 
410 static __inline void
411 wrmsr(u_int msr, u_int64_t newval)
412 {
413 	__asm __volatile(".byte 0x0f, 0x30" : : "A" (newval), "c" (msr));
414 }
415 
416 #else /* !__GNUC__ */
417 
418 int	breakpoint	__P((void));
419 void	disable_intr	__P((void));
420 void	enable_intr	__P((void));
421 u_char	inb		__P((u_int port));
422 u_int	inl		__P((u_int port));
423 void	insb		__P((u_int port, void *addr, size_t cnt));
424 void	insl		__P((u_int port, void *addr, size_t cnt));
425 void	insw		__P((u_int port, void *addr, size_t cnt));
426 void	invd		__P((void));
427 void	invlpg		__P((u_int addr));
428 void	invltlb		__P((void));
429 u_short	inw		__P((u_int port));
430 u_int	loadandclear	__P((u_int *addr));
431 void	outb		__P((u_int port, u_char data));
432 void	outl		__P((u_int port, u_int data));
433 void	outsb		__P((u_int port, void *addr, size_t cnt));
434 void	outsl		__P((u_int port, void *addr, size_t cnt));
435 void	outsw		__P((u_int port, void *addr, size_t cnt));
436 void	outw		__P((u_int port, u_short data));
437 u_int	rcr2		__P((void));
438 u_int64_t rdmsr		__P((u_int msr));
439 u_int64_t rdpmc		__P((u_int pmc));
440 u_int64_t rdtsc		__P((void));
441 u_int	read_eflags	__P((void));
442 void	setbits		__P((volatile unsigned *addr, u_int bits));
443 void	wbinvd		__P((void));
444 void	write_eflags	__P((u_int ef));
445 void	wrmsr		__P((u_int msr, u_int64_t newval));
446 
447 #endif	/* __GNUC__ */
448 
449 void	load_cr0	__P((u_int cr0));
450 void	load_cr3	__P((u_int cr3));
451 void	load_cr4	__P((u_int cr4));
452 void	ltr		__P((u_short sel));
453 u_int	rcr0		__P((void));
454 u_int	rcr3		__P((void));
455 u_int	rcr4		__P((void));
456 void	i686_pagezero	__P((void *addr));
457 
458 #endif /* !_MACHINE_CPUFUNC_H_ */
459