xref: /freebsd/sys/i386/include/cpufunc.h (revision 380a989b3223d455375b4fae70fd0b9bdd43bafb)
1 /*-
2  * Copyright (c) 1993 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	$Id: cpufunc.h,v 1.84 1999/01/08 19:51:02 bde Exp $
34  */
35 
36 /*
37  * Functions to provide access to special i386 instructions.
38  */
39 
40 #ifndef _MACHINE_CPUFUNC_H_
41 #define	_MACHINE_CPUFUNC_H_
42 
43 #define readb(va)	(*(volatile u_int8_t *) (va))
44 #define readw(va)	(*(volatile u_int16_t *) (va))
45 #define readl(va)	(*(volatile u_int32_t *) (va))
46 
47 #define writeb(va, d)	(*(volatile u_int8_t *) (va) = (d))
48 #define writew(va, d)	(*(volatile u_int16_t *) (va) = (d))
49 #define writel(va, d)	(*(volatile u_int32_t *) (va) = (d))
50 
51 #ifdef	__GNUC__
52 
53 #ifdef SMP
54 #include <machine/lock.h>		/* XXX */
55 #endif
56 
57 #ifdef SWTCH_OPTIM_STATS
58 extern	int	tlb_flush_count;	/* XXX */
59 #endif
60 
61 static __inline void
62 breakpoint(void)
63 {
64 	__asm __volatile("int $3");
65 }
66 
67 static __inline void
68 disable_intr(void)
69 {
70 	__asm __volatile("cli" : : : "memory");
71 #ifdef SMP
72 	MPINTR_LOCK();
73 #endif
74 }
75 
76 static __inline void
77 enable_intr(void)
78 {
79 #ifdef SMP
80 	MPINTR_UNLOCK();
81 #endif
82 	__asm __volatile("sti");
83 }
84 
85 #define	HAVE_INLINE_FFS
86 
87 static __inline int
88 ffs(int mask)
89 {
90 	int	result;
91 	/*
92 	 * bsfl turns out to be not all that slow on 486's.  It can beaten
93 	 * using a binary search to reduce to 4 bits and then a table lookup,
94 	 * but only if the code is inlined and in the cache, and the code
95 	 * is quite large so inlining it probably busts the cache.
96 	 *
97 	 * Note that gcc-2's builtin ffs would be used if we didn't declare
98 	 * this inline or turn off the builtin.  The builtin is faster but
99 	 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and 2.6.
100 	 */
101 	__asm __volatile("testl %0,%0; je 1f; bsfl %0,%0; incl %0; 1:"
102 			 : "=r" (result) : "0" (mask));
103 	return (result);
104 }
105 
106 #define	HAVE_INLINE_FLS
107 
108 static __inline int
109 fls(int mask)
110 {
111 	int	result;
112 	__asm __volatile("testl %0,%0; je 1f; bsrl %0,%0; incl %0; 1:"
113 			 : "=r" (result) : "0" (mask));
114 	return (result);
115 }
116 
117 #if __GNUC__ < 2
118 
119 #define	inb(port)		inbv(port)
120 #define	outb(port, data)	outbv(port, data)
121 
122 #else /* __GNUC >= 2 */
123 
124 /*
125  * The following complications are to get around gcc not having a
126  * constraint letter for the range 0..255.  We still put "d" in the
127  * constraint because "i" isn't a valid constraint when the port
128  * isn't constant.  This only matters for -O0 because otherwise
129  * the non-working version gets optimized away.
130  *
131  * Use an expression-statement instead of a conditional expression
132  * because gcc-2.6.0 would promote the operands of the conditional
133  * and produce poor code for "if ((inb(var) & const1) == const2)".
134  *
135  * The unnecessary test `(port) < 0x10000' is to generate a warning if
136  * the `port' has type u_short or smaller.  Such types are pessimal.
137  * This actually only works for signed types.  The range check is
138  * careful to avoid generating warnings.
139  */
140 #define	inb(port) __extension__ ({					\
141 	u_char	_data;							\
142 	if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100	\
143 	    && (port) < 0x10000)					\
144 		_data = inbc(port);					\
145 	else								\
146 		_data = inbv(port);					\
147 	_data; })
148 
149 #define	outb(port, data) (						\
150 	__builtin_constant_p(port) && ((port) & 0xffff) < 0x100		\
151 	&& (port) < 0x10000						\
152 	? outbc(port, data) : outbv(port, data))
153 
154 static __inline u_char
155 inbc(u_int port)
156 {
157 	u_char	data;
158 
159 	__asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
160 	return (data);
161 }
162 
163 static __inline void
164 outbc(u_int port, u_char data)
165 {
166 	__asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
167 }
168 
169 #endif /* __GNUC <= 2 */
170 
171 static __inline u_char
172 inbv(u_int port)
173 {
174 	u_char	data;
175 	/*
176 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
177 	 * %edx, while gcc generates inferior code (movw instead of movl)
178 	 * if we tell it to load (u_short) port.
179 	 */
180 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
181 	return (data);
182 }
183 
184 static __inline u_int
185 inl(u_int port)
186 {
187 	u_int	data;
188 
189 	__asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
190 	return (data);
191 }
192 
193 static __inline void
194 insb(u_int port, void *addr, size_t cnt)
195 {
196 	__asm __volatile("cld; rep; insb"
197 			 : "=D" (addr), "=c" (cnt)
198 			 :  "0" (addr),  "1" (cnt), "d" (port)
199 			 : "memory");
200 }
201 
202 static __inline void
203 insw(u_int port, void *addr, size_t cnt)
204 {
205 	__asm __volatile("cld; rep; insw"
206 			 : "=D" (addr), "=c" (cnt)
207 			 :  "0" (addr),  "1" (cnt), "d" (port)
208 			 : "memory");
209 }
210 
211 static __inline void
212 insl(u_int port, void *addr, size_t cnt)
213 {
214 	__asm __volatile("cld; rep; insl"
215 			 : "=D" (addr), "=c" (cnt)
216 			 :  "0" (addr),  "1" (cnt), "d" (port)
217 			 : "memory");
218 }
219 
220 static __inline void
221 invd(void)
222 {
223 	__asm __volatile("invd");
224 }
225 
226 #if defined(SMP) && defined(KERNEL)
227 
228 /*
229  * When using APIC IPI's, invlpg() is not simply the invlpg instruction
230  * (this is a bug) and the inlining cost is prohibitive since the call
231  * executes into the IPI transmission system.
232  */
233 void	invlpg		__P((u_int addr));
234 void	invltlb		__P((void));
235 
236 static __inline void
237 cpu_invlpg(void *addr)
238 {
239 	__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
240 }
241 
242 static __inline void
243 cpu_invltlb(void)
244 {
245 	u_int	temp;
246 	/*
247 	 * This should be implemented as load_cr3(rcr3()) when load_cr3()
248 	 * is inlined.
249 	 */
250 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
251 			 : : "memory");
252 #if defined(SWTCH_OPTIM_STATS)
253 	++tlb_flush_count;
254 #endif
255 }
256 
257 #else /* !(SMP && KERNEL) */
258 
259 static __inline void
260 invlpg(u_int addr)
261 {
262 	__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
263 }
264 
265 static __inline void
266 invltlb(void)
267 {
268 	u_int	temp;
269 	/*
270 	 * This should be implemented as load_cr3(rcr3()) when load_cr3()
271 	 * is inlined.
272 	 */
273 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
274 			 : : "memory");
275 #ifdef SWTCH_OPTIM_STATS
276 	++tlb_flush_count;
277 #endif
278 }
279 
280 #endif /* SMP && KERNEL */
281 
282 static __inline u_short
283 inw(u_int port)
284 {
285 	u_short	data;
286 
287 	__asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
288 	return (data);
289 }
290 
291 static __inline u_int
292 loadandclear(u_int *addr)
293 {
294 	u_int	result;
295 
296 	__asm __volatile("xorl %0,%0; xchgl %1,%0"
297 			 : "=&r" (result) : "m" (*addr));
298 	return (result);
299 }
300 
301 static __inline void
302 outbv(u_int port, u_char data)
303 {
304 	u_char	al;
305 	/*
306 	 * Use an unnecessary assignment to help gcc's register allocator.
307 	 * This make a large difference for gcc-1.40 and a tiny difference
308 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
309 	 * best results.  gcc-2.6.0 can't handle this.
310 	 */
311 	al = data;
312 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
313 }
314 
315 static __inline void
316 outl(u_int port, u_int data)
317 {
318 	/*
319 	 * outl() and outw() aren't used much so we haven't looked at
320 	 * possible micro-optimizations such as the unnecessary
321 	 * assignment for them.
322 	 */
323 	__asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
324 }
325 
326 static __inline void
327 outsb(u_int port, const void *addr, size_t cnt)
328 {
329 	__asm __volatile("cld; rep; outsb"
330 			 : "=S" (addr), "=c" (cnt)
331 			 :  "0" (addr),  "1" (cnt), "d" (port));
332 }
333 
334 static __inline void
335 outsw(u_int port, const void *addr, size_t cnt)
336 {
337 	__asm __volatile("cld; rep; outsw"
338 			 : "=S" (addr), "=c" (cnt)
339 			 :  "0" (addr),  "1" (cnt), "d" (port));
340 }
341 
342 static __inline void
343 outsl(u_int port, const void *addr, size_t cnt)
344 {
345 	__asm __volatile("cld; rep; outsl"
346 			 : "=S" (addr), "=c" (cnt)
347 			 :  "0" (addr),  "1" (cnt), "d" (port));
348 }
349 
350 static __inline void
351 outw(u_int port, u_short data)
352 {
353 	__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
354 }
355 
356 static __inline u_int
357 rcr2(void)
358 {
359 	u_int	data;
360 
361 	__asm __volatile("movl %%cr2,%0" : "=r" (data));
362 	return (data);
363 }
364 
365 static __inline u_int
366 read_eflags(void)
367 {
368 	u_int	ef;
369 
370 	__asm __volatile("pushfl; popl %0" : "=r" (ef));
371 	return (ef);
372 }
373 
374 static __inline u_int64_t
375 rdmsr(u_int msr)
376 {
377 	u_int64_t rv;
378 
379 	__asm __volatile(".byte 0x0f, 0x32" : "=A" (rv) : "c" (msr));
380 	return (rv);
381 }
382 
383 static __inline u_int64_t
384 rdpmc(u_int pmc)
385 {
386 	u_int64_t rv;
387 
388 	__asm __volatile(".byte 0x0f, 0x33" : "=A" (rv) : "c" (pmc));
389 	return (rv);
390 }
391 
392 static __inline u_int64_t
393 rdtsc(void)
394 {
395 	u_int64_t rv;
396 
397 	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
398 	return (rv);
399 }
400 
401 static __inline void
402 setbits(volatile u_int *addr, u_int bits)
403 {
404 	__asm __volatile(
405 #ifdef SMP
406 			 "lock; "
407 #endif
408 			 "orl %1,%0" : "=m" (*addr) : "ir" (bits));
409 }
410 
411 static __inline void
412 wbinvd(void)
413 {
414 	__asm __volatile("wbinvd");
415 }
416 
417 static __inline void
418 write_eflags(u_int ef)
419 {
420 	__asm __volatile("pushl %0; popfl" : : "r" (ef));
421 }
422 
423 static __inline void
424 wrmsr(u_int msr, u_int64_t newval)
425 {
426 	__asm __volatile(".byte 0x0f, 0x30" : : "A" (newval), "c" (msr));
427 }
428 
429 #else /* !__GNUC__ */
430 
431 int	breakpoint	__P((void));
432 void	disable_intr	__P((void));
433 void	enable_intr	__P((void));
434 u_char	inb		__P((u_int port));
435 u_int	inl		__P((u_int port));
436 void	insb		__P((u_int port, void *addr, size_t cnt));
437 void	insl		__P((u_int port, void *addr, size_t cnt));
438 void	insw		__P((u_int port, void *addr, size_t cnt));
439 void	invd		__P((void));
440 void	invlpg		__P((u_int addr));
441 void	invltlb		__P((void));
442 u_short	inw		__P((u_int port));
443 u_int	loadandclear	__P((u_int *addr));
444 void	outb		__P((u_int port, u_char data));
445 void	outl		__P((u_int port, u_int data));
446 void	outsb		__P((u_int port, void *addr, size_t cnt));
447 void	outsl		__P((u_int port, void *addr, size_t cnt));
448 void	outsw		__P((u_int port, void *addr, size_t cnt));
449 void	outw		__P((u_int port, u_short data));
450 u_int	rcr2		__P((void));
451 u_int64_t rdmsr		__P((u_int msr));
452 u_int64_t rdpmc		__P((u_int pmc));
453 u_int64_t rdtsc		__P((void));
454 u_int	read_eflags	__P((void));
455 void	setbits		__P((volatile u_int *addr, u_int bits));
456 void	wbinvd		__P((void));
457 void	write_eflags	__P((u_int ef));
458 void	wrmsr		__P((u_int msr, u_int64_t newval));
459 
460 #endif	/* __GNUC__ */
461 
462 void	load_cr0	__P((u_int cr0));
463 void	load_cr3	__P((u_int cr3));
464 void	load_cr4	__P((u_int cr4));
465 void	ltr		__P((u_short sel));
466 u_int	rcr0		__P((void));
467 u_int	rcr3		__P((void));
468 u_int	rcr4		__P((void));
469 
470 #endif /* !_MACHINE_CPUFUNC_H_ */
471