xref: /freebsd/sys/i386/include/cpufunc.h (revision a5941fc2aae12cb42e24117b2979147c9c7c5571)
1 /*-
2  * Copyright (c) 1993 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	$Id: cpufunc.h,v 1.75 1998/01/25 17:02:00 kato Exp $
34  */
35 
36 /*
37  * Functions to provide access to special i386 instructions.
38  */
39 
40 #ifndef _MACHINE_CPUFUNC_H_
41 #define	_MACHINE_CPUFUNC_H_
42 
43 #include <sys/cdefs.h>
44 #include <sys/types.h>
45 
46 #include <machine/lock.h>
47 
48 #if defined(SWTCH_OPTIM_STATS)
49 extern int tlb_flush_count;
50 #endif
51 
52 #ifdef	__GNUC__
53 
54 static __inline void
55 breakpoint(void)
56 {
57 	__asm __volatile("int $3");
58 }
59 
60 static __inline void
61 disable_intr(void)
62 {
63 	__asm __volatile("cli" : : : "memory");
64 	MPINTR_LOCK();
65 }
66 
67 static __inline void
68 enable_intr(void)
69 {
70 	MPINTR_UNLOCK();
71 	__asm __volatile("sti");
72 }
73 
74 #define	HAVE_INLINE_FFS
75 
76 static __inline int
77 ffs(int mask)
78 {
79 	int	result;
80 	/*
81 	 * bsfl turns out to be not all that slow on 486's.  It can beaten
82 	 * using a binary search to reduce to 4 bits and then a table lookup,
83 	 * but only if the code is inlined and in the cache, and the code
84 	 * is quite large so inlining it probably busts the cache.
85 	 *
86 	 * Note that gcc-2's builtin ffs would be used if we didn't declare
87 	 * this inline or turn off the builtin.  The builtin is faster but
88 	 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and 2.6.
89 	 */
90 	__asm __volatile("testl %0,%0; je 1f; bsfl %0,%0; incl %0; 1:"
91 			 : "=r" (result) : "0" (mask));
92 	return (result);
93 }
94 
95 #define	HAVE_INLINE_FLS
96 
97 static __inline int
98 fls(int mask)
99 {
100 	int	result;
101 	__asm __volatile("testl %0,%0; je 1f; bsrl %0,%0; incl %0; 1:"
102 			 : "=r" (result) : "0" (mask));
103 	return (result);
104 }
105 
106 #if __GNUC__ < 2
107 
108 #define	inb(port)		inbv(port)
109 #define	outb(port, data)	outbv(port, data)
110 
111 #else /* __GNUC >= 2 */
112 
113 /*
114  * The following complications are to get around gcc not having a
115  * constraint letter for the range 0..255.  We still put "d" in the
116  * constraint because "i" isn't a valid constraint when the port
117  * isn't constant.  This only matters for -O0 because otherwise
118  * the non-working version gets optimized away.
119  *
120  * Use an expression-statement instead of a conditional expression
121  * because gcc-2.6.0 would promote the operands of the conditional
122  * and produce poor code for "if ((inb(var) & const1) == const2)".
123  *
124  * The unnecessary test `(port) < 0x10000' is to generate a warning if
125  * the `port' has type u_short or smaller.  Such types are pessimal.
126  * This actually only works for signed types.  The range check is
127  * careful to avoid generating warnings.
128  */
129 #define	inb(port) __extension__ ({					\
130 	u_char	_data;							\
131 	if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100	\
132 	    && (port) < 0x10000)					\
133 		_data = inbc(port);					\
134 	else								\
135 		_data = inbv(port);					\
136 	_data; })
137 
138 #define	outb(port, data) (						\
139 	__builtin_constant_p(port) && ((port) & 0xffff) < 0x100		\
140 	&& (port) < 0x10000						\
141 	? outbc(port, data) : outbv(port, data))
142 
143 static __inline u_char
144 inbc(u_int port)
145 {
146 	u_char	data;
147 
148 	__asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
149 	return (data);
150 }
151 
152 static __inline void
153 outbc(u_int port, u_char data)
154 {
155 	__asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
156 }
157 
158 #endif /* __GNUC <= 2 */
159 
160 static __inline u_char
161 inbv(u_int port)
162 {
163 	u_char	data;
164 	/*
165 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
166 	 * %edx, while gcc generates inferior code (movw instead of movl)
167 	 * if we tell it to load (u_short) port.
168 	 */
169 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
170 	return (data);
171 }
172 
173 static __inline u_long
174 inl(u_int port)
175 {
176 	u_long	data;
177 
178 	__asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
179 	return (data);
180 }
181 
182 static __inline void
183 insb(u_int port, void *addr, size_t cnt)
184 {
185 	__asm __volatile("cld; rep; insb"
186 			 : : "d" (port), "D" (addr), "c" (cnt)
187 			 : "di", "cx", "memory");
188 }
189 
190 static __inline void
191 insw(u_int port, void *addr, size_t cnt)
192 {
193 	__asm __volatile("cld; rep; insw"
194 			 : : "d" (port), "D" (addr), "c" (cnt)
195 			 : "di", "cx", "memory");
196 }
197 
198 static __inline void
199 insl(u_int port, void *addr, size_t cnt)
200 {
201 	__asm __volatile("cld; rep; insl"
202 			 : : "d" (port), "D" (addr), "c" (cnt)
203 			 : "di", "cx", "memory");
204 }
205 
206 static __inline void
207 invd(void)
208 {
209 	__asm __volatile("invd");
210 }
211 
212 #ifdef KERNEL
213 #ifdef SMP
214 
215 /*
216  * When using APIC IPI's, the inlining cost is prohibitive since the call
217  * executes into the IPI transmission system.
218  */
219 void	invlpg		__P((u_int addr));
220 void	invltlb		__P((void));
221 
222 #else  /* !SMP */
223 
224 static __inline void
225 invlpg(u_int addr)
226 {
227 	__asm __volatile("invlpg (%0)" : : "r" (addr) : "memory");
228 }
229 
230 static __inline void
231 invltlb(void)
232 {
233 	u_long	temp;
234 	/*
235 	 * This should be implemented as load_cr3(rcr3()) when load_cr3()
236 	 * is inlined.
237 	 */
238 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
239 			 : : "memory");
240 #if defined(SWTCH_OPTIM_STATS)
241 	++tlb_flush_count;
242 #endif
243 }
244 
245 #endif	/* SMP */
246 #endif  /* KERNEL */
247 
248 static __inline u_short
249 inw(u_int port)
250 {
251 	u_short	data;
252 
253 	__asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
254 	return (data);
255 }
256 
257 static __inline u_int
258 loadandclear(u_int *addr)
259 {
260 	u_int	result;
261 
262 	__asm __volatile("xorl %0,%0; xchgl %1,%0"
263 			 : "=&r" (result) : "m" (*addr));
264 	return (result);
265 }
266 
267 static __inline void
268 outbv(u_int port, u_char data)
269 {
270 	u_char	al;
271 	/*
272 	 * Use an unnecessary assignment to help gcc's register allocator.
273 	 * This make a large difference for gcc-1.40 and a tiny difference
274 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
275 	 * best results.  gcc-2.6.0 can't handle this.
276 	 */
277 	al = data;
278 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
279 }
280 
281 static __inline void
282 outl(u_int port, u_long data)
283 {
284 	/*
285 	 * outl() and outw() aren't used much so we haven't looked at
286 	 * possible micro-optimizations such as the unnecessary
287 	 * assignment for them.
288 	 */
289 	__asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
290 }
291 
292 static __inline void
293 outsb(u_int port, const void *addr, size_t cnt)
294 {
295 	__asm __volatile("cld; rep; outsb"
296 			 : : "d" (port), "S" (addr), "c" (cnt)
297 			 : "si", "cx");
298 }
299 
300 static __inline void
301 outsw(u_int port, const void *addr, size_t cnt)
302 {
303 	__asm __volatile("cld; rep; outsw"
304 			 : : "d" (port), "S" (addr), "c" (cnt)
305 			 : "si", "cx");
306 }
307 
308 static __inline void
309 outsl(u_int port, const void *addr, size_t cnt)
310 {
311 	__asm __volatile("cld; rep; outsl"
312 			 : : "d" (port), "S" (addr), "c" (cnt)
313 			 : "si", "cx");
314 }
315 
316 static __inline void
317 outw(u_int port, u_short data)
318 {
319 	__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
320 }
321 
322 static __inline u_long
323 rcr2(void)
324 {
325 	u_long	data;
326 
327 	__asm __volatile("movl %%cr2,%0" : "=r" (data));
328 	return (data);
329 }
330 
331 static __inline u_long
332 read_eflags(void)
333 {
334 	u_long	ef;
335 
336 	__asm __volatile("pushfl; popl %0" : "=r" (ef));
337 	return (ef);
338 }
339 
340 static __inline quad_t
341 rdmsr(u_int msr)
342 {
343 	quad_t rv;
344 
345 	__asm __volatile(".byte 0x0f, 0x32" : "=A" (rv) : "c" (msr));
346 	return (rv);
347 }
348 
349 static __inline quad_t
350 rdpmc(u_int pmc)
351 {
352 	quad_t rv;
353 
354 	__asm __volatile(".byte 0x0f, 0x33" : "=A" (rv) : "c" (pmc));
355 	return (rv);
356 }
357 
358 static __inline quad_t
359 rdtsc(void)
360 {
361 	quad_t rv;
362 
363 	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
364 	return (rv);
365 }
366 
367 static __inline void
368 setbits(volatile unsigned *addr, u_int bits)
369 {
370 	__asm __volatile(
371 #ifdef SMP
372 			 "lock; "
373 #endif
374 			 "orl %1,%0" : "=m" (*addr) : "ir" (bits));
375 }
376 
377 static __inline void
378 wbinvd(void)
379 {
380 	__asm __volatile("wbinvd");
381 }
382 
383 static __inline void
384 write_eflags(u_long ef)
385 {
386 	__asm __volatile("pushl %0; popfl" : : "r" (ef));
387 }
388 
389 static __inline void
390 wrmsr(u_int msr, quad_t newval)
391 {
392 	__asm __volatile(".byte 0x0f, 0x30" : : "A" (newval), "c" (msr));
393 }
394 
395 #else /* !__GNUC__ */
396 
397 int	breakpoint	__P((void));
398 void	disable_intr	__P((void));
399 void	enable_intr	__P((void));
400 u_char	inb		__P((u_int port));
401 u_long	inl		__P((u_int port));
402 void	insb		__P((u_int port, void *addr, size_t cnt));
403 void	insl		__P((u_int port, void *addr, size_t cnt));
404 void	insw		__P((u_int port, void *addr, size_t cnt));
405 void	invd		__P((void));
406 void	invlpg		__P((u_int addr));
407 void	invltlb		__P((void));
408 u_short	inw		__P((u_int port));
409 u_int	loadandclear	__P((u_int *addr));
410 void	outb		__P((u_int port, u_char data));
411 void	outl		__P((u_int port, u_long data));
412 void	outsb		__P((u_int port, void *addr, size_t cnt));
413 void	outsl		__P((u_int port, void *addr, size_t cnt));
414 void	outsw		__P((u_int port, void *addr, size_t cnt));
415 void	outw		__P((u_int port, u_short data));
416 u_long	rcr2		__P((void));
417 quad_t	rdmsr		__P((u_int msr));
418 quad_t	rdpmc		__P((u_int pmc));
419 quad_t	rdtsc		__P((void));
420 u_long	read_eflags	__P((void));
421 void	setbits		__P((volatile unsigned *addr, u_int bits));
422 void	wbinvd		__P((void));
423 void	write_eflags	__P((u_long ef));
424 void	wrmsr		__P((u_int msr, quad_t newval));
425 
426 #endif	/* __GNUC__ */
427 
428 void	load_cr0	__P((u_long cr0));
429 void	load_cr3	__P((u_long cr3));
430 void	load_cr4	__P((u_long cr4));
431 void	ltr		__P((u_short sel));
432 u_int	rcr0		__P((void));
433 u_long	rcr3		__P((void));
434 u_long	rcr4		__P((void));
435 
436 #endif /* !_MACHINE_CPUFUNC_H_ */
437