xref: /freebsd/sys/amd64/include/cpufunc.h (revision 1de995bb1f2e662f557b0e8448ff7f1d19a48e51)
1 /*-
2  * Copyright (c) 1993 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	$Id: cpufunc.h,v 1.2 1997/09/01 07:37:58 smp Exp smp $
34  */
35 
36 /*
37  * Functions to provide access to special i386 instructions.
38  */
39 
40 #ifndef _MACHINE_CPUFUNC_H_
41 #define	_MACHINE_CPUFUNC_H_
42 
43 #include <sys/cdefs.h>
44 #include <sys/types.h>
45 
46 #include <machine/lock.h>
47 
48 
49 #ifdef	__GNUC__
50 
51 static __inline void
52 breakpoint(void)
53 {
54 	__asm __volatile("int $3");
55 }
56 
57 static __inline void
58 disable_intr(void)
59 {
60 	__asm __volatile("cli" : : : "memory");
61 #ifdef SMP
62 	s_lock(&mpintr_lock);
63 #endif
64 }
65 
66 static __inline void
67 enable_intr(void)
68 {
69 #ifdef SMP
70 	s_unlock(&mpintr_lock);
71 #endif
72 	__asm __volatile("sti");
73 }
74 
75 #define	HAVE_INLINE_FFS
76 
77 static __inline int
78 ffs(int mask)
79 {
80 	int	result;
81 	/*
82 	 * bsfl turns out to be not all that slow on 486's.  It can beaten
83 	 * using a binary search to reduce to 4 bits and then a table lookup,
84 	 * but only if the code is inlined and in the cache, and the code
85 	 * is quite large so inlining it probably busts the cache.
86 	 *
87 	 * Note that gcc-2's builtin ffs would be used if we didn't declare
88 	 * this inline or turn off the builtin.  The builtin is faster but
89 	 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and 2.6.
90 	 */
91 	__asm __volatile("testl %0,%0; je 1f; bsfl %0,%0; incl %0; 1:"
92 			 : "=r" (result) : "0" (mask));
93 	return (result);
94 }
95 
96 #define	HAVE_INLINE_FLS
97 
98 static __inline int
99 fls(int mask)
100 {
101 	int	result;
102 	__asm __volatile("testl %0,%0; je 1f; bsrl %0,%0; incl %0; 1:"
103 			 : "=r" (result) : "0" (mask));
104 	return (result);
105 }
106 
107 #if __GNUC__ < 2
108 
109 #define	inb(port)		inbv(port)
110 #define	outb(port, data)	outbv(port, data)
111 
112 #else /* __GNUC >= 2 */
113 
114 /*
115  * The following complications are to get around gcc not having a
116  * constraint letter for the range 0..255.  We still put "d" in the
117  * constraint because "i" isn't a valid constraint when the port
118  * isn't constant.  This only matters for -O0 because otherwise
119  * the non-working version gets optimized away.
120  *
121  * Use an expression-statement instead of a conditional expression
122  * because gcc-2.6.0 would promote the operands of the conditional
123  * and produce poor code for "if ((inb(var) & const1) == const2)".
124  *
125  * The unnecessary test `(port) < 0x10000' is to generate a warning if
126  * the `port' has type u_short or smaller.  Such types are pessimal.
127  * This actually only works for signed types.  The range check is
128  * careful to avoid generating warnings.
129  */
130 #define	inb(port) __extension__ ({					\
131 	u_char	_data;							\
132 	if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100	\
133 	    && (port) < 0x10000)					\
134 		_data = inbc(port);					\
135 	else								\
136 		_data = inbv(port);					\
137 	_data; })
138 
139 #define	outb(port, data) (						\
140 	__builtin_constant_p(port) && ((port) & 0xffff) < 0x100		\
141 	&& (port) < 0x10000						\
142 	? outbc(port, data) : outbv(port, data))
143 
144 static __inline u_char
145 inbc(u_int port)
146 {
147 	u_char	data;
148 
149 	__asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
150 	return (data);
151 }
152 
153 static __inline void
154 outbc(u_int port, u_char data)
155 {
156 	__asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
157 }
158 
159 #endif /* __GNUC <= 2 */
160 
161 static __inline u_char
162 inbv(u_int port)
163 {
164 	u_char	data;
165 	/*
166 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
167 	 * %edx, while gcc generates inferior code (movw instead of movl)
168 	 * if we tell it to load (u_short) port.
169 	 */
170 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
171 	return (data);
172 }
173 
174 static __inline u_long
175 inl(u_int port)
176 {
177 	u_long	data;
178 
179 	__asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
180 	return (data);
181 }
182 
183 static __inline void
184 insb(u_int port, void *addr, size_t cnt)
185 {
186 	__asm __volatile("cld; rep; insb"
187 			 : : "d" (port), "D" (addr), "c" (cnt)
188 			 : "di", "cx", "memory");
189 }
190 
191 static __inline void
192 insw(u_int port, void *addr, size_t cnt)
193 {
194 	__asm __volatile("cld; rep; insw"
195 			 : : "d" (port), "D" (addr), "c" (cnt)
196 			 : "di", "cx", "memory");
197 }
198 
199 static __inline void
200 insl(u_int port, void *addr, size_t cnt)
201 {
202 	__asm __volatile("cld; rep; insl"
203 			 : : "d" (port), "D" (addr), "c" (cnt)
204 			 : "di", "cx", "memory");
205 }
206 
207 static __inline void
208 invd(void)
209 {
210 	__asm __volatile("invd");
211 }
212 
213 #ifdef KERNEL
214 #ifdef SMP
215 
216 /*
217  * When using APIC IPI's, the inlining cost is prohibitive since the call
218  * executes into the IPI transmission system.
219  */
220 void	invlpg		__P((u_int addr));
221 void	invltlb		__P((void));
222 
223 #else  /* !SMP */
224 
225 static __inline void
226 invlpg(u_int addr)
227 {
228 	__asm __volatile("invlpg (%0)" : : "r" (addr) : "memory");
229 }
230 
231 static __inline void
232 invltlb(void)
233 {
234 	u_long	temp;
235 	/*
236 	 * This should be implemented as load_cr3(rcr3()) when load_cr3()
237 	 * is inlined.
238 	 */
239 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
240 			 : : "memory");
241 }
242 
243 #endif	/* SMP */
244 #endif  /* KERNEL */
245 
246 static __inline u_short
247 inw(u_int port)
248 {
249 	u_short	data;
250 
251 	__asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
252 	return (data);
253 }
254 
255 static __inline u_int
256 loadandclear(u_int *addr)
257 {
258 	u_int	result;
259 
260 	__asm __volatile("xorl %0,%0; xchgl %1,%0"
261 			 : "=&r" (result) : "m" (*addr));
262 	return (result);
263 }
264 
265 static __inline void
266 outbv(u_int port, u_char data)
267 {
268 	u_char	al;
269 	/*
270 	 * Use an unnecessary assignment to help gcc's register allocator.
271 	 * This make a large difference for gcc-1.40 and a tiny difference
272 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
273 	 * best results.  gcc-2.6.0 can't handle this.
274 	 */
275 	al = data;
276 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
277 }
278 
279 static __inline void
280 outl(u_int port, u_long data)
281 {
282 	/*
283 	 * outl() and outw() aren't used much so we haven't looked at
284 	 * possible micro-optimizations such as the unnecessary
285 	 * assignment for them.
286 	 */
287 	__asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
288 }
289 
290 static __inline void
291 outsb(u_int port, void *addr, size_t cnt)
292 {
293 	__asm __volatile("cld; rep; outsb"
294 			 : : "d" (port), "S" (addr), "c" (cnt)
295 			 : "si", "cx");
296 }
297 
298 static __inline void
299 outsw(u_int port, void *addr, size_t cnt)
300 {
301 	__asm __volatile("cld; rep; outsw"
302 			 : : "d" (port), "S" (addr), "c" (cnt)
303 			 : "si", "cx");
304 }
305 
306 static __inline void
307 outsl(u_int port, void *addr, size_t cnt)
308 {
309 	__asm __volatile("cld; rep; outsl"
310 			 : : "d" (port), "S" (addr), "c" (cnt)
311 			 : "si", "cx");
312 }
313 
314 static __inline void
315 outw(u_int port, u_short data)
316 {
317 	__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
318 }
319 
320 static __inline u_long
321 rcr2(void)
322 {
323 	u_long	data;
324 
325 	__asm __volatile("movl %%cr2,%0" : "=r" (data));
326 	return (data);
327 }
328 
329 static __inline u_long
330 read_eflags(void)
331 {
332 	u_long	ef;
333 
334 	__asm __volatile("pushfl; popl %0" : "=r" (ef));
335 	return (ef);
336 }
337 
338 static __inline quad_t
339 rdmsr(u_int msr)
340 {
341 	quad_t rv;
342 
343 	__asm __volatile(".byte 0x0f, 0x32" : "=A" (rv) : "c" (msr));
344 	return (rv);
345 }
346 
347 static __inline quad_t
348 rdpmc(u_int pmc)
349 {
350 	quad_t rv;
351 
352 	__asm __volatile(".byte 0x0f, 0x33" : "=A" (rv) : "c" (pmc));
353 	return (rv);
354 }
355 
356 static __inline quad_t
357 rdtsc(void)
358 {
359 	quad_t rv;
360 
361 	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
362 	return (rv);
363 }
364 
365 static __inline void
366 setbits(volatile unsigned *addr, u_int bits)
367 {
368 	__asm __volatile(
369 #ifdef SMP
370 			 "lock; "
371 #endif
372 			 "orl %1,%0" : "=m" (*addr) : "ir" (bits));
373 }
374 
375 static __inline void
376 wbinvd(void)
377 {
378 	__asm __volatile("wbinvd");
379 }
380 
381 static __inline void
382 write_eflags(u_long ef)
383 {
384 	__asm __volatile("pushl %0; popfl" : : "r" (ef));
385 }
386 
387 static __inline void
388 wrmsr(u_int msr, quad_t newval)
389 {
390 	__asm __volatile(".byte 0x0f, 0x30" : : "A" (newval), "c" (msr));
391 }
392 
393 #else /* !__GNUC__ */
394 
395 int	breakpoint	__P((void));
396 void	disable_intr	__P((void));
397 void	enable_intr	__P((void));
398 u_char	inb		__P((u_int port));
399 u_long	inl		__P((u_int port));
400 void	insb		__P((u_int port, void *addr, size_t cnt));
401 void	insl		__P((u_int port, void *addr, size_t cnt));
402 void	insw		__P((u_int port, void *addr, size_t cnt));
403 void	invd		__P((void));
404 void	invlpg		__P((u_int addr));
405 void	invltlb		__P((void));
406 u_short	inw		__P((u_int port));
407 u_int	loadandclear	__P((u_int *addr));
408 void	outb		__P((u_int port, u_char data));
409 void	outl		__P((u_int port, u_long data));
410 void	outsb		__P((u_int port, void *addr, size_t cnt));
411 void	outsl		__P((u_int port, void *addr, size_t cnt));
412 void	outsw		__P((u_int port, void *addr, size_t cnt));
413 void	outw		__P((u_int port, u_short data));
414 u_long	rcr2		__P((void));
415 quad_t	rdmsr		__P((u_int msr));
416 quad_t	rdpmc		__P((u_int pmc));
417 quad_t	rdtsc		__P((void));
418 u_long	read_eflags	__P((void));
419 void	setbits		__P((volatile unsigned *addr, u_int bits));
420 void	wbinvd		__P((void));
421 void	write_eflags	__P((u_long ef));
422 void	wrmsr		__P((u_int msr, quad_t newval));
423 
424 #endif	/* __GNUC__ */
425 
426 void	load_cr0	__P((u_long cr0));
427 void	load_cr3	__P((u_long cr3));
428 void	load_cr4	__P((u_long cr4));
429 void	ltr		__P((u_short sel));
430 u_int	rcr0		__P((void));
431 u_long	rcr3		__P((void));
432 u_long	rcr4		__P((void));
433 
434 #endif /* !_MACHINE_CPUFUNC_H_ */
435