1 /*- 2 * Copyright (c) 1993 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $Id: cpufunc.h,v 1.80 1998/07/11 04:58:25 bde Exp $ 34 */ 35 36 /* 37 * Functions to provide access to special i386 instructions. 38 */ 39 40 #ifndef _MACHINE_CPUFUNC_H_ 41 #define _MACHINE_CPUFUNC_H_ 42 43 #include <sys/cdefs.h> 44 #include <sys/types.h> 45 46 #include <machine/lock.h> 47 48 #if defined(SWTCH_OPTIM_STATS) 49 extern int tlb_flush_count; 50 #endif 51 52 #define readb(va) (*(volatile u_int8_t *) (va)) 53 #define readw(va) (*(volatile u_int16_t *) (va)) 54 #define readl(va) (*(volatile u_int32_t *) (va)) 55 56 #define writeb(va, d) (*(volatile u_int8_t *) (va) = (d)) 57 #define writew(va, d) (*(volatile u_int16_t *) (va) = (d)) 58 #define writel(va, d) (*(volatile u_int32_t *) (va) = (d)) 59 60 #ifdef __GNUC__ 61 62 static __inline void 63 breakpoint(void) 64 { 65 __asm __volatile("int $3"); 66 } 67 68 static __inline void 69 disable_intr(void) 70 { 71 __asm __volatile("cli" : : : "memory"); 72 MPINTR_LOCK(); 73 } 74 75 static __inline void 76 enable_intr(void) 77 { 78 MPINTR_UNLOCK(); 79 __asm __volatile("sti"); 80 } 81 82 #define HAVE_INLINE_FFS 83 84 static __inline int 85 ffs(int mask) 86 { 87 int result; 88 /* 89 * bsfl turns out to be not all that slow on 486's. It can beaten 90 * using a binary search to reduce to 4 bits and then a table lookup, 91 * but only if the code is inlined and in the cache, and the code 92 * is quite large so inlining it probably busts the cache. 93 * 94 * Note that gcc-2's builtin ffs would be used if we didn't declare 95 * this inline or turn off the builtin. The builtin is faster but 96 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and 2.6. 97 */ 98 __asm __volatile("testl %0,%0; je 1f; bsfl %0,%0; incl %0; 1:" 99 : "=r" (result) : "0" (mask)); 100 return (result); 101 } 102 103 #define HAVE_INLINE_FLS 104 105 static __inline int 106 fls(int mask) 107 { 108 int result; 109 __asm __volatile("testl %0,%0; je 1f; bsrl %0,%0; incl %0; 1:" 110 : "=r" (result) : "0" (mask)); 111 return (result); 112 } 113 114 #if __GNUC__ < 2 115 116 #define inb(port) inbv(port) 117 #define outb(port, data) outbv(port, data) 118 119 #else /* __GNUC >= 2 */ 120 121 /* 122 * The following complications are to get around gcc not having a 123 * constraint letter for the range 0..255. We still put "d" in the 124 * constraint because "i" isn't a valid constraint when the port 125 * isn't constant. This only matters for -O0 because otherwise 126 * the non-working version gets optimized away. 127 * 128 * Use an expression-statement instead of a conditional expression 129 * because gcc-2.6.0 would promote the operands of the conditional 130 * and produce poor code for "if ((inb(var) & const1) == const2)". 131 * 132 * The unnecessary test `(port) < 0x10000' is to generate a warning if 133 * the `port' has type u_short or smaller. Such types are pessimal. 134 * This actually only works for signed types. The range check is 135 * careful to avoid generating warnings. 136 */ 137 #define inb(port) __extension__ ({ \ 138 u_char _data; \ 139 if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \ 140 && (port) < 0x10000) \ 141 _data = inbc(port); \ 142 else \ 143 _data = inbv(port); \ 144 _data; }) 145 146 #define outb(port, data) ( \ 147 __builtin_constant_p(port) && ((port) & 0xffff) < 0x100 \ 148 && (port) < 0x10000 \ 149 ? outbc(port, data) : outbv(port, data)) 150 151 static __inline u_char 152 inbc(u_int port) 153 { 154 u_char data; 155 156 __asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port))); 157 return (data); 158 } 159 160 static __inline void 161 outbc(u_int port, u_char data) 162 { 163 __asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port))); 164 } 165 166 #endif /* __GNUC <= 2 */ 167 168 static __inline u_char 169 inbv(u_int port) 170 { 171 u_char data; 172 /* 173 * We use %%dx and not %1 here because i/o is done at %dx and not at 174 * %edx, while gcc generates inferior code (movw instead of movl) 175 * if we tell it to load (u_short) port. 176 */ 177 __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); 178 return (data); 179 } 180 181 static __inline u_int 182 inl(u_int port) 183 { 184 u_int data; 185 186 __asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port)); 187 return (data); 188 } 189 190 static __inline void 191 insb(u_int port, void *addr, size_t cnt) 192 { 193 __asm __volatile("cld; rep; insb" 194 : : "d" (port), "D" (addr), "c" (cnt) 195 : "di", "cx", "memory"); 196 } 197 198 static __inline void 199 insw(u_int port, void *addr, size_t cnt) 200 { 201 __asm __volatile("cld; rep; insw" 202 : : "d" (port), "D" (addr), "c" (cnt) 203 : "di", "cx", "memory"); 204 } 205 206 static __inline void 207 insl(u_int port, void *addr, size_t cnt) 208 { 209 __asm __volatile("cld; rep; insl" 210 : : "d" (port), "D" (addr), "c" (cnt) 211 : "di", "cx", "memory"); 212 } 213 214 static __inline void 215 invd(void) 216 { 217 __asm __volatile("invd"); 218 } 219 220 #ifdef KERNEL 221 #ifdef SMP 222 223 /* 224 * When using APIC IPI's, the inlining cost is prohibitive since the call 225 * executes into the IPI transmission system. 226 */ 227 void invlpg __P((u_int addr)); 228 void invltlb __P((void)); 229 230 static __inline void 231 cpu_invlpg(void *addr) 232 { 233 __asm __volatile("invlpg %0"::"m"(*(char *)addr):"memory"); 234 } 235 236 static __inline void 237 cpu_invltlb(void) 238 { 239 u_int temp; 240 /* 241 * This should be implemented as load_cr3(rcr3()) when load_cr3() 242 * is inlined. 243 */ 244 __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) 245 : : "memory"); 246 #if defined(SWTCH_OPTIM_STATS) 247 ++tlb_flush_count; 248 #endif 249 } 250 #else /* !SMP */ 251 252 static __inline void 253 invlpg(u_int addr) 254 { 255 __asm __volatile("invlpg %0"::"m"(*(char *)addr):"memory"); 256 } 257 258 259 static __inline void 260 invltlb(void) 261 { 262 u_int temp; 263 /* 264 * This should be implemented as load_cr3(rcr3()) when load_cr3() 265 * is inlined. 266 */ 267 __asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp) 268 : : "memory"); 269 #if defined(SWTCH_OPTIM_STATS) 270 ++tlb_flush_count; 271 #endif 272 } 273 274 #endif /* SMP */ 275 #endif /* KERNEL */ 276 277 static __inline u_short 278 inw(u_int port) 279 { 280 u_short data; 281 282 __asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port)); 283 return (data); 284 } 285 286 static __inline u_int 287 loadandclear(u_int *addr) 288 { 289 u_int result; 290 291 __asm __volatile("xorl %0,%0; xchgl %1,%0" 292 : "=&r" (result) : "m" (*addr)); 293 return (result); 294 } 295 296 static __inline void 297 outbv(u_int port, u_char data) 298 { 299 u_char al; 300 /* 301 * Use an unnecessary assignment to help gcc's register allocator. 302 * This make a large difference for gcc-1.40 and a tiny difference 303 * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for 304 * best results. gcc-2.6.0 can't handle this. 305 */ 306 al = data; 307 __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); 308 } 309 310 static __inline void 311 outl(u_int port, u_int data) 312 { 313 /* 314 * outl() and outw() aren't used much so we haven't looked at 315 * possible micro-optimizations such as the unnecessary 316 * assignment for them. 317 */ 318 __asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port)); 319 } 320 321 static __inline void 322 outsb(u_int port, const void *addr, size_t cnt) 323 { 324 __asm __volatile("cld; rep; outsb" 325 : : "d" (port), "S" (addr), "c" (cnt) 326 : "si", "cx"); 327 } 328 329 static __inline void 330 outsw(u_int port, const void *addr, size_t cnt) 331 { 332 __asm __volatile("cld; rep; outsw" 333 : : "d" (port), "S" (addr), "c" (cnt) 334 : "si", "cx"); 335 } 336 337 static __inline void 338 outsl(u_int port, const void *addr, size_t cnt) 339 { 340 __asm __volatile("cld; rep; outsl" 341 : : "d" (port), "S" (addr), "c" (cnt) 342 : "si", "cx"); 343 } 344 345 static __inline void 346 outw(u_int port, u_short data) 347 { 348 __asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port)); 349 } 350 351 static __inline u_int 352 rcr2(void) 353 { 354 u_int data; 355 356 __asm __volatile("movl %%cr2,%0" : "=r" (data)); 357 return (data); 358 } 359 360 static __inline u_int 361 read_eflags(void) 362 { 363 u_int ef; 364 365 __asm __volatile("pushfl; popl %0" : "=r" (ef)); 366 return (ef); 367 } 368 369 static __inline u_int64_t 370 rdmsr(u_int msr) 371 { 372 u_int64_t rv; 373 374 __asm __volatile(".byte 0x0f, 0x32" : "=A" (rv) : "c" (msr)); 375 return (rv); 376 } 377 378 static __inline u_int64_t 379 rdpmc(u_int pmc) 380 { 381 u_int64_t rv; 382 383 __asm __volatile(".byte 0x0f, 0x33" : "=A" (rv) : "c" (pmc)); 384 return (rv); 385 } 386 387 static __inline u_int64_t 388 rdtsc(void) 389 { 390 u_int64_t rv; 391 392 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 393 return (rv); 394 } 395 396 static __inline void 397 setbits(volatile unsigned *addr, u_int bits) 398 { 399 __asm __volatile( 400 #ifdef SMP 401 "lock; " 402 #endif 403 "orl %1,%0" : "=m" (*addr) : "ir" (bits)); 404 } 405 406 static __inline void 407 wbinvd(void) 408 { 409 __asm __volatile("wbinvd"); 410 } 411 412 static __inline void 413 write_eflags(u_int ef) 414 { 415 __asm __volatile("pushl %0; popfl" : : "r" (ef)); 416 } 417 418 static __inline void 419 wrmsr(u_int msr, u_int64_t newval) 420 { 421 __asm __volatile(".byte 0x0f, 0x30" : : "A" (newval), "c" (msr)); 422 } 423 424 #else /* !__GNUC__ */ 425 426 int breakpoint __P((void)); 427 void disable_intr __P((void)); 428 void enable_intr __P((void)); 429 u_char inb __P((u_int port)); 430 u_int inl __P((u_int port)); 431 void insb __P((u_int port, void *addr, size_t cnt)); 432 void insl __P((u_int port, void *addr, size_t cnt)); 433 void insw __P((u_int port, void *addr, size_t cnt)); 434 void invd __P((void)); 435 void invlpg __P((u_int addr)); 436 void invltlb __P((void)); 437 u_short inw __P((u_int port)); 438 u_int loadandclear __P((u_int *addr)); 439 void outb __P((u_int port, u_char data)); 440 void outl __P((u_int port, u_int data)); 441 void outsb __P((u_int port, void *addr, size_t cnt)); 442 void outsl __P((u_int port, void *addr, size_t cnt)); 443 void outsw __P((u_int port, void *addr, size_t cnt)); 444 void outw __P((u_int port, u_short data)); 445 u_int rcr2 __P((void)); 446 u_int64_t rdmsr __P((u_int msr)); 447 u_int64_t rdpmc __P((u_int pmc)); 448 u_int64_t rdtsc __P((void)); 449 u_int read_eflags __P((void)); 450 void setbits __P((volatile unsigned *addr, u_int bits)); 451 void wbinvd __P((void)); 452 void write_eflags __P((u_int ef)); 453 void wrmsr __P((u_int msr, u_int64_t newval)); 454 455 #endif /* __GNUC__ */ 456 457 void load_cr0 __P((u_int cr0)); 458 void load_cr3 __P((u_int cr3)); 459 void load_cr4 __P((u_int cr4)); 460 void ltr __P((u_short sel)); 461 u_int rcr0 __P((void)); 462 u_int rcr3 __P((void)); 463 u_int rcr4 __P((void)); 464 void i686_pagezero __P((void *addr)); 465 466 #endif /* !_MACHINE_CPUFUNC_H_ */ 467