1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 * 21 * Portions Copyright 2008 John Birrell <jb@freebsd.org> 22 * 23 */ 24/* 25 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29#define _ASM 30 31#include <machine/asmacros.h> 32#include <sys/cpuvar_defs.h> 33#include <sys/dtrace.h> 34 35#include "assym.inc" 36 37#define INTR_POP \ 38 movq TF_RDI(%rsp),%rdi; \ 39 movq TF_RSI(%rsp),%rsi; \ 40 movq TF_RDX(%rsp),%rdx; \ 41 movq TF_RCX(%rsp),%rcx; \ 42 movq TF_R8(%rsp),%r8; \ 43 movq TF_R9(%rsp),%r9; \ 44 movq TF_RAX(%rsp),%rax; \ 45 movq TF_RBX(%rsp),%rbx; \ 46 movq TF_RBP(%rsp),%rbp; \ 47 movq TF_R10(%rsp),%r10; \ 48 movq TF_R11(%rsp),%r11; \ 49 movq TF_R12(%rsp),%r12; \ 50 movq TF_R13(%rsp),%r13; \ 51 movq TF_R14(%rsp),%r14; \ 52 movq TF_R15(%rsp),%r15; \ 53 testb $SEL_RPL_MASK,TF_CS(%rsp); \ 54 jz 1f; \ 55 cli; \ 56 swapgs; \ 571: addq $TF_RIP,%rsp; 58 59.globl dtrace_invop_callsite 60.type dtrace_invop_callsite,@function 61 62 ENTRY(dtrace_invop_start) 63 64 /* 65 * #BP traps with %rip set to the next address. We need to decrement 66 * the value to indicate the address of the int3 (0xcc) instruction 67 * that we substituted. 68 */ 69 movq TF_RIP(%rsp), %rdi 70 decq %rdi 71 movq %rsp, %rsi 72 73 /* 74 * Allocate some scratch space to let the invop handler return a value. 75 * This is needed when emulating "call" instructions. 76 */ 77 subq $16, %rsp 78 movq %rsp, %rdx 79 80 call dtrace_invop 81dtrace_invop_callsite: 82 addq $16, %rsp 83 84 cmpl $DTRACE_INVOP_PUSHL_EBP, %eax 85 je bp_push 86 cmpl $DTRACE_INVOP_CALL, %eax 87 je bp_call 88 cmpl $DTRACE_INVOP_LEAVE, %eax 89 je bp_leave 90 cmpl $DTRACE_INVOP_NOP, %eax 91 je bp_nop 92 cmpl $DTRACE_INVOP_RET, %eax 93 je bp_ret 94 95 /* When all else fails handle the trap in the usual way. */ 96 jmpq *dtrace_invop_calltrap_addr 97 98bp_push: 99 /* 100 * We must emulate a "pushq %rbp". To do this, we pull the stack 101 * down 8 bytes, and then store the base pointer. 102 */ 103 INTR_POP 104 subq $16, %rsp /* make room for %rbp */ 105 pushq %rax /* push temp */ 106 movq 24(%rsp), %rax /* load calling RIP */ 107 movq %rax, 8(%rsp) /* store calling RIP */ 108 movq 32(%rsp), %rax /* load calling CS */ 109 movq %rax, 16(%rsp) /* store calling CS */ 110 movq 40(%rsp), %rax /* load calling RFLAGS */ 111 movq %rax, 24(%rsp) /* store calling RFLAGS */ 112 movq 48(%rsp), %rax /* load calling RSP */ 113 subq $8, %rax /* make room for %rbp */ 114 movq %rax, 32(%rsp) /* store calling RSP */ 115 movq 56(%rsp), %rax /* load calling SS */ 116 movq %rax, 40(%rsp) /* store calling SS */ 117 movq 32(%rsp), %rax /* reload calling RSP */ 118 movq %rbp, (%rax) /* store %rbp there */ 119 popq %rax /* pop off temp */ 120 iretq /* return from interrupt */ 121 /*NOTREACHED*/ 122 123bp_call: 124 /* 125 * Emulate a "call" instruction. The invop handler must have already 126 * updated the saved copy of %rip in the register set. It's our job to 127 * pull the hardware-saved registers down to make space for the return 128 * address, which is provided by the invop handler in our scratch 129 * space. 130 */ 131 INTR_POP 132 subq $16, %rsp /* make room for %rbp */ 133 pushq %rax /* push temp */ 134 pushq %rbx /* push temp */ 135 136 movq 32(%rsp), %rax /* load calling RIP */ 137 movq %rax, 16(%rsp) /* store calling RIP */ 138 movq 40(%rsp), %rax /* load calling CS */ 139 movq %rax, 24(%rsp) /* store calling CS */ 140 movq 48(%rsp), %rax /* load calling RFLAGS */ 141 movq %rax, 32(%rsp) /* store calling RFLAGS */ 142 movq 56(%rsp), %rax /* load calling RSP */ 143 subq $8, %rax /* make room for return address */ 144 movq %rax, 40(%rsp) /* store calling RSP */ 145 movq 64(%rsp), %rax /* load calling SS */ 146 movq %rax, 48(%rsp) /* store calling SS */ 147 148 movq -(TF_RIP - 16)(%rsp), %rax /* load return address */ 149 movq 40(%rsp), %rbx /* reload calling RSP */ 150 movq %rax, (%rbx) /* store return address */ 151 152 popq %rbx /* pop temp */ 153 popq %rax /* pop temp */ 154 iretq /* return from interrupt */ 155 /*NOTREACHED*/ 156 157bp_leave: 158 /* 159 * We must emulate a "leave", which is the same as a "movq %rbp, %rsp" 160 * followed by a "popq %rbp". This is quite a bit simpler on amd64 161 * than it is on i386 -- we can exploit the fact that the %rsp is 162 * explicitly saved to effect the pop without having to reshuffle 163 * the other data pushed for the trap. 164 */ 165 INTR_POP 166 pushq %rax /* push temp */ 167 movq 8(%rsp), %rax /* load calling RIP */ 168 movq %rax, 8(%rsp) /* store calling RIP */ 169 movq (%rbp), %rax /* get new %rbp */ 170 addq $8, %rbp /* adjust new %rsp */ 171 movq %rbp, 32(%rsp) /* store new %rsp */ 172 movq %rax, %rbp /* set new %rbp */ 173 popq %rax /* pop off temp */ 174 iretq /* return from interrupt */ 175 /*NOTREACHED*/ 176 177bp_nop: 178 /* We must emulate a "nop". */ 179 INTR_POP 180 iretq 181 /*NOTREACHED*/ 182 183bp_ret: 184 INTR_POP 185 pushq %rax /* push temp */ 186 movq 32(%rsp), %rax /* load %rsp */ 187 movq (%rax), %rax /* load calling RIP */ 188 movq %rax, 8(%rsp) /* store calling RIP */ 189 addq $8, 32(%rsp) /* adjust new %rsp */ 190 popq %rax /* pop off temp */ 191 iretq /* return from interrupt */ 192 /*NOTREACHED*/ 193 194 END(dtrace_invop_start) 195 196/* 197greg_t dtrace_getfp(void) 198*/ 199 ENTRY(dtrace_getfp) 200 movq %rbp, %rax 201 ret 202 END(dtrace_getfp) 203 204/* 205uint32_t 206dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new) 207*/ 208 ENTRY(dtrace_cas32) 209 movl %esi, %eax 210 lock 211 cmpxchgl %edx, (%rdi) 212 ret 213 END(dtrace_cas32) 214 215/* 216void * 217dtrace_casptr(void *target, void *cmp, void *new) 218*/ 219 ENTRY(dtrace_casptr) 220 movq %rsi, %rax 221 lock 222 cmpxchgq %rdx, (%rdi) 223 ret 224 END(dtrace_casptr) 225 226/* 227uintptr_t 228dtrace_caller(int aframes) 229*/ 230 ENTRY(dtrace_caller) 231 movq $-1, %rax 232 ret 233 END(dtrace_caller) 234 235/* 236void 237dtrace_copy(uintptr_t src, uintptr_t dest, size_t size) 238*/ 239 ENTRY(dtrace_copy_nosmap) 240 pushq %rbp 241 movq %rsp, %rbp 242 243 xchgq %rdi, %rsi /* make %rsi source, %rdi dest */ 244 movq %rdx, %rcx /* load count */ 245 repz /* repeat for count ... */ 246 smovb /* move from %ds:rsi to %ed:rdi */ 247 leave 248 ret 249 END(dtrace_copy_nosmap) 250 251 ENTRY(dtrace_copy_smap) 252 pushq %rbp 253 movq %rsp, %rbp 254 255 xchgq %rdi, %rsi /* make %rsi source, %rdi dest */ 256 movq %rdx, %rcx /* load count */ 257 stac 258 repz /* repeat for count ... */ 259 smovb /* move from %ds:rsi to %ed:rdi */ 260 clac 261 leave 262 ret 263 END(dtrace_copy_smap) 264 265/* 266void 267dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size, 268 volatile uint16_t *flags) 269*/ 270 ENTRY(dtrace_copystr_nosmap) 271 pushq %rbp 272 movq %rsp, %rbp 273 2740: 275 movb (%rdi), %al /* load from source */ 276 movb %al, (%rsi) /* store to destination */ 277 addq $1, %rdi /* increment source pointer */ 278 addq $1, %rsi /* increment destination pointer */ 279 subq $1, %rdx /* decrement remaining count */ 280 cmpb $0, %al 281 je 2f 282 testq $0xfff, %rdx /* test if count is 4k-aligned */ 283 jnz 1f /* if not, continue with copying */ 284 testq $CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */ 285 jnz 2f 2861: 287 cmpq $0, %rdx 288 jne 0b 2892: 290 leave 291 ret 292 293 END(dtrace_copystr_nosmap) 294 295 ENTRY(dtrace_copystr_smap) 296 pushq %rbp 297 movq %rsp, %rbp 298 299 stac 3000: 301 movb (%rdi), %al /* load from source */ 302 movb %al, (%rsi) /* store to destination */ 303 addq $1, %rdi /* increment source pointer */ 304 addq $1, %rsi /* increment destination pointer */ 305 subq $1, %rdx /* decrement remaining count */ 306 cmpb $0, %al 307 je 2f 308 testq $0xfff, %rdx /* test if count is 4k-aligned */ 309 jnz 1f /* if not, continue with copying */ 310 testq $CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */ 311 jnz 2f 3121: 313 cmpq $0, %rdx 314 jne 0b 3152: 316 clac 317 leave 318 ret 319 320 END(dtrace_copystr_smap) 321 322/* 323uintptr_t 324dtrace_fulword(void *addr) 325*/ 326 ENTRY(dtrace_fulword_nosmap) 327 movq (%rdi), %rax 328 ret 329 END(dtrace_fulword_nosmap) 330 331 ENTRY(dtrace_fulword_smap) 332 stac 333 movq (%rdi), %rax 334 clac 335 ret 336 END(dtrace_fulword_smap) 337 338/* 339uint8_t 340dtrace_fuword8_nocheck(void *addr) 341*/ 342 ENTRY(dtrace_fuword8_nocheck_nosmap) 343 xorq %rax, %rax 344 movb (%rdi), %al 345 ret 346 END(dtrace_fuword8_nocheck_nosmap) 347 348 ENTRY(dtrace_fuword8_nocheck_smap) 349 stac 350 xorq %rax, %rax 351 movb (%rdi), %al 352 clac 353 ret 354 END(dtrace_fuword8_nocheck_smap) 355 356/* 357uint16_t 358dtrace_fuword16_nocheck(void *addr) 359*/ 360 ENTRY(dtrace_fuword16_nocheck_nosmap) 361 xorq %rax, %rax 362 movw (%rdi), %ax 363 ret 364 END(dtrace_fuword16_nocheck_nosmap) 365 366 ENTRY(dtrace_fuword16_nocheck_smap) 367 stac 368 xorq %rax, %rax 369 movw (%rdi), %ax 370 clac 371 ret 372 END(dtrace_fuword16_nocheck_smap) 373 374/* 375uint32_t 376dtrace_fuword32_nocheck(void *addr) 377*/ 378 ENTRY(dtrace_fuword32_nocheck_nosmap) 379 xorq %rax, %rax 380 movl (%rdi), %eax 381 ret 382 END(dtrace_fuword32_nocheck_nosmap) 383 384 ENTRY(dtrace_fuword32_nocheck_smap) 385 stac 386 xorq %rax, %rax 387 movl (%rdi), %eax 388 clac 389 ret 390 END(dtrace_fuword32_nocheck_smap) 391 392/* 393uint64_t 394dtrace_fuword64_nocheck(void *addr) 395*/ 396 ENTRY(dtrace_fuword64_nocheck_nosmap) 397 movq (%rdi), %rax 398 ret 399 END(dtrace_fuword64_nocheck_nosmap) 400 401 ENTRY(dtrace_fuword64_nocheck_smap) 402 stac 403 movq (%rdi), %rax 404 clac 405 ret 406 END(dtrace_fuword64_nocheck_smap) 407 408/* 409void 410dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which, 411 int fault, int fltoffs, uintptr_t illval) 412*/ 413 ENTRY(dtrace_probe_error) 414 pushq %rbp 415 movq %rsp, %rbp 416 subq $0x8, %rsp 417 movq %r9, (%rsp) 418 movq %r8, %r9 419 movq %rcx, %r8 420 movq %rdx, %rcx 421 movq %rsi, %rdx 422 movq %rdi, %rsi 423 movl dtrace_probeid_error(%rip), %edi 424 call dtrace_probe 425 addq $0x8, %rsp 426 leave 427 ret 428 END(dtrace_probe_error) 429 430/* 431void 432dtrace_membar_producer(void) 433*/ 434 ENTRY(dtrace_membar_producer) 435 rep; ret /* use 2 byte return instruction when branch target */ 436 /* AMD Software Optimization Guide - Section 6.2 */ 437 END(dtrace_membar_producer) 438 439/* 440void 441dtrace_membar_consumer(void) 442*/ 443 ENTRY(dtrace_membar_consumer) 444 rep; ret /* use 2 byte return instruction when branch target */ 445 /* AMD Software Optimization Guide - Section 6.2 */ 446 END(dtrace_membar_consumer) 447 448/* 449dtrace_icookie_t 450dtrace_interrupt_disable(void) 451*/ 452 ENTRY(dtrace_interrupt_disable) 453 pushfq 454 popq %rax 455 cli 456 ret 457 END(dtrace_interrupt_disable) 458 459/* 460void 461dtrace_interrupt_enable(dtrace_icookie_t cookie) 462*/ 463 ENTRY(dtrace_interrupt_enable) 464 pushq %rdi 465 popfq 466 ret 467 END(dtrace_interrupt_enable) 468