1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 * 21 * Portions Copyright 2008 John Birrell <jb@freebsd.org> 22 * 23 */ 24/* 25 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29#define _ASM 30 31#include <machine/asmacros.h> 32#include <sys/cpuvar_defs.h> 33#include <sys/dtrace.h> 34 35#include "assym.inc" 36 37#define INTR_POP \ 38 movq TF_RDI(%rsp),%rdi; \ 39 movq TF_RSI(%rsp),%rsi; \ 40 movq TF_RDX(%rsp),%rdx; \ 41 movq TF_RCX(%rsp),%rcx; \ 42 movq TF_R8(%rsp),%r8; \ 43 movq TF_R9(%rsp),%r9; \ 44 movq TF_RAX(%rsp),%rax; \ 45 movq TF_RBX(%rsp),%rbx; \ 46 movq TF_RBP(%rsp),%rbp; \ 47 movq TF_R10(%rsp),%r10; \ 48 movq TF_R11(%rsp),%r11; \ 49 movq TF_R12(%rsp),%r12; \ 50 movq TF_R13(%rsp),%r13; \ 51 movq TF_R14(%rsp),%r14; \ 52 movq TF_R15(%rsp),%r15; \ 53 testb $SEL_RPL_MASK,TF_CS(%rsp); \ 54 jz 1f; \ 55 cli; \ 56 swapgs; \ 571: addq $TF_RIP,%rsp; 58 59 ENTRY(dtrace_invop_start) 60 61 /* 62 * #BP traps with %rip set to the next address. We need to decrement 63 * the value to indicate the address of the int3 (0xcc) instruction 64 * that we substituted. 65 */ 66 movq TF_RIP(%rsp), %rdi 67 decq %rdi 68 movq %rsp, %rsi 69 70 /* 71 * Allocate some scratch space to let the invop handler return a value. 72 * This is needed when emulating "call" instructions. 73 */ 74 subq $16, %rsp 75 movq %rsp, %rdx 76 77 call dtrace_invop 78 addq $16, %rsp 79 80 cmpl $DTRACE_INVOP_PUSHL_EBP, %eax 81 je bp_push 82 cmpl $DTRACE_INVOP_CALL, %eax 83 je bp_call 84 cmpl $DTRACE_INVOP_LEAVE, %eax 85 je bp_leave 86 cmpl $DTRACE_INVOP_NOP, %eax 87 je bp_nop 88 cmpl $DTRACE_INVOP_RET, %eax 89 je bp_ret 90 91 /* When all else fails handle the trap in the usual way. */ 92 jmpq *dtrace_invop_calltrap_addr 93 94bp_push: 95 /* 96 * We must emulate a "pushq %rbp". To do this, we pull the stack 97 * down 8 bytes, and then store the base pointer. 98 */ 99 INTR_POP 100 subq $16, %rsp /* make room for %rbp */ 101 pushq %rax /* push temp */ 102 movq 24(%rsp), %rax /* load calling RIP */ 103 movq %rax, 8(%rsp) /* store calling RIP */ 104 movq 32(%rsp), %rax /* load calling CS */ 105 movq %rax, 16(%rsp) /* store calling CS */ 106 movq 40(%rsp), %rax /* load calling RFLAGS */ 107 movq %rax, 24(%rsp) /* store calling RFLAGS */ 108 movq 48(%rsp), %rax /* load calling RSP */ 109 subq $8, %rax /* make room for %rbp */ 110 movq %rax, 32(%rsp) /* store calling RSP */ 111 movq 56(%rsp), %rax /* load calling SS */ 112 movq %rax, 40(%rsp) /* store calling SS */ 113 movq 32(%rsp), %rax /* reload calling RSP */ 114 movq %rbp, (%rax) /* store %rbp there */ 115 popq %rax /* pop off temp */ 116 iretq /* return from interrupt */ 117 /*NOTREACHED*/ 118 119bp_call: 120 /* 121 * Emulate a "call" instruction. The invop handler must have already 122 * updated the saved copy of %rip in the register set. It's our job to 123 * pull the hardware-saved registers down to make space for the return 124 * address, which is provided by the invop handler in our scratch 125 * space. 126 */ 127 INTR_POP 128 subq $16, %rsp /* make room for %rbp */ 129 pushq %rax /* push temp */ 130 pushq %rbx /* push temp */ 131 132 movq 32(%rsp), %rax /* load calling RIP */ 133 movq %rax, 16(%rsp) /* store calling RIP */ 134 movq 40(%rsp), %rax /* load calling CS */ 135 movq %rax, 24(%rsp) /* store calling CS */ 136 movq 48(%rsp), %rax /* load calling RFLAGS */ 137 movq %rax, 32(%rsp) /* store calling RFLAGS */ 138 movq 56(%rsp), %rax /* load calling RSP */ 139 subq $8, %rax /* make room for return address */ 140 movq %rax, 40(%rsp) /* store calling RSP */ 141 movq 64(%rsp), %rax /* load calling SS */ 142 movq %rax, 48(%rsp) /* store calling SS */ 143 144 movq -(TF_RIP - 16)(%rsp), %rax /* load return address */ 145 movq 40(%rsp), %rbx /* reload calling RSP */ 146 movq %rax, (%rbx) /* store return address */ 147 148 popq %rbx /* pop temp */ 149 popq %rax /* pop temp */ 150 iretq /* return from interrupt */ 151 /*NOTREACHED*/ 152 153bp_leave: 154 /* 155 * We must emulate a "leave", which is the same as a "movq %rbp, %rsp" 156 * followed by a "popq %rbp". This is quite a bit simpler on amd64 157 * than it is on i386 -- we can exploit the fact that the %rsp is 158 * explicitly saved to effect the pop without having to reshuffle 159 * the other data pushed for the trap. 160 */ 161 INTR_POP 162 pushq %rax /* push temp */ 163 movq 8(%rsp), %rax /* load calling RIP */ 164 movq %rax, 8(%rsp) /* store calling RIP */ 165 movq (%rbp), %rax /* get new %rbp */ 166 addq $8, %rbp /* adjust new %rsp */ 167 movq %rbp, 32(%rsp) /* store new %rsp */ 168 movq %rax, %rbp /* set new %rbp */ 169 popq %rax /* pop off temp */ 170 iretq /* return from interrupt */ 171 /*NOTREACHED*/ 172 173bp_nop: 174 /* We must emulate a "nop". */ 175 INTR_POP 176 iretq 177 /*NOTREACHED*/ 178 179bp_ret: 180 INTR_POP 181 pushq %rax /* push temp */ 182 movq 32(%rsp), %rax /* load %rsp */ 183 movq (%rax), %rax /* load calling RIP */ 184 movq %rax, 8(%rsp) /* store calling RIP */ 185 addq $8, 32(%rsp) /* adjust new %rsp */ 186 popq %rax /* pop off temp */ 187 iretq /* return from interrupt */ 188 /*NOTREACHED*/ 189 190 END(dtrace_invop_start) 191 192/* 193greg_t dtrace_getfp(void) 194*/ 195 ENTRY(dtrace_getfp) 196 movq %rbp, %rax 197 ret 198 END(dtrace_getfp) 199 200/* 201uint32_t 202dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new) 203*/ 204 ENTRY(dtrace_cas32) 205 movl %esi, %eax 206 lock 207 cmpxchgl %edx, (%rdi) 208 ret 209 END(dtrace_cas32) 210 211/* 212void * 213dtrace_casptr(void *target, void *cmp, void *new) 214*/ 215 ENTRY(dtrace_casptr) 216 movq %rsi, %rax 217 lock 218 cmpxchgq %rdx, (%rdi) 219 ret 220 END(dtrace_casptr) 221 222/* 223uintptr_t 224dtrace_caller(int aframes) 225*/ 226 ENTRY(dtrace_caller) 227 movq $-1, %rax 228 ret 229 END(dtrace_caller) 230 231/* 232void 233dtrace_copy(uintptr_t src, uintptr_t dest, size_t size) 234*/ 235 ENTRY(dtrace_copy_nosmap) 236 pushq %rbp 237 movq %rsp, %rbp 238 239 xchgq %rdi, %rsi /* make %rsi source, %rdi dest */ 240 movq %rdx, %rcx /* load count */ 241 repz /* repeat for count ... */ 242 smovb /* move from %ds:rsi to %ed:rdi */ 243 leave 244 ret 245 END(dtrace_copy_nosmap) 246 247 ENTRY(dtrace_copy_smap) 248 pushq %rbp 249 movq %rsp, %rbp 250 251 xchgq %rdi, %rsi /* make %rsi source, %rdi dest */ 252 movq %rdx, %rcx /* load count */ 253 stac 254 repz /* repeat for count ... */ 255 smovb /* move from %ds:rsi to %ed:rdi */ 256 clac 257 leave 258 ret 259 END(dtrace_copy_smap) 260 261/* 262void 263dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size, 264 volatile uint16_t *flags) 265*/ 266 ENTRY(dtrace_copystr_nosmap) 267 pushq %rbp 268 movq %rsp, %rbp 269 2700: 271 movb (%rdi), %al /* load from source */ 272 movb %al, (%rsi) /* store to destination */ 273 addq $1, %rdi /* increment source pointer */ 274 addq $1, %rsi /* increment destination pointer */ 275 subq $1, %rdx /* decrement remaining count */ 276 cmpb $0, %al 277 je 2f 278 testq $0xfff, %rdx /* test if count is 4k-aligned */ 279 jnz 1f /* if not, continue with copying */ 280 testq $CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */ 281 jnz 2f 2821: 283 cmpq $0, %rdx 284 jne 0b 2852: 286 leave 287 ret 288 289 END(dtrace_copystr_nosmap) 290 291 ENTRY(dtrace_copystr_smap) 292 pushq %rbp 293 movq %rsp, %rbp 294 295 stac 2960: 297 movb (%rdi), %al /* load from source */ 298 movb %al, (%rsi) /* store to destination */ 299 addq $1, %rdi /* increment source pointer */ 300 addq $1, %rsi /* increment destination pointer */ 301 subq $1, %rdx /* decrement remaining count */ 302 cmpb $0, %al 303 je 2f 304 testq $0xfff, %rdx /* test if count is 4k-aligned */ 305 jnz 1f /* if not, continue with copying */ 306 testq $CPU_DTRACE_BADADDR, (%rcx) /* load and test dtrace flags */ 307 jnz 2f 3081: 309 cmpq $0, %rdx 310 jne 0b 3112: 312 clac 313 leave 314 ret 315 316 END(dtrace_copystr_smap) 317 318/* 319uintptr_t 320dtrace_fulword(void *addr) 321*/ 322 ENTRY(dtrace_fulword_nosmap) 323 movq (%rdi), %rax 324 ret 325 END(dtrace_fulword_nosmap) 326 327 ENTRY(dtrace_fulword_smap) 328 stac 329 movq (%rdi), %rax 330 clac 331 ret 332 END(dtrace_fulword_smap) 333 334/* 335uint8_t 336dtrace_fuword8_nocheck(void *addr) 337*/ 338 ENTRY(dtrace_fuword8_nocheck_nosmap) 339 xorq %rax, %rax 340 movb (%rdi), %al 341 ret 342 END(dtrace_fuword8_nocheck_nosmap) 343 344 ENTRY(dtrace_fuword8_nocheck_smap) 345 stac 346 xorq %rax, %rax 347 movb (%rdi), %al 348 clac 349 ret 350 END(dtrace_fuword8_nocheck_smap) 351 352/* 353uint16_t 354dtrace_fuword16_nocheck(void *addr) 355*/ 356 ENTRY(dtrace_fuword16_nocheck_nosmap) 357 xorq %rax, %rax 358 movw (%rdi), %ax 359 ret 360 END(dtrace_fuword16_nocheck_nosmap) 361 362 ENTRY(dtrace_fuword16_nocheck_smap) 363 stac 364 xorq %rax, %rax 365 movw (%rdi), %ax 366 clac 367 ret 368 END(dtrace_fuword16_nocheck_smap) 369 370/* 371uint32_t 372dtrace_fuword32_nocheck(void *addr) 373*/ 374 ENTRY(dtrace_fuword32_nocheck_nosmap) 375 xorq %rax, %rax 376 movl (%rdi), %eax 377 ret 378 END(dtrace_fuword32_nocheck_nosmap) 379 380 ENTRY(dtrace_fuword32_nocheck_smap) 381 stac 382 xorq %rax, %rax 383 movl (%rdi), %eax 384 clac 385 ret 386 END(dtrace_fuword32_nocheck_smap) 387 388/* 389uint64_t 390dtrace_fuword64_nocheck(void *addr) 391*/ 392 ENTRY(dtrace_fuword64_nocheck_nosmap) 393 movq (%rdi), %rax 394 ret 395 END(dtrace_fuword64_nocheck_nosmap) 396 397 ENTRY(dtrace_fuword64_nocheck_smap) 398 stac 399 movq (%rdi), %rax 400 clac 401 ret 402 END(dtrace_fuword64_nocheck_smap) 403 404/* 405void 406dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which, 407 int fault, int fltoffs, uintptr_t illval) 408*/ 409 ENTRY(dtrace_probe_error) 410 pushq %rbp 411 movq %rsp, %rbp 412 subq $0x8, %rsp 413 movq %r9, (%rsp) 414 movq %r8, %r9 415 movq %rcx, %r8 416 movq %rdx, %rcx 417 movq %rsi, %rdx 418 movq %rdi, %rsi 419 movl dtrace_probeid_error(%rip), %edi 420 call dtrace_probe 421 addq $0x8, %rsp 422 leave 423 ret 424 END(dtrace_probe_error) 425 426/* 427void 428dtrace_membar_producer(void) 429*/ 430 ENTRY(dtrace_membar_producer) 431 rep; ret /* use 2 byte return instruction when branch target */ 432 /* AMD Software Optimization Guide - Section 6.2 */ 433 END(dtrace_membar_producer) 434 435/* 436void 437dtrace_membar_consumer(void) 438*/ 439 ENTRY(dtrace_membar_consumer) 440 rep; ret /* use 2 byte return instruction when branch target */ 441 /* AMD Software Optimization Guide - Section 6.2 */ 442 END(dtrace_membar_consumer) 443 444/* 445dtrace_icookie_t 446dtrace_interrupt_disable(void) 447*/ 448 ENTRY(dtrace_interrupt_disable) 449 pushfq 450 popq %rax 451 cli 452 ret 453 END(dtrace_interrupt_disable) 454 455/* 456void 457dtrace_interrupt_enable(dtrace_icookie_t cookie) 458*/ 459 ENTRY(dtrace_interrupt_enable) 460 pushq %rdi 461 popfq 462 ret 463 END(dtrace_interrupt_enable) 464