1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright (c) 2012 Joyent, Inc. All rights reserved. 26 */ 27 28#if defined(lint) 29 30#include <sys/types.h> 31#include <_rtld.h> 32#include <_audit.h> 33#include <_elf.h> 34#include <sys/regset.h> 35#include <sys/auxv_386.h> 36 37/* ARGSUSED0 */ 38int 39elf_plt_trace() 40{ 41 return (0); 42} 43#else 44 45#include <link.h> 46#include <_audit.h> 47#include <sys/asm_linkage.h> 48#include <sys/auxv_386.h> 49 50 .file "boot_elf.s" 51 .text 52 53/* 54 * On entry the 'glue code' has already done the following: 55 * 56 * pushq %rbp 57 * movq %rsp, %rbp 58 * subq $0x10, %rsp 59 * leaq trace_fields(%rip), %r11 60 * movq %r11, -0x8(%rbp) 61 * movq $elf_plt_trace, %r11 62 * jmp *%r11 63 * 64 * so - -8(%rbp) contains the dyndata ptr 65 * 66 * 0x0 Addr *reflmp 67 * 0x8 Addr *deflmp 68 * 0x10 Word symndx 69 * 0x14 Word sb_flags 70 * 0x18 Sym symdef.st_name 71 * 0x1c symdef.st_info 72 * 0x1d symdef.st_other 73 * 0x1e symdef.st_shndx 74 * 0x20 symdef.st_value 75 * 0x28 symdef.st_size 76 * 77 * Also note - on entry 16 bytes have already been subtracted 78 * from the %rsp. The first 8 bytes is for the dyn_data_ptr, 79 * the second 8 bytes are to align the stack and are available 80 * for use. 81 */ 82#define REFLMP_OFF 0x0 83#define DEFLMP_OFF 0x8 84#define SYMNDX_OFF 0x10 85#define SBFLAGS_OFF 0x14 86#define SYMDEF_OFF 0x18 87#define SYMDEF_VALUE_OFF 0x20 88/* 89 * Local stack space storage for elf_plt_trace is allocated 90 * as follows: 91 * 92 * First - before we got here - %rsp has been decremented 93 * by 0x10 to make space for the dyndata ptr (and another 94 * free word). In addition to that, we create space 95 * for the following: 96 * 97 * La_amd64_regs 8 * 8: 64 98 * prev_stack_size 8 8 99 * Saved regs: 100 * %rdi 8 101 * %rsi 8 102 * %rdx 8 103 * %rcx 8 104 * %r8 8 105 * %r9 8 106 * %r10 8 107 * %r11 8 108 * %rax 8 109 * ======= 110 * Subtotal: 144 (32byte aligned) 111 * 112 * Saved Media Regs (used to pass floating point args): 113 * %xmm0 - %xmm7 32 * 8: 256 114 * ======= 115 * Total: 400 (32byte aligned) 116 * 117 * So - will subtract the following to create enough space 118 * 119 * -8(%rbp) store dyndata ptr 120 * -16(%rbp) store call destination 121 * -80(%rbp) space for La_amd64_regs 122 * -88(%rbp) prev stack size 123 * The next %rbp offsets are only true if the caller had correct stack 124 * alignment. See note above SPRDIOFF for why we use %rsp alignment to 125 * access these stack fields. 126 * -96(%rbp) entering %rdi 127 * -104(%rbp) entering %rsi 128 * -112(%rbp) entering %rdx 129 * -120(%rbp) entering %rcx 130 * -128(%rbp) entering %r8 131 * -136(%rbp) entering %r9 132 * -144(%rbp) entering %r10 133 * -152(%rbp) entering %r11 134 * -160(%rbp) entering %rax 135 * -192(%rbp) entering %xmm0 136 * -224(%rbp) entering %xmm1 137 * -256(%rbp) entering %xmm2 138 * -288(%rbp) entering %xmm3 139 * -320(%rbp) entering %xmm4 140 * -384(%rbp) entering %xmm5 141 * -416(%rbp) entering %xmm6 142 * -448(%rbp) entering %xmm7 143 * 144 */ 145#define SPDYNOFF -8 146#define SPDESTOFF -16 147#define SPLAREGOFF -80 148#define SPPRVSTKOFF -88 149 150/* 151 * The next set of offsets are relative to %rsp. 152 * We guarantee %rsp is ABI compliant 32-byte aligned. This guarantees the 153 * ymm registers are saved to 32-byte aligned addresses. 154 * %rbp may only be 8 byte aligned if we came in from non-ABI compliant code. 155 */ 156#define SPRDIOFF 320 157#define SPRSIOFF 312 158#define SPRDXOFF 304 159#define SPRCXOFF 296 160#define SPR8OFF 288 161#define SPR9OFF 280 162#define SPR10OFF 272 163#define SPR11OFF 264 164#define SPRAXOFF 256 165#define SPXMM0OFF 224 166#define SPXMM1OFF 192 167#define SPXMM2OFF 160 168#define SPXMM3OFF 128 169#define SPXMM4OFF 96 170#define SPXMM5OFF 64 171#define SPXMM6OFF 32 172#define SPXMM7OFF 0 173 174 /* See elf_rtbndr for explanation behind org_scapset */ 175 .extern org_scapset 176 .globl elf_plt_trace 177 .type elf_plt_trace,@function 178 .align 16 179elf_plt_trace: 180 /* 181 * Enforce ABI 32-byte stack alignment here. 182 * The next andq instruction does this pseudo code: 183 * If %rsp is 8 byte aligned then subtract 8 from %rsp. 184 */ 185 andq $-32, %rsp /* enforce ABI 32-byte stack alignment */ 186 subq $400,%rsp / create some local storage 187 188 movq %rdi, SPRDIOFF(%rsp) 189 movq %rsi, SPRSIOFF(%rsp) 190 movq %rdx, SPRDXOFF(%rsp) 191 movq %rcx, SPRCXOFF(%rsp) 192 movq %r8, SPR8OFF(%rsp) 193 movq %r9, SPR9OFF(%rsp) 194 movq %r10, SPR10OFF(%rsp) 195 movq %r11, SPR11OFF(%rsp) 196 movq %rax, SPRAXOFF(%rsp) 197 198 movq org_scapset@GOTPCREL(%rip),%r9 199 movq (%r9),%r9 200 movl (%r9),%edx 201 testl $AV_386_AVX,%edx 202 jne .trace_save_ymm 203 204.trace_save_xmm: 205 movdqa %xmm0, SPXMM0OFF(%rsp) 206 movdqa %xmm1, SPXMM1OFF(%rsp) 207 movdqa %xmm2, SPXMM2OFF(%rsp) 208 movdqa %xmm3, SPXMM3OFF(%rsp) 209 movdqa %xmm4, SPXMM4OFF(%rsp) 210 movdqa %xmm5, SPXMM5OFF(%rsp) 211 movdqa %xmm6, SPXMM6OFF(%rsp) 212 movdqa %xmm7, SPXMM7OFF(%rsp) 213 jmp .trace_save_finish 214 215.trace_save_ymm: 216 vmovdqa %ymm0, SPXMM0OFF(%rsp) 217 vmovdqa %ymm1, SPXMM1OFF(%rsp) 218 vmovdqa %ymm2, SPXMM2OFF(%rsp) 219 vmovdqa %ymm3, SPXMM3OFF(%rsp) 220 vmovdqa %ymm4, SPXMM4OFF(%rsp) 221 vmovdqa %ymm5, SPXMM5OFF(%rsp) 222 vmovdqa %ymm6, SPXMM6OFF(%rsp) 223 vmovdqa %ymm7, SPXMM7OFF(%rsp) 224 225.trace_save_finish: 226 227 movq SPDYNOFF(%rbp), %rax / %rax = dyndata 228 testb $LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax) / <link.h> 229 je .start_pltenter 230 movq SYMDEF_VALUE_OFF(%rax), %rdi 231 movq %rdi, SPDESTOFF(%rbp) / save destination address 232 jmp .end_pltenter 233 234.start_pltenter: 235 /* 236 * save all registers into La_amd64_regs 237 */ 238 leaq SPLAREGOFF(%rbp), %rsi / %rsi = &La_amd64_regs 239 leaq 8(%rbp), %rdi 240 movq %rdi, 0(%rsi) / la_rsp 241 movq 0(%rbp), %rdi 242 movq %rdi, 8(%rsi) / la_rbp 243 movq SPRDIOFF(%rsp), %rdi 244 movq %rdi, 16(%rsi) / la_rdi 245 movq SPRSIOFF(%rsp), %rdi 246 movq %rdi, 24(%rsi) / la_rsi 247 movq SPRDXOFF(%rsp), %rdi 248 movq %rdi, 32(%rsi) / la_rdx 249 movq SPRCXOFF(%rsp), %rdi 250 movq %rdi, 40(%rsi) / la_rcx 251 movq SPR8OFF(%rsp), %rdi 252 movq %rdi, 48(%rsi) / la_r8 253 movq SPR9OFF(%rsp), %rdi 254 movq %rdi, 56(%rsi) / la_r9 255 256 /* 257 * prepare for call to la_pltenter 258 */ 259 movq SPDYNOFF(%rbp), %r11 / %r11 = &dyndata 260 leaq SBFLAGS_OFF(%r11), %r9 / arg6 (&sb_flags) 261 leaq SPLAREGOFF(%rbp), %r8 / arg5 (&La_amd64_regs) 262 movl SYMNDX_OFF(%r11), %ecx / arg4 (symndx) 263 leaq SYMDEF_OFF(%r11), %rdx / arg3 (&Sym) 264 movq DEFLMP_OFF(%r11), %rsi / arg2 (dlmp) 265 movq REFLMP_OFF(%r11), %rdi / arg1 (rlmp) 266 call audit_pltenter@PLT 267 movq %rax, SPDESTOFF(%rbp) / save calling address 268.end_pltenter: 269 270 /* 271 * If *no* la_pltexit() routines exist 272 * we do not need to keep the stack frame 273 * before we call the actual routine. Instead we 274 * jump to it and remove our stack from the stack 275 * at the same time. 276 */ 277 movl audit_flags(%rip), %eax 278 andl $AF_PLTEXIT, %eax / value of audit.h:AF_PLTEXIT 279 cmpl $0, %eax 280 je .bypass_pltexit 281 /* 282 * Has the *nopltexit* flag been set for this entry point 283 */ 284 movq SPDYNOFF(%rbp), %r11 / %r11 = &dyndata 285 testb $LA_SYMB_NOPLTEXIT, SBFLAGS_OFF(%r11) 286 je .start_pltexit 287 288.bypass_pltexit: 289 /* 290 * No PLTEXIT processing required. 291 */ 292 movq 0(%rbp), %r11 293 movq %r11, -8(%rbp) / move prev %rbp 294 movq SPDESTOFF(%rbp), %r11 / r11 == calling destination 295 movq %r11, 0(%rbp) / store destination at top 296 297 / 298 / Restore registers 299 / 300 movq org_scapset@GOTPCREL(%rip),%r9 301 movq (%r9),%r9 302 movl (%r9),%edx 303 testl $AV_386_AVX,%edx 304 jne .trace_restore_ymm 305 306.trace_restore_xmm: 307 movdqa SPXMM0OFF(%rsp), %xmm0 308 movdqa SPXMM1OFF(%rsp), %xmm1 309 movdqa SPXMM2OFF(%rsp), %xmm2 310 movdqa SPXMM3OFF(%rsp), %xmm3 311 movdqa SPXMM4OFF(%rsp), %xmm4 312 movdqa SPXMM5OFF(%rsp), %xmm5 313 movdqa SPXMM6OFF(%rsp), %xmm6 314 movdqa SPXMM7OFF(%rsp), %xmm7 315 jmp .trace_restore_finish 316 317.trace_restore_ymm: 318 vmovdqa SPXMM0OFF(%rsp), %ymm0 319 vmovdqa SPXMM1OFF(%rsp), %ymm1 320 vmovdqa SPXMM2OFF(%rsp), %ymm2 321 vmovdqa SPXMM3OFF(%rsp), %ymm3 322 vmovdqa SPXMM4OFF(%rsp), %ymm4 323 vmovdqa SPXMM5OFF(%rsp), %ymm5 324 vmovdqa SPXMM6OFF(%rsp), %ymm6 325 vmovdqa SPXMM7OFF(%rsp), %ymm7 326 327.trace_restore_finish: 328 movq SPRDIOFF(%rsp), %rdi 329 movq SPRSIOFF(%rsp), %rsi 330 movq SPRDXOFF(%rsp), %rdx 331 movq SPRCXOFF(%rsp), %rcx 332 movq SPR8OFF(%rsp), %r8 333 movq SPR9OFF(%rsp), %r9 334 movq SPR10OFF(%rsp), %r10 335 movq SPR11OFF(%rsp), %r11 336 movq SPRAXOFF(%rsp), %rax 337 338 subq $8, %rbp / adjust %rbp for 'ret' 339 movq %rbp, %rsp / 340 /* 341 * At this point, after a little doctoring, we should 342 * have the following on the stack: 343 * 344 * 16(%rsp): ret addr 345 * 8(%rsp): dest_addr 346 * 0(%rsp): Previous %rbp 347 * 348 * So - we pop the previous %rbp, and then 349 * ret to our final destination. 350 */ 351 popq %rbp / 352 ret / jmp to final destination 353 / and clean up stack :) 354 355.start_pltexit: 356 /* 357 * In order to call the destination procedure and then return 358 * to audit_pltexit() for post analysis we must first grow 359 * our stack frame and then duplicate the original callers 360 * stack state. This duplicates all of the arguements 361 * that were to be passed to the destination procedure. 362 */ 363 movq %rbp, %rdi / 364 addq $16, %rdi / %rdi = src 365 movq (%rbp), %rdx / 366 subq %rdi, %rdx / %rdx == prev frame sz 367 /* 368 * If audit_argcnt > 0 then we limit the number of 369 * arguements that will be duplicated to audit_argcnt. 370 * 371 * If (prev_stack_size > (audit_argcnt * 8)) 372 * prev_stack_size = audit_argcnt * 8; 373 */ 374 movl audit_argcnt(%rip),%eax / %eax = audit_argcnt 375 cmpl $0, %eax 376 jle .grow_stack 377 leaq (,%rax,8), %rax / %eax = %eax * 4 378 cmpq %rax,%rdx 379 jle .grow_stack 380 movq %rax, %rdx 381 /* 382 * Grow the stack and duplicate the arguements of the 383 * original caller. 384 * 385 * We save %rsp in %r11 since we need to use the current rsp for 386 * accessing the registers saved in our stack frame. 387 */ 388.grow_stack: 389 movq %rsp, %r11 390 subq %rdx, %rsp / grow the stack 391 movq %rdx, SPPRVSTKOFF(%rbp) / -88(%rbp) == prev frame sz 392 movq %rsp, %rcx / %rcx = dest 393 addq %rcx, %rdx / %rdx == tail of dest 394.while_base: 395 cmpq %rdx, %rcx / while (base+size >= src++) { 396 jge .end_while / 397 movq (%rdi), %rsi 398 movq %rsi,(%rcx) / *dest = *src 399 addq $8, %rdi / src++ 400 addq $8, %rcx / dest++ 401 jmp .while_base / } 402 403 /* 404 * The above stack is now an exact duplicate of 405 * the stack of the original calling procedure. 406 */ 407.end_while: 408 / 409 / Restore registers using %r11 which contains our old %rsp value 410 / before growing the stack. 411 / 412 413 / Yes, we have to do this dance again. Sorry. 414 movq org_scapset@GOTPCREL(%rip),%r9 415 movq (%r9),%r9 416 movl (%r9),%edx 417 testl $AV_386_AVX,%edx 418 jne .trace_r2_ymm 419 420.trace_r2_xmm: 421 movdqa SPXMM0OFF(%r11), %xmm0 422 movdqa SPXMM1OFF(%r11), %xmm1 423 movdqa SPXMM2OFF(%r11), %xmm2 424 movdqa SPXMM3OFF(%r11), %xmm3 425 movdqa SPXMM4OFF(%r11), %xmm4 426 movdqa SPXMM5OFF(%r11), %xmm5 427 movdqa SPXMM6OFF(%r11), %xmm6 428 movdqa SPXMM7OFF(%r11), %xmm7 429 jmp .trace_r2_finish 430 431.trace_r2_ymm: 432 vmovdqa SPXMM0OFF(%r11), %ymm0 433 vmovdqa SPXMM1OFF(%r11), %ymm1 434 vmovdqa SPXMM2OFF(%r11), %ymm2 435 vmovdqa SPXMM3OFF(%r11), %ymm3 436 vmovdqa SPXMM4OFF(%r11), %ymm4 437 vmovdqa SPXMM5OFF(%r11), %ymm5 438 vmovdqa SPXMM6OFF(%r11), %ymm6 439 vmovdqa SPXMM7OFF(%r11), %ymm7 440 441.trace_r2_finish: 442 movq SPRDIOFF(%r11), %rdi 443 movq SPRSIOFF(%r11), %rsi 444 movq SPRDXOFF(%r11), %rdx 445 movq SPRCXOFF(%r11), %rcx 446 movq SPR8OFF(%r11), %r8 447 movq SPR9OFF(%r11), %r9 448 movq SPR10OFF(%r11), %r10 449 movq SPRAXOFF(%r11), %rax 450 movq SPR11OFF(%r11), %r11 / retore %r11 last 451 452 /* 453 * Call to desitnation function - we'll return here 454 * for pltexit monitoring. 455 */ 456 call *SPDESTOFF(%rbp) 457 458 addq SPPRVSTKOFF(%rbp), %rsp / cleanup dupped stack 459 460 / 461 / prepare for call to audit_pltenter() 462 / 463 movq SPDYNOFF(%rbp), %r11 / %r11 = &dyndata 464 movq SYMNDX_OFF(%r11), %r8 / arg5 (symndx) 465 leaq SYMDEF_OFF(%r11), %rcx / arg4 (&Sym) 466 movq DEFLMP_OFF(%r11), %rdx / arg3 (dlmp) 467 movq REFLMP_OFF(%r11), %rsi / arg2 (rlmp) 468 movq %rax, %rdi / arg1 (returnval) 469 call audit_pltexit@PLT 470 471 /* 472 * Clean up after ourselves and return to the 473 * original calling procedure. 474 */ 475 476 / 477 / Restore registers 478 / 479 movq SPRDIOFF(%rsp), %rdi 480 movq SPRSIOFF(%rsp), %rsi 481 movq SPRDXOFF(%rsp), %rdx 482 movq SPRCXOFF(%rsp), %rcx 483 movq SPR8OFF(%rsp), %r8 484 movq SPR9OFF(%rsp), %r9 485 movq SPR10OFF(%rsp), %r10 486 movq SPR11OFF(%rsp), %r11 487 // rax already contains return value 488 movdqa SPXMM0OFF(%rsp), %xmm0 489 movdqa SPXMM1OFF(%rsp), %xmm1 490 movdqa SPXMM2OFF(%rsp), %xmm2 491 movdqa SPXMM3OFF(%rsp), %xmm3 492 movdqa SPXMM4OFF(%rsp), %xmm4 493 movdqa SPXMM5OFF(%rsp), %xmm5 494 movdqa SPXMM6OFF(%rsp), %xmm6 495 movdqa SPXMM7OFF(%rsp), %xmm7 496 497 movq %rbp, %rsp / 498 popq %rbp / 499 ret / return to caller 500 .size elf_plt_trace, .-elf_plt_trace 501#endif 502 503/* 504 * We got here because a call to a function resolved to a procedure 505 * linkage table entry. That entry did a JMPL to the first PLT entry, which 506 * in turn did a call to elf_rtbndr. 507 * 508 * the code sequence that got us here was: 509 * 510 * .PLT0: 511 * pushq GOT+8(%rip) #GOT[1] 512 * jmp *GOT+16(%rip) #GOT[2] 513 * nop 514 * nop 515 * nop 516 * nop 517 * ... 518 * PLT entry for foo: 519 * jmp *name1@GOTPCREL(%rip) 520 * pushl $rel.plt.foo 521 * jmp PLT0 522 * 523 * At entry, the stack looks like this: 524 * 525 * return address 16(%rsp) 526 * $rel.plt.foo (plt index) 8(%rsp) 527 * lmp 0(%rsp) 528 * 529 */ 530#if defined(lint) 531 532extern unsigned long elf_bndr(Rt_map *, unsigned long, caddr_t); 533 534void 535elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc) 536{ 537 (void) elf_bndr(lmp, reloc, pc); 538} 539 540#else 541 542/* 543 * The PLT code that landed us here placed 2 arguments on the stack as 544 * arguments to elf_rtbndr. 545 * Additionally the pc of caller is below these 2 args. 546 * Our stack will look like this after we establish a stack frame with 547 * push %rbp; movq %rsp, %rbp sequence: 548 * 549 * 8(%rbp) arg1 - *lmp 550 * 16(%rbp), %rsi arg2 - reloc index 551 * 24(%rbp), %rdx arg3 - pc of caller 552 */ 553#define LBPLMPOFF 8 /* arg1 - *lmp */ 554#define LBPRELOCOFF 16 /* arg2 - reloc index */ 555#define LBRPCOFF 24 /* arg3 - pc of caller */ 556 557/* 558 * Possible arguments for the resolved function are in registers as per 559 * the AMD64 ABI. We must save on the local stack all possible register 560 * arguments before interposing functions to resolve the called function. 561 * Possible arguments must be restored before invoking the resolved function. 562 * 563 * Before the AVX instruction set enhancements to AMD64 there were no changes in 564 * the set of registers and their sizes across different processors. With AVX, 565 * the xmm registers became the lower 128 bits of the ymm registers. Because of 566 * this, we need to conditionally save 256 bits instead of 128 bits. Regardless 567 * of whether we have ymm registers or not, we're always going to push the stack 568 * space assuming that we do to simplify the code. 569 * 570 * Local stack space storage for elf_rtbndr is allocated as follows: 571 * 572 * Saved regs: 573 * %rax 8 574 * %rdi 8 575 * %rsi 8 576 * %rdx 8 577 * %rcx 8 578 * %r8 8 579 * %r9 8 580 * %r10 8 581 * ======= 582 * Subtotal: 64 (32byte aligned) 583 * 584 * Saved Media Regs (used to pass floating point args): 585 * %ymm0 - %ymm7 32 * 8 256 586 * ======= 587 * Total: 320 (32byte aligned) 588 * 589 * So - will subtract the following to create enough space 590 * 591 * 0(%rsp) save %rax 592 * 8(%rsp) save %rdi 593 * 16(%rsp) save %rsi 594 * 24(%rsp) save %rdx 595 * 32(%rsp) save %rcx 596 * 40(%rsp) save %r8 597 * 48(%rsp) save %r9 598 * 56(%rsp) save %r10 599 * 64(%rsp) save %ymm0 600 * 96(%rsp) save %ymm1 601 * 128(%rsp) save %ymm2 602 * 160(%rsp) save %ymm3 603 * 192(%rsp) save %ymm4 604 * 224(%rsp) save %ymm5 605 * 256(%rsp) save %ymm6 606 * 288(%rsp) save %ymm7 607 * 608 * Note: Some callers may use 8-byte stack alignment instead of the 609 * ABI required 16-byte alignment. We use %rsp offsets to save/restore 610 * registers because %rbp may not be 16-byte aligned. We guarantee %rsp 611 * is 16-byte aligned in the function preamble. 612 */ 613/* 614 * As the registers may either be xmm or ymm, we've left the name as xmm, but 615 * increased the offset between them to always cover the xmm and ymm cases. 616 */ 617#define LS_SIZE $320 /* local stack space to save all possible arguments */ 618#define LSRAXOFF 0 /* for SSE register count */ 619#define LSRDIOFF 8 /* arg 0 ... */ 620#define LSRSIOFF 16 621#define LSRDXOFF 24 622#define LSRCXOFF 32 623#define LSR8OFF 40 624#define LSR9OFF 48 625#define LSR10OFF 56 /* ... arg 5 */ 626#define LSXMM0OFF 64 /* SSE arg 0 ... */ 627#define LSXMM1OFF 96 628#define LSXMM2OFF 128 629#define LSXMM3OFF 160 630#define LSXMM4OFF 192 631#define LSXMM5OFF 224 632#define LSXMM6OFF 256 633#define LSXMM7OFF 288 /* ... SSE arg 7 */ 634 635 /* 636 * The org_scapset is a global variable that is a part of rtld. It 637 * contains the capabilities that the kernel has told us are supported 638 * (auxv_hwcap). This is necessary for determining whether or not we 639 * need to save and restore AVX registers or simple SSE registers. Note, 640 * that the field we care about is currently at offset 0, if that 641 * changes, this code will have to be updated. 642 */ 643 .extern org_scapset 644 .weak _elf_rtbndr 645 _elf_rtbndr = elf_rtbndr 646 647 ENTRY(elf_rtbndr) 648 649 pushq %rbp 650 movq %rsp, %rbp 651 652 /* 653 * Some libraries may (incorrectly) use non-ABI compliant 8-byte stack 654 * alignment. Enforce ABI 16-byte stack alignment here. 655 * The next andq instruction does this pseudo code: 656 * If %rsp is 8 byte aligned then subtract 8 from %rsp. 657 */ 658 andq $-32, %rsp /* enforce ABI 32-byte stack alignment */ 659 660 subq LS_SIZE, %rsp /* save all ABI defined argument registers */ 661 662 movq %rax, LSRAXOFF(%rsp) /* for SSE register count */ 663 movq %rdi, LSRDIOFF(%rsp) /* arg 0 .. */ 664 movq %rsi, LSRSIOFF(%rsp) 665 movq %rdx, LSRDXOFF(%rsp) 666 movq %rcx, LSRCXOFF(%rsp) 667 movq %r8, LSR8OFF(%rsp) 668 movq %r9, LSR9OFF(%rsp) /* .. arg 5 */ 669 movq %r10, LSR10OFF(%rsp) /* call chain reg */ 670 671 /* 672 * Our xmm registers could secretly by ymm registers in disguise. 673 */ 674 movq org_scapset@GOTPCREL(%rip),%r9 675 movq (%r9),%r9 676 movl (%r9),%edx 677 testl $AV_386_AVX,%edx 678 jne .save_ymm 679 680.save_xmm: 681 movdqa %xmm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */ 682 movdqa %xmm1, LSXMM1OFF(%rsp) 683 movdqa %xmm2, LSXMM2OFF(%rsp) 684 movdqa %xmm3, LSXMM3OFF(%rsp) 685 movdqa %xmm4, LSXMM4OFF(%rsp) 686 movdqa %xmm5, LSXMM5OFF(%rsp) 687 movdqa %xmm6, LSXMM6OFF(%rsp) 688 movdqa %xmm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */ 689 jmp .save_finish 690 691.save_ymm: 692 vmovdqa %ymm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */ 693 vmovdqa %ymm1, LSXMM1OFF(%rsp) 694 vmovdqa %ymm2, LSXMM2OFF(%rsp) 695 vmovdqa %ymm3, LSXMM3OFF(%rsp) 696 vmovdqa %ymm4, LSXMM4OFF(%rsp) 697 vmovdqa %ymm5, LSXMM5OFF(%rsp) 698 vmovdqa %ymm6, LSXMM6OFF(%rsp) 699 vmovdqa %ymm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */ 700 701.save_finish: 702 movq LBPLMPOFF(%rbp), %rdi /* arg1 - *lmp */ 703 movq LBPRELOCOFF(%rbp), %rsi /* arg2 - reloc index */ 704 movq LBRPCOFF(%rbp), %rdx /* arg3 - pc of caller */ 705 call elf_bndr@PLT /* call elf_rtbndr(lmp, relndx, pc) */ 706 movq %rax, LBPRELOCOFF(%rbp) /* store final destination */ 707 708 /* 709 * Restore possible arguments before invoking resolved function. We 710 * check the xmm vs. ymm regs first so we can use the others. 711 */ 712 movq org_scapset@GOTPCREL(%rip),%r9 713 movq (%r9),%r9 714 movl (%r9),%edx 715 testl $AV_386_AVX,%edx 716 jne .restore_ymm 717 718.restore_xmm: 719 movdqa LSXMM0OFF(%rsp), %xmm0 720 movdqa LSXMM1OFF(%rsp), %xmm1 721 movdqa LSXMM2OFF(%rsp), %xmm2 722 movdqa LSXMM3OFF(%rsp), %xmm3 723 movdqa LSXMM4OFF(%rsp), %xmm4 724 movdqa LSXMM5OFF(%rsp), %xmm5 725 movdqa LSXMM6OFF(%rsp), %xmm6 726 movdqa LSXMM7OFF(%rsp), %xmm7 727 jmp .restore_finish 728 729.restore_ymm: 730 vmovdqa LSXMM0OFF(%rsp), %ymm0 731 vmovdqa LSXMM1OFF(%rsp), %ymm1 732 vmovdqa LSXMM2OFF(%rsp), %ymm2 733 vmovdqa LSXMM3OFF(%rsp), %ymm3 734 vmovdqa LSXMM4OFF(%rsp), %ymm4 735 vmovdqa LSXMM5OFF(%rsp), %ymm5 736 vmovdqa LSXMM6OFF(%rsp), %ymm6 737 vmovdqa LSXMM7OFF(%rsp), %ymm7 738 739.restore_finish: 740 movq LSRAXOFF(%rsp), %rax 741 movq LSRDIOFF(%rsp), %rdi 742 movq LSRSIOFF(%rsp), %rsi 743 movq LSRDXOFF(%rsp), %rdx 744 movq LSRCXOFF(%rsp), %rcx 745 movq LSR8OFF(%rsp), %r8 746 movq LSR9OFF(%rsp), %r9 747 movq LSR10OFF(%rsp), %r10 748 749 movq %rbp, %rsp 750 popq %rbp 751 752 addq $8, %rsp /* pop 1st plt-pushed args */ 753 /* the second arguement is used */ 754 /* for the 'return' address to our */ 755 /* final destination */ 756 757 ret /* invoke resolved function */ 758 .size elf_rtbndr, .-elf_rtbndr 759#endif 760