1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright (c) 2018 Joyent, Inc. All rights reserved. 26 */ 27 28/* 29 * Welcome to the magic behind the PLT (procedure linkage table). When rtld 30 * fills out the PLT entries, it will refer initially to the functions in this 31 * file. As such our goal is simple: 32 * 33 * The lie of the function call must be preserved at all costs. 34 * 35 * This means that we need to prepare the system for an arbitrary series of 36 * instructions to be called. For example, as a side effect of resolving a 37 * symbol we may need to open a shared object which will cause any _init 38 * functions to be called. Those functions can use any and all of the ABI state 39 * that they desire (for example, the FPU registers). Therefore we must save and 40 * restore all the ABI mandated registers here. 41 * 42 * For the full information about what we need to save and restore and why, 43 * please see the System V amd64 PS ABI '3.2.3 Parameter Passing'. For general 44 * purpose registers, we need to take care of the following: 45 * 46 * %rax - Used for information about the number of vector arguments 47 * %rdi - arg0 48 * %rsi - arg1 49 * %rdx - arg2 50 * %rcx - arg3 51 * %r8 - arg4 52 * %r9 - arg5 53 * %r10 - static chain pointer 54 * 55 * Unfortunately, the world of the FPU is more complicated. 56 * 57 * The ABI mandates that we must save %xmm0-%xmm7. On newer Intel processors, 58 * %xmm0-%xmm7 shadow %ymm0-%ymm7 and %zmm0-%zmm7. Historically, when saving the 59 * FPU, we only saved and restored these eight registers. Unfortunately, this 60 * process itself ended up having side effects. Because the registers shadow one 61 * another, if we saved a full %zmm register when only a %xmm register was 62 * valid, we would end up causing the processor to think that the full %zmm 63 * register was valid. Once it believed that this was the case, it would then 64 * degrade performance of code that only used the %xmm registers. 65 * 66 * One way to tackle this problem would have been to use xgetbv with ecx=1 to 67 * get information about what was actually in use and only save and restore 68 * that. You can imagine that this logic roughly ends up as something like: 69 * 70 * if (zmm_inuse) 71 * save_zmm() 72 * if (ymm_inuse) 73 * save_ymm() 74 * save_xmm() 75 * 76 * However, this logic leaves us at the mercy of the branch predictor. This 77 * means that all of our efforts can end up still causing the CPU to execute 78 * things to make it think that some of these other FPU registers are in use and 79 * thus defeat the optimizations that it has. 80 * 81 * To deal with this problem, Intel has suggested using the xsave family of 82 * instructions. The kernel provides information about the size required for the 83 * floating point registers as well as which of several methods we need to 84 * employ through the aux vector. This gets us out of trying to look at the 85 * hardware capabilities and make decisions every time. As part of the 86 * amd64-specific portion of rtld, it will process those values and determine 87 * the functions on an as-needed basis. 88 * 89 * There are two different functions that we export. The first is elf_rtbndr(). 90 * This is basically the glue that gets us into the PLT and to perform 91 * relocations. elf_rtbndr() determines the address of the function that we must 92 * call and arranges its stack such that when we return from elf_rtbndr() we 93 * will instead jump to the actual relocated function which will return to the 94 * original caller. Because of this, we must preserve all of the registers that 95 * are used for arguments and restore them before returning. 96 * 97 * The second function we export is elf_plt_trace(). This is used to add support 98 * for audit libraries among other things. elf_plt_trace() may or may not call 99 * the underlying function as a side effect or merely set up its return to it. 100 * This changes how we handle %rax. If we call the function ourself, then we end 101 * up making sure that %rax is the return value versus the initial value. In 102 * addition, because we get %r11 from the surrounding PLT code, we opt to 103 * preserve it in case some of the relocation logic ever ends up calling back 104 * into us again. 105 */ 106 107#if defined(lint) 108 109#include <sys/types.h> 110#include <_rtld.h> 111#include <_audit.h> 112#include <_elf.h> 113#include <sys/regset.h> 114#include <sys/auxv_386.h> 115 116#else 117 118#include <link.h> 119#include <_audit.h> 120#include <sys/asm_linkage.h> 121#include <sys/auxv_386.h> 122#include <sys/x86_archext.h> 123 124/* 125 * This macro is used to zero the xsave header. The contents of scratchreg will 126 * be destroyed. locreg should contain the starting address of the xsave header. 127 */ 128#define XSAVE_HEADER_ZERO(scratch, loc) \ 129 xorq scratch, scratch; \ 130 movq scratch, 0x200(loc); \ 131 movq scratch, 0x208(loc); \ 132 movq scratch, 0x210(loc); \ 133 movq scratch, 0x218(loc); \ 134 movq scratch, 0x220(loc); \ 135 movq scratch, 0x228(loc); \ 136 movq scratch, 0x230(loc); \ 137 movq scratch, 0x238(loc) 138 139 140 .file "boot_elf.s" 141 .text 142 143/* 144 * This section of the code contains glue functions that are used to take care 145 * of saving and restoring the FPU. We deal with this in a few different ways 146 * based on the hardware support and what exists. Historically we've only saved 147 * and restored the first 8 floating point registers rather than the entire FPU. 148 * That implementation still exists here and is kept around mostly as an 149 * insurance policy. 150 */ 151 ENTRY(_elf_rtbndr_fp_save_orig) 152 movq org_scapset@GOTPCREL(%rip),%r11 153 movq (%r11),%r11 /* Syscapset_t pointer */ 154 movl 8(%r11),%edx /* sc_hw_2 */ 155 testl $AV_386_2_AVX512F,%edx 156 jne .save_zmm 157 movl (%r11),%edx /* sc_hw_1 */ 158 testl $AV_386_AVX,%edx 159 jne .save_ymm 160 movdqa %xmm0, (%rdi) 161 movdqa %xmm1, 64(%rdi) 162 movdqa %xmm2, 128(%rdi) 163 movdqa %xmm3, 192(%rdi) 164 movdqa %xmm4, 256(%rdi) 165 movdqa %xmm5, 320(%rdi) 166 movdqa %xmm6, 384(%rdi) 167 movdqa %xmm7, 448(%rdi) 168 jmp .save_finish 169 170.save_ymm: 171 vmovdqa %ymm0, (%rdi) 172 vmovdqa %ymm1, 64(%rdi) 173 vmovdqa %ymm2, 128(%rdi) 174 vmovdqa %ymm3, 192(%rdi) 175 vmovdqa %ymm4, 256(%rdi) 176 vmovdqa %ymm5, 320(%rdi) 177 vmovdqa %ymm6, 384(%rdi) 178 vmovdqa %ymm7, 448(%rdi) 179 jmp .save_finish 180 181.save_zmm: 182 vmovdqa64 %zmm0, (%rdi) 183 vmovdqa64 %zmm1, 64(%rdi) 184 vmovdqa64 %zmm2, 128(%rdi) 185 vmovdqa64 %zmm3, 192(%rdi) 186 vmovdqa64 %zmm4, 256(%rdi) 187 vmovdqa64 %zmm5, 320(%rdi) 188 vmovdqa64 %zmm6, 384(%rdi) 189 vmovdqa64 %zmm7, 448(%rdi) 190 191.save_finish: 192 ret 193 SET_SIZE(_elf_rtbndr_fp_save_orig) 194 195 ENTRY(_elf_rtbndr_fp_restore_orig) 196 movq org_scapset@GOTPCREL(%rip),%r11 197 movq (%r11),%r11 /* Syscapset_t pointer */ 198 movl 8(%r11),%edx /* sc_hw_2 */ 199 testl $AV_386_2_AVX512F,%edx 200 jne .restore_zmm 201 movl (%r11),%edx /* sc_hw_1 */ 202 testl $AV_386_AVX,%edx 203 jne .restore_ymm 204 205 movdqa (%rdi), %xmm0 206 movdqa 64(%rdi), %xmm1 207 movdqa 128(%rdi), %xmm2 208 movdqa 192(%rdi), %xmm3 209 movdqa 256(%rdi), %xmm4 210 movdqa 320(%rdi), %xmm5 211 movdqa 384(%rdi), %xmm6 212 movdqa 448(%rdi), %xmm7 213 jmp .restore_finish 214 215.restore_ymm: 216 vmovdqa (%rdi), %ymm0 217 vmovdqa 64(%rdi), %ymm1 218 vmovdqa 128(%rdi), %ymm2 219 vmovdqa 192(%rdi), %ymm3 220 vmovdqa 256(%rdi), %ymm4 221 vmovdqa 320(%rdi), %ymm5 222 vmovdqa 384(%rdi), %ymm6 223 vmovdqa 448(%rdi), %ymm7 224 jmp .restore_finish 225 226.restore_zmm: 227 vmovdqa64 (%rdi), %zmm0 228 vmovdqa64 64(%rdi), %zmm1 229 vmovdqa64 128(%rdi), %zmm2 230 vmovdqa64 192(%rdi), %zmm3 231 vmovdqa64 256(%rdi), %zmm4 232 vmovdqa64 320(%rdi), %zmm5 233 vmovdqa64 384(%rdi), %zmm6 234 vmovdqa64 448(%rdi), %zmm7 235 236.restore_finish: 237 ret 238 SET_SIZE(_elf_rtbndr_fp_restore_orig) 239 240 ENTRY(_elf_rtbndr_fp_fxsave) 241 fxsaveq (%rdi) 242 ret 243 SET_SIZE(_elf_rtbndr_fp_fxsave) 244 245 ENTRY(_elf_rtbndr_fp_fxrestore) 246 fxrstor (%rdi) 247 ret 248 SET_SIZE(_elf_rtbndr_fp_fxrestore) 249 250 ENTRY(_elf_rtbndr_fp_xsave) 251 XSAVE_HEADER_ZERO(%rdx, %rdi) 252 movq $_CONST(XFEATURE_FP_ALL), %rdx 253 movl %edx, %eax 254 shrq $32, %rdx 255 xsave (%rdi) /* save data */ 256 ret 257 SET_SIZE(_elf_rtbndr_fp_xsave) 258 259 ENTRY(_elf_rtbndr_fp_xrestore) 260 movq $_CONST(XFEATURE_FP_ALL), %rdx 261 movl %edx, %eax 262 shrq $32, %rdx 263 xrstor (%rdi) /* save data */ 264 ret 265 SET_SIZE(_elf_rtbndr_fp_xrestore) 266 267#endif 268 269#if defined(lint) 270 271/* ARGSUSED0 */ 272int 273elf_plt_trace() 274{ 275 return (0); 276} 277 278#else 279 280/* 281 * On entry the 'glue code' has already done the following: 282 * 283 * pushq %rbp 284 * movq %rsp, %rbp 285 * subq $0x10, %rsp 286 * leaq trace_fields(%rip), %r11 287 * movq %r11, -0x8(%rbp) 288 * movq $elf_plt_trace, %r11 289 * jmp *%r11 290 * 291 * so - -8(%rbp) contains the dyndata ptr 292 * 293 * 0x0 Addr *reflmp 294 * 0x8 Addr *deflmp 295 * 0x10 Word symndx 296 * 0x14 Word sb_flags 297 * 0x18 Sym symdef.st_name 298 * 0x1c symdef.st_info 299 * 0x1d symdef.st_other 300 * 0x1e symdef.st_shndx 301 * 0x20 symdef.st_value 302 * 0x28 symdef.st_size 303 * 304 * Also note - on entry 16 bytes have already been subtracted 305 * from the %rsp. The first 8 bytes is for the dyn_data_ptr, 306 * the second 8 bytes are to align the stack and are available 307 * for use. 308 */ 309#define REFLMP_OFF 0x0 310#define DEFLMP_OFF 0x8 311#define SYMNDX_OFF 0x10 312#define SBFLAGS_OFF 0x14 313#define SYMDEF_OFF 0x18 314#define SYMDEF_VALUE_OFF 0x20 315 316/* 317 * Next, we need to create a bunch of local storage. First, we have to preserve 318 * the standard registers per the amd64 ABI. This means we need to deal with: 319 * %rax - Used for information about the number of vector arguments 320 * %rdi - arg0 321 * %rsi - arg1 322 * %rdx - arg2 323 * %rcx - arg3 324 * %r8 - arg4 325 * %r9 - arg5 326 * %r10 - static chain pointer 327 * %r11 - PLT Interwork register, our caller is using this, so it's not 328 * a temporary for us. 329 * 330 * In addition, we need to save the amd64 ABI floating point arguments. Finally, 331 * we need to deal with our local storage. We need a La_amd64_regs and a 332 * uint64_t for the previous stack size. 333 * 334 * To deal with this and the potentially variable size of the FPU regs, we have 335 * to play a few different games. We refer to all of the standard registers, the 336 * previous stack size, and La_amd64_regs structure off of %rbp. These are all 337 * values that are below %rbp. 338 */ 339#define SPDYNOFF -8 340#define SPDESTOFF -16 341#define SPPRVSTKOFF -24 342#define SPLAREGOFF -88 343#define ORIG_RDI -96 344#define ORIG_RSI -104 345#define ORIG_RDX -112 346#define ORIG_RCX -120 347#define ORIG_R8 -128 348#define ORIG_R9 -136 349#define ORIG_R10 -144 350#define ORIG_R11 -152 351#define ORIG_RAX -160 352#define PLT_SAVE_OFF 168 353 354 ENTRY(elf_plt_trace) 355 /* 356 * Save our static registers. After that 64-byte align us and subtract 357 * the appropriate amount for the FPU. The frame pointer has already 358 * been pushed for us by the glue code. 359 */ 360 movq %rdi, ORIG_RDI(%rbp) 361 movq %rsi, ORIG_RSI(%rbp) 362 movq %rdx, ORIG_RDX(%rbp) 363 movq %rcx, ORIG_RCX(%rbp) 364 movq %r8, ORIG_R8(%rbp) 365 movq %r9, ORIG_R9(%rbp) 366 movq %r10, ORIG_R10(%rbp) 367 movq %r11, ORIG_R11(%rbp) 368 movq %rax, ORIG_RAX(%rbp) 369 370 subq $PLT_SAVE_OFF, %rsp 371 372 movq _plt_save_size@GOTPCREL(%rip),%r9 373 movq _plt_fp_save@GOTPCREL(%rip),%r10 374 subq (%r9), %rsp 375 andq $-64, %rsp 376 movq %rsp, %rdi 377 call *(%r10) 378 379 /* 380 * Now that we've saved all of our registers, figure out what we need to 381 * do next. 382 */ 383 movq SPDYNOFF(%rbp), %rax / %rax = dyndata 384 testb $LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax) / <link.h> 385 je .start_pltenter 386 movq SYMDEF_VALUE_OFF(%rax), %rdi 387 movq %rdi, SPDESTOFF(%rbp) / save destination address 388 jmp .end_pltenter 389 390.start_pltenter: 391 /* 392 * save all registers into La_amd64_regs 393 */ 394 leaq SPLAREGOFF(%rbp), %rsi / %rsi = &La_amd64_regs 395 leaq 8(%rbp), %rdi 396 movq %rdi, 0(%rsi) / la_rsp 397 movq 0(%rbp), %rdi 398 movq %rdi, 8(%rsi) / la_rbp 399 movq ORIG_RDI(%rbp), %rdi 400 movq %rdi, 16(%rsi) / la_rdi 401 movq ORIG_RSI(%rbp), %rdi 402 movq %rdi, 24(%rsi) / la_rsi 403 movq ORIG_RDX(%rbp), %rdi 404 movq %rdi, 32(%rsi) / la_rdx 405 movq ORIG_RCX(%rbp), %rdi 406 movq %rdi, 40(%rsi) / la_rcx 407 movq ORIG_R8(%rbp), %rdi 408 movq %rdi, 48(%rsi) / la_r8 409 movq ORIG_R9(%rbp), %rdi 410 movq %rdi, 56(%rsi) / la_r9 411 412 /* 413 * prepare for call to la_pltenter 414 */ 415 movq SPDYNOFF(%rbp), %r11 / %r11 = &dyndata 416 leaq SBFLAGS_OFF(%r11), %r9 / arg6 (&sb_flags) 417 leaq SPLAREGOFF(%rbp), %r8 / arg5 (&La_amd64_regs) 418 movl SYMNDX_OFF(%r11), %ecx / arg4 (symndx) 419 leaq SYMDEF_OFF(%r11), %rdx / arg3 (&Sym) 420 movq DEFLMP_OFF(%r11), %rsi / arg2 (dlmp) 421 movq REFLMP_OFF(%r11), %rdi / arg1 (rlmp) 422 call audit_pltenter@PLT 423 movq %rax, SPDESTOFF(%rbp) / save calling address 424.end_pltenter: 425 426 /* 427 * If *no* la_pltexit() routines exist 428 * we do not need to keep the stack frame 429 * before we call the actual routine. Instead we 430 * jump to it and remove our stack from the stack 431 * at the same time. 432 */ 433 movl audit_flags(%rip), %eax 434 andl $AF_PLTEXIT, %eax / value of audit.h:AF_PLTEXIT 435 cmpl $0, %eax 436 je .bypass_pltexit 437 /* 438 * Has the *nopltexit* flag been set for this entry point 439 */ 440 movq SPDYNOFF(%rbp), %r11 / %r11 = &dyndata 441 testb $LA_SYMB_NOPLTEXIT, SBFLAGS_OFF(%r11) 442 je .start_pltexit 443 444.bypass_pltexit: 445 /* 446 * No PLTEXIT processing required. 447 */ 448 movq 0(%rbp), %r11 449 movq %r11, -8(%rbp) / move prev %rbp 450 movq SPDESTOFF(%rbp), %r11 / r11 == calling destination 451 movq %r11, 0(%rbp) / store destination at top 452 453 /* Restore FPU */ 454 movq _plt_fp_restore@GOTPCREL(%rip),%r10 455 456 movq %rsp, %rdi 457 call *(%r10) 458 459 movq ORIG_RDI(%rbp), %rdi 460 movq ORIG_RSI(%rbp), %rsi 461 movq ORIG_RDX(%rbp), %rdx 462 movq ORIG_RCX(%rbp), %rcx 463 movq ORIG_R8(%rbp), %r8 464 movq ORIG_R9(%rbp), %r9 465 movq ORIG_R10(%rbp), %r10 466 movq ORIG_R11(%rbp), %r11 467 movq ORIG_RAX(%rbp), %rax 468 469 subq $8, %rbp / adjust %rbp for 'ret' 470 movq %rbp, %rsp / 471 /* 472 * At this point, after a little doctoring, we should 473 * have the following on the stack: 474 * 475 * 16(%rsp): ret addr 476 * 8(%rsp): dest_addr 477 * 0(%rsp): Previous %rbp 478 * 479 * So - we pop the previous %rbp, and then 480 * ret to our final destination. 481 */ 482 popq %rbp / 483 ret / jmp to final destination 484 / and clean up stack :) 485 486.start_pltexit: 487 /* 488 * In order to call the destination procedure and then return 489 * to audit_pltexit() for post analysis we must first grow 490 * our stack frame and then duplicate the original callers 491 * stack state. This duplicates all of the arguements 492 * that were to be passed to the destination procedure. 493 */ 494 movq %rbp, %rdi / 495 addq $16, %rdi / %rdi = src 496 movq (%rbp), %rdx / 497 subq %rdi, %rdx / %rdx == prev frame sz 498 /* 499 * If audit_argcnt > 0 then we limit the number of 500 * arguements that will be duplicated to audit_argcnt. 501 * 502 * If (prev_stack_size > (audit_argcnt * 8)) 503 * prev_stack_size = audit_argcnt * 8; 504 */ 505 movl audit_argcnt(%rip),%eax / %eax = audit_argcnt 506 cmpl $0, %eax 507 jle .grow_stack 508 leaq (,%rax,8), %rax / %eax = %eax * 4 509 cmpq %rax,%rdx 510 jle .grow_stack 511 movq %rax, %rdx 512 /* 513 * Grow the stack and duplicate the arguements of the 514 * original caller. 515 */ 516.grow_stack: 517 movq %rsp, %r11 518 subq %rdx, %rsp / grow the stack 519 movq %rdx, SPPRVSTKOFF(%rbp) / -88(%rbp) == prev frame sz 520 movq %rsp, %rcx / %rcx = dest 521 addq %rcx, %rdx / %rdx == tail of dest 522.while_base: 523 cmpq %rdx, %rcx / while (base+size >= src++) { 524 jge .end_while / 525 movq (%rdi), %rsi 526 movq %rsi,(%rcx) / *dest = *src 527 addq $8, %rdi / src++ 528 addq $8, %rcx / dest++ 529 jmp .while_base / } 530 531 /* 532 * The above stack is now an exact duplicate of 533 * the stack of the original calling procedure. 534 */ 535.end_while: 536 / 537 / Restore registers using %r11 which contains our old %rsp value 538 / before growing the stack. 539 / 540 movq _plt_fp_restore@GOTPCREL(%rip),%r10 541 movq %r11, %rdi 542 call *(%r10) 543 544.trace_r2_finish: 545 movq ORIG_RDI(%rbp), %rdi 546 movq ORIG_RSI(%rbp), %rsi 547 movq ORIG_RDX(%rbp), %rdx 548 movq ORIG_RCX(%rbp), %rcx 549 movq ORIG_R8(%rbp), %r8 550 movq ORIG_R9(%rbp), %r9 551 movq ORIG_R10(%rbp), %r10 552 movq ORIG_RAX(%rbp), %rax 553 movq ORIG_R11(%rbp), %r11 554 555 /* 556 * Call to desitnation function - we'll return here 557 * for pltexit monitoring. 558 */ 559 call *SPDESTOFF(%rbp) 560 561 addq SPPRVSTKOFF(%rbp), %rsp / cleanup dupped stack 562 563 / 564 / prepare for call to audit_pltenter() 565 / 566 movq SPDYNOFF(%rbp), %r11 / %r11 = &dyndata 567 movq SYMNDX_OFF(%r11), %r8 / arg5 (symndx) 568 leaq SYMDEF_OFF(%r11), %rcx / arg4 (&Sym) 569 movq DEFLMP_OFF(%r11), %rdx / arg3 (dlmp) 570 movq REFLMP_OFF(%r11), %rsi / arg2 (rlmp) 571 movq %rax, %rdi / arg1 (returnval) 572 call audit_pltexit@PLT 573 574 /* 575 * Clean up after ourselves and return to the 576 * original calling procedure. Make sure to restore 577 * registers. 578 */ 579 580 movq _plt_fp_restore@GOTPCREL(%rip),%r10 581 movq %rsp, %rdi 582 movq %rax, SPPRVSTKOFF(%rbp) 583 call *(%r10) 584 585 movq ORIG_RDI(%rbp), %rdi 586 movq ORIG_RSI(%rbp), %rsi 587 movq ORIG_RDX(%rbp), %rdx 588 movq ORIG_RCX(%rbp), %rcx 589 movq ORIG_R8(%rbp), %r8 590 movq ORIG_R9(%rbp), %r9 591 movq ORIG_R10(%rbp), %r10 592 movq ORIG_R11(%rbp), %r11 593 movq SPPRVSTKOFF(%rbp), %rax 594 595 movq %rbp, %rsp / 596 popq %rbp / 597 ret / return to caller 598 SET_SIZE(elf_plt_trace) 599#endif 600 601/* 602 * We got here because a call to a function resolved to a procedure 603 * linkage table entry. That entry did a JMPL to the first PLT entry, which 604 * in turn did a call to elf_rtbndr. 605 * 606 * the code sequence that got us here was: 607 * 608 * .PLT0: 609 * pushq GOT+8(%rip) #GOT[1] 610 * jmp *GOT+16(%rip) #GOT[2] 611 * nop 612 * nop 613 * nop 614 * nop 615 * ... 616 * PLT entry for foo: 617 * jmp *name1@GOTPCREL(%rip) 618 * pushl $rel.plt.foo 619 * jmp PLT0 620 * 621 * At entry, the stack looks like this: 622 * 623 * return address 16(%rsp) 624 * $rel.plt.foo (plt index) 8(%rsp) 625 * lmp 0(%rsp) 626 * 627 */ 628#if defined(lint) 629 630extern unsigned long elf_bndr(Rt_map *, unsigned long, caddr_t); 631 632void 633elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc) 634{ 635 (void) elf_bndr(lmp, reloc, pc); 636} 637 638#else 639 640/* 641 * The PLT code that landed us here placed 2 arguments on the stack as 642 * arguments to elf_rtbndr. 643 * Additionally the pc of caller is below these 2 args. 644 * Our stack will look like this after we establish a stack frame with 645 * push %rbp; movq %rsp, %rbp sequence: 646 * 647 * 8(%rbp) arg1 - *lmp 648 * 16(%rbp), %rsi arg2 - reloc index 649 * 24(%rbp), %rdx arg3 - pc of caller 650 */ 651#define LBPLMPOFF 8 /* arg1 - *lmp */ 652#define LBPRELOCOFF 16 /* arg2 - reloc index */ 653#define LBRPCOFF 24 /* arg3 - pc of caller */ 654 655/* 656 * With the above in place, we must now proceed to preserve all temporary 657 * registers that are also used for passing arguments. Specifically this 658 * means: 659 * 660 * %rax - Used for information about the number of vector arguments 661 * %rdi - arg0 662 * %rsi - arg1 663 * %rdx - arg2 664 * %rcx - arg3 665 * %r8 - arg4 666 * %r9 - arg5 667 * %r10 - static chain pointer 668 * 669 * While we don't have to preserve %r11, we do have to preserve the FPU 670 * registers. The FPU logic is delegated to a specific function that we'll call. 671 * However, it requires that its stack is 64-byte aligned. We defer the 672 * alignment to that point. This will also take care of the fact that a caller 673 * may not call us with a correctly aligned stack pointer per the amd64 ABI. 674 */ 675 676 .extern _plt_save_size 677 .extern _plt_fp_save 678 .extern plt_fp_restore 679 680 .weak _elf_rtbndr 681 _elf_rtbndr = elf_rtbndr 682 683 ENTRY(elf_rtbndr) 684 pushq %rbp /* Establish stack frame */ 685 movq %rsp, %rbp 686 687 /* 688 * Save basic regs. 689 */ 690 pushq %rax 691 pushq %rdi 692 pushq %rsi 693 pushq %rdx 694 pushq %rcx 695 pushq %r8 696 pushq %r9 697 pushq %r10 698 pushq %r12 699 700 /* 701 * Save the amount of space we need for the FPU registers and call that 702 * function. Save %rsp before we manipulate it to make restore easier. 703 */ 704 movq %rsp, %r12 705 movq _plt_save_size@GOTPCREL(%rip),%r9 706 movq _plt_fp_save@GOTPCREL(%rip),%r10 707 subq (%r9), %rsp 708 andq $-64, %rsp 709 710 movq %rsp, %rdi 711 call *(%r10) 712 713 /* 714 * Perform actual PLT logic. Note that the plt related arguments are 715 * located at an offset relative to %rbp. 716 */ 717 movq LBPLMPOFF(%rbp), %rdi /* arg1 - *lmp */ 718 movq LBPRELOCOFF(%rbp), %rsi /* arg2 - reloc index */ 719 movq LBRPCOFF(%rbp), %rdx /* arg3 - pc of caller */ 720 call elf_bndr@PLT /* call elf_rtbndr(lmp, relndx, pc) */ 721 movq %rax, LBPRELOCOFF(%rbp) /* store final destination */ 722 723 /* Restore FPU */ 724 movq _plt_fp_restore@GOTPCREL(%rip),%r10 725 726 movq %rsp, %rdi 727 call *(%r10) 728 729 movq %r12, %rsp 730 popq %r12 731 popq %r10 732 popq %r9 733 popq %r8 734 popq %rcx 735 popq %rdx 736 popq %rsi 737 popq %rdi 738 popq %rax 739 740 movq %rbp, %rsp /* Restore our stack frame */ 741 popq %rbp 742 743 addq $8, %rsp /* pop 1st plt-pushed args */ 744 /* the second arguement is used */ 745 /* for the 'return' address to our */ 746 /* final destination */ 747 748 ret /* invoke resolved function */ 749 750 SET_SIZE(elf_rtbndr) 751#endif 752