1// z_Linux_asm.S: - microtasking routines specifically 2// written for Intel platforms running Linux* OS 3 4// 5////===----------------------------------------------------------------------===// 6//// 7//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8//// See https://llvm.org/LICENSE.txt for license information. 9//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10//// 11////===----------------------------------------------------------------------===// 12// 13 14// ----------------------------------------------------------------------- 15// macros 16// ----------------------------------------------------------------------- 17 18#include "kmp_config.h" 19 20#if KMP_ARCH_X86 || KMP_ARCH_X86_64 21 22# if KMP_MIC 23// the 'delay r16/r32/r64' should be used instead of the 'pause'. 24// The delay operation has the effect of removing the current thread from 25// the round-robin HT mechanism, and therefore speeds up the issue rate of 26// the other threads on the same core. 27// 28// A value of 0 works fine for <= 2 threads per core, but causes the EPCC 29// barrier time to increase greatly for 3 or more threads per core. 30// 31// A value of 100 works pretty well for up to 4 threads per core, but isn't 32// quite as fast as 0 for 2 threads per core. 33// 34// We need to check what happens for oversubscription / > 4 threads per core. 35// It is possible that we need to pass the delay value in as a parameter 36// that the caller determines based on the total # threads / # cores. 37// 38//.macro pause_op 39// mov $100, %rax 40// delay %rax 41//.endm 42# else 43# define pause_op .byte 0xf3,0x90 44# endif // KMP_MIC 45 46# if KMP_OS_DARWIN 47# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 48# define KMP_LABEL(x) L_##x // form the name of label 49.macro KMP_CFI_DEF_OFFSET 50.endmacro 51.macro KMP_CFI_OFFSET 52.endmacro 53.macro KMP_CFI_REGISTER 54.endmacro 55.macro KMP_CFI_DEF 56.endmacro 57.macro ALIGN 58 .align $0 59.endmacro 60.macro DEBUG_INFO 61/* Not sure what .size does in icc, not sure if we need to do something 62 similar for OS X*. 63*/ 64.endmacro 65.macro PROC 66 ALIGN 4 67 .globl KMP_PREFIX_UNDERSCORE($0) 68KMP_PREFIX_UNDERSCORE($0): 69.endmacro 70# else // KMP_OS_DARWIN 71# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols 72// Format labels so that they don't override function names in gdb's backtraces 73// MIC assembler doesn't accept .L syntax, the L works fine there (as well as 74// on OS X*) 75# if KMP_MIC 76# define KMP_LABEL(x) L_##x // local label 77# else 78# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 79# endif // KMP_MIC 80.macro ALIGN size 81 .align 1<<(\size) 82.endm 83.macro DEBUG_INFO proc 84 .cfi_endproc 85// Not sure why we need .type and .size for the functions 86 .align 16 87 .type \proc,@function 88 .size \proc,.-\proc 89.endm 90.macro PROC proc 91 ALIGN 4 92 .globl KMP_PREFIX_UNDERSCORE(\proc) 93KMP_PREFIX_UNDERSCORE(\proc): 94 .cfi_startproc 95.endm 96.macro KMP_CFI_DEF_OFFSET sz 97 .cfi_def_cfa_offset \sz 98.endm 99.macro KMP_CFI_OFFSET reg, sz 100 .cfi_offset \reg,\sz 101.endm 102.macro KMP_CFI_REGISTER reg 103 .cfi_def_cfa_register \reg 104.endm 105.macro KMP_CFI_DEF reg, sz 106 .cfi_def_cfa \reg,\sz 107.endm 108# endif // KMP_OS_DARWIN 109#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 110 111#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 112 113# if KMP_OS_DARWIN 114# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 115# define KMP_LABEL(x) L_##x // form the name of label 116 117.macro ALIGN 118 .align $0 119.endmacro 120 121.macro DEBUG_INFO 122/* Not sure what .size does in icc, not sure if we need to do something 123 similar for OS X*. 124*/ 125.endmacro 126 127.macro PROC 128 ALIGN 4 129 .globl KMP_PREFIX_UNDERSCORE($0) 130KMP_PREFIX_UNDERSCORE($0): 131.endmacro 132# else // KMP_OS_DARWIN 133# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols 134// Format labels so that they don't override function names in gdb's backtraces 135# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 136 137.macro ALIGN size 138 .align 1<<(\size) 139.endm 140 141.macro DEBUG_INFO proc 142 .cfi_endproc 143// Not sure why we need .type and .size for the functions 144 ALIGN 2 145 .type \proc,@function 146 .size \proc,.-\proc 147.endm 148 149.macro PROC proc 150 ALIGN 2 151 .globl KMP_PREFIX_UNDERSCORE(\proc) 152KMP_PREFIX_UNDERSCORE(\proc): 153 .cfi_startproc 154.endm 155# endif // KMP_OS_DARWIN 156 157#endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 158 159// ----------------------------------------------------------------------- 160// data 161// ----------------------------------------------------------------------- 162 163#ifdef KMP_GOMP_COMPAT 164 165// Support for unnamed common blocks. 166// 167// Because the symbol ".gomp_critical_user_" contains a ".", we have to 168// put this stuff in assembly. 169 170# if KMP_ARCH_X86 171# if KMP_OS_DARWIN 172 .data 173 .comm .gomp_critical_user_,32 174 .data 175 .globl ___kmp_unnamed_critical_addr 176___kmp_unnamed_critical_addr: 177 .long .gomp_critical_user_ 178# else /* Linux* OS */ 179 .data 180 .comm .gomp_critical_user_,32,8 181 .data 182 ALIGN 4 183 .global __kmp_unnamed_critical_addr 184__kmp_unnamed_critical_addr: 185 .4byte .gomp_critical_user_ 186 .type __kmp_unnamed_critical_addr,@object 187 .size __kmp_unnamed_critical_addr,4 188# endif /* KMP_OS_DARWIN */ 189# endif /* KMP_ARCH_X86 */ 190 191# if KMP_ARCH_X86_64 192# if KMP_OS_DARWIN 193 .data 194 .comm .gomp_critical_user_,32 195 .data 196 .globl ___kmp_unnamed_critical_addr 197___kmp_unnamed_critical_addr: 198 .quad .gomp_critical_user_ 199# else /* Linux* OS */ 200 .data 201 .comm .gomp_critical_user_,32,8 202 .data 203 ALIGN 8 204 .global __kmp_unnamed_critical_addr 205__kmp_unnamed_critical_addr: 206 .8byte .gomp_critical_user_ 207 .type __kmp_unnamed_critical_addr,@object 208 .size __kmp_unnamed_critical_addr,8 209# endif /* KMP_OS_DARWIN */ 210# endif /* KMP_ARCH_X86_64 */ 211 212#endif /* KMP_GOMP_COMPAT */ 213 214 215#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 216 217// ----------------------------------------------------------------------- 218// microtasking routines specifically written for IA-32 architecture 219// running Linux* OS 220// ----------------------------------------------------------------------- 221 222 .ident "Intel Corporation" 223 .data 224 ALIGN 4 225// void 226// __kmp_x86_pause( void ); 227 228 .text 229 PROC __kmp_x86_pause 230 231 pause_op 232 ret 233 234 DEBUG_INFO __kmp_x86_pause 235 236# if !KMP_ASM_INTRINS 237 238//------------------------------------------------------------------------ 239// kmp_int32 240// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 241 242 PROC __kmp_test_then_add32 243 244 movl 4(%esp), %ecx 245 movl 8(%esp), %eax 246 lock 247 xaddl %eax,(%ecx) 248 ret 249 250 DEBUG_INFO __kmp_test_then_add32 251 252//------------------------------------------------------------------------ 253// FUNCTION __kmp_xchg_fixed8 254// 255// kmp_int32 256// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 257// 258// parameters: 259// p: 4(%esp) 260// d: 8(%esp) 261// 262// return: %al 263 PROC __kmp_xchg_fixed8 264 265 movl 4(%esp), %ecx // "p" 266 movb 8(%esp), %al // "d" 267 268 lock 269 xchgb %al,(%ecx) 270 ret 271 272 DEBUG_INFO __kmp_xchg_fixed8 273 274 275//------------------------------------------------------------------------ 276// FUNCTION __kmp_xchg_fixed16 277// 278// kmp_int16 279// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 280// 281// parameters: 282// p: 4(%esp) 283// d: 8(%esp) 284// return: %ax 285 PROC __kmp_xchg_fixed16 286 287 movl 4(%esp), %ecx // "p" 288 movw 8(%esp), %ax // "d" 289 290 lock 291 xchgw %ax,(%ecx) 292 ret 293 294 DEBUG_INFO __kmp_xchg_fixed16 295 296 297//------------------------------------------------------------------------ 298// FUNCTION __kmp_xchg_fixed32 299// 300// kmp_int32 301// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 302// 303// parameters: 304// p: 4(%esp) 305// d: 8(%esp) 306// 307// return: %eax 308 PROC __kmp_xchg_fixed32 309 310 movl 4(%esp), %ecx // "p" 311 movl 8(%esp), %eax // "d" 312 313 lock 314 xchgl %eax,(%ecx) 315 ret 316 317 DEBUG_INFO __kmp_xchg_fixed32 318 319 320// kmp_int8 321// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 322 PROC __kmp_compare_and_store8 323 324 movl 4(%esp), %ecx 325 movb 8(%esp), %al 326 movb 12(%esp), %dl 327 lock 328 cmpxchgb %dl,(%ecx) 329 sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 330 and $1, %eax // sign extend previous instruction 331 ret 332 333 DEBUG_INFO __kmp_compare_and_store8 334 335// kmp_int16 336// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); 337 PROC __kmp_compare_and_store16 338 339 movl 4(%esp), %ecx 340 movw 8(%esp), %ax 341 movw 12(%esp), %dx 342 lock 343 cmpxchgw %dx,(%ecx) 344 sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 345 and $1, %eax // sign extend previous instruction 346 ret 347 348 DEBUG_INFO __kmp_compare_and_store16 349 350// kmp_int32 351// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); 352 PROC __kmp_compare_and_store32 353 354 movl 4(%esp), %ecx 355 movl 8(%esp), %eax 356 movl 12(%esp), %edx 357 lock 358 cmpxchgl %edx,(%ecx) 359 sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 360 and $1, %eax // sign extend previous instruction 361 ret 362 363 DEBUG_INFO __kmp_compare_and_store32 364 365// kmp_int32 366// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); 367 PROC __kmp_compare_and_store64 368 369 pushl %ebp 370 movl %esp, %ebp 371 pushl %ebx 372 pushl %edi 373 movl 8(%ebp), %edi 374 movl 12(%ebp), %eax // "cv" low order word 375 movl 16(%ebp), %edx // "cv" high order word 376 movl 20(%ebp), %ebx // "sv" low order word 377 movl 24(%ebp), %ecx // "sv" high order word 378 lock 379 cmpxchg8b (%edi) 380 sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 381 and $1, %eax // sign extend previous instruction 382 popl %edi 383 popl %ebx 384 movl %ebp, %esp 385 popl %ebp 386 ret 387 388 DEBUG_INFO __kmp_compare_and_store64 389 390// kmp_int8 391// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); 392 PROC __kmp_compare_and_store_ret8 393 394 movl 4(%esp), %ecx 395 movb 8(%esp), %al 396 movb 12(%esp), %dl 397 lock 398 cmpxchgb %dl,(%ecx) 399 ret 400 401 DEBUG_INFO __kmp_compare_and_store_ret8 402 403// kmp_int16 404// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, 405// kmp_int16 sv); 406 PROC __kmp_compare_and_store_ret16 407 408 movl 4(%esp), %ecx 409 movw 8(%esp), %ax 410 movw 12(%esp), %dx 411 lock 412 cmpxchgw %dx,(%ecx) 413 ret 414 415 DEBUG_INFO __kmp_compare_and_store_ret16 416 417// kmp_int32 418// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, 419// kmp_int32 sv); 420 PROC __kmp_compare_and_store_ret32 421 422 movl 4(%esp), %ecx 423 movl 8(%esp), %eax 424 movl 12(%esp), %edx 425 lock 426 cmpxchgl %edx,(%ecx) 427 ret 428 429 DEBUG_INFO __kmp_compare_and_store_ret32 430 431// kmp_int64 432// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, 433// kmp_int64 sv); 434 PROC __kmp_compare_and_store_ret64 435 436 pushl %ebp 437 movl %esp, %ebp 438 pushl %ebx 439 pushl %edi 440 movl 8(%ebp), %edi 441 movl 12(%ebp), %eax // "cv" low order word 442 movl 16(%ebp), %edx // "cv" high order word 443 movl 20(%ebp), %ebx // "sv" low order word 444 movl 24(%ebp), %ecx // "sv" high order word 445 lock 446 cmpxchg8b (%edi) 447 popl %edi 448 popl %ebx 449 movl %ebp, %esp 450 popl %ebp 451 ret 452 453 DEBUG_INFO __kmp_compare_and_store_ret64 454 455 456//------------------------------------------------------------------------ 457// FUNCTION __kmp_xchg_real32 458// 459// kmp_real32 460// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 461// 462// parameters: 463// addr: 4(%esp) 464// data: 8(%esp) 465// 466// return: %eax 467 PROC __kmp_xchg_real32 468 469 pushl %ebp 470 movl %esp, %ebp 471 subl $4, %esp 472 pushl %esi 473 474 movl 4(%ebp), %esi 475 flds (%esi) 476 // load <addr> 477 fsts -4(%ebp) 478 // store old value 479 480 movl 8(%ebp), %eax 481 482 lock 483 xchgl %eax, (%esi) 484 485 flds -4(%ebp) 486 // return old value 487 488 popl %esi 489 movl %ebp, %esp 490 popl %ebp 491 ret 492 493 DEBUG_INFO __kmp_xchg_real32 494 495# endif /* !KMP_ASM_INTRINS */ 496 497//------------------------------------------------------------------------ 498// int 499// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 500// int gtid, int tid, 501// int argc, void *p_argv[] 502// #if OMPT_SUPPORT 503// , 504// void **exit_frame_ptr 505// #endif 506// ) { 507// #if OMPT_SUPPORT 508// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 509// #endif 510// 511// (*pkfn)( & gtid, & tid, argv[0], ... ); 512// return 1; 513// } 514 515// -- Begin __kmp_invoke_microtask 516// mark_begin; 517 PROC __kmp_invoke_microtask 518 519 pushl %ebp 520 KMP_CFI_DEF_OFFSET 8 521 KMP_CFI_OFFSET ebp,-8 522 movl %esp,%ebp // establish the base pointer for this routine. 523 KMP_CFI_REGISTER ebp 524 subl $8,%esp // allocate space for two local variables. 525 // These varibales are: 526 // argv: -4(%ebp) 527 // temp: -8(%ebp) 528 // 529 pushl %ebx // save %ebx to use during this routine 530 // 531#if OMPT_SUPPORT 532 movl 28(%ebp),%ebx // get exit_frame address 533 movl %ebp,(%ebx) // save exit_frame 534#endif 535 536 movl 20(%ebp),%ebx // Stack alignment - # args 537 addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) 538 shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 539 movl %esp,%eax // 540 subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this 541 movl %eax,%ebx // Save to %ebx 542 andl $0xFFFFFF80,%eax // mask off 7 bits 543 subl %eax,%ebx // Amount to subtract from %esp 544 subl %ebx,%esp // Prepare the stack ptr -- 545 // now it will be aligned on 128-byte boundary at the call 546 547 movl 24(%ebp),%eax // copy from p_argv[] 548 movl %eax,-4(%ebp) // into the local variable *argv. 549 550 movl 20(%ebp),%ebx // argc is 20(%ebp) 551 shll $2,%ebx 552 553KMP_LABEL(invoke_2): 554 cmpl $0,%ebx 555 jg KMP_LABEL(invoke_4) 556 jmp KMP_LABEL(invoke_3) 557 ALIGN 2 558KMP_LABEL(invoke_4): 559 movl -4(%ebp),%eax 560 subl $4,%ebx // decrement argc. 561 addl %ebx,%eax // index into argv. 562 movl (%eax),%edx 563 pushl %edx 564 565 jmp KMP_LABEL(invoke_2) 566 ALIGN 2 567KMP_LABEL(invoke_3): 568 leal 16(%ebp),%eax // push & tid 569 pushl %eax 570 571 leal 12(%ebp),%eax // push & gtid 572 pushl %eax 573 574 movl 8(%ebp),%ebx 575 call *%ebx // call (*pkfn)(); 576 577 movl $1,%eax // return 1; 578 579 movl -12(%ebp),%ebx // restore %ebx 580 leave 581 KMP_CFI_DEF esp,4 582 ret 583 584 DEBUG_INFO __kmp_invoke_microtask 585// -- End __kmp_invoke_microtask 586 587 588// kmp_uint64 589// __kmp_hardware_timestamp(void) 590 PROC __kmp_hardware_timestamp 591 rdtsc 592 ret 593 594 DEBUG_INFO __kmp_hardware_timestamp 595// -- End __kmp_hardware_timestamp 596 597#endif /* KMP_ARCH_X86 */ 598 599 600#if KMP_ARCH_X86_64 601 602// ----------------------------------------------------------------------- 603// microtasking routines specifically written for IA-32 architecture and 604// Intel(R) 64 running Linux* OS 605// ----------------------------------------------------------------------- 606 607// -- Machine type P 608// mark_description "Intel Corporation"; 609 .ident "Intel Corporation" 610// -- .file "z_Linux_asm.S" 611 .data 612 ALIGN 4 613 614// To prevent getting our code into .data section .text added to every routine 615// definition for x86_64. 616//------------------------------------------------------------------------ 617# if !KMP_ASM_INTRINS 618 619//------------------------------------------------------------------------ 620// FUNCTION __kmp_test_then_add32 621// 622// kmp_int32 623// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 624// 625// parameters: 626// p: %rdi 627// d: %esi 628// 629// return: %eax 630 .text 631 PROC __kmp_test_then_add32 632 633 movl %esi, %eax // "d" 634 lock 635 xaddl %eax,(%rdi) 636 ret 637 638 DEBUG_INFO __kmp_test_then_add32 639 640 641//------------------------------------------------------------------------ 642// FUNCTION __kmp_test_then_add64 643// 644// kmp_int64 645// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); 646// 647// parameters: 648// p: %rdi 649// d: %rsi 650// return: %rax 651 .text 652 PROC __kmp_test_then_add64 653 654 movq %rsi, %rax // "d" 655 lock 656 xaddq %rax,(%rdi) 657 ret 658 659 DEBUG_INFO __kmp_test_then_add64 660 661 662//------------------------------------------------------------------------ 663// FUNCTION __kmp_xchg_fixed8 664// 665// kmp_int32 666// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 667// 668// parameters: 669// p: %rdi 670// d: %sil 671// 672// return: %al 673 .text 674 PROC __kmp_xchg_fixed8 675 676 movb %sil, %al // "d" 677 678 lock 679 xchgb %al,(%rdi) 680 ret 681 682 DEBUG_INFO __kmp_xchg_fixed8 683 684 685//------------------------------------------------------------------------ 686// FUNCTION __kmp_xchg_fixed16 687// 688// kmp_int16 689// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 690// 691// parameters: 692// p: %rdi 693// d: %si 694// return: %ax 695 .text 696 PROC __kmp_xchg_fixed16 697 698 movw %si, %ax // "d" 699 700 lock 701 xchgw %ax,(%rdi) 702 ret 703 704 DEBUG_INFO __kmp_xchg_fixed16 705 706 707//------------------------------------------------------------------------ 708// FUNCTION __kmp_xchg_fixed32 709// 710// kmp_int32 711// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 712// 713// parameters: 714// p: %rdi 715// d: %esi 716// 717// return: %eax 718 .text 719 PROC __kmp_xchg_fixed32 720 721 movl %esi, %eax // "d" 722 723 lock 724 xchgl %eax,(%rdi) 725 ret 726 727 DEBUG_INFO __kmp_xchg_fixed32 728 729 730//------------------------------------------------------------------------ 731// FUNCTION __kmp_xchg_fixed64 732// 733// kmp_int64 734// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); 735// 736// parameters: 737// p: %rdi 738// d: %rsi 739// return: %rax 740 .text 741 PROC __kmp_xchg_fixed64 742 743 movq %rsi, %rax // "d" 744 745 lock 746 xchgq %rax,(%rdi) 747 ret 748 749 DEBUG_INFO __kmp_xchg_fixed64 750 751 752//------------------------------------------------------------------------ 753// FUNCTION __kmp_compare_and_store8 754// 755// kmp_int8 756// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 757// 758// parameters: 759// p: %rdi 760// cv: %esi 761// sv: %edx 762// 763// return: %eax 764 .text 765 PROC __kmp_compare_and_store8 766 767 movb %sil, %al // "cv" 768 lock 769 cmpxchgb %dl,(%rdi) 770 sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 771 andq $1, %rax // sign extend previous instruction for return value 772 ret 773 774 DEBUG_INFO __kmp_compare_and_store8 775 776 777//------------------------------------------------------------------------ 778// FUNCTION __kmp_compare_and_store16 779// 780// kmp_int16 781// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 782// 783// parameters: 784// p: %rdi 785// cv: %si 786// sv: %dx 787// 788// return: %eax 789 .text 790 PROC __kmp_compare_and_store16 791 792 movw %si, %ax // "cv" 793 lock 794 cmpxchgw %dx,(%rdi) 795 sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 796 andq $1, %rax // sign extend previous instruction for return value 797 ret 798 799 DEBUG_INFO __kmp_compare_and_store16 800 801 802//------------------------------------------------------------------------ 803// FUNCTION __kmp_compare_and_store32 804// 805// kmp_int32 806// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 807// 808// parameters: 809// p: %rdi 810// cv: %esi 811// sv: %edx 812// 813// return: %eax 814 .text 815 PROC __kmp_compare_and_store32 816 817 movl %esi, %eax // "cv" 818 lock 819 cmpxchgl %edx,(%rdi) 820 sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 821 andq $1, %rax // sign extend previous instruction for return value 822 ret 823 824 DEBUG_INFO __kmp_compare_and_store32 825 826 827//------------------------------------------------------------------------ 828// FUNCTION __kmp_compare_and_store64 829// 830// kmp_int32 831// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 832// 833// parameters: 834// p: %rdi 835// cv: %rsi 836// sv: %rdx 837// return: %eax 838 .text 839 PROC __kmp_compare_and_store64 840 841 movq %rsi, %rax // "cv" 842 lock 843 cmpxchgq %rdx,(%rdi) 844 sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 845 andq $1, %rax // sign extend previous instruction for return value 846 ret 847 848 DEBUG_INFO __kmp_compare_and_store64 849 850//------------------------------------------------------------------------ 851// FUNCTION __kmp_compare_and_store_ret8 852// 853// kmp_int8 854// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 855// 856// parameters: 857// p: %rdi 858// cv: %esi 859// sv: %edx 860// 861// return: %eax 862 .text 863 PROC __kmp_compare_and_store_ret8 864 865 movb %sil, %al // "cv" 866 lock 867 cmpxchgb %dl,(%rdi) 868 ret 869 870 DEBUG_INFO __kmp_compare_and_store_ret8 871 872 873//------------------------------------------------------------------------ 874// FUNCTION __kmp_compare_and_store_ret16 875// 876// kmp_int16 877// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 878// 879// parameters: 880// p: %rdi 881// cv: %si 882// sv: %dx 883// 884// return: %eax 885 .text 886 PROC __kmp_compare_and_store_ret16 887 888 movw %si, %ax // "cv" 889 lock 890 cmpxchgw %dx,(%rdi) 891 ret 892 893 DEBUG_INFO __kmp_compare_and_store_ret16 894 895 896//------------------------------------------------------------------------ 897// FUNCTION __kmp_compare_and_store_ret32 898// 899// kmp_int32 900// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 901// 902// parameters: 903// p: %rdi 904// cv: %esi 905// sv: %edx 906// 907// return: %eax 908 .text 909 PROC __kmp_compare_and_store_ret32 910 911 movl %esi, %eax // "cv" 912 lock 913 cmpxchgl %edx,(%rdi) 914 ret 915 916 DEBUG_INFO __kmp_compare_and_store_ret32 917 918 919//------------------------------------------------------------------------ 920// FUNCTION __kmp_compare_and_store_ret64 921// 922// kmp_int64 923// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 924// 925// parameters: 926// p: %rdi 927// cv: %rsi 928// sv: %rdx 929// return: %eax 930 .text 931 PROC __kmp_compare_and_store_ret64 932 933 movq %rsi, %rax // "cv" 934 lock 935 cmpxchgq %rdx,(%rdi) 936 ret 937 938 DEBUG_INFO __kmp_compare_and_store_ret64 939 940# endif /* !KMP_ASM_INTRINS */ 941 942 943# if !KMP_MIC 944 945# if !KMP_ASM_INTRINS 946 947//------------------------------------------------------------------------ 948// FUNCTION __kmp_xchg_real32 949// 950// kmp_real32 951// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 952// 953// parameters: 954// addr: %rdi 955// data: %xmm0 (lower 4 bytes) 956// 957// return: %xmm0 (lower 4 bytes) 958 .text 959 PROC __kmp_xchg_real32 960 961 movd %xmm0, %eax // load "data" to eax 962 963 lock 964 xchgl %eax, (%rdi) 965 966 movd %eax, %xmm0 // load old value into return register 967 968 ret 969 970 DEBUG_INFO __kmp_xchg_real32 971 972 973//------------------------------------------------------------------------ 974// FUNCTION __kmp_xchg_real64 975// 976// kmp_real64 977// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); 978// 979// parameters: 980// addr: %rdi 981// data: %xmm0 (lower 8 bytes) 982// return: %xmm0 (lower 8 bytes) 983 .text 984 PROC __kmp_xchg_real64 985 986 movd %xmm0, %rax // load "data" to rax 987 988 lock 989 xchgq %rax, (%rdi) 990 991 movd %rax, %xmm0 // load old value into return register 992 ret 993 994 DEBUG_INFO __kmp_xchg_real64 995 996 997# endif /* !KMP_MIC */ 998 999# endif /* !KMP_ASM_INTRINS */ 1000 1001//------------------------------------------------------------------------ 1002// int 1003// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1004// int gtid, int tid, 1005// int argc, void *p_argv[] 1006// #if OMPT_SUPPORT 1007// , 1008// void **exit_frame_ptr 1009// #endif 1010// ) { 1011// #if OMPT_SUPPORT 1012// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1013// #endif 1014// 1015// (*pkfn)( & gtid, & tid, argv[0], ... ); 1016// return 1; 1017// } 1018// 1019// note: at call to pkfn must have %rsp 128-byte aligned for compiler 1020// 1021// parameters: 1022// %rdi: pkfn 1023// %esi: gtid 1024// %edx: tid 1025// %ecx: argc 1026// %r8: p_argv 1027// %r9: &exit_frame 1028// 1029// locals: 1030// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1031// __tid: tid parm pushed on stack so can pass &tid to pkfn 1032// 1033// reg temps: 1034// %rax: used all over the place 1035// %rdx: used in stack pointer alignment calculation 1036// %r11: used to traverse p_argv array 1037// %rsi: used as temporary for stack parameters 1038// used as temporary for number of pkfn parms to push 1039// %rbx: used to hold pkfn address, and zero constant, callee-save 1040// 1041// return: %eax (always 1/TRUE) 1042__gtid = -16 1043__tid = -24 1044 1045// -- Begin __kmp_invoke_microtask 1046// mark_begin; 1047 .text 1048 PROC __kmp_invoke_microtask 1049 1050 pushq %rbp // save base pointer 1051 KMP_CFI_DEF_OFFSET 16 1052 KMP_CFI_OFFSET rbp,-16 1053 movq %rsp,%rbp // establish the base pointer for this routine. 1054 KMP_CFI_REGISTER rbp 1055 1056#if OMPT_SUPPORT 1057 movq %rbp, (%r9) // save exit_frame 1058#endif 1059 1060 pushq %rbx // %rbx is callee-saved register 1061 pushq %rsi // Put gtid on stack so can pass &tgid to pkfn 1062 pushq %rdx // Put tid on stack so can pass &tid to pkfn 1063 1064 movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax 1065 movq $0, %rbx // constant for cmovs later 1066 subq $4, %rax // subtract four args passed in registers to pkfn 1067#if KMP_MIC 1068 js KMP_LABEL(kmp_0) // jump to movq 1069 jmp KMP_LABEL(kmp_0_exit) // jump ahead 1070KMP_LABEL(kmp_0): 1071 movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 1072KMP_LABEL(kmp_0_exit): 1073#else 1074 cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 1075#endif // KMP_MIC 1076 1077 movq %rax, %rsi // save max(0, argc-4) -> %rsi for later 1078 shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 1079 1080 movq %rsp, %rdx // 1081 subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- 1082 // without align, stack ptr would be this 1083 movq %rdx, %rax // Save to %rax 1084 1085 andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) 1086 subq %rax, %rdx // Amount to subtract from %rsp 1087 subq %rdx, %rsp // Prepare the stack ptr -- 1088 // now %rsp will align to 128-byte boundary at call site 1089 1090 // setup pkfn parameter reg and stack 1091 movq %rcx, %rax // argc -> %rax 1092 cmpq $0, %rsi 1093 je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push 1094 shlq $3, %rcx // argc*8 -> %rcx 1095 movq %r8, %rdx // p_argv -> %rdx 1096 addq %rcx, %rdx // &p_argv[argc] -> %rdx 1097 1098 movq %rsi, %rcx // max (0, argc-4) -> %rcx 1099 1100KMP_LABEL(kmp_invoke_push_parms): 1101 // push nth - 7th parms to pkfn on stack 1102 subq $8, %rdx // decrement p_argv pointer to previous parm 1103 movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi 1104 pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) 1105 subl $1, %ecx 1106 1107// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e 1108// if the name of the label that is an operand of this jecxz starts with a dot ("."); 1109// Apple's linker does not support 1-byte length relocation; 1110// Resolution: replace all .labelX entries with L_labelX. 1111 1112 jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left 1113 jmp KMP_LABEL(kmp_invoke_push_parms) 1114 ALIGN 3 1115KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. 1116 // order here is important to avoid trashing 1117 // registers used for both input and output parms! 1118 movq %rdi, %rbx // pkfn -> %rbx 1119 leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) 1120 leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) 1121 1122 movq %r8, %r11 // p_argv -> %r11 1123 1124#if KMP_MIC 1125 cmpq $4, %rax // argc >= 4? 1126 jns KMP_LABEL(kmp_4) // jump to movq 1127 jmp KMP_LABEL(kmp_4_exit) // jump ahead 1128KMP_LABEL(kmp_4): 1129 movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 1130KMP_LABEL(kmp_4_exit): 1131 1132 cmpq $3, %rax // argc >= 3? 1133 jns KMP_LABEL(kmp_3) // jump to movq 1134 jmp KMP_LABEL(kmp_3_exit) // jump ahead 1135KMP_LABEL(kmp_3): 1136 movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 1137KMP_LABEL(kmp_3_exit): 1138 1139 cmpq $2, %rax // argc >= 2? 1140 jns KMP_LABEL(kmp_2) // jump to movq 1141 jmp KMP_LABEL(kmp_2_exit) // jump ahead 1142KMP_LABEL(kmp_2): 1143 movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 1144KMP_LABEL(kmp_2_exit): 1145 1146 cmpq $1, %rax // argc >= 1? 1147 jns KMP_LABEL(kmp_1) // jump to movq 1148 jmp KMP_LABEL(kmp_1_exit) // jump ahead 1149KMP_LABEL(kmp_1): 1150 movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 1151KMP_LABEL(kmp_1_exit): 1152#else 1153 cmpq $4, %rax // argc >= 4? 1154 cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 1155 1156 cmpq $3, %rax // argc >= 3? 1157 cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 1158 1159 cmpq $2, %rax // argc >= 2? 1160 cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 1161 1162 cmpq $1, %rax // argc >= 1? 1163 cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 1164#endif // KMP_MIC 1165 1166 call *%rbx // call (*pkfn)(); 1167 movq $1, %rax // move 1 into return register; 1168 1169 movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified 1170 movq %rbp, %rsp // restore stack pointer 1171 popq %rbp // restore frame pointer 1172 KMP_CFI_DEF rsp,8 1173 ret 1174 1175 DEBUG_INFO __kmp_invoke_microtask 1176// -- End __kmp_invoke_microtask 1177 1178// kmp_uint64 1179// __kmp_hardware_timestamp(void) 1180 .text 1181 PROC __kmp_hardware_timestamp 1182 rdtsc 1183 shlq $32, %rdx 1184 orq %rdx, %rax 1185 ret 1186 1187 DEBUG_INFO __kmp_hardware_timestamp 1188// -- End __kmp_hardware_timestamp 1189 1190//------------------------------------------------------------------------ 1191// FUNCTION __kmp_bsr32 1192// 1193// int 1194// __kmp_bsr32( int ); 1195 .text 1196 PROC __kmp_bsr32 1197 1198 bsr %edi,%eax 1199 ret 1200 1201 DEBUG_INFO __kmp_bsr32 1202 1203// ----------------------------------------------------------------------- 1204#endif /* KMP_ARCH_X86_64 */ 1205 1206// ' 1207#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 1208 1209//------------------------------------------------------------------------ 1210// int 1211// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1212// int gtid, int tid, 1213// int argc, void *p_argv[] 1214// #if OMPT_SUPPORT 1215// , 1216// void **exit_frame_ptr 1217// #endif 1218// ) { 1219// #if OMPT_SUPPORT 1220// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1221// #endif 1222// 1223// (*pkfn)( & gtid, & tid, argv[0], ... ); 1224// 1225// // FIXME: This is done at call-site and can be removed here. 1226// #if OMPT_SUPPORT 1227// *exit_frame_ptr = 0; 1228// #endif 1229// 1230// return 1; 1231// } 1232// 1233// parameters: 1234// x0: pkfn 1235// w1: gtid 1236// w2: tid 1237// w3: argc 1238// x4: p_argv 1239// x5: &exit_frame 1240// 1241// locals: 1242// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1243// __tid: tid parm pushed on stack so can pass &tid to pkfn 1244// 1245// reg temps: 1246// x8: used to hold pkfn address 1247// w9: used as temporary for number of pkfn parms 1248// x10: used to traverse p_argv array 1249// x11: used as temporary for stack placement calculation 1250// x12: used as temporary for stack parameters 1251// x19: used to preserve exit_frame_ptr, callee-save 1252// 1253// return: w0 (always 1/TRUE) 1254// 1255 1256__gtid = 4 1257__tid = 8 1258 1259// -- Begin __kmp_invoke_microtask 1260// mark_begin; 1261 .text 1262 PROC __kmp_invoke_microtask 1263 1264 stp x29, x30, [sp, #-16]! 1265# if OMPT_SUPPORT 1266 stp x19, x20, [sp, #-16]! 1267# endif 1268 mov x29, sp 1269 1270 orr w9, wzr, #1 1271 add w9, w9, w3, lsr #1 1272 sub sp, sp, w9, uxtw #4 1273 mov x11, sp 1274 1275 mov x8, x0 1276 str w1, [x29, #-__gtid] 1277 str w2, [x29, #-__tid] 1278 mov w9, w3 1279 mov x10, x4 1280# if OMPT_SUPPORT 1281 mov x19, x5 1282 str x29, [x19] 1283# endif 1284 1285 sub x0, x29, #__gtid 1286 sub x1, x29, #__tid 1287 1288 cbz w9, KMP_LABEL(kmp_1) 1289 ldr x2, [x10] 1290 1291 sub w9, w9, #1 1292 cbz w9, KMP_LABEL(kmp_1) 1293 ldr x3, [x10, #8]! 1294 1295 sub w9, w9, #1 1296 cbz w9, KMP_LABEL(kmp_1) 1297 ldr x4, [x10, #8]! 1298 1299 sub w9, w9, #1 1300 cbz w9, KMP_LABEL(kmp_1) 1301 ldr x5, [x10, #8]! 1302 1303 sub w9, w9, #1 1304 cbz w9, KMP_LABEL(kmp_1) 1305 ldr x6, [x10, #8]! 1306 1307 sub w9, w9, #1 1308 cbz w9, KMP_LABEL(kmp_1) 1309 ldr x7, [x10, #8]! 1310 1311KMP_LABEL(kmp_0): 1312 sub w9, w9, #1 1313 cbz w9, KMP_LABEL(kmp_1) 1314 ldr x12, [x10, #8]! 1315 str x12, [x11], #8 1316 b KMP_LABEL(kmp_0) 1317KMP_LABEL(kmp_1): 1318 blr x8 1319 orr w0, wzr, #1 1320 mov sp, x29 1321# if OMPT_SUPPORT 1322 str xzr, [x19] 1323 ldp x19, x20, [sp], #16 1324# endif 1325 ldp x29, x30, [sp], #16 1326 ret 1327 1328 DEBUG_INFO __kmp_invoke_microtask 1329// -- End __kmp_invoke_microtask 1330 1331#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */ 1332 1333#if KMP_ARCH_PPC64 1334 1335//------------------------------------------------------------------------ 1336// int 1337// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1338// int gtid, int tid, 1339// int argc, void *p_argv[] 1340// #if OMPT_SUPPORT 1341// , 1342// void **exit_frame_ptr 1343// #endif 1344// ) { 1345// #if OMPT_SUPPORT 1346// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1347// #endif 1348// 1349// (*pkfn)( & gtid, & tid, argv[0], ... ); 1350// 1351// // FIXME: This is done at call-site and can be removed here. 1352// #if OMPT_SUPPORT 1353// *exit_frame_ptr = 0; 1354// #endif 1355// 1356// return 1; 1357// } 1358// 1359// parameters: 1360// r3: pkfn 1361// r4: gtid 1362// r5: tid 1363// r6: argc 1364// r7: p_argv 1365// r8: &exit_frame 1366// 1367// return: r3 (always 1/TRUE) 1368// 1369 .text 1370# if KMP_ARCH_PPC64_ELFv2 1371 .abiversion 2 1372# endif 1373 .globl __kmp_invoke_microtask 1374 1375# if KMP_ARCH_PPC64_ELFv2 1376 .p2align 4 1377# else 1378 .p2align 2 1379# endif 1380 1381 .type __kmp_invoke_microtask,@function 1382 1383# if KMP_ARCH_PPC64_ELFv2 1384__kmp_invoke_microtask: 1385.Lfunc_begin0: 1386.Lfunc_gep0: 1387 addis 2, 12, .TOC.-.Lfunc_gep0@ha 1388 addi 2, 2, .TOC.-.Lfunc_gep0@l 1389.Lfunc_lep0: 1390 .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 1391# else 1392 .section .opd,"aw",@progbits 1393__kmp_invoke_microtask: 1394 .p2align 3 1395 .quad .Lfunc_begin0 1396 .quad .TOC.@tocbase 1397 .quad 0 1398 .text 1399.Lfunc_begin0: 1400# endif 1401 1402// -- Begin __kmp_invoke_microtask 1403// mark_begin; 1404 1405// We need to allocate a stack frame large enough to hold all of the parameters 1406// on the stack for the microtask plus what this function needs. That's 48 1407// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the 1408// parameters to the microtask, plus 8 bytes to store the values of r4 and r5, 1409// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes 1410// to save r30 to hold a copy of r8. 1411 1412 .cfi_startproc 1413 mflr 0 1414 std 31, -8(1) 1415 std 0, 16(1) 1416 1417// This is unusual because normally we'd set r31 equal to r1 after the stack 1418// frame is established. In this case, however, we need to dynamically compute 1419// the stack frame size, and so we keep a direct copy of r1 to access our 1420// register save areas and restore the r1 value before returning. 1421 mr 31, 1 1422 .cfi_def_cfa_register r31 1423 .cfi_offset r31, -8 1424 .cfi_offset lr, 16 1425 1426// Compute the size necessary for the local stack frame. 1427# if KMP_ARCH_PPC64_ELFv2 1428 li 12, 72 1429# else 1430 li 12, 88 1431# endif 1432 sldi 0, 6, 3 1433 add 12, 0, 12 1434 neg 12, 12 1435 1436// We need to make sure that the stack frame stays aligned (to 16 bytes, except 1437// under the BG/Q CNK, where it must be to 32 bytes). 1438# if KMP_OS_CNK 1439 li 0, -32 1440# else 1441 li 0, -16 1442# endif 1443 and 12, 0, 12 1444 1445// Establish the local stack frame. 1446 stdux 1, 1, 12 1447 1448# if OMPT_SUPPORT 1449 .cfi_offset r30, -16 1450 std 30, -16(31) 1451 std 1, 0(8) 1452 mr 30, 8 1453# endif 1454 1455// Store gtid and tid to the stack because they're passed by reference to the microtask. 1456 stw 4, -20(31) 1457 stw 5, -24(31) 1458 1459 mr 12, 6 1460 mr 4, 7 1461 1462 cmpwi 0, 12, 1 1463 blt 0, .Lcall 1464 1465 ld 5, 0(4) 1466 1467 cmpwi 0, 12, 2 1468 blt 0, .Lcall 1469 1470 ld 6, 8(4) 1471 1472 cmpwi 0, 12, 3 1473 blt 0, .Lcall 1474 1475 ld 7, 16(4) 1476 1477 cmpwi 0, 12, 4 1478 blt 0, .Lcall 1479 1480 ld 8, 24(4) 1481 1482 cmpwi 0, 12, 5 1483 blt 0, .Lcall 1484 1485 ld 9, 32(4) 1486 1487 cmpwi 0, 12, 6 1488 blt 0, .Lcall 1489 1490 ld 10, 40(4) 1491 1492 cmpwi 0, 12, 7 1493 blt 0, .Lcall 1494 1495// There are more than 6 microtask parameters, so we need to store the 1496// remainder to the stack. 1497 addi 12, 12, -6 1498 mtctr 12 1499 1500// These are set to 8 bytes before the first desired store address (we're using 1501// pre-increment loads and stores in the loop below). The parameter save area 1502// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and 1503// 32 + 8*8 == 96 bytes above r1 for ELFv2. 1504 addi 4, 4, 40 1505# if KMP_ARCH_PPC64_ELFv2 1506 addi 12, 1, 88 1507# else 1508 addi 12, 1, 104 1509# endif 1510 1511.Lnext: 1512 ldu 0, 8(4) 1513 stdu 0, 8(12) 1514 bdnz .Lnext 1515 1516.Lcall: 1517# if KMP_ARCH_PPC64_ELFv2 1518 std 2, 24(1) 1519 mr 12, 3 1520#else 1521 std 2, 40(1) 1522// For ELFv1, we need to load the actual function address from the function descriptor. 1523 ld 12, 0(3) 1524 ld 2, 8(3) 1525 ld 11, 16(3) 1526#endif 1527 1528 addi 3, 31, -20 1529 addi 4, 31, -24 1530 1531 mtctr 12 1532 bctrl 1533# if KMP_ARCH_PPC64_ELFv2 1534 ld 2, 24(1) 1535# else 1536 ld 2, 40(1) 1537# endif 1538 1539# if OMPT_SUPPORT 1540 li 3, 0 1541 std 3, 0(30) 1542# endif 1543 1544 li 3, 1 1545 1546# if OMPT_SUPPORT 1547 ld 30, -16(31) 1548# endif 1549 1550 mr 1, 31 1551 ld 0, 16(1) 1552 ld 31, -8(1) 1553 mtlr 0 1554 blr 1555 1556 .long 0 1557 .quad 0 1558.Lfunc_end0: 1559 .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 1560 .cfi_endproc 1561 1562// -- End __kmp_invoke_microtask 1563 1564#endif /* KMP_ARCH_PPC64 */ 1565 1566#if KMP_ARCH_RISCV64 1567 1568//------------------------------------------------------------------------ 1569// 1570// typedef void (*microtask_t)(int *gtid, int *tid, ...); 1571// 1572// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 1573// void *p_argv[] 1574// #if OMPT_SUPPORT 1575// , 1576// void **exit_frame_ptr 1577// #endif 1578// ) { 1579// #if OMPT_SUPPORT 1580// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1581// #endif 1582// 1583// (*pkfn)(>id, &tid, argv[0], ...); 1584// 1585// return 1; 1586// } 1587// 1588// Parameters: 1589// a0: pkfn 1590// a1: gtid 1591// a2: tid 1592// a3: argc 1593// a4: p_argv 1594// a5: exit_frame_ptr 1595// 1596// Locals: 1597// __gtid: gtid param pushed on stack so can pass >id to pkfn 1598// __tid: tid param pushed on stack so can pass &tid to pkfn 1599// 1600// Temp. registers: 1601// 1602// t0: used to calculate the dynamic stack size / used to hold pkfn address 1603// t1: used as temporary for stack placement calculation 1604// t2: used as temporary for stack arguments 1605// t3: used as temporary for number of remaining pkfn parms 1606// t4: used to traverse p_argv array 1607// 1608// return: a0 (always 1/TRUE) 1609// 1610 1611__gtid = -20 1612__tid = -24 1613 1614// -- Begin __kmp_invoke_microtask 1615// mark_begin; 1616 .text 1617 .globl __kmp_invoke_microtask 1618 .p2align 1 1619 .type __kmp_invoke_microtask,@function 1620__kmp_invoke_microtask: 1621 .cfi_startproc 1622 1623 // First, save ra and fp 1624 addi sp, sp, -16 1625 sd ra, 8(sp) 1626 sd fp, 0(sp) 1627 addi fp, sp, 16 1628 .cfi_def_cfa fp, 0 1629 .cfi_offset ra, -8 1630 .cfi_offset fp, -16 1631 1632 // Compute the dynamic stack size: 1633 // 1634 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 1635 // reference 1636 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 1637 // function by register. Given that we have 8 of such registers (a[0-7]) 1638 // and two + 'argc' arguments (consider >id and &tid), we need to 1639 // reserve max(0, argc - 6)*8 extra bytes 1640 // 1641 // The total number of bytes is then max(0, argc - 6)*8 + 8 1642 1643 // Compute max(0, argc - 6) using the following bithack: 1644 // max(0, x) = x - (x & (x >> 31)), where x := argc - 6 1645 // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax 1646 addi t0, a3, -6 1647 srai t1, t0, 31 1648 and t1, t0, t1 1649 sub t0, t0, t1 1650 1651 addi t0, t0, 1 1652 1653 slli t0, t0, 3 1654 sub sp, sp, t0 1655 1656 // Align the stack to 16 bytes 1657 andi sp, sp, -16 1658 1659 mv t0, a0 1660 mv t3, a3 1661 mv t4, a4 1662 1663#if OMPT_SUPPORT 1664 // Save frame pointer into exit_frame 1665 sd fp, 0(a5) 1666#endif 1667 1668 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) 1669 1670 sw a1, __gtid(fp) 1671 sw a2, __tid(fp) 1672 1673 addi a0, fp, __gtid 1674 addi a1, fp, __tid 1675 1676 beqz t3, .L_kmp_3 1677 ld a2, 0(t4) 1678 1679 addi t3, t3, -1 1680 beqz t3, .L_kmp_3 1681 ld a3, 8(t4) 1682 1683 addi t3, t3, -1 1684 beqz t3, .L_kmp_3 1685 ld a4, 16(t4) 1686 1687 addi t3, t3, -1 1688 beqz t3, .L_kmp_3 1689 ld a5, 24(t4) 1690 1691 addi t3, t3, -1 1692 beqz t3, .L_kmp_3 1693 ld a6, 32(t4) 1694 1695 addi t3, t3, -1 1696 beqz t3, .L_kmp_3 1697 ld a7, 40(t4) 1698 1699 // Prepare any additional argument passed through the stack 1700 addi t4, t4, 48 1701 mv t1, sp 1702 j .L_kmp_2 1703.L_kmp_1: 1704 ld t2, 0(t4) 1705 sd t2, 0(t1) 1706 addi t4, t4, 8 1707 addi t1, t1, 8 1708.L_kmp_2: 1709 addi t3, t3, -1 1710 bnez t3, .L_kmp_1 1711 1712.L_kmp_3: 1713 // Call pkfn function 1714 jalr t0 1715 1716 // Restore stack and return 1717 1718 addi a0, zero, 1 1719 1720 addi sp, fp, -16 1721 ld fp, 0(sp) 1722 ld ra, 8(sp) 1723 addi sp, sp, 16 1724 ret 1725.Lfunc_end0: 1726 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 1727 .cfi_endproc 1728 1729// -- End __kmp_invoke_microtask 1730 1731#endif /* KMP_ARCH_RISCV64 */ 1732 1733#if KMP_ARCH_ARM || KMP_ARCH_MIPS 1734 .data 1735 .comm .gomp_critical_user_,32,8 1736 .data 1737 .align 4 1738 .global __kmp_unnamed_critical_addr 1739__kmp_unnamed_critical_addr: 1740 .4byte .gomp_critical_user_ 1741 .size __kmp_unnamed_critical_addr,4 1742#endif /* KMP_ARCH_ARM */ 1743 1744#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 1745 .data 1746 .comm .gomp_critical_user_,32,8 1747 .data 1748 .align 8 1749 .global __kmp_unnamed_critical_addr 1750__kmp_unnamed_critical_addr: 1751 .8byte .gomp_critical_user_ 1752 .size __kmp_unnamed_critical_addr,8 1753#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || 1754 KMP_ARCH_RISCV64 */ 1755 1756#if KMP_OS_LINUX 1757# if KMP_ARCH_ARM 1758.section .note.GNU-stack,"",%progbits 1759# else 1760.section .note.GNU-stack,"",@progbits 1761# endif 1762#endif 1763