1// z_Linux_asm.S: - microtasking routines specifically 2// written for Intel platforms running Linux* OS 3 4// 5////===----------------------------------------------------------------------===// 6//// 7//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8//// See https://llvm.org/LICENSE.txt for license information. 9//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10//// 11////===----------------------------------------------------------------------===// 12// 13 14// ----------------------------------------------------------------------- 15// macros 16// ----------------------------------------------------------------------- 17 18#include "kmp_config.h" 19 20#if KMP_ARCH_X86 || KMP_ARCH_X86_64 21 22# if KMP_MIC 23// the 'delay r16/r32/r64' should be used instead of the 'pause'. 24// The delay operation has the effect of removing the current thread from 25// the round-robin HT mechanism, and therefore speeds up the issue rate of 26// the other threads on the same core. 27// 28// A value of 0 works fine for <= 2 threads per core, but causes the EPCC 29// barrier time to increase greatly for 3 or more threads per core. 30// 31// A value of 100 works pretty well for up to 4 threads per core, but isn't 32// quite as fast as 0 for 2 threads per core. 33// 34// We need to check what happens for oversubscription / > 4 threads per core. 35// It is possible that we need to pass the delay value in as a parameter 36// that the caller determines based on the total # threads / # cores. 37// 38//.macro pause_op 39// mov $100, %rax 40// delay %rax 41//.endm 42# else 43# define pause_op .byte 0xf3,0x90 44# endif // KMP_MIC 45 46# if KMP_OS_DARWIN 47# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 48# define KMP_LABEL(x) L_##x // form the name of label 49.macro KMP_CFI_DEF_OFFSET 50.endmacro 51.macro KMP_CFI_OFFSET 52.endmacro 53.macro KMP_CFI_REGISTER 54.endmacro 55.macro KMP_CFI_DEF 56.endmacro 57.macro ALIGN 58 .align $0 59.endmacro 60.macro DEBUG_INFO 61/* Not sure what .size does in icc, not sure if we need to do something 62 similar for OS X*. 63*/ 64.endmacro 65.macro PROC 66 ALIGN 4 67 .globl KMP_PREFIX_UNDERSCORE($0) 68KMP_PREFIX_UNDERSCORE($0): 69.endmacro 70# else // KMP_OS_DARWIN 71# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols 72// Format labels so that they don't override function names in gdb's backtraces 73// MIC assembler doesn't accept .L syntax, the L works fine there (as well as 74// on OS X*) 75# if KMP_MIC 76# define KMP_LABEL(x) L_##x // local label 77# else 78# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 79# endif // KMP_MIC 80.macro ALIGN size 81 .align 1<<(\size) 82.endm 83.macro DEBUG_INFO proc 84 .cfi_endproc 85// Not sure why we need .type and .size for the functions 86 .align 16 87 .type \proc,@function 88 .size \proc,.-\proc 89.endm 90.macro PROC proc 91 ALIGN 4 92 .globl KMP_PREFIX_UNDERSCORE(\proc) 93KMP_PREFIX_UNDERSCORE(\proc): 94 .cfi_startproc 95.endm 96.macro KMP_CFI_DEF_OFFSET sz 97 .cfi_def_cfa_offset \sz 98.endm 99.macro KMP_CFI_OFFSET reg, sz 100 .cfi_offset \reg,\sz 101.endm 102.macro KMP_CFI_REGISTER reg 103 .cfi_def_cfa_register \reg 104.endm 105.macro KMP_CFI_DEF reg, sz 106 .cfi_def_cfa \reg,\sz 107.endm 108# endif // KMP_OS_DARWIN 109#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 110 111#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM) 112 113# if KMP_OS_DARWIN 114# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 115# define KMP_LABEL(x) L_##x // form the name of label 116 117.macro ALIGN 118 .align $0 119.endmacro 120 121.macro DEBUG_INFO 122/* Not sure what .size does in icc, not sure if we need to do something 123 similar for OS X*. 124*/ 125.endmacro 126 127.macro PROC 128 ALIGN 4 129 .globl KMP_PREFIX_UNDERSCORE($0) 130KMP_PREFIX_UNDERSCORE($0): 131.endmacro 132# elif KMP_OS_WINDOWS 133# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Windows/ARM64 symbols 134// Format labels so that they don't override function names in gdb's backtraces 135# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 136 137.macro ALIGN size 138 .align 1<<(\size) 139.endm 140 141.macro DEBUG_INFO proc 142 ALIGN 2 143.endm 144 145.macro PROC proc 146 ALIGN 2 147 .globl KMP_PREFIX_UNDERSCORE(\proc) 148KMP_PREFIX_UNDERSCORE(\proc): 149.endm 150# else // KMP_OS_DARWIN || KMP_OS_WINDOWS 151# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols 152// Format labels so that they don't override function names in gdb's backtraces 153# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 154 155.macro ALIGN size 156 .align 1<<(\size) 157.endm 158 159.macro DEBUG_INFO proc 160 .cfi_endproc 161// Not sure why we need .type and .size for the functions 162 ALIGN 2 163#if KMP_ARCH_ARM 164 .type \proc,%function 165#else 166 .type \proc,@function 167#endif 168 .size \proc,.-\proc 169.endm 170 171.macro PROC proc 172 ALIGN 2 173 .globl KMP_PREFIX_UNDERSCORE(\proc) 174KMP_PREFIX_UNDERSCORE(\proc): 175 .cfi_startproc 176.endm 177# endif // KMP_OS_DARWIN 178 179# if KMP_OS_LINUX 180// BTI and PAC gnu property note 181# define NT_GNU_PROPERTY_TYPE_0 5 182# define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 183# define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1 184# define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2 185 186# define GNU_PROPERTY(type, value) \ 187 .pushsection .note.gnu.property, "a"; \ 188 .p2align 3; \ 189 .word 4; \ 190 .word 16; \ 191 .word NT_GNU_PROPERTY_TYPE_0; \ 192 .asciz "GNU"; \ 193 .word type; \ 194 .word 4; \ 195 .word value; \ 196 .word 0; \ 197 .popsection 198# endif 199 200# if defined(__ARM_FEATURE_BTI_DEFAULT) 201# define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI 202# else 203# define BTI_FLAG 0 204# endif 205# if __ARM_FEATURE_PAC_DEFAULT & 3 206# define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC 207# else 208# define PAC_FLAG 0 209# endif 210 211# if (BTI_FLAG | PAC_FLAG) != 0 212# if PAC_FLAG != 0 213# define PACBTI_C hint #25 214# define PACBTI_RET hint #29 215# else 216# define PACBTI_C hint #34 217# define PACBTI_RET 218# endif 219# define GNU_PROPERTY_BTI_PAC \ 220 GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG) 221# else 222# define PACBTI_C 223# define PACBTI_RET 224# define GNU_PROPERTY_BTI_PAC 225# endif 226#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM) 227 228.macro COMMON name, size, align_power 229#if KMP_OS_DARWIN 230 .comm \name, \size 231#elif KMP_OS_WINDOWS 232 .comm \name, \size, \align_power 233#else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS 234 .comm \name, \size, (1<<(\align_power)) 235#endif 236.endm 237 238// ----------------------------------------------------------------------- 239// data 240// ----------------------------------------------------------------------- 241 242#ifdef KMP_GOMP_COMPAT 243 244// Support for unnamed common blocks. 245// 246// Because the symbol ".gomp_critical_user_" contains a ".", we have to 247// put this stuff in assembly. 248 249# if KMP_ARCH_X86 250# if KMP_OS_DARWIN 251 .data 252 .comm .gomp_critical_user_,32 253 .data 254 .globl ___kmp_unnamed_critical_addr 255___kmp_unnamed_critical_addr: 256 .long .gomp_critical_user_ 257# else /* Linux* OS */ 258 .data 259 .comm .gomp_critical_user_,32,8 260 .data 261 ALIGN 4 262 .global __kmp_unnamed_critical_addr 263__kmp_unnamed_critical_addr: 264 .4byte .gomp_critical_user_ 265 .type __kmp_unnamed_critical_addr,@object 266 .size __kmp_unnamed_critical_addr,4 267# endif /* KMP_OS_DARWIN */ 268# endif /* KMP_ARCH_X86 */ 269 270# if KMP_ARCH_X86_64 271# if KMP_OS_DARWIN 272 .data 273 .comm .gomp_critical_user_,32 274 .data 275 .globl ___kmp_unnamed_critical_addr 276___kmp_unnamed_critical_addr: 277 .quad .gomp_critical_user_ 278# else /* Linux* OS */ 279 .data 280 .comm .gomp_critical_user_,32,8 281 .data 282 ALIGN 8 283 .global __kmp_unnamed_critical_addr 284__kmp_unnamed_critical_addr: 285 .8byte .gomp_critical_user_ 286 .type __kmp_unnamed_critical_addr,@object 287 .size __kmp_unnamed_critical_addr,8 288# endif /* KMP_OS_DARWIN */ 289# endif /* KMP_ARCH_X86_64 */ 290 291#endif /* KMP_GOMP_COMPAT */ 292 293 294#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 295 296// ----------------------------------------------------------------------- 297// microtasking routines specifically written for IA-32 architecture 298// running Linux* OS 299// ----------------------------------------------------------------------- 300 301 .ident "Intel Corporation" 302 .data 303 ALIGN 4 304// void 305// __kmp_x86_pause( void ); 306 307 .text 308 PROC __kmp_x86_pause 309 310 pause_op 311 ret 312 313 DEBUG_INFO __kmp_x86_pause 314 315# if !KMP_ASM_INTRINS 316 317//------------------------------------------------------------------------ 318// kmp_int32 319// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 320 321 PROC __kmp_test_then_add32 322 323 movl 4(%esp), %ecx 324 movl 8(%esp), %eax 325 lock 326 xaddl %eax,(%ecx) 327 ret 328 329 DEBUG_INFO __kmp_test_then_add32 330 331//------------------------------------------------------------------------ 332// FUNCTION __kmp_xchg_fixed8 333// 334// kmp_int32 335// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 336// 337// parameters: 338// p: 4(%esp) 339// d: 8(%esp) 340// 341// return: %al 342 PROC __kmp_xchg_fixed8 343 344 movl 4(%esp), %ecx // "p" 345 movb 8(%esp), %al // "d" 346 347 lock 348 xchgb %al,(%ecx) 349 ret 350 351 DEBUG_INFO __kmp_xchg_fixed8 352 353 354//------------------------------------------------------------------------ 355// FUNCTION __kmp_xchg_fixed16 356// 357// kmp_int16 358// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 359// 360// parameters: 361// p: 4(%esp) 362// d: 8(%esp) 363// return: %ax 364 PROC __kmp_xchg_fixed16 365 366 movl 4(%esp), %ecx // "p" 367 movw 8(%esp), %ax // "d" 368 369 lock 370 xchgw %ax,(%ecx) 371 ret 372 373 DEBUG_INFO __kmp_xchg_fixed16 374 375 376//------------------------------------------------------------------------ 377// FUNCTION __kmp_xchg_fixed32 378// 379// kmp_int32 380// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 381// 382// parameters: 383// p: 4(%esp) 384// d: 8(%esp) 385// 386// return: %eax 387 PROC __kmp_xchg_fixed32 388 389 movl 4(%esp), %ecx // "p" 390 movl 8(%esp), %eax // "d" 391 392 lock 393 xchgl %eax,(%ecx) 394 ret 395 396 DEBUG_INFO __kmp_xchg_fixed32 397 398 399// kmp_int8 400// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 401 PROC __kmp_compare_and_store8 402 403 movl 4(%esp), %ecx 404 movb 8(%esp), %al 405 movb 12(%esp), %dl 406 lock 407 cmpxchgb %dl,(%ecx) 408 sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 409 and $1, %eax // sign extend previous instruction 410 ret 411 412 DEBUG_INFO __kmp_compare_and_store8 413 414// kmp_int16 415// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); 416 PROC __kmp_compare_and_store16 417 418 movl 4(%esp), %ecx 419 movw 8(%esp), %ax 420 movw 12(%esp), %dx 421 lock 422 cmpxchgw %dx,(%ecx) 423 sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 424 and $1, %eax // sign extend previous instruction 425 ret 426 427 DEBUG_INFO __kmp_compare_and_store16 428 429// kmp_int32 430// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); 431 PROC __kmp_compare_and_store32 432 433 movl 4(%esp), %ecx 434 movl 8(%esp), %eax 435 movl 12(%esp), %edx 436 lock 437 cmpxchgl %edx,(%ecx) 438 sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 439 and $1, %eax // sign extend previous instruction 440 ret 441 442 DEBUG_INFO __kmp_compare_and_store32 443 444// kmp_int32 445// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); 446 PROC __kmp_compare_and_store64 447 448 pushl %ebp 449 movl %esp, %ebp 450 pushl %ebx 451 pushl %edi 452 movl 8(%ebp), %edi 453 movl 12(%ebp), %eax // "cv" low order word 454 movl 16(%ebp), %edx // "cv" high order word 455 movl 20(%ebp), %ebx // "sv" low order word 456 movl 24(%ebp), %ecx // "sv" high order word 457 lock 458 cmpxchg8b (%edi) 459 sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 460 and $1, %eax // sign extend previous instruction 461 popl %edi 462 popl %ebx 463 movl %ebp, %esp 464 popl %ebp 465 ret 466 467 DEBUG_INFO __kmp_compare_and_store64 468 469// kmp_int8 470// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); 471 PROC __kmp_compare_and_store_ret8 472 473 movl 4(%esp), %ecx 474 movb 8(%esp), %al 475 movb 12(%esp), %dl 476 lock 477 cmpxchgb %dl,(%ecx) 478 ret 479 480 DEBUG_INFO __kmp_compare_and_store_ret8 481 482// kmp_int16 483// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, 484// kmp_int16 sv); 485 PROC __kmp_compare_and_store_ret16 486 487 movl 4(%esp), %ecx 488 movw 8(%esp), %ax 489 movw 12(%esp), %dx 490 lock 491 cmpxchgw %dx,(%ecx) 492 ret 493 494 DEBUG_INFO __kmp_compare_and_store_ret16 495 496// kmp_int32 497// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, 498// kmp_int32 sv); 499 PROC __kmp_compare_and_store_ret32 500 501 movl 4(%esp), %ecx 502 movl 8(%esp), %eax 503 movl 12(%esp), %edx 504 lock 505 cmpxchgl %edx,(%ecx) 506 ret 507 508 DEBUG_INFO __kmp_compare_and_store_ret32 509 510// kmp_int64 511// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, 512// kmp_int64 sv); 513 PROC __kmp_compare_and_store_ret64 514 515 pushl %ebp 516 movl %esp, %ebp 517 pushl %ebx 518 pushl %edi 519 movl 8(%ebp), %edi 520 movl 12(%ebp), %eax // "cv" low order word 521 movl 16(%ebp), %edx // "cv" high order word 522 movl 20(%ebp), %ebx // "sv" low order word 523 movl 24(%ebp), %ecx // "sv" high order word 524 lock 525 cmpxchg8b (%edi) 526 popl %edi 527 popl %ebx 528 movl %ebp, %esp 529 popl %ebp 530 ret 531 532 DEBUG_INFO __kmp_compare_and_store_ret64 533 534 535//------------------------------------------------------------------------ 536// FUNCTION __kmp_xchg_real32 537// 538// kmp_real32 539// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 540// 541// parameters: 542// addr: 4(%esp) 543// data: 8(%esp) 544// 545// return: %eax 546 PROC __kmp_xchg_real32 547 548 pushl %ebp 549 movl %esp, %ebp 550 subl $4, %esp 551 pushl %esi 552 553 movl 4(%ebp), %esi 554 flds (%esi) 555 // load <addr> 556 fsts -4(%ebp) 557 // store old value 558 559 movl 8(%ebp), %eax 560 561 lock 562 xchgl %eax, (%esi) 563 564 flds -4(%ebp) 565 // return old value 566 567 popl %esi 568 movl %ebp, %esp 569 popl %ebp 570 ret 571 572 DEBUG_INFO __kmp_xchg_real32 573 574# endif /* !KMP_ASM_INTRINS */ 575 576//------------------------------------------------------------------------ 577// int 578// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 579// int gtid, int tid, 580// int argc, void *p_argv[] 581// #if OMPT_SUPPORT 582// , 583// void **exit_frame_ptr 584// #endif 585// ) { 586// #if OMPT_SUPPORT 587// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 588// #endif 589// 590// (*pkfn)( & gtid, & tid, argv[0], ... ); 591// return 1; 592// } 593 594// -- Begin __kmp_invoke_microtask 595// mark_begin; 596 PROC __kmp_invoke_microtask 597 598 pushl %ebp 599 KMP_CFI_DEF_OFFSET 8 600 KMP_CFI_OFFSET ebp,-8 601 movl %esp,%ebp // establish the base pointer for this routine. 602 KMP_CFI_REGISTER ebp 603 subl $8,%esp // allocate space for two local variables. 604 // These varibales are: 605 // argv: -4(%ebp) 606 // temp: -8(%ebp) 607 // 608 pushl %ebx // save %ebx to use during this routine 609 // 610#if OMPT_SUPPORT 611 movl 28(%ebp),%ebx // get exit_frame address 612 movl %ebp,(%ebx) // save exit_frame 613#endif 614 615 movl 20(%ebp),%ebx // Stack alignment - # args 616 addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) 617 shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 618 movl %esp,%eax // 619 subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this 620 movl %eax,%ebx // Save to %ebx 621 andl $0xFFFFFF80,%eax // mask off 7 bits 622 subl %eax,%ebx // Amount to subtract from %esp 623 subl %ebx,%esp // Prepare the stack ptr -- 624 // now it will be aligned on 128-byte boundary at the call 625 626 movl 24(%ebp),%eax // copy from p_argv[] 627 movl %eax,-4(%ebp) // into the local variable *argv. 628 629 movl 20(%ebp),%ebx // argc is 20(%ebp) 630 shll $2,%ebx 631 632KMP_LABEL(invoke_2): 633 cmpl $0,%ebx 634 jg KMP_LABEL(invoke_4) 635 jmp KMP_LABEL(invoke_3) 636 ALIGN 2 637KMP_LABEL(invoke_4): 638 movl -4(%ebp),%eax 639 subl $4,%ebx // decrement argc. 640 addl %ebx,%eax // index into argv. 641 movl (%eax),%edx 642 pushl %edx 643 644 jmp KMP_LABEL(invoke_2) 645 ALIGN 2 646KMP_LABEL(invoke_3): 647 leal 16(%ebp),%eax // push & tid 648 pushl %eax 649 650 leal 12(%ebp),%eax // push & gtid 651 pushl %eax 652 653 movl 8(%ebp),%ebx 654 call *%ebx // call (*pkfn)(); 655 656 movl $1,%eax // return 1; 657 658 movl -12(%ebp),%ebx // restore %ebx 659 leave 660 KMP_CFI_DEF esp,4 661 ret 662 663 DEBUG_INFO __kmp_invoke_microtask 664// -- End __kmp_invoke_microtask 665 666 667// kmp_uint64 668// __kmp_hardware_timestamp(void) 669 PROC __kmp_hardware_timestamp 670 rdtsc 671 ret 672 673 DEBUG_INFO __kmp_hardware_timestamp 674// -- End __kmp_hardware_timestamp 675 676#endif /* KMP_ARCH_X86 */ 677 678 679#if KMP_ARCH_X86_64 680 681// ----------------------------------------------------------------------- 682// microtasking routines specifically written for IA-32 architecture and 683// Intel(R) 64 running Linux* OS 684// ----------------------------------------------------------------------- 685 686// -- Machine type P 687// mark_description "Intel Corporation"; 688 .ident "Intel Corporation" 689// -- .file "z_Linux_asm.S" 690 .data 691 ALIGN 4 692 693// To prevent getting our code into .data section .text added to every routine 694// definition for x86_64. 695//------------------------------------------------------------------------ 696# if !KMP_ASM_INTRINS 697 698//------------------------------------------------------------------------ 699// FUNCTION __kmp_test_then_add32 700// 701// kmp_int32 702// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 703// 704// parameters: 705// p: %rdi 706// d: %esi 707// 708// return: %eax 709 .text 710 PROC __kmp_test_then_add32 711 712 movl %esi, %eax // "d" 713 lock 714 xaddl %eax,(%rdi) 715 ret 716 717 DEBUG_INFO __kmp_test_then_add32 718 719 720//------------------------------------------------------------------------ 721// FUNCTION __kmp_test_then_add64 722// 723// kmp_int64 724// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); 725// 726// parameters: 727// p: %rdi 728// d: %rsi 729// return: %rax 730 .text 731 PROC __kmp_test_then_add64 732 733 movq %rsi, %rax // "d" 734 lock 735 xaddq %rax,(%rdi) 736 ret 737 738 DEBUG_INFO __kmp_test_then_add64 739 740 741//------------------------------------------------------------------------ 742// FUNCTION __kmp_xchg_fixed8 743// 744// kmp_int32 745// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 746// 747// parameters: 748// p: %rdi 749// d: %sil 750// 751// return: %al 752 .text 753 PROC __kmp_xchg_fixed8 754 755 movb %sil, %al // "d" 756 757 lock 758 xchgb %al,(%rdi) 759 ret 760 761 DEBUG_INFO __kmp_xchg_fixed8 762 763 764//------------------------------------------------------------------------ 765// FUNCTION __kmp_xchg_fixed16 766// 767// kmp_int16 768// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 769// 770// parameters: 771// p: %rdi 772// d: %si 773// return: %ax 774 .text 775 PROC __kmp_xchg_fixed16 776 777 movw %si, %ax // "d" 778 779 lock 780 xchgw %ax,(%rdi) 781 ret 782 783 DEBUG_INFO __kmp_xchg_fixed16 784 785 786//------------------------------------------------------------------------ 787// FUNCTION __kmp_xchg_fixed32 788// 789// kmp_int32 790// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 791// 792// parameters: 793// p: %rdi 794// d: %esi 795// 796// return: %eax 797 .text 798 PROC __kmp_xchg_fixed32 799 800 movl %esi, %eax // "d" 801 802 lock 803 xchgl %eax,(%rdi) 804 ret 805 806 DEBUG_INFO __kmp_xchg_fixed32 807 808 809//------------------------------------------------------------------------ 810// FUNCTION __kmp_xchg_fixed64 811// 812// kmp_int64 813// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); 814// 815// parameters: 816// p: %rdi 817// d: %rsi 818// return: %rax 819 .text 820 PROC __kmp_xchg_fixed64 821 822 movq %rsi, %rax // "d" 823 824 lock 825 xchgq %rax,(%rdi) 826 ret 827 828 DEBUG_INFO __kmp_xchg_fixed64 829 830 831//------------------------------------------------------------------------ 832// FUNCTION __kmp_compare_and_store8 833// 834// kmp_int8 835// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 836// 837// parameters: 838// p: %rdi 839// cv: %esi 840// sv: %edx 841// 842// return: %eax 843 .text 844 PROC __kmp_compare_and_store8 845 846 movb %sil, %al // "cv" 847 lock 848 cmpxchgb %dl,(%rdi) 849 sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 850 andq $1, %rax // sign extend previous instruction for return value 851 ret 852 853 DEBUG_INFO __kmp_compare_and_store8 854 855 856//------------------------------------------------------------------------ 857// FUNCTION __kmp_compare_and_store16 858// 859// kmp_int16 860// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 861// 862// parameters: 863// p: %rdi 864// cv: %si 865// sv: %dx 866// 867// return: %eax 868 .text 869 PROC __kmp_compare_and_store16 870 871 movw %si, %ax // "cv" 872 lock 873 cmpxchgw %dx,(%rdi) 874 sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 875 andq $1, %rax // sign extend previous instruction for return value 876 ret 877 878 DEBUG_INFO __kmp_compare_and_store16 879 880 881//------------------------------------------------------------------------ 882// FUNCTION __kmp_compare_and_store32 883// 884// kmp_int32 885// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 886// 887// parameters: 888// p: %rdi 889// cv: %esi 890// sv: %edx 891// 892// return: %eax 893 .text 894 PROC __kmp_compare_and_store32 895 896 movl %esi, %eax // "cv" 897 lock 898 cmpxchgl %edx,(%rdi) 899 sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 900 andq $1, %rax // sign extend previous instruction for return value 901 ret 902 903 DEBUG_INFO __kmp_compare_and_store32 904 905 906//------------------------------------------------------------------------ 907// FUNCTION __kmp_compare_and_store64 908// 909// kmp_int32 910// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 911// 912// parameters: 913// p: %rdi 914// cv: %rsi 915// sv: %rdx 916// return: %eax 917 .text 918 PROC __kmp_compare_and_store64 919 920 movq %rsi, %rax // "cv" 921 lock 922 cmpxchgq %rdx,(%rdi) 923 sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 924 andq $1, %rax // sign extend previous instruction for return value 925 ret 926 927 DEBUG_INFO __kmp_compare_and_store64 928 929//------------------------------------------------------------------------ 930// FUNCTION __kmp_compare_and_store_ret8 931// 932// kmp_int8 933// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 934// 935// parameters: 936// p: %rdi 937// cv: %esi 938// sv: %edx 939// 940// return: %eax 941 .text 942 PROC __kmp_compare_and_store_ret8 943 944 movb %sil, %al // "cv" 945 lock 946 cmpxchgb %dl,(%rdi) 947 ret 948 949 DEBUG_INFO __kmp_compare_and_store_ret8 950 951 952//------------------------------------------------------------------------ 953// FUNCTION __kmp_compare_and_store_ret16 954// 955// kmp_int16 956// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 957// 958// parameters: 959// p: %rdi 960// cv: %si 961// sv: %dx 962// 963// return: %eax 964 .text 965 PROC __kmp_compare_and_store_ret16 966 967 movw %si, %ax // "cv" 968 lock 969 cmpxchgw %dx,(%rdi) 970 ret 971 972 DEBUG_INFO __kmp_compare_and_store_ret16 973 974 975//------------------------------------------------------------------------ 976// FUNCTION __kmp_compare_and_store_ret32 977// 978// kmp_int32 979// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 980// 981// parameters: 982// p: %rdi 983// cv: %esi 984// sv: %edx 985// 986// return: %eax 987 .text 988 PROC __kmp_compare_and_store_ret32 989 990 movl %esi, %eax // "cv" 991 lock 992 cmpxchgl %edx,(%rdi) 993 ret 994 995 DEBUG_INFO __kmp_compare_and_store_ret32 996 997 998//------------------------------------------------------------------------ 999// FUNCTION __kmp_compare_and_store_ret64 1000// 1001// kmp_int64 1002// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 1003// 1004// parameters: 1005// p: %rdi 1006// cv: %rsi 1007// sv: %rdx 1008// return: %eax 1009 .text 1010 PROC __kmp_compare_and_store_ret64 1011 1012 movq %rsi, %rax // "cv" 1013 lock 1014 cmpxchgq %rdx,(%rdi) 1015 ret 1016 1017 DEBUG_INFO __kmp_compare_and_store_ret64 1018 1019# endif /* !KMP_ASM_INTRINS */ 1020 1021 1022# if !KMP_MIC 1023 1024# if !KMP_ASM_INTRINS 1025 1026//------------------------------------------------------------------------ 1027// FUNCTION __kmp_xchg_real32 1028// 1029// kmp_real32 1030// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 1031// 1032// parameters: 1033// addr: %rdi 1034// data: %xmm0 (lower 4 bytes) 1035// 1036// return: %xmm0 (lower 4 bytes) 1037 .text 1038 PROC __kmp_xchg_real32 1039 1040 movd %xmm0, %eax // load "data" to eax 1041 1042 lock 1043 xchgl %eax, (%rdi) 1044 1045 movd %eax, %xmm0 // load old value into return register 1046 1047 ret 1048 1049 DEBUG_INFO __kmp_xchg_real32 1050 1051 1052//------------------------------------------------------------------------ 1053// FUNCTION __kmp_xchg_real64 1054// 1055// kmp_real64 1056// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); 1057// 1058// parameters: 1059// addr: %rdi 1060// data: %xmm0 (lower 8 bytes) 1061// return: %xmm0 (lower 8 bytes) 1062 .text 1063 PROC __kmp_xchg_real64 1064 1065 movd %xmm0, %rax // load "data" to rax 1066 1067 lock 1068 xchgq %rax, (%rdi) 1069 1070 movd %rax, %xmm0 // load old value into return register 1071 ret 1072 1073 DEBUG_INFO __kmp_xchg_real64 1074 1075 1076# endif /* !KMP_MIC */ 1077 1078# endif /* !KMP_ASM_INTRINS */ 1079 1080//------------------------------------------------------------------------ 1081// int 1082// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1083// int gtid, int tid, 1084// int argc, void *p_argv[] 1085// #if OMPT_SUPPORT 1086// , 1087// void **exit_frame_ptr 1088// #endif 1089// ) { 1090// #if OMPT_SUPPORT 1091// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1092// #endif 1093// 1094// (*pkfn)( & gtid, & tid, argv[0], ... ); 1095// return 1; 1096// } 1097// 1098// note: at call to pkfn must have %rsp 128-byte aligned for compiler 1099// 1100// parameters: 1101// %rdi: pkfn 1102// %esi: gtid 1103// %edx: tid 1104// %ecx: argc 1105// %r8: p_argv 1106// %r9: &exit_frame 1107// 1108// locals: 1109// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1110// __tid: tid parm pushed on stack so can pass &tid to pkfn 1111// 1112// reg temps: 1113// %rax: used all over the place 1114// %rdx: used in stack pointer alignment calculation 1115// %r11: used to traverse p_argv array 1116// %rsi: used as temporary for stack parameters 1117// used as temporary for number of pkfn parms to push 1118// %rbx: used to hold pkfn address, and zero constant, callee-save 1119// 1120// return: %eax (always 1/TRUE) 1121__gtid = -16 1122__tid = -24 1123 1124// -- Begin __kmp_invoke_microtask 1125// mark_begin; 1126 .text 1127 PROC __kmp_invoke_microtask 1128 1129 pushq %rbp // save base pointer 1130 KMP_CFI_DEF_OFFSET 16 1131 KMP_CFI_OFFSET rbp,-16 1132 movq %rsp,%rbp // establish the base pointer for this routine. 1133 KMP_CFI_REGISTER rbp 1134 1135#if OMPT_SUPPORT 1136 movq %rbp, (%r9) // save exit_frame 1137#endif 1138 1139 pushq %rbx // %rbx is callee-saved register 1140 pushq %rsi // Put gtid on stack so can pass &tgid to pkfn 1141 pushq %rdx // Put tid on stack so can pass &tid to pkfn 1142 1143 movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax 1144 movq $0, %rbx // constant for cmovs later 1145 subq $4, %rax // subtract four args passed in registers to pkfn 1146#if KMP_MIC 1147 js KMP_LABEL(kmp_0) // jump to movq 1148 jmp KMP_LABEL(kmp_0_exit) // jump ahead 1149KMP_LABEL(kmp_0): 1150 movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 1151KMP_LABEL(kmp_0_exit): 1152#else 1153 cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 1154#endif // KMP_MIC 1155 1156 movq %rax, %rsi // save max(0, argc-4) -> %rsi for later 1157 shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 1158 1159 movq %rsp, %rdx // 1160 subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- 1161 // without align, stack ptr would be this 1162 movq %rdx, %rax // Save to %rax 1163 1164 andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) 1165 subq %rax, %rdx // Amount to subtract from %rsp 1166 subq %rdx, %rsp // Prepare the stack ptr -- 1167 // now %rsp will align to 128-byte boundary at call site 1168 1169 // setup pkfn parameter reg and stack 1170 movq %rcx, %rax // argc -> %rax 1171 cmpq $0, %rsi 1172 je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push 1173 shlq $3, %rcx // argc*8 -> %rcx 1174 movq %r8, %rdx // p_argv -> %rdx 1175 addq %rcx, %rdx // &p_argv[argc] -> %rdx 1176 1177 movq %rsi, %rcx // max (0, argc-4) -> %rcx 1178 1179KMP_LABEL(kmp_invoke_push_parms): 1180 // push nth - 7th parms to pkfn on stack 1181 subq $8, %rdx // decrement p_argv pointer to previous parm 1182 movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi 1183 pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) 1184 subl $1, %ecx 1185 1186// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e 1187// if the name of the label that is an operand of this jecxz starts with a dot ("."); 1188// Apple's linker does not support 1-byte length relocation; 1189// Resolution: replace all .labelX entries with L_labelX. 1190 1191 jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left 1192 jmp KMP_LABEL(kmp_invoke_push_parms) 1193 ALIGN 3 1194KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. 1195 // order here is important to avoid trashing 1196 // registers used for both input and output parms! 1197 movq %rdi, %rbx // pkfn -> %rbx 1198 leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) 1199 leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) 1200 // Check if argc is 0 1201 cmpq $0, %rax 1202 je KMP_LABEL(kmp_no_args) // Jump ahead 1203 1204 movq %r8, %r11 // p_argv -> %r11 1205 1206#if KMP_MIC 1207 cmpq $4, %rax // argc >= 4? 1208 jns KMP_LABEL(kmp_4) // jump to movq 1209 jmp KMP_LABEL(kmp_4_exit) // jump ahead 1210KMP_LABEL(kmp_4): 1211 movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 1212KMP_LABEL(kmp_4_exit): 1213 1214 cmpq $3, %rax // argc >= 3? 1215 jns KMP_LABEL(kmp_3) // jump to movq 1216 jmp KMP_LABEL(kmp_3_exit) // jump ahead 1217KMP_LABEL(kmp_3): 1218 movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 1219KMP_LABEL(kmp_3_exit): 1220 1221 cmpq $2, %rax // argc >= 2? 1222 jns KMP_LABEL(kmp_2) // jump to movq 1223 jmp KMP_LABEL(kmp_2_exit) // jump ahead 1224KMP_LABEL(kmp_2): 1225 movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 1226KMP_LABEL(kmp_2_exit): 1227 1228 cmpq $1, %rax // argc >= 1? 1229 jns KMP_LABEL(kmp_1) // jump to movq 1230 jmp KMP_LABEL(kmp_1_exit) // jump ahead 1231KMP_LABEL(kmp_1): 1232 movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 1233KMP_LABEL(kmp_1_exit): 1234#else 1235 cmpq $4, %rax // argc >= 4? 1236 cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 1237 1238 cmpq $3, %rax // argc >= 3? 1239 cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 1240 1241 cmpq $2, %rax // argc >= 2? 1242 cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 1243 1244 cmpq $1, %rax // argc >= 1? 1245 cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 1246#endif // KMP_MIC 1247 1248KMP_LABEL(kmp_no_args): 1249 call *%rbx // call (*pkfn)(); 1250 movq $1, %rax // move 1 into return register; 1251 1252 movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified 1253 movq %rbp, %rsp // restore stack pointer 1254 popq %rbp // restore frame pointer 1255 KMP_CFI_DEF rsp,8 1256 ret 1257 1258 DEBUG_INFO __kmp_invoke_microtask 1259// -- End __kmp_invoke_microtask 1260 1261// kmp_uint64 1262// __kmp_hardware_timestamp(void) 1263 .text 1264 PROC __kmp_hardware_timestamp 1265 rdtsc 1266 shlq $32, %rdx 1267 orq %rdx, %rax 1268 ret 1269 1270 DEBUG_INFO __kmp_hardware_timestamp 1271// -- End __kmp_hardware_timestamp 1272 1273//------------------------------------------------------------------------ 1274// FUNCTION __kmp_bsr32 1275// 1276// int 1277// __kmp_bsr32( int ); 1278 .text 1279 PROC __kmp_bsr32 1280 1281 bsr %edi,%eax 1282 ret 1283 1284 DEBUG_INFO __kmp_bsr32 1285 1286// ----------------------------------------------------------------------- 1287#endif /* KMP_ARCH_X86_64 */ 1288 1289// ' 1290#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) 1291 1292//------------------------------------------------------------------------ 1293// int 1294// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1295// int gtid, int tid, 1296// int argc, void *p_argv[] 1297// #if OMPT_SUPPORT 1298// , 1299// void **exit_frame_ptr 1300// #endif 1301// ) { 1302// #if OMPT_SUPPORT 1303// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1304// #endif 1305// 1306// (*pkfn)( & gtid, & tid, argv[0], ... ); 1307// 1308// // FIXME: This is done at call-site and can be removed here. 1309// #if OMPT_SUPPORT 1310// *exit_frame_ptr = 0; 1311// #endif 1312// 1313// return 1; 1314// } 1315// 1316// parameters: 1317// x0: pkfn 1318// w1: gtid 1319// w2: tid 1320// w3: argc 1321// x4: p_argv 1322// x5: &exit_frame 1323// 1324// locals: 1325// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1326// __tid: tid parm pushed on stack so can pass &tid to pkfn 1327// 1328// reg temps: 1329// x8: used to hold pkfn address 1330// w9: used as temporary for number of pkfn parms 1331// x10: used to traverse p_argv array 1332// x11: used as temporary for stack placement calculation 1333// x12: used as temporary for stack parameters 1334// x19: used to preserve exit_frame_ptr, callee-save 1335// 1336// return: w0 (always 1/TRUE) 1337// 1338 1339__gtid = 4 1340__tid = 8 1341 1342// -- Begin __kmp_invoke_microtask 1343// mark_begin; 1344 .text 1345 PROC __kmp_invoke_microtask 1346 PACBTI_C 1347 1348 stp x29, x30, [sp, #-16]! 1349# if OMPT_SUPPORT 1350 stp x19, x20, [sp, #-16]! 1351# endif 1352 mov x29, sp 1353 1354 orr w9, wzr, #1 1355 add w9, w9, w3, lsr #1 1356 sub sp, sp, w9, uxtw #4 1357 mov x11, sp 1358 1359 mov x8, x0 1360 str w1, [x29, #-__gtid] 1361 str w2, [x29, #-__tid] 1362 mov w9, w3 1363 mov x10, x4 1364# if OMPT_SUPPORT 1365 mov x19, x5 1366 str x29, [x19] 1367# endif 1368 1369 sub x0, x29, #__gtid 1370 sub x1, x29, #__tid 1371 1372 cbz w9, KMP_LABEL(kmp_1) 1373 ldr x2, [x10] 1374 1375 sub w9, w9, #1 1376 cbz w9, KMP_LABEL(kmp_1) 1377 ldr x3, [x10, #8]! 1378 1379 sub w9, w9, #1 1380 cbz w9, KMP_LABEL(kmp_1) 1381 ldr x4, [x10, #8]! 1382 1383 sub w9, w9, #1 1384 cbz w9, KMP_LABEL(kmp_1) 1385 ldr x5, [x10, #8]! 1386 1387 sub w9, w9, #1 1388 cbz w9, KMP_LABEL(kmp_1) 1389 ldr x6, [x10, #8]! 1390 1391 sub w9, w9, #1 1392 cbz w9, KMP_LABEL(kmp_1) 1393 ldr x7, [x10, #8]! 1394 1395KMP_LABEL(kmp_0): 1396 sub w9, w9, #1 1397 cbz w9, KMP_LABEL(kmp_1) 1398 ldr x12, [x10, #8]! 1399 str x12, [x11], #8 1400 b KMP_LABEL(kmp_0) 1401KMP_LABEL(kmp_1): 1402 blr x8 1403 orr w0, wzr, #1 1404 mov sp, x29 1405# if OMPT_SUPPORT 1406 str xzr, [x19] 1407 ldp x19, x20, [sp], #16 1408# endif 1409 ldp x29, x30, [sp], #16 1410 PACBTI_RET 1411 ret 1412 1413 DEBUG_INFO __kmp_invoke_microtask 1414// -- End __kmp_invoke_microtask 1415 1416#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */ 1417 1418#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM 1419 1420//------------------------------------------------------------------------ 1421// int 1422// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1423// int gtid, int tid, 1424// int argc, void *p_argv[] 1425// #if OMPT_SUPPORT 1426// , 1427// void **exit_frame_ptr 1428// #endif 1429// ) { 1430// #if OMPT_SUPPORT 1431// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1432// #endif 1433// 1434// (*pkfn)( & gtid, & tid, argv[0], ... ); 1435// 1436// // FIXME: This is done at call-site and can be removed here. 1437// #if OMPT_SUPPORT 1438// *exit_frame_ptr = 0; 1439// #endif 1440// 1441// return 1; 1442// } 1443// 1444// parameters: 1445// r0: pkfn 1446// r1: gtid 1447// r2: tid 1448// r3: argc 1449// r4(stack): p_argv 1450// r5(stack): &exit_frame 1451// 1452// locals: 1453// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1454// __tid: tid parm pushed on stack so can pass &tid to pkfn 1455// 1456// reg temps: 1457// r4: used to hold pkfn address 1458// r5: used as temporary for number of pkfn parms 1459// r6: used to traverse p_argv array 1460// r7: frame pointer (in some configurations) 1461// r8: used as temporary for stack placement calculation 1462// and as pointer to base of callee saved area 1463// r9: used as temporary for stack parameters 1464// r10: used to preserve exit_frame_ptr, callee-save 1465// r11: frame pointer (in some configurations) 1466// 1467// return: r0 (always 1/TRUE) 1468// 1469 1470__gtid = 4 1471__tid = 8 1472 1473// -- Begin __kmp_invoke_microtask 1474// mark_begin; 1475 .text 1476 PROC __kmp_invoke_microtask 1477 1478 // Pushing one extra register (r3) to keep the stack aligned 1479 // for when we call pkfn below 1480 push {r3-r11,lr} 1481 // Load p_argv and &exit_frame 1482 ldr r4, [sp, #10*4] 1483# if OMPT_SUPPORT 1484 ldr r5, [sp, #11*4] 1485# endif 1486 1487# if KMP_OS_DARWIN || (defined(__thumb__) && !KMP_OS_WINDOWS) 1488# define FP r7 1489# define FPOFF 4*4 1490#else 1491# define FP r11 1492# define FPOFF 8*4 1493#endif 1494 add FP, sp, #FPOFF 1495# if OMPT_SUPPORT 1496 mov r10, r5 1497 str FP, [r10] 1498# endif 1499 mov r8, sp 1500 1501 // Calculate how much stack to allocate, in increments of 8 bytes. 1502 // We strictly need 4*(argc-2) bytes (2 arguments are passed in 1503 // registers) but allocate 4*argc for simplicity (to avoid needing 1504 // to handle the argc<2 cases). We align the number of bytes 1505 // allocated to 8 bytes, to keep the stack aligned. (Since we 1506 // already allocate more than enough, it's ok to round down 1507 // instead of up for the alignment.) We allocate another extra 1508 // 8 bytes for gtid and tid. 1509 mov r5, #1 1510 add r5, r5, r3, lsr #1 1511 sub sp, sp, r5, lsl #3 1512 1513 str r1, [r8, #-__gtid] 1514 str r2, [r8, #-__tid] 1515 mov r5, r3 1516 mov r6, r4 1517 mov r4, r0 1518 1519 // Prepare the first 2 parameters to pkfn - pointers to gtid and tid 1520 // in our stack frame. 1521 sub r0, r8, #__gtid 1522 sub r1, r8, #__tid 1523 1524 mov r8, sp 1525 1526 // Load p_argv[0] and p_argv[1] into r2 and r3, if argc >= 1/2 1527 cmp r5, #0 1528 beq KMP_LABEL(kmp_1) 1529 ldr r2, [r6] 1530 1531 subs r5, r5, #1 1532 beq KMP_LABEL(kmp_1) 1533 ldr r3, [r6, #4]! 1534 1535 // Loop, loading the rest of p_argv and writing the elements on the 1536 // stack. 1537KMP_LABEL(kmp_0): 1538 subs r5, r5, #1 1539 beq KMP_LABEL(kmp_1) 1540 ldr r12, [r6, #4]! 1541 str r12, [r8], #4 1542 b KMP_LABEL(kmp_0) 1543KMP_LABEL(kmp_1): 1544 blx r4 1545 mov r0, #1 1546 1547 sub r4, FP, #FPOFF 1548 mov sp, r4 1549# undef FP 1550# undef FPOFF 1551 1552# if OMPT_SUPPORT 1553 mov r1, #0 1554 str r1, [r10] 1555# endif 1556 pop {r3-r11,pc} 1557 1558 DEBUG_INFO __kmp_invoke_microtask 1559// -- End __kmp_invoke_microtask 1560 1561#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM */ 1562 1563#if KMP_ARCH_PPC64 1564 1565//------------------------------------------------------------------------ 1566// int 1567// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1568// int gtid, int tid, 1569// int argc, void *p_argv[] 1570// #if OMPT_SUPPORT 1571// , 1572// void **exit_frame_ptr 1573// #endif 1574// ) { 1575// #if OMPT_SUPPORT 1576// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1577// #endif 1578// 1579// (*pkfn)( & gtid, & tid, argv[0], ... ); 1580// 1581// // FIXME: This is done at call-site and can be removed here. 1582// #if OMPT_SUPPORT 1583// *exit_frame_ptr = 0; 1584// #endif 1585// 1586// return 1; 1587// } 1588// 1589// parameters: 1590// r3: pkfn 1591// r4: gtid 1592// r5: tid 1593// r6: argc 1594// r7: p_argv 1595// r8: &exit_frame 1596// 1597// return: r3 (always 1/TRUE) 1598// 1599 .text 1600# if KMP_ARCH_PPC64_ELFv2 1601 .abiversion 2 1602# endif 1603 .globl __kmp_invoke_microtask 1604 1605# if KMP_ARCH_PPC64_ELFv2 1606 .p2align 4 1607# else 1608 .p2align 2 1609# endif 1610 1611 .type __kmp_invoke_microtask,@function 1612 1613# if KMP_ARCH_PPC64_ELFv2 1614__kmp_invoke_microtask: 1615.Lfunc_begin0: 1616.Lfunc_gep0: 1617 addis 2, 12, .TOC.-.Lfunc_gep0@ha 1618 addi 2, 2, .TOC.-.Lfunc_gep0@l 1619.Lfunc_lep0: 1620 .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 1621# else 1622 .section .opd,"aw",@progbits 1623__kmp_invoke_microtask: 1624 .p2align 3 1625 .quad .Lfunc_begin0 1626 .quad .TOC.@tocbase 1627 .quad 0 1628 .text 1629.Lfunc_begin0: 1630# endif 1631 1632// -- Begin __kmp_invoke_microtask 1633// mark_begin; 1634 1635// We need to allocate a stack frame large enough to hold all of the parameters 1636// on the stack for the microtask plus what this function needs. That's 48 1637// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the 1638// parameters to the microtask, plus 8 bytes to store the values of r4 and r5, 1639// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes 1640// to save r30 to hold a copy of r8. 1641 1642 .cfi_startproc 1643 mflr 0 1644 std 31, -8(1) 1645 std 0, 16(1) 1646 1647// This is unusual because normally we'd set r31 equal to r1 after the stack 1648// frame is established. In this case, however, we need to dynamically compute 1649// the stack frame size, and so we keep a direct copy of r1 to access our 1650// register save areas and restore the r1 value before returning. 1651 mr 31, 1 1652 .cfi_def_cfa_register r31 1653 .cfi_offset r31, -8 1654 .cfi_offset lr, 16 1655 1656// Compute the size necessary for the local stack frame. 1657# if KMP_ARCH_PPC64_ELFv2 1658 li 12, 72 1659# else 1660 li 12, 88 1661# endif 1662 sldi 0, 6, 3 1663 add 12, 0, 12 1664 neg 12, 12 1665 1666// We need to make sure that the stack frame stays aligned (to 16 bytes). 1667 li 0, -16 1668 and 12, 0, 12 1669 1670// Establish the local stack frame. 1671 stdux 1, 1, 12 1672 1673# if OMPT_SUPPORT 1674 .cfi_offset r30, -16 1675 std 30, -16(31) 1676 std 1, 0(8) 1677 mr 30, 8 1678# endif 1679 1680// Store gtid and tid to the stack because they're passed by reference to the microtask. 1681 stw 4, -20(31) 1682 stw 5, -24(31) 1683 1684 mr 12, 6 1685 mr 4, 7 1686 1687 cmpwi 0, 12, 1 1688 blt 0, .Lcall 1689 1690 ld 5, 0(4) 1691 1692 cmpwi 0, 12, 2 1693 blt 0, .Lcall 1694 1695 ld 6, 8(4) 1696 1697 cmpwi 0, 12, 3 1698 blt 0, .Lcall 1699 1700 ld 7, 16(4) 1701 1702 cmpwi 0, 12, 4 1703 blt 0, .Lcall 1704 1705 ld 8, 24(4) 1706 1707 cmpwi 0, 12, 5 1708 blt 0, .Lcall 1709 1710 ld 9, 32(4) 1711 1712 cmpwi 0, 12, 6 1713 blt 0, .Lcall 1714 1715 ld 10, 40(4) 1716 1717 cmpwi 0, 12, 7 1718 blt 0, .Lcall 1719 1720// There are more than 6 microtask parameters, so we need to store the 1721// remainder to the stack. 1722 addi 12, 12, -6 1723 mtctr 12 1724 1725// These are set to 8 bytes before the first desired store address (we're using 1726// pre-increment loads and stores in the loop below). The parameter save area 1727// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and 1728// 32 + 8*8 == 96 bytes above r1 for ELFv2. 1729 addi 4, 4, 40 1730# if KMP_ARCH_PPC64_ELFv2 1731 addi 12, 1, 88 1732# else 1733 addi 12, 1, 104 1734# endif 1735 1736.Lnext: 1737 ldu 0, 8(4) 1738 stdu 0, 8(12) 1739 bdnz .Lnext 1740 1741.Lcall: 1742# if KMP_ARCH_PPC64_ELFv2 1743 std 2, 24(1) 1744 mr 12, 3 1745#else 1746 std 2, 40(1) 1747// For ELFv1, we need to load the actual function address from the function descriptor. 1748 ld 12, 0(3) 1749 ld 2, 8(3) 1750 ld 11, 16(3) 1751#endif 1752 1753 addi 3, 31, -20 1754 addi 4, 31, -24 1755 1756 mtctr 12 1757 bctrl 1758# if KMP_ARCH_PPC64_ELFv2 1759 ld 2, 24(1) 1760# else 1761 ld 2, 40(1) 1762# endif 1763 1764# if OMPT_SUPPORT 1765 li 3, 0 1766 std 3, 0(30) 1767# endif 1768 1769 li 3, 1 1770 1771# if OMPT_SUPPORT 1772 ld 30, -16(31) 1773# endif 1774 1775 mr 1, 31 1776 ld 0, 16(1) 1777 ld 31, -8(1) 1778 mtlr 0 1779 blr 1780 1781 .long 0 1782 .quad 0 1783.Lfunc_end0: 1784 .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 1785 .cfi_endproc 1786 1787// -- End __kmp_invoke_microtask 1788 1789#endif /* KMP_ARCH_PPC64 */ 1790 1791#if KMP_ARCH_RISCV64 1792 1793//------------------------------------------------------------------------ 1794// 1795// typedef void (*microtask_t)(int *gtid, int *tid, ...); 1796// 1797// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 1798// void *p_argv[] 1799// #if OMPT_SUPPORT 1800// , 1801// void **exit_frame_ptr 1802// #endif 1803// ) { 1804// #if OMPT_SUPPORT 1805// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1806// #endif 1807// 1808// (*pkfn)(>id, &tid, argv[0], ...); 1809// 1810// return 1; 1811// } 1812// 1813// Parameters: 1814// a0: pkfn 1815// a1: gtid 1816// a2: tid 1817// a3: argc 1818// a4: p_argv 1819// a5: exit_frame_ptr 1820// 1821// Locals: 1822// __gtid: gtid param pushed on stack so can pass >id to pkfn 1823// __tid: tid param pushed on stack so can pass &tid to pkfn 1824// 1825// Temp. registers: 1826// 1827// t0: used to calculate the dynamic stack size / used to hold pkfn address 1828// t1: used as temporary for stack placement calculation 1829// t2: used as temporary for stack arguments 1830// t3: used as temporary for number of remaining pkfn parms 1831// t4: used to traverse p_argv array 1832// 1833// return: a0 (always 1/TRUE) 1834// 1835 1836__gtid = -20 1837__tid = -24 1838 1839// -- Begin __kmp_invoke_microtask 1840// mark_begin; 1841 .text 1842 .globl __kmp_invoke_microtask 1843 .p2align 1 1844 .type __kmp_invoke_microtask,@function 1845__kmp_invoke_microtask: 1846 .cfi_startproc 1847 1848 // First, save ra and fp 1849 addi sp, sp, -16 1850 sd ra, 8(sp) 1851 sd fp, 0(sp) 1852 addi fp, sp, 16 1853 .cfi_def_cfa fp, 0 1854 .cfi_offset ra, -8 1855 .cfi_offset fp, -16 1856 1857 // Compute the dynamic stack size: 1858 // 1859 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 1860 // reference 1861 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 1862 // function by register. Given that we have 8 of such registers (a[0-7]) 1863 // and two + 'argc' arguments (consider >id and &tid), we need to 1864 // reserve max(0, argc - 6)*8 extra bytes 1865 // 1866 // The total number of bytes is then max(0, argc - 6)*8 + 8 1867 1868 // Compute max(0, argc - 6) using the following bithack: 1869 // max(0, x) = x - (x & (x >> 31)), where x := argc - 6 1870 // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax 1871 addi t0, a3, -6 1872 srai t1, t0, 31 1873 and t1, t0, t1 1874 sub t0, t0, t1 1875 1876 addi t0, t0, 1 1877 1878 slli t0, t0, 3 1879 sub sp, sp, t0 1880 1881 // Align the stack to 16 bytes 1882 andi sp, sp, -16 1883 1884 mv t0, a0 1885 mv t3, a3 1886 mv t4, a4 1887 1888#if OMPT_SUPPORT 1889 // Save frame pointer into exit_frame 1890 sd fp, 0(a5) 1891#endif 1892 1893 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) 1894 1895 sw a1, __gtid(fp) 1896 sw a2, __tid(fp) 1897 1898 addi a0, fp, __gtid 1899 addi a1, fp, __tid 1900 1901 beqz t3, .L_kmp_3 1902 ld a2, 0(t4) 1903 1904 addi t3, t3, -1 1905 beqz t3, .L_kmp_3 1906 ld a3, 8(t4) 1907 1908 addi t3, t3, -1 1909 beqz t3, .L_kmp_3 1910 ld a4, 16(t4) 1911 1912 addi t3, t3, -1 1913 beqz t3, .L_kmp_3 1914 ld a5, 24(t4) 1915 1916 addi t3, t3, -1 1917 beqz t3, .L_kmp_3 1918 ld a6, 32(t4) 1919 1920 addi t3, t3, -1 1921 beqz t3, .L_kmp_3 1922 ld a7, 40(t4) 1923 1924 // Prepare any additional argument passed through the stack 1925 addi t4, t4, 48 1926 mv t1, sp 1927 j .L_kmp_2 1928.L_kmp_1: 1929 ld t2, 0(t4) 1930 sd t2, 0(t1) 1931 addi t4, t4, 8 1932 addi t1, t1, 8 1933.L_kmp_2: 1934 addi t3, t3, -1 1935 bnez t3, .L_kmp_1 1936 1937.L_kmp_3: 1938 // Call pkfn function 1939 jalr t0 1940 1941 // Restore stack and return 1942 1943 addi a0, zero, 1 1944 1945 addi sp, fp, -16 1946 ld fp, 0(sp) 1947 ld ra, 8(sp) 1948 addi sp, sp, 16 1949 ret 1950.Lfunc_end0: 1951 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 1952 .cfi_endproc 1953 1954// -- End __kmp_invoke_microtask 1955 1956#endif /* KMP_ARCH_RISCV64 */ 1957 1958#if KMP_ARCH_LOONGARCH64 1959 1960//------------------------------------------------------------------------ 1961// 1962// typedef void (*microtask_t)(int *gtid, int *tid, ...); 1963// 1964// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 1965// void *p_argv[] 1966// #if OMPT_SUPPORT 1967// , 1968// void **exit_frame_ptr 1969// #endif 1970// ) { 1971// #if OMPT_SUPPORT 1972// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1973// #endif 1974// 1975// (*pkfn)(>id, &tid, argv[0], ...); 1976// 1977// return 1; 1978// } 1979// 1980// Parameters: 1981// a0: pkfn 1982// a1: gtid 1983// a2: tid 1984// a3: argc 1985// a4: p_argv 1986// a5: exit_frame_ptr 1987// 1988// Locals: 1989// __gtid: gtid param pushed on stack so can pass >id to pkfn 1990// __tid: tid param pushed on stack so can pass &tid to pkfn 1991// 1992// Temp registers: 1993// 1994// t0: used to calculate the dynamic stack size / used to hold pkfn address 1995// t1: used as temporary for stack placement calculation 1996// t2: used as temporary for stack arguments 1997// t3: used as temporary for number of remaining pkfn parms 1998// t4: used to traverse p_argv array 1999// 2000// return: a0 (always 1/TRUE) 2001// 2002 2003// -- Begin __kmp_invoke_microtask 2004// mark_begin; 2005 .text 2006 .globl __kmp_invoke_microtask 2007 .p2align 2 2008 .type __kmp_invoke_microtask,@function 2009__kmp_invoke_microtask: 2010 .cfi_startproc 2011 2012 // First, save ra and fp 2013 addi.d $sp, $sp, -16 2014 st.d $ra, $sp, 8 2015 st.d $fp, $sp, 0 2016 addi.d $fp, $sp, 16 2017 .cfi_def_cfa 22, 0 2018 .cfi_offset 1, -8 2019 .cfi_offset 22, -16 2020 2021 // Compute the dynamic stack size: 2022 // 2023 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 2024 // reference 2025 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 2026 // function by register. Given that we have 8 of such registers (a[0-7]) 2027 // and two + 'argc' arguments (consider >id and &tid), we need to 2028 // reserve max(0, argc - 6)*8 extra bytes 2029 // 2030 // The total number of bytes is then max(0, argc - 6)*8 + 8 2031 2032 addi.d $t0, $a3, -6 2033 slt $t1, $t0, $zero 2034 masknez $t0, $t0, $t1 2035 addi.d $t0, $t0, 1 2036 slli.d $t0, $t0, 3 2037 sub.d $sp, $sp, $t0 2038 2039 // Align the stack to 16 bytes 2040 bstrins.d $sp, $zero, 3, 0 2041 2042 move $t0, $a0 2043 move $t3, $a3 2044 move $t4, $a4 2045 2046#if OMPT_SUPPORT 2047 // Save frame pointer into exit_frame 2048 st.d $fp, $a5, 0 2049#endif 2050 2051 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) 2052 2053 st.w $a1, $fp, -20 2054 st.w $a2, $fp, -24 2055 2056 addi.d $a0, $fp, -20 2057 addi.d $a1, $fp, -24 2058 2059 beqz $t3, .L_kmp_3 2060 ld.d $a2, $t4, 0 2061 2062 addi.d $t3, $t3, -1 2063 beqz $t3, .L_kmp_3 2064 ld.d $a3, $t4, 8 2065 2066 addi.d $t3, $t3, -1 2067 beqz $t3, .L_kmp_3 2068 ld.d $a4, $t4, 16 2069 2070 addi.d $t3, $t3, -1 2071 beqz $t3, .L_kmp_3 2072 ld.d $a5, $t4, 24 2073 2074 addi.d $t3, $t3, -1 2075 beqz $t3, .L_kmp_3 2076 ld.d $a6, $t4, 32 2077 2078 addi.d $t3, $t3, -1 2079 beqz $t3, .L_kmp_3 2080 ld.d $a7, $t4, 40 2081 2082 // Prepare any additional argument passed through the stack 2083 addi.d $t4, $t4, 48 2084 move $t1, $sp 2085 b .L_kmp_2 2086.L_kmp_1: 2087 ld.d $t2, $t4, 0 2088 st.d $t2, $t1, 0 2089 addi.d $t4, $t4, 8 2090 addi.d $t1, $t1, 8 2091.L_kmp_2: 2092 addi.d $t3, $t3, -1 2093 bnez $t3, .L_kmp_1 2094 2095.L_kmp_3: 2096 // Call pkfn function 2097 jirl $ra, $t0, 0 2098 2099 // Restore stack and return 2100 2101 addi.d $a0, $zero, 1 2102 2103 addi.d $sp, $fp, -16 2104 ld.d $fp, $sp, 0 2105 ld.d $ra, $sp, 8 2106 addi.d $sp, $sp, 16 2107 jr $ra 2108.Lfunc_end0: 2109 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 2110 .cfi_endproc 2111 2112// -- End __kmp_invoke_microtask 2113 2114#endif /* KMP_ARCH_LOONGARCH64 */ 2115 2116#if KMP_ARCH_VE 2117 2118//------------------------------------------------------------------------ 2119// 2120// typedef void (*microtask_t)(int *gtid, int *tid, ...); 2121// 2122// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 2123// void *p_argv[] 2124// #if OMPT_SUPPORT 2125// , 2126// void **exit_frame_ptr 2127// #endif 2128// ) { 2129// #if OMPT_SUPPORT 2130// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 2131// #endif 2132// 2133// (*pkfn)(>id, &tid, argv[0], ...); 2134// 2135// return 1; 2136// } 2137// 2138// Parameters: 2139// s0: pkfn 2140// s1: gtid 2141// s2: tid 2142// s3: argc 2143// s4: p_argv 2144// s5: exit_frame_ptr 2145// 2146// Locals: 2147// __gtid: gtid param pushed on stack so can pass >id to pkfn 2148// __tid: tid param pushed on stack so can pass &tid to pkfn 2149// 2150// Temp. registers: 2151// 2152// s34: used to calculate the dynamic stack size 2153// s35: used as temporary for stack placement calculation 2154// s36: used as temporary for stack arguments 2155// s37: used as temporary for number of remaining pkfn parms 2156// s38: used to traverse p_argv array 2157// 2158// return: s0 (always 1/TRUE) 2159// 2160 2161__gtid = -4 2162__tid = -8 2163 2164// -- Begin __kmp_invoke_microtask 2165// mark_begin; 2166 .text 2167 .globl __kmp_invoke_microtask 2168 // A function requires 8 bytes align. 2169 .p2align 3 2170 .type __kmp_invoke_microtask,@function 2171__kmp_invoke_microtask: 2172 .cfi_startproc 2173 2174 // First, save fp and lr. VE stores them at caller stack frame. 2175 st %fp, 0(, %sp) 2176 st %lr, 8(, %sp) 2177 or %fp, 0, %sp 2178 .cfi_def_cfa %fp, 0 2179 .cfi_offset %lr, 8 2180 .cfi_offset %fp, 0 2181 2182 // Compute the dynamic stack size: 2183 // 2184 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them 2185 // by reference 2186 // - We need 8 bytes for whole arguments. We have two + 'argc' 2187 // arguments (condider >id and &tid). We need to reserve 2188 // (argc + 2) * 8 bytes. 2189 // - We need 176 bytes for RSA and others 2190 // 2191 // The total number of bytes is then (argc + 2) * 8 + 8 + 176. 2192 // 2193 // |------------------------------| 2194 // | return address of callee | 8(%fp) 2195 // |------------------------------| 2196 // | frame pointer of callee | 0(%fp) 2197 // |------------------------------| <------------------ %fp 2198 // | __tid / __gtid | -8(%fp) / -4(%fp) 2199 // |------------------------------| 2200 // | argc+2 for arguments | 176(%sp) 2201 // |------------------------------| 2202 // | RSA | 2203 // |------------------------------| 2204 // | return address | 2205 // |------------------------------| 2206 // | frame pointer | 2207 // |------------------------------| <------------------ %sp 2208 2209 adds.w.sx %s34, 2, %s3 2210 sll %s34, %s34, 3 2211 lea %s34, 184(, %s34) 2212 subs.l %sp, %sp, %s34 2213 2214 // Align the stack to 16 bytes. 2215 and %sp, -16, %sp 2216 2217 // Save pkfn. 2218 or %s12, 0, %s0 2219 2220 // Call host to allocate stack if it is necessary. 2221 brge.l %sp, %sl, .L_kmp_pass 2222 ld %s61, 24(, %tp) 2223 lea %s63, 0x13b 2224 shm.l %s63, 0(%s61) 2225 shm.l %sl, 8(%s61) 2226 shm.l %sp, 16(%s61) 2227 monc 2228 2229.L_kmp_pass: 2230 lea %s35, 176(, %sp) 2231 adds.w.sx %s37, 0, %s3 2232 or %s38, 0, %s4 2233 2234#if OMPT_SUPPORT 2235 // Save frame pointer into exit_frame. 2236 st %fp, 0(%s5) 2237#endif 2238 2239 // Prepare arguments for the pkfn function (first 8 using s0-s7 2240 // registers, but need to store stack also because of varargs). 2241 2242 stl %s1, __gtid(%fp) 2243 stl %s2, __tid(%fp) 2244 2245 adds.l %s0, __gtid, %fp 2246 st %s0, 0(, %s35) 2247 adds.l %s1, __tid, %fp 2248 st %s1, 8(, %s35) 2249 2250 breq.l 0, %s37, .L_kmp_call 2251 ld %s2, 0(, %s38) 2252 st %s2, 16(, %s35) 2253 2254 breq.l 1, %s37, .L_kmp_call 2255 ld %s3, 8(, %s38) 2256 st %s3, 24(, %s35) 2257 2258 breq.l 2, %s37, .L_kmp_call 2259 ld %s4, 16(, %s38) 2260 st %s4, 32(, %s35) 2261 2262 breq.l 3, %s37, .L_kmp_call 2263 ld %s5, 24(, %s38) 2264 st %s5, 40(, %s35) 2265 2266 breq.l 4, %s37, .L_kmp_call 2267 ld %s6, 32(, %s38) 2268 st %s6, 48(, %s35) 2269 2270 breq.l 5, %s37, .L_kmp_call 2271 ld %s7, 40(, %s38) 2272 st %s7, 56(, %s35) 2273 2274 breq.l 6, %s37, .L_kmp_call 2275 2276 // Prepare any additional argument passed through the stack. 2277 adds.l %s37, -6, %s37 2278 lea %s38, 48(, %s38) 2279 lea %s35, 64(, %s35) 2280.L_kmp_loop: 2281 ld %s36, 0(, %s38) 2282 st %s36, 0(, %s35) 2283 adds.l %s37, -1, %s37 2284 adds.l %s38, 8, %s38 2285 adds.l %s35, 8, %s35 2286 brne.l 0, %s37, .L_kmp_loop 2287 2288.L_kmp_call: 2289 // Call pkfn function. 2290 bsic %lr, (, %s12) 2291 2292 // Return value. 2293 lea %s0, 1 2294 2295 // Restore stack and return. 2296 or %sp, 0, %fp 2297 ld %lr, 8(, %sp) 2298 ld %fp, 0(, %sp) 2299 b.l.t (, %lr) 2300.Lfunc_end0: 2301 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 2302 .cfi_endproc 2303 2304// -- End __kmp_invoke_microtask 2305 2306#endif /* KMP_ARCH_VE */ 2307 2308#if KMP_ARCH_S390X 2309 2310//------------------------------------------------------------------------ 2311// 2312// typedef void (*microtask_t)(int *gtid, int *tid, ...); 2313// 2314// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 2315// void *p_argv[] 2316// #if OMPT_SUPPORT 2317// , 2318// void **exit_frame_ptr 2319// #endif 2320// ) { 2321// #if OMPT_SUPPORT 2322// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 2323// #endif 2324// 2325// (*pkfn)(>id, &tid, argv[0], ...); 2326// 2327// return 1; 2328// } 2329// 2330// Parameters: 2331// r2: pkfn 2332// r3: gtid 2333// r4: tid 2334// r5: argc 2335// r6: p_argv 2336// SP+160: exit_frame_ptr 2337// 2338// Locals: 2339// __gtid: gtid param pushed on stack so can pass >id to pkfn 2340// __tid: tid param pushed on stack so can pass &tid to pkfn 2341// 2342// Temp. registers: 2343// 2344// r0: used to fetch argv slots 2345// r7: used as temporary for number of remaining pkfn parms 2346// r8: argv 2347// r9: pkfn 2348// r10: stack size 2349// r11: previous fp 2350// r12: stack parameter area 2351// r13: argv slot 2352// 2353// return: r2 (always 1/TRUE) 2354// 2355 2356// -- Begin __kmp_invoke_microtask 2357// mark_begin; 2358 .text 2359 .globl __kmp_invoke_microtask 2360 .p2align 1 2361 .type __kmp_invoke_microtask,@function 2362__kmp_invoke_microtask: 2363 .cfi_startproc 2364 2365 stmg %r6,%r14,48(%r15) 2366 .cfi_offset %r6, -112 2367 .cfi_offset %r7, -104 2368 .cfi_offset %r8, -96 2369 .cfi_offset %r9, -88 2370 .cfi_offset %r10, -80 2371 .cfi_offset %r11, -72 2372 .cfi_offset %r12, -64 2373 .cfi_offset %r13, -56 2374 .cfi_offset %r14, -48 2375 .cfi_offset %r15, -40 2376 lgr %r11,%r15 2377 .cfi_def_cfa %r11, 160 2378 2379 // Compute the dynamic stack size: 2380 // 2381 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 2382 // reference 2383 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 2384 // function by register. Given that we have 5 of such registers (r[2-6]) 2385 // and two + 'argc' arguments (consider >id and &tid), we need to 2386 // reserve max(0, argc - 3)*8 extra bytes 2387 // 2388 // The total number of bytes is then max(0, argc - 3)*8 + 8 2389 2390 lgr %r10,%r5 2391 aghi %r10,-2 2392 jnm 0f 2393 lghi %r10,0 23940: 2395 sllg %r10,%r10,3 2396 lgr %r12,%r10 2397 aghi %r10,176 2398 sgr %r15,%r10 2399 agr %r12,%r15 2400 stg %r11,0(%r15) 2401 2402 lgr %r9,%r2 // pkfn 2403 2404#if OMPT_SUPPORT 2405 // Save frame pointer into exit_frame 2406 lg %r8,160(%r11) 2407 stg %r11,0(%r8) 2408#endif 2409 2410 // Prepare arguments for the pkfn function (first 5 using r2-r6 registers) 2411 2412 stg %r3,160(%r12) 2413 la %r2,164(%r12) // gid 2414 stg %r4,168(%r12) 2415 la %r3,172(%r12) // tid 2416 lgr %r8,%r6 // argv 2417 2418 // If argc > 0 2419 ltgr %r7,%r5 2420 jz 1f 2421 2422 lg %r4,0(%r8) // argv[0] 2423 aghi %r7,-1 2424 jz 1f 2425 2426 // If argc > 1 2427 lg %r5,8(%r8) // argv[1] 2428 aghi %r7,-1 2429 jz 1f 2430 2431 // If argc > 2 2432 lg %r6,16(%r8) // argv[2] 2433 aghi %r7,-1 2434 jz 1f 2435 2436 lghi %r13,0 // Index [n] 24372: 2438 lg %r0,24(%r13,%r8) // argv[2+n] 2439 stg %r0,160(%r13,%r15) // parm[2+n] 2440 aghi %r13,8 // Next 2441 aghi %r7,-1 2442 jnz 2b 2443 24441: 2445 basr %r14,%r9 // Call pkfn 2446 2447 // Restore stack and return 2448 2449 lgr %r15,%r11 2450 lmg %r6,%r14,48(%r15) 2451 lghi %r2,1 2452 br %r14 2453.Lfunc_end0: 2454 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 2455 .cfi_endproc 2456 2457// -- End __kmp_invoke_microtask 2458 2459#endif /* KMP_ARCH_S390X */ 2460 2461#if KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 2462#ifndef KMP_PREFIX_UNDERSCORE 2463# define KMP_PREFIX_UNDERSCORE(x) x 2464#endif 2465 .data 2466 COMMON .gomp_critical_user_, 32, 3 2467 .data 2468 .align 4 2469 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) 2470KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): 2471 .4byte .gomp_critical_user_ 2472#ifdef __ELF__ 2473 .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4 2474#endif 2475#endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 */ 2476 2477#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \ 2478 KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || \ 2479 KMP_ARCH_S390X 2480#ifndef KMP_PREFIX_UNDERSCORE 2481# define KMP_PREFIX_UNDERSCORE(x) x 2482#endif 2483 .data 2484 COMMON .gomp_critical_user_, 32, 3 2485 .data 2486 .align 8 2487 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) 2488KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): 2489 .8byte .gomp_critical_user_ 2490#ifdef __ELF__ 2491 .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 2492#endif 2493#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || 2494 KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || 2495 KMP_ARCH_S390X */ 2496 2497#if KMP_OS_LINUX 2498# if KMP_ARCH_ARM || KMP_ARCH_AARCH64 2499.section .note.GNU-stack,"",%progbits 2500# elif !KMP_ARCH_WASM 2501.section .note.GNU-stack,"",@progbits 2502# endif 2503#endif 2504 2505#if KMP_ARCH_WASM 2506.data 2507.global .gomp_critical_user_ 2508.global .gomp_critical_user_.var 2509.global .gomp_critical_user_.reduction.var 2510.global __kmp_unnamed_critical_addr 2511.gomp_critical_user_: 2512.zero 4 2513.size .gomp_critical_user_, 4 2514.gomp_critical_user_.var: 2515.zero 4 2516.size .gomp_critical_user_.var, 4 2517.gomp_critical_user_.reduction.var: 2518.zero 4 2519.size .gomp_critical_user_.reduction.var, 4 2520__kmp_unnamed_critical_addr: 2521 .4byte .gomp_critical_user_ 2522 .size __kmp_unnamed_critical_addr, 4 2523#endif 2524 2525#if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) 2526GNU_PROPERTY_BTI_PAC 2527#endif 2528