1// z_Linux_asm.S: - microtasking routines specifically 2// written for Intel platforms running Linux* OS 3 4// 5////===----------------------------------------------------------------------===// 6//// 7//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8//// See https://llvm.org/LICENSE.txt for license information. 9//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10//// 11////===----------------------------------------------------------------------===// 12// 13 14// ----------------------------------------------------------------------- 15// macros 16// ----------------------------------------------------------------------- 17 18#include "kmp_config.h" 19 20#if KMP_ARCH_X86 || KMP_ARCH_X86_64 21 22# if KMP_MIC 23// the 'delay r16/r32/r64' should be used instead of the 'pause'. 24// The delay operation has the effect of removing the current thread from 25// the round-robin HT mechanism, and therefore speeds up the issue rate of 26// the other threads on the same core. 27// 28// A value of 0 works fine for <= 2 threads per core, but causes the EPCC 29// barrier time to increase greatly for 3 or more threads per core. 30// 31// A value of 100 works pretty well for up to 4 threads per core, but isn't 32// quite as fast as 0 for 2 threads per core. 33// 34// We need to check what happens for oversubscription / > 4 threads per core. 35// It is possible that we need to pass the delay value in as a parameter 36// that the caller determines based on the total # threads / # cores. 37// 38//.macro pause_op 39// mov $100, %rax 40// delay %rax 41//.endm 42# else 43# define pause_op .byte 0xf3,0x90 44# endif // KMP_MIC 45 46# if KMP_OS_DARWIN 47# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 48# define KMP_LABEL(x) L_##x // form the name of label 49.macro KMP_CFI_DEF_OFFSET 50.endmacro 51.macro KMP_CFI_OFFSET 52.endmacro 53.macro KMP_CFI_REGISTER 54.endmacro 55.macro KMP_CFI_DEF 56.endmacro 57.macro ALIGN 58 .align $0 59.endmacro 60.macro DEBUG_INFO 61/* Not sure what .size does in icc, not sure if we need to do something 62 similar for OS X*. 63*/ 64.endmacro 65.macro PROC 66 ALIGN 4 67 .globl KMP_PREFIX_UNDERSCORE($0) 68KMP_PREFIX_UNDERSCORE($0): 69.endmacro 70# else // KMP_OS_DARWIN 71# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols 72// Format labels so that they don't override function names in gdb's backtraces 73// MIC assembler doesn't accept .L syntax, the L works fine there (as well as 74// on OS X*) 75# if KMP_MIC 76# define KMP_LABEL(x) L_##x // local label 77# else 78# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 79# endif // KMP_MIC 80.macro ALIGN size 81 .align 1<<(\size) 82.endm 83.macro DEBUG_INFO proc 84 .cfi_endproc 85// Not sure why we need .type and .size for the functions 86 .align 16 87 .type \proc,@function 88 .size \proc,.-\proc 89.endm 90.macro PROC proc 91 ALIGN 4 92 .globl KMP_PREFIX_UNDERSCORE(\proc) 93KMP_PREFIX_UNDERSCORE(\proc): 94 .cfi_startproc 95.endm 96.macro KMP_CFI_DEF_OFFSET sz 97 .cfi_def_cfa_offset \sz 98.endm 99.macro KMP_CFI_OFFSET reg, sz 100 .cfi_offset \reg,\sz 101.endm 102.macro KMP_CFI_REGISTER reg 103 .cfi_def_cfa_register \reg 104.endm 105.macro KMP_CFI_DEF reg, sz 106 .cfi_def_cfa \reg,\sz 107.endm 108# endif // KMP_OS_DARWIN 109#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 110 111#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM) 112 113# if KMP_OS_DARWIN 114# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 115# define KMP_LABEL(x) L_##x // form the name of label 116 117.macro ALIGN 118 .align $0 119.endmacro 120 121.macro DEBUG_INFO 122/* Not sure what .size does in icc, not sure if we need to do something 123 similar for OS X*. 124*/ 125.endmacro 126 127.macro PROC 128 ALIGN 4 129 .globl KMP_PREFIX_UNDERSCORE($0) 130KMP_PREFIX_UNDERSCORE($0): 131.endmacro 132# elif KMP_OS_WINDOWS 133# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Windows/ARM64 symbols 134// Format labels so that they don't override function names in gdb's backtraces 135# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 136 137.macro ALIGN size 138 .align 1<<(\size) 139.endm 140 141.macro DEBUG_INFO proc 142 ALIGN 2 143.endm 144 145.macro PROC proc 146 ALIGN 2 147 .globl KMP_PREFIX_UNDERSCORE(\proc) 148KMP_PREFIX_UNDERSCORE(\proc): 149.endm 150# else // KMP_OS_DARWIN || KMP_OS_WINDOWS 151# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols 152// Format labels so that they don't override function names in gdb's backtraces 153# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 154 155.macro ALIGN size 156 .align 1<<(\size) 157.endm 158 159.macro DEBUG_INFO proc 160 .cfi_endproc 161// Not sure why we need .type and .size for the functions 162 ALIGN 2 163#if KMP_ARCH_ARM 164 .type \proc,%function 165#else 166 .type \proc,@function 167#endif 168 .size \proc,.-\proc 169.endm 170 171.macro PROC proc 172 ALIGN 2 173 .globl KMP_PREFIX_UNDERSCORE(\proc) 174KMP_PREFIX_UNDERSCORE(\proc): 175 .cfi_startproc 176.endm 177# endif // KMP_OS_DARWIN 178 179#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM) 180 181.macro COMMON name, size, align_power 182#if KMP_OS_DARWIN 183 .comm \name, \size 184#elif KMP_OS_WINDOWS 185 .comm \name, \size, \align_power 186#else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS 187 .comm \name, \size, (1<<(\align_power)) 188#endif 189.endm 190 191// ----------------------------------------------------------------------- 192// data 193// ----------------------------------------------------------------------- 194 195#ifdef KMP_GOMP_COMPAT 196 197// Support for unnamed common blocks. 198// 199// Because the symbol ".gomp_critical_user_" contains a ".", we have to 200// put this stuff in assembly. 201 202# if KMP_ARCH_X86 203# if KMP_OS_DARWIN 204 .data 205 .comm .gomp_critical_user_,32 206 .data 207 .globl ___kmp_unnamed_critical_addr 208___kmp_unnamed_critical_addr: 209 .long .gomp_critical_user_ 210# else /* Linux* OS */ 211 .data 212 .comm .gomp_critical_user_,32,8 213 .data 214 ALIGN 4 215 .global __kmp_unnamed_critical_addr 216__kmp_unnamed_critical_addr: 217 .4byte .gomp_critical_user_ 218 .type __kmp_unnamed_critical_addr,@object 219 .size __kmp_unnamed_critical_addr,4 220# endif /* KMP_OS_DARWIN */ 221# endif /* KMP_ARCH_X86 */ 222 223# if KMP_ARCH_X86_64 224# if KMP_OS_DARWIN 225 .data 226 .comm .gomp_critical_user_,32 227 .data 228 .globl ___kmp_unnamed_critical_addr 229___kmp_unnamed_critical_addr: 230 .quad .gomp_critical_user_ 231# else /* Linux* OS */ 232 .data 233 .comm .gomp_critical_user_,32,8 234 .data 235 ALIGN 8 236 .global __kmp_unnamed_critical_addr 237__kmp_unnamed_critical_addr: 238 .8byte .gomp_critical_user_ 239 .type __kmp_unnamed_critical_addr,@object 240 .size __kmp_unnamed_critical_addr,8 241# endif /* KMP_OS_DARWIN */ 242# endif /* KMP_ARCH_X86_64 */ 243 244#endif /* KMP_GOMP_COMPAT */ 245 246 247#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 248 249// ----------------------------------------------------------------------- 250// microtasking routines specifically written for IA-32 architecture 251// running Linux* OS 252// ----------------------------------------------------------------------- 253 254 .ident "Intel Corporation" 255 .data 256 ALIGN 4 257// void 258// __kmp_x86_pause( void ); 259 260 .text 261 PROC __kmp_x86_pause 262 263 pause_op 264 ret 265 266 DEBUG_INFO __kmp_x86_pause 267 268# if !KMP_ASM_INTRINS 269 270//------------------------------------------------------------------------ 271// kmp_int32 272// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 273 274 PROC __kmp_test_then_add32 275 276 movl 4(%esp), %ecx 277 movl 8(%esp), %eax 278 lock 279 xaddl %eax,(%ecx) 280 ret 281 282 DEBUG_INFO __kmp_test_then_add32 283 284//------------------------------------------------------------------------ 285// FUNCTION __kmp_xchg_fixed8 286// 287// kmp_int32 288// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 289// 290// parameters: 291// p: 4(%esp) 292// d: 8(%esp) 293// 294// return: %al 295 PROC __kmp_xchg_fixed8 296 297 movl 4(%esp), %ecx // "p" 298 movb 8(%esp), %al // "d" 299 300 lock 301 xchgb %al,(%ecx) 302 ret 303 304 DEBUG_INFO __kmp_xchg_fixed8 305 306 307//------------------------------------------------------------------------ 308// FUNCTION __kmp_xchg_fixed16 309// 310// kmp_int16 311// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 312// 313// parameters: 314// p: 4(%esp) 315// d: 8(%esp) 316// return: %ax 317 PROC __kmp_xchg_fixed16 318 319 movl 4(%esp), %ecx // "p" 320 movw 8(%esp), %ax // "d" 321 322 lock 323 xchgw %ax,(%ecx) 324 ret 325 326 DEBUG_INFO __kmp_xchg_fixed16 327 328 329//------------------------------------------------------------------------ 330// FUNCTION __kmp_xchg_fixed32 331// 332// kmp_int32 333// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 334// 335// parameters: 336// p: 4(%esp) 337// d: 8(%esp) 338// 339// return: %eax 340 PROC __kmp_xchg_fixed32 341 342 movl 4(%esp), %ecx // "p" 343 movl 8(%esp), %eax // "d" 344 345 lock 346 xchgl %eax,(%ecx) 347 ret 348 349 DEBUG_INFO __kmp_xchg_fixed32 350 351 352// kmp_int8 353// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 354 PROC __kmp_compare_and_store8 355 356 movl 4(%esp), %ecx 357 movb 8(%esp), %al 358 movb 12(%esp), %dl 359 lock 360 cmpxchgb %dl,(%ecx) 361 sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 362 and $1, %eax // sign extend previous instruction 363 ret 364 365 DEBUG_INFO __kmp_compare_and_store8 366 367// kmp_int16 368// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); 369 PROC __kmp_compare_and_store16 370 371 movl 4(%esp), %ecx 372 movw 8(%esp), %ax 373 movw 12(%esp), %dx 374 lock 375 cmpxchgw %dx,(%ecx) 376 sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 377 and $1, %eax // sign extend previous instruction 378 ret 379 380 DEBUG_INFO __kmp_compare_and_store16 381 382// kmp_int32 383// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); 384 PROC __kmp_compare_and_store32 385 386 movl 4(%esp), %ecx 387 movl 8(%esp), %eax 388 movl 12(%esp), %edx 389 lock 390 cmpxchgl %edx,(%ecx) 391 sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 392 and $1, %eax // sign extend previous instruction 393 ret 394 395 DEBUG_INFO __kmp_compare_and_store32 396 397// kmp_int32 398// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); 399 PROC __kmp_compare_and_store64 400 401 pushl %ebp 402 movl %esp, %ebp 403 pushl %ebx 404 pushl %edi 405 movl 8(%ebp), %edi 406 movl 12(%ebp), %eax // "cv" low order word 407 movl 16(%ebp), %edx // "cv" high order word 408 movl 20(%ebp), %ebx // "sv" low order word 409 movl 24(%ebp), %ecx // "sv" high order word 410 lock 411 cmpxchg8b (%edi) 412 sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 413 and $1, %eax // sign extend previous instruction 414 popl %edi 415 popl %ebx 416 movl %ebp, %esp 417 popl %ebp 418 ret 419 420 DEBUG_INFO __kmp_compare_and_store64 421 422// kmp_int8 423// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); 424 PROC __kmp_compare_and_store_ret8 425 426 movl 4(%esp), %ecx 427 movb 8(%esp), %al 428 movb 12(%esp), %dl 429 lock 430 cmpxchgb %dl,(%ecx) 431 ret 432 433 DEBUG_INFO __kmp_compare_and_store_ret8 434 435// kmp_int16 436// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, 437// kmp_int16 sv); 438 PROC __kmp_compare_and_store_ret16 439 440 movl 4(%esp), %ecx 441 movw 8(%esp), %ax 442 movw 12(%esp), %dx 443 lock 444 cmpxchgw %dx,(%ecx) 445 ret 446 447 DEBUG_INFO __kmp_compare_and_store_ret16 448 449// kmp_int32 450// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, 451// kmp_int32 sv); 452 PROC __kmp_compare_and_store_ret32 453 454 movl 4(%esp), %ecx 455 movl 8(%esp), %eax 456 movl 12(%esp), %edx 457 lock 458 cmpxchgl %edx,(%ecx) 459 ret 460 461 DEBUG_INFO __kmp_compare_and_store_ret32 462 463// kmp_int64 464// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, 465// kmp_int64 sv); 466 PROC __kmp_compare_and_store_ret64 467 468 pushl %ebp 469 movl %esp, %ebp 470 pushl %ebx 471 pushl %edi 472 movl 8(%ebp), %edi 473 movl 12(%ebp), %eax // "cv" low order word 474 movl 16(%ebp), %edx // "cv" high order word 475 movl 20(%ebp), %ebx // "sv" low order word 476 movl 24(%ebp), %ecx // "sv" high order word 477 lock 478 cmpxchg8b (%edi) 479 popl %edi 480 popl %ebx 481 movl %ebp, %esp 482 popl %ebp 483 ret 484 485 DEBUG_INFO __kmp_compare_and_store_ret64 486 487 488//------------------------------------------------------------------------ 489// FUNCTION __kmp_xchg_real32 490// 491// kmp_real32 492// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 493// 494// parameters: 495// addr: 4(%esp) 496// data: 8(%esp) 497// 498// return: %eax 499 PROC __kmp_xchg_real32 500 501 pushl %ebp 502 movl %esp, %ebp 503 subl $4, %esp 504 pushl %esi 505 506 movl 4(%ebp), %esi 507 flds (%esi) 508 // load <addr> 509 fsts -4(%ebp) 510 // store old value 511 512 movl 8(%ebp), %eax 513 514 lock 515 xchgl %eax, (%esi) 516 517 flds -4(%ebp) 518 // return old value 519 520 popl %esi 521 movl %ebp, %esp 522 popl %ebp 523 ret 524 525 DEBUG_INFO __kmp_xchg_real32 526 527# endif /* !KMP_ASM_INTRINS */ 528 529//------------------------------------------------------------------------ 530// int 531// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 532// int gtid, int tid, 533// int argc, void *p_argv[] 534// #if OMPT_SUPPORT 535// , 536// void **exit_frame_ptr 537// #endif 538// ) { 539// #if OMPT_SUPPORT 540// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 541// #endif 542// 543// (*pkfn)( & gtid, & tid, argv[0], ... ); 544// return 1; 545// } 546 547// -- Begin __kmp_invoke_microtask 548// mark_begin; 549 PROC __kmp_invoke_microtask 550 551 pushl %ebp 552 KMP_CFI_DEF_OFFSET 8 553 KMP_CFI_OFFSET ebp,-8 554 movl %esp,%ebp // establish the base pointer for this routine. 555 KMP_CFI_REGISTER ebp 556 subl $8,%esp // allocate space for two local variables. 557 // These varibales are: 558 // argv: -4(%ebp) 559 // temp: -8(%ebp) 560 // 561 pushl %ebx // save %ebx to use during this routine 562 // 563#if OMPT_SUPPORT 564 movl 28(%ebp),%ebx // get exit_frame address 565 movl %ebp,(%ebx) // save exit_frame 566#endif 567 568 movl 20(%ebp),%ebx // Stack alignment - # args 569 addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) 570 shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 571 movl %esp,%eax // 572 subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this 573 movl %eax,%ebx // Save to %ebx 574 andl $0xFFFFFF80,%eax // mask off 7 bits 575 subl %eax,%ebx // Amount to subtract from %esp 576 subl %ebx,%esp // Prepare the stack ptr -- 577 // now it will be aligned on 128-byte boundary at the call 578 579 movl 24(%ebp),%eax // copy from p_argv[] 580 movl %eax,-4(%ebp) // into the local variable *argv. 581 582 movl 20(%ebp),%ebx // argc is 20(%ebp) 583 shll $2,%ebx 584 585KMP_LABEL(invoke_2): 586 cmpl $0,%ebx 587 jg KMP_LABEL(invoke_4) 588 jmp KMP_LABEL(invoke_3) 589 ALIGN 2 590KMP_LABEL(invoke_4): 591 movl -4(%ebp),%eax 592 subl $4,%ebx // decrement argc. 593 addl %ebx,%eax // index into argv. 594 movl (%eax),%edx 595 pushl %edx 596 597 jmp KMP_LABEL(invoke_2) 598 ALIGN 2 599KMP_LABEL(invoke_3): 600 leal 16(%ebp),%eax // push & tid 601 pushl %eax 602 603 leal 12(%ebp),%eax // push & gtid 604 pushl %eax 605 606 movl 8(%ebp),%ebx 607 call *%ebx // call (*pkfn)(); 608 609 movl $1,%eax // return 1; 610 611 movl -12(%ebp),%ebx // restore %ebx 612 leave 613 KMP_CFI_DEF esp,4 614 ret 615 616 DEBUG_INFO __kmp_invoke_microtask 617// -- End __kmp_invoke_microtask 618 619 620// kmp_uint64 621// __kmp_hardware_timestamp(void) 622 PROC __kmp_hardware_timestamp 623 rdtsc 624 ret 625 626 DEBUG_INFO __kmp_hardware_timestamp 627// -- End __kmp_hardware_timestamp 628 629#endif /* KMP_ARCH_X86 */ 630 631 632#if KMP_ARCH_X86_64 633 634// ----------------------------------------------------------------------- 635// microtasking routines specifically written for IA-32 architecture and 636// Intel(R) 64 running Linux* OS 637// ----------------------------------------------------------------------- 638 639// -- Machine type P 640// mark_description "Intel Corporation"; 641 .ident "Intel Corporation" 642// -- .file "z_Linux_asm.S" 643 .data 644 ALIGN 4 645 646// To prevent getting our code into .data section .text added to every routine 647// definition for x86_64. 648//------------------------------------------------------------------------ 649# if !KMP_ASM_INTRINS 650 651//------------------------------------------------------------------------ 652// FUNCTION __kmp_test_then_add32 653// 654// kmp_int32 655// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 656// 657// parameters: 658// p: %rdi 659// d: %esi 660// 661// return: %eax 662 .text 663 PROC __kmp_test_then_add32 664 665 movl %esi, %eax // "d" 666 lock 667 xaddl %eax,(%rdi) 668 ret 669 670 DEBUG_INFO __kmp_test_then_add32 671 672 673//------------------------------------------------------------------------ 674// FUNCTION __kmp_test_then_add64 675// 676// kmp_int64 677// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); 678// 679// parameters: 680// p: %rdi 681// d: %rsi 682// return: %rax 683 .text 684 PROC __kmp_test_then_add64 685 686 movq %rsi, %rax // "d" 687 lock 688 xaddq %rax,(%rdi) 689 ret 690 691 DEBUG_INFO __kmp_test_then_add64 692 693 694//------------------------------------------------------------------------ 695// FUNCTION __kmp_xchg_fixed8 696// 697// kmp_int32 698// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 699// 700// parameters: 701// p: %rdi 702// d: %sil 703// 704// return: %al 705 .text 706 PROC __kmp_xchg_fixed8 707 708 movb %sil, %al // "d" 709 710 lock 711 xchgb %al,(%rdi) 712 ret 713 714 DEBUG_INFO __kmp_xchg_fixed8 715 716 717//------------------------------------------------------------------------ 718// FUNCTION __kmp_xchg_fixed16 719// 720// kmp_int16 721// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 722// 723// parameters: 724// p: %rdi 725// d: %si 726// return: %ax 727 .text 728 PROC __kmp_xchg_fixed16 729 730 movw %si, %ax // "d" 731 732 lock 733 xchgw %ax,(%rdi) 734 ret 735 736 DEBUG_INFO __kmp_xchg_fixed16 737 738 739//------------------------------------------------------------------------ 740// FUNCTION __kmp_xchg_fixed32 741// 742// kmp_int32 743// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 744// 745// parameters: 746// p: %rdi 747// d: %esi 748// 749// return: %eax 750 .text 751 PROC __kmp_xchg_fixed32 752 753 movl %esi, %eax // "d" 754 755 lock 756 xchgl %eax,(%rdi) 757 ret 758 759 DEBUG_INFO __kmp_xchg_fixed32 760 761 762//------------------------------------------------------------------------ 763// FUNCTION __kmp_xchg_fixed64 764// 765// kmp_int64 766// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); 767// 768// parameters: 769// p: %rdi 770// d: %rsi 771// return: %rax 772 .text 773 PROC __kmp_xchg_fixed64 774 775 movq %rsi, %rax // "d" 776 777 lock 778 xchgq %rax,(%rdi) 779 ret 780 781 DEBUG_INFO __kmp_xchg_fixed64 782 783 784//------------------------------------------------------------------------ 785// FUNCTION __kmp_compare_and_store8 786// 787// kmp_int8 788// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 789// 790// parameters: 791// p: %rdi 792// cv: %esi 793// sv: %edx 794// 795// return: %eax 796 .text 797 PROC __kmp_compare_and_store8 798 799 movb %sil, %al // "cv" 800 lock 801 cmpxchgb %dl,(%rdi) 802 sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 803 andq $1, %rax // sign extend previous instruction for return value 804 ret 805 806 DEBUG_INFO __kmp_compare_and_store8 807 808 809//------------------------------------------------------------------------ 810// FUNCTION __kmp_compare_and_store16 811// 812// kmp_int16 813// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 814// 815// parameters: 816// p: %rdi 817// cv: %si 818// sv: %dx 819// 820// return: %eax 821 .text 822 PROC __kmp_compare_and_store16 823 824 movw %si, %ax // "cv" 825 lock 826 cmpxchgw %dx,(%rdi) 827 sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 828 andq $1, %rax // sign extend previous instruction for return value 829 ret 830 831 DEBUG_INFO __kmp_compare_and_store16 832 833 834//------------------------------------------------------------------------ 835// FUNCTION __kmp_compare_and_store32 836// 837// kmp_int32 838// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 839// 840// parameters: 841// p: %rdi 842// cv: %esi 843// sv: %edx 844// 845// return: %eax 846 .text 847 PROC __kmp_compare_and_store32 848 849 movl %esi, %eax // "cv" 850 lock 851 cmpxchgl %edx,(%rdi) 852 sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 853 andq $1, %rax // sign extend previous instruction for return value 854 ret 855 856 DEBUG_INFO __kmp_compare_and_store32 857 858 859//------------------------------------------------------------------------ 860// FUNCTION __kmp_compare_and_store64 861// 862// kmp_int32 863// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 864// 865// parameters: 866// p: %rdi 867// cv: %rsi 868// sv: %rdx 869// return: %eax 870 .text 871 PROC __kmp_compare_and_store64 872 873 movq %rsi, %rax // "cv" 874 lock 875 cmpxchgq %rdx,(%rdi) 876 sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 877 andq $1, %rax // sign extend previous instruction for return value 878 ret 879 880 DEBUG_INFO __kmp_compare_and_store64 881 882//------------------------------------------------------------------------ 883// FUNCTION __kmp_compare_and_store_ret8 884// 885// kmp_int8 886// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 887// 888// parameters: 889// p: %rdi 890// cv: %esi 891// sv: %edx 892// 893// return: %eax 894 .text 895 PROC __kmp_compare_and_store_ret8 896 897 movb %sil, %al // "cv" 898 lock 899 cmpxchgb %dl,(%rdi) 900 ret 901 902 DEBUG_INFO __kmp_compare_and_store_ret8 903 904 905//------------------------------------------------------------------------ 906// FUNCTION __kmp_compare_and_store_ret16 907// 908// kmp_int16 909// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 910// 911// parameters: 912// p: %rdi 913// cv: %si 914// sv: %dx 915// 916// return: %eax 917 .text 918 PROC __kmp_compare_and_store_ret16 919 920 movw %si, %ax // "cv" 921 lock 922 cmpxchgw %dx,(%rdi) 923 ret 924 925 DEBUG_INFO __kmp_compare_and_store_ret16 926 927 928//------------------------------------------------------------------------ 929// FUNCTION __kmp_compare_and_store_ret32 930// 931// kmp_int32 932// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 933// 934// parameters: 935// p: %rdi 936// cv: %esi 937// sv: %edx 938// 939// return: %eax 940 .text 941 PROC __kmp_compare_and_store_ret32 942 943 movl %esi, %eax // "cv" 944 lock 945 cmpxchgl %edx,(%rdi) 946 ret 947 948 DEBUG_INFO __kmp_compare_and_store_ret32 949 950 951//------------------------------------------------------------------------ 952// FUNCTION __kmp_compare_and_store_ret64 953// 954// kmp_int64 955// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 956// 957// parameters: 958// p: %rdi 959// cv: %rsi 960// sv: %rdx 961// return: %eax 962 .text 963 PROC __kmp_compare_and_store_ret64 964 965 movq %rsi, %rax // "cv" 966 lock 967 cmpxchgq %rdx,(%rdi) 968 ret 969 970 DEBUG_INFO __kmp_compare_and_store_ret64 971 972# endif /* !KMP_ASM_INTRINS */ 973 974 975# if !KMP_MIC 976 977# if !KMP_ASM_INTRINS 978 979//------------------------------------------------------------------------ 980// FUNCTION __kmp_xchg_real32 981// 982// kmp_real32 983// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 984// 985// parameters: 986// addr: %rdi 987// data: %xmm0 (lower 4 bytes) 988// 989// return: %xmm0 (lower 4 bytes) 990 .text 991 PROC __kmp_xchg_real32 992 993 movd %xmm0, %eax // load "data" to eax 994 995 lock 996 xchgl %eax, (%rdi) 997 998 movd %eax, %xmm0 // load old value into return register 999 1000 ret 1001 1002 DEBUG_INFO __kmp_xchg_real32 1003 1004 1005//------------------------------------------------------------------------ 1006// FUNCTION __kmp_xchg_real64 1007// 1008// kmp_real64 1009// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); 1010// 1011// parameters: 1012// addr: %rdi 1013// data: %xmm0 (lower 8 bytes) 1014// return: %xmm0 (lower 8 bytes) 1015 .text 1016 PROC __kmp_xchg_real64 1017 1018 movd %xmm0, %rax // load "data" to rax 1019 1020 lock 1021 xchgq %rax, (%rdi) 1022 1023 movd %rax, %xmm0 // load old value into return register 1024 ret 1025 1026 DEBUG_INFO __kmp_xchg_real64 1027 1028 1029# endif /* !KMP_MIC */ 1030 1031# endif /* !KMP_ASM_INTRINS */ 1032 1033//------------------------------------------------------------------------ 1034// int 1035// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1036// int gtid, int tid, 1037// int argc, void *p_argv[] 1038// #if OMPT_SUPPORT 1039// , 1040// void **exit_frame_ptr 1041// #endif 1042// ) { 1043// #if OMPT_SUPPORT 1044// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1045// #endif 1046// 1047// (*pkfn)( & gtid, & tid, argv[0], ... ); 1048// return 1; 1049// } 1050// 1051// note: at call to pkfn must have %rsp 128-byte aligned for compiler 1052// 1053// parameters: 1054// %rdi: pkfn 1055// %esi: gtid 1056// %edx: tid 1057// %ecx: argc 1058// %r8: p_argv 1059// %r9: &exit_frame 1060// 1061// locals: 1062// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1063// __tid: tid parm pushed on stack so can pass &tid to pkfn 1064// 1065// reg temps: 1066// %rax: used all over the place 1067// %rdx: used in stack pointer alignment calculation 1068// %r11: used to traverse p_argv array 1069// %rsi: used as temporary for stack parameters 1070// used as temporary for number of pkfn parms to push 1071// %rbx: used to hold pkfn address, and zero constant, callee-save 1072// 1073// return: %eax (always 1/TRUE) 1074__gtid = -16 1075__tid = -24 1076 1077// -- Begin __kmp_invoke_microtask 1078// mark_begin; 1079 .text 1080 PROC __kmp_invoke_microtask 1081 1082 pushq %rbp // save base pointer 1083 KMP_CFI_DEF_OFFSET 16 1084 KMP_CFI_OFFSET rbp,-16 1085 movq %rsp,%rbp // establish the base pointer for this routine. 1086 KMP_CFI_REGISTER rbp 1087 1088#if OMPT_SUPPORT 1089 movq %rbp, (%r9) // save exit_frame 1090#endif 1091 1092 pushq %rbx // %rbx is callee-saved register 1093 pushq %rsi // Put gtid on stack so can pass &tgid to pkfn 1094 pushq %rdx // Put tid on stack so can pass &tid to pkfn 1095 1096 movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax 1097 movq $0, %rbx // constant for cmovs later 1098 subq $4, %rax // subtract four args passed in registers to pkfn 1099#if KMP_MIC 1100 js KMP_LABEL(kmp_0) // jump to movq 1101 jmp KMP_LABEL(kmp_0_exit) // jump ahead 1102KMP_LABEL(kmp_0): 1103 movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 1104KMP_LABEL(kmp_0_exit): 1105#else 1106 cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 1107#endif // KMP_MIC 1108 1109 movq %rax, %rsi // save max(0, argc-4) -> %rsi for later 1110 shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 1111 1112 movq %rsp, %rdx // 1113 subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- 1114 // without align, stack ptr would be this 1115 movq %rdx, %rax // Save to %rax 1116 1117 andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) 1118 subq %rax, %rdx // Amount to subtract from %rsp 1119 subq %rdx, %rsp // Prepare the stack ptr -- 1120 // now %rsp will align to 128-byte boundary at call site 1121 1122 // setup pkfn parameter reg and stack 1123 movq %rcx, %rax // argc -> %rax 1124 cmpq $0, %rsi 1125 je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push 1126 shlq $3, %rcx // argc*8 -> %rcx 1127 movq %r8, %rdx // p_argv -> %rdx 1128 addq %rcx, %rdx // &p_argv[argc] -> %rdx 1129 1130 movq %rsi, %rcx // max (0, argc-4) -> %rcx 1131 1132KMP_LABEL(kmp_invoke_push_parms): 1133 // push nth - 7th parms to pkfn on stack 1134 subq $8, %rdx // decrement p_argv pointer to previous parm 1135 movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi 1136 pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) 1137 subl $1, %ecx 1138 1139// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e 1140// if the name of the label that is an operand of this jecxz starts with a dot ("."); 1141// Apple's linker does not support 1-byte length relocation; 1142// Resolution: replace all .labelX entries with L_labelX. 1143 1144 jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left 1145 jmp KMP_LABEL(kmp_invoke_push_parms) 1146 ALIGN 3 1147KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. 1148 // order here is important to avoid trashing 1149 // registers used for both input and output parms! 1150 movq %rdi, %rbx // pkfn -> %rbx 1151 leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) 1152 leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) 1153 1154 movq %r8, %r11 // p_argv -> %r11 1155 1156#if KMP_MIC 1157 cmpq $4, %rax // argc >= 4? 1158 jns KMP_LABEL(kmp_4) // jump to movq 1159 jmp KMP_LABEL(kmp_4_exit) // jump ahead 1160KMP_LABEL(kmp_4): 1161 movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 1162KMP_LABEL(kmp_4_exit): 1163 1164 cmpq $3, %rax // argc >= 3? 1165 jns KMP_LABEL(kmp_3) // jump to movq 1166 jmp KMP_LABEL(kmp_3_exit) // jump ahead 1167KMP_LABEL(kmp_3): 1168 movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 1169KMP_LABEL(kmp_3_exit): 1170 1171 cmpq $2, %rax // argc >= 2? 1172 jns KMP_LABEL(kmp_2) // jump to movq 1173 jmp KMP_LABEL(kmp_2_exit) // jump ahead 1174KMP_LABEL(kmp_2): 1175 movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 1176KMP_LABEL(kmp_2_exit): 1177 1178 cmpq $1, %rax // argc >= 1? 1179 jns KMP_LABEL(kmp_1) // jump to movq 1180 jmp KMP_LABEL(kmp_1_exit) // jump ahead 1181KMP_LABEL(kmp_1): 1182 movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 1183KMP_LABEL(kmp_1_exit): 1184#else 1185 cmpq $4, %rax // argc >= 4? 1186 cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 1187 1188 cmpq $3, %rax // argc >= 3? 1189 cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 1190 1191 cmpq $2, %rax // argc >= 2? 1192 cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 1193 1194 cmpq $1, %rax // argc >= 1? 1195 cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 1196#endif // KMP_MIC 1197 1198 call *%rbx // call (*pkfn)(); 1199 movq $1, %rax // move 1 into return register; 1200 1201 movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified 1202 movq %rbp, %rsp // restore stack pointer 1203 popq %rbp // restore frame pointer 1204 KMP_CFI_DEF rsp,8 1205 ret 1206 1207 DEBUG_INFO __kmp_invoke_microtask 1208// -- End __kmp_invoke_microtask 1209 1210// kmp_uint64 1211// __kmp_hardware_timestamp(void) 1212 .text 1213 PROC __kmp_hardware_timestamp 1214 rdtsc 1215 shlq $32, %rdx 1216 orq %rdx, %rax 1217 ret 1218 1219 DEBUG_INFO __kmp_hardware_timestamp 1220// -- End __kmp_hardware_timestamp 1221 1222//------------------------------------------------------------------------ 1223// FUNCTION __kmp_bsr32 1224// 1225// int 1226// __kmp_bsr32( int ); 1227 .text 1228 PROC __kmp_bsr32 1229 1230 bsr %edi,%eax 1231 ret 1232 1233 DEBUG_INFO __kmp_bsr32 1234 1235// ----------------------------------------------------------------------- 1236#endif /* KMP_ARCH_X86_64 */ 1237 1238// ' 1239#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_AARCH64 1240 1241//------------------------------------------------------------------------ 1242// int 1243// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1244// int gtid, int tid, 1245// int argc, void *p_argv[] 1246// #if OMPT_SUPPORT 1247// , 1248// void **exit_frame_ptr 1249// #endif 1250// ) { 1251// #if OMPT_SUPPORT 1252// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1253// #endif 1254// 1255// (*pkfn)( & gtid, & tid, argv[0], ... ); 1256// 1257// // FIXME: This is done at call-site and can be removed here. 1258// #if OMPT_SUPPORT 1259// *exit_frame_ptr = 0; 1260// #endif 1261// 1262// return 1; 1263// } 1264// 1265// parameters: 1266// x0: pkfn 1267// w1: gtid 1268// w2: tid 1269// w3: argc 1270// x4: p_argv 1271// x5: &exit_frame 1272// 1273// locals: 1274// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1275// __tid: tid parm pushed on stack so can pass &tid to pkfn 1276// 1277// reg temps: 1278// x8: used to hold pkfn address 1279// w9: used as temporary for number of pkfn parms 1280// x10: used to traverse p_argv array 1281// x11: used as temporary for stack placement calculation 1282// x12: used as temporary for stack parameters 1283// x19: used to preserve exit_frame_ptr, callee-save 1284// 1285// return: w0 (always 1/TRUE) 1286// 1287 1288__gtid = 4 1289__tid = 8 1290 1291// -- Begin __kmp_invoke_microtask 1292// mark_begin; 1293 .text 1294 PROC __kmp_invoke_microtask 1295 1296 stp x29, x30, [sp, #-16]! 1297# if OMPT_SUPPORT 1298 stp x19, x20, [sp, #-16]! 1299# endif 1300 mov x29, sp 1301 1302 orr w9, wzr, #1 1303 add w9, w9, w3, lsr #1 1304 sub sp, sp, w9, uxtw #4 1305 mov x11, sp 1306 1307 mov x8, x0 1308 str w1, [x29, #-__gtid] 1309 str w2, [x29, #-__tid] 1310 mov w9, w3 1311 mov x10, x4 1312# if OMPT_SUPPORT 1313 mov x19, x5 1314 str x29, [x19] 1315# endif 1316 1317 sub x0, x29, #__gtid 1318 sub x1, x29, #__tid 1319 1320 cbz w9, KMP_LABEL(kmp_1) 1321 ldr x2, [x10] 1322 1323 sub w9, w9, #1 1324 cbz w9, KMP_LABEL(kmp_1) 1325 ldr x3, [x10, #8]! 1326 1327 sub w9, w9, #1 1328 cbz w9, KMP_LABEL(kmp_1) 1329 ldr x4, [x10, #8]! 1330 1331 sub w9, w9, #1 1332 cbz w9, KMP_LABEL(kmp_1) 1333 ldr x5, [x10, #8]! 1334 1335 sub w9, w9, #1 1336 cbz w9, KMP_LABEL(kmp_1) 1337 ldr x6, [x10, #8]! 1338 1339 sub w9, w9, #1 1340 cbz w9, KMP_LABEL(kmp_1) 1341 ldr x7, [x10, #8]! 1342 1343KMP_LABEL(kmp_0): 1344 sub w9, w9, #1 1345 cbz w9, KMP_LABEL(kmp_1) 1346 ldr x12, [x10, #8]! 1347 str x12, [x11], #8 1348 b KMP_LABEL(kmp_0) 1349KMP_LABEL(kmp_1): 1350 blr x8 1351 orr w0, wzr, #1 1352 mov sp, x29 1353# if OMPT_SUPPORT 1354 str xzr, [x19] 1355 ldp x19, x20, [sp], #16 1356# endif 1357 ldp x29, x30, [sp], #16 1358 ret 1359 1360 DEBUG_INFO __kmp_invoke_microtask 1361// -- End __kmp_invoke_microtask 1362 1363#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_AARCH64 */ 1364 1365#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM 1366 1367//------------------------------------------------------------------------ 1368// int 1369// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1370// int gtid, int tid, 1371// int argc, void *p_argv[] 1372// #if OMPT_SUPPORT 1373// , 1374// void **exit_frame_ptr 1375// #endif 1376// ) { 1377// #if OMPT_SUPPORT 1378// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1379// #endif 1380// 1381// (*pkfn)( & gtid, & tid, argv[0], ... ); 1382// 1383// // FIXME: This is done at call-site and can be removed here. 1384// #if OMPT_SUPPORT 1385// *exit_frame_ptr = 0; 1386// #endif 1387// 1388// return 1; 1389// } 1390// 1391// parameters: 1392// r0: pkfn 1393// r1: gtid 1394// r2: tid 1395// r3: argc 1396// r4(stack): p_argv 1397// r5(stack): &exit_frame 1398// 1399// locals: 1400// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1401// __tid: tid parm pushed on stack so can pass &tid to pkfn 1402// 1403// reg temps: 1404// r4: used to hold pkfn address 1405// r5: used as temporary for number of pkfn parms 1406// r6: used to traverse p_argv array 1407// r7: frame pointer (in some configurations) 1408// r8: used as temporary for stack placement calculation 1409// and as pointer to base of callee saved area 1410// r9: used as temporary for stack parameters 1411// r10: used to preserve exit_frame_ptr, callee-save 1412// r11: frame pointer (in some configurations) 1413// 1414// return: r0 (always 1/TRUE) 1415// 1416 1417__gtid = 4 1418__tid = 8 1419 1420// -- Begin __kmp_invoke_microtask 1421// mark_begin; 1422 .text 1423 PROC __kmp_invoke_microtask 1424 1425 // Pushing one extra register (r3) to keep the stack aligned 1426 // for when we call pkfn below 1427 push {r3-r11,lr} 1428 // Load p_argv and &exit_frame 1429 ldr r4, [sp, #10*4] 1430# if OMPT_SUPPORT 1431 ldr r5, [sp, #11*4] 1432# endif 1433 1434# if KMP_OS_DARWIN || (defined(__thumb__) && !KMP_OS_WINDOWS) 1435# define FP r7 1436# define FPOFF 4*4 1437#else 1438# define FP r11 1439# define FPOFF 8*4 1440#endif 1441 add FP, sp, #FPOFF 1442# if OMPT_SUPPORT 1443 mov r10, r5 1444 str FP, [r10] 1445# endif 1446 mov r8, sp 1447 1448 // Calculate how much stack to allocate, in increments of 8 bytes. 1449 // We strictly need 4*(argc-2) bytes (2 arguments are passed in 1450 // registers) but allocate 4*argc for simplicity (to avoid needing 1451 // to handle the argc<2 cases). We align the number of bytes 1452 // allocated to 8 bytes, to keep the stack aligned. (Since we 1453 // already allocate more than enough, it's ok to round down 1454 // instead of up for the alignment.) We allocate another extra 1455 // 8 bytes for gtid and tid. 1456 mov r5, #1 1457 add r5, r5, r3, lsr #1 1458 sub sp, sp, r5, lsl #3 1459 1460 str r1, [r8, #-__gtid] 1461 str r2, [r8, #-__tid] 1462 mov r5, r3 1463 mov r6, r4 1464 mov r4, r0 1465 1466 // Prepare the first 2 parameters to pkfn - pointers to gtid and tid 1467 // in our stack frame. 1468 sub r0, r8, #__gtid 1469 sub r1, r8, #__tid 1470 1471 mov r8, sp 1472 1473 // Load p_argv[0] and p_argv[1] into r2 and r3, if argc >= 1/2 1474 cmp r5, #0 1475 beq KMP_LABEL(kmp_1) 1476 ldr r2, [r6] 1477 1478 subs r5, r5, #1 1479 beq KMP_LABEL(kmp_1) 1480 ldr r3, [r6, #4]! 1481 1482 // Loop, loading the rest of p_argv and writing the elements on the 1483 // stack. 1484KMP_LABEL(kmp_0): 1485 subs r5, r5, #1 1486 beq KMP_LABEL(kmp_1) 1487 ldr r12, [r6, #4]! 1488 str r12, [r8], #4 1489 b KMP_LABEL(kmp_0) 1490KMP_LABEL(kmp_1): 1491 blx r4 1492 mov r0, #1 1493 1494 sub r4, FP, #FPOFF 1495 mov sp, r4 1496# undef FP 1497# undef FPOFF 1498 1499# if OMPT_SUPPORT 1500 mov r1, #0 1501 str r1, [r10] 1502# endif 1503 pop {r3-r11,pc} 1504 1505 DEBUG_INFO __kmp_invoke_microtask 1506// -- End __kmp_invoke_microtask 1507 1508#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_AARCH64 */ 1509 1510#if KMP_ARCH_PPC64 1511 1512//------------------------------------------------------------------------ 1513// int 1514// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1515// int gtid, int tid, 1516// int argc, void *p_argv[] 1517// #if OMPT_SUPPORT 1518// , 1519// void **exit_frame_ptr 1520// #endif 1521// ) { 1522// #if OMPT_SUPPORT 1523// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1524// #endif 1525// 1526// (*pkfn)( & gtid, & tid, argv[0], ... ); 1527// 1528// // FIXME: This is done at call-site and can be removed here. 1529// #if OMPT_SUPPORT 1530// *exit_frame_ptr = 0; 1531// #endif 1532// 1533// return 1; 1534// } 1535// 1536// parameters: 1537// r3: pkfn 1538// r4: gtid 1539// r5: tid 1540// r6: argc 1541// r7: p_argv 1542// r8: &exit_frame 1543// 1544// return: r3 (always 1/TRUE) 1545// 1546 .text 1547# if KMP_ARCH_PPC64_ELFv2 1548 .abiversion 2 1549# endif 1550 .globl __kmp_invoke_microtask 1551 1552# if KMP_ARCH_PPC64_ELFv2 1553 .p2align 4 1554# else 1555 .p2align 2 1556# endif 1557 1558 .type __kmp_invoke_microtask,@function 1559 1560# if KMP_ARCH_PPC64_ELFv2 1561__kmp_invoke_microtask: 1562.Lfunc_begin0: 1563.Lfunc_gep0: 1564 addis 2, 12, .TOC.-.Lfunc_gep0@ha 1565 addi 2, 2, .TOC.-.Lfunc_gep0@l 1566.Lfunc_lep0: 1567 .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 1568# else 1569 .section .opd,"aw",@progbits 1570__kmp_invoke_microtask: 1571 .p2align 3 1572 .quad .Lfunc_begin0 1573 .quad .TOC.@tocbase 1574 .quad 0 1575 .text 1576.Lfunc_begin0: 1577# endif 1578 1579// -- Begin __kmp_invoke_microtask 1580// mark_begin; 1581 1582// We need to allocate a stack frame large enough to hold all of the parameters 1583// on the stack for the microtask plus what this function needs. That's 48 1584// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the 1585// parameters to the microtask, plus 8 bytes to store the values of r4 and r5, 1586// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes 1587// to save r30 to hold a copy of r8. 1588 1589 .cfi_startproc 1590 mflr 0 1591 std 31, -8(1) 1592 std 0, 16(1) 1593 1594// This is unusual because normally we'd set r31 equal to r1 after the stack 1595// frame is established. In this case, however, we need to dynamically compute 1596// the stack frame size, and so we keep a direct copy of r1 to access our 1597// register save areas and restore the r1 value before returning. 1598 mr 31, 1 1599 .cfi_def_cfa_register r31 1600 .cfi_offset r31, -8 1601 .cfi_offset lr, 16 1602 1603// Compute the size necessary for the local stack frame. 1604# if KMP_ARCH_PPC64_ELFv2 1605 li 12, 72 1606# else 1607 li 12, 88 1608# endif 1609 sldi 0, 6, 3 1610 add 12, 0, 12 1611 neg 12, 12 1612 1613// We need to make sure that the stack frame stays aligned (to 16 bytes). 1614 li 0, -16 1615 and 12, 0, 12 1616 1617// Establish the local stack frame. 1618 stdux 1, 1, 12 1619 1620# if OMPT_SUPPORT 1621 .cfi_offset r30, -16 1622 std 30, -16(31) 1623 std 1, 0(8) 1624 mr 30, 8 1625# endif 1626 1627// Store gtid and tid to the stack because they're passed by reference to the microtask. 1628 stw 4, -20(31) 1629 stw 5, -24(31) 1630 1631 mr 12, 6 1632 mr 4, 7 1633 1634 cmpwi 0, 12, 1 1635 blt 0, .Lcall 1636 1637 ld 5, 0(4) 1638 1639 cmpwi 0, 12, 2 1640 blt 0, .Lcall 1641 1642 ld 6, 8(4) 1643 1644 cmpwi 0, 12, 3 1645 blt 0, .Lcall 1646 1647 ld 7, 16(4) 1648 1649 cmpwi 0, 12, 4 1650 blt 0, .Lcall 1651 1652 ld 8, 24(4) 1653 1654 cmpwi 0, 12, 5 1655 blt 0, .Lcall 1656 1657 ld 9, 32(4) 1658 1659 cmpwi 0, 12, 6 1660 blt 0, .Lcall 1661 1662 ld 10, 40(4) 1663 1664 cmpwi 0, 12, 7 1665 blt 0, .Lcall 1666 1667// There are more than 6 microtask parameters, so we need to store the 1668// remainder to the stack. 1669 addi 12, 12, -6 1670 mtctr 12 1671 1672// These are set to 8 bytes before the first desired store address (we're using 1673// pre-increment loads and stores in the loop below). The parameter save area 1674// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and 1675// 32 + 8*8 == 96 bytes above r1 for ELFv2. 1676 addi 4, 4, 40 1677# if KMP_ARCH_PPC64_ELFv2 1678 addi 12, 1, 88 1679# else 1680 addi 12, 1, 104 1681# endif 1682 1683.Lnext: 1684 ldu 0, 8(4) 1685 stdu 0, 8(12) 1686 bdnz .Lnext 1687 1688.Lcall: 1689# if KMP_ARCH_PPC64_ELFv2 1690 std 2, 24(1) 1691 mr 12, 3 1692#else 1693 std 2, 40(1) 1694// For ELFv1, we need to load the actual function address from the function descriptor. 1695 ld 12, 0(3) 1696 ld 2, 8(3) 1697 ld 11, 16(3) 1698#endif 1699 1700 addi 3, 31, -20 1701 addi 4, 31, -24 1702 1703 mtctr 12 1704 bctrl 1705# if KMP_ARCH_PPC64_ELFv2 1706 ld 2, 24(1) 1707# else 1708 ld 2, 40(1) 1709# endif 1710 1711# if OMPT_SUPPORT 1712 li 3, 0 1713 std 3, 0(30) 1714# endif 1715 1716 li 3, 1 1717 1718# if OMPT_SUPPORT 1719 ld 30, -16(31) 1720# endif 1721 1722 mr 1, 31 1723 ld 0, 16(1) 1724 ld 31, -8(1) 1725 mtlr 0 1726 blr 1727 1728 .long 0 1729 .quad 0 1730.Lfunc_end0: 1731 .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 1732 .cfi_endproc 1733 1734// -- End __kmp_invoke_microtask 1735 1736#endif /* KMP_ARCH_PPC64 */ 1737 1738#if KMP_ARCH_RISCV64 1739 1740//------------------------------------------------------------------------ 1741// 1742// typedef void (*microtask_t)(int *gtid, int *tid, ...); 1743// 1744// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 1745// void *p_argv[] 1746// #if OMPT_SUPPORT 1747// , 1748// void **exit_frame_ptr 1749// #endif 1750// ) { 1751// #if OMPT_SUPPORT 1752// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1753// #endif 1754// 1755// (*pkfn)(>id, &tid, argv[0], ...); 1756// 1757// return 1; 1758// } 1759// 1760// Parameters: 1761// a0: pkfn 1762// a1: gtid 1763// a2: tid 1764// a3: argc 1765// a4: p_argv 1766// a5: exit_frame_ptr 1767// 1768// Locals: 1769// __gtid: gtid param pushed on stack so can pass >id to pkfn 1770// __tid: tid param pushed on stack so can pass &tid to pkfn 1771// 1772// Temp. registers: 1773// 1774// t0: used to calculate the dynamic stack size / used to hold pkfn address 1775// t1: used as temporary for stack placement calculation 1776// t2: used as temporary for stack arguments 1777// t3: used as temporary for number of remaining pkfn parms 1778// t4: used to traverse p_argv array 1779// 1780// return: a0 (always 1/TRUE) 1781// 1782 1783__gtid = -20 1784__tid = -24 1785 1786// -- Begin __kmp_invoke_microtask 1787// mark_begin; 1788 .text 1789 .globl __kmp_invoke_microtask 1790 .p2align 1 1791 .type __kmp_invoke_microtask,@function 1792__kmp_invoke_microtask: 1793 .cfi_startproc 1794 1795 // First, save ra and fp 1796 addi sp, sp, -16 1797 sd ra, 8(sp) 1798 sd fp, 0(sp) 1799 addi fp, sp, 16 1800 .cfi_def_cfa fp, 0 1801 .cfi_offset ra, -8 1802 .cfi_offset fp, -16 1803 1804 // Compute the dynamic stack size: 1805 // 1806 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 1807 // reference 1808 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 1809 // function by register. Given that we have 8 of such registers (a[0-7]) 1810 // and two + 'argc' arguments (consider >id and &tid), we need to 1811 // reserve max(0, argc - 6)*8 extra bytes 1812 // 1813 // The total number of bytes is then max(0, argc - 6)*8 + 8 1814 1815 // Compute max(0, argc - 6) using the following bithack: 1816 // max(0, x) = x - (x & (x >> 31)), where x := argc - 6 1817 // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax 1818 addi t0, a3, -6 1819 srai t1, t0, 31 1820 and t1, t0, t1 1821 sub t0, t0, t1 1822 1823 addi t0, t0, 1 1824 1825 slli t0, t0, 3 1826 sub sp, sp, t0 1827 1828 // Align the stack to 16 bytes 1829 andi sp, sp, -16 1830 1831 mv t0, a0 1832 mv t3, a3 1833 mv t4, a4 1834 1835#if OMPT_SUPPORT 1836 // Save frame pointer into exit_frame 1837 sd fp, 0(a5) 1838#endif 1839 1840 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) 1841 1842 sw a1, __gtid(fp) 1843 sw a2, __tid(fp) 1844 1845 addi a0, fp, __gtid 1846 addi a1, fp, __tid 1847 1848 beqz t3, .L_kmp_3 1849 ld a2, 0(t4) 1850 1851 addi t3, t3, -1 1852 beqz t3, .L_kmp_3 1853 ld a3, 8(t4) 1854 1855 addi t3, t3, -1 1856 beqz t3, .L_kmp_3 1857 ld a4, 16(t4) 1858 1859 addi t3, t3, -1 1860 beqz t3, .L_kmp_3 1861 ld a5, 24(t4) 1862 1863 addi t3, t3, -1 1864 beqz t3, .L_kmp_3 1865 ld a6, 32(t4) 1866 1867 addi t3, t3, -1 1868 beqz t3, .L_kmp_3 1869 ld a7, 40(t4) 1870 1871 // Prepare any additional argument passed through the stack 1872 addi t4, t4, 48 1873 mv t1, sp 1874 j .L_kmp_2 1875.L_kmp_1: 1876 ld t2, 0(t4) 1877 sd t2, 0(t1) 1878 addi t4, t4, 8 1879 addi t1, t1, 8 1880.L_kmp_2: 1881 addi t3, t3, -1 1882 bnez t3, .L_kmp_1 1883 1884.L_kmp_3: 1885 // Call pkfn function 1886 jalr t0 1887 1888 // Restore stack and return 1889 1890 addi a0, zero, 1 1891 1892 addi sp, fp, -16 1893 ld fp, 0(sp) 1894 ld ra, 8(sp) 1895 addi sp, sp, 16 1896 ret 1897.Lfunc_end0: 1898 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 1899 .cfi_endproc 1900 1901// -- End __kmp_invoke_microtask 1902 1903#endif /* KMP_ARCH_RISCV64 */ 1904 1905#if KMP_ARCH_LOONGARCH64 1906 1907//------------------------------------------------------------------------ 1908// 1909// typedef void (*microtask_t)(int *gtid, int *tid, ...); 1910// 1911// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 1912// void *p_argv[] 1913// #if OMPT_SUPPORT 1914// , 1915// void **exit_frame_ptr 1916// #endif 1917// ) { 1918// #if OMPT_SUPPORT 1919// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1920// #endif 1921// 1922// (*pkfn)(>id, &tid, argv[0], ...); 1923// 1924// return 1; 1925// } 1926// 1927// Parameters: 1928// a0: pkfn 1929// a1: gtid 1930// a2: tid 1931// a3: argc 1932// a4: p_argv 1933// a5: exit_frame_ptr 1934// 1935// Locals: 1936// __gtid: gtid param pushed on stack so can pass >id to pkfn 1937// __tid: tid param pushed on stack so can pass &tid to pkfn 1938// 1939// Temp registers: 1940// 1941// t0: used to calculate the dynamic stack size / used to hold pkfn address 1942// t1: used as temporary for stack placement calculation 1943// t2: used as temporary for stack arguments 1944// t3: used as temporary for number of remaining pkfn parms 1945// t4: used to traverse p_argv array 1946// 1947// return: a0 (always 1/TRUE) 1948// 1949 1950// -- Begin __kmp_invoke_microtask 1951// mark_begin; 1952 .text 1953 .globl __kmp_invoke_microtask 1954 .p2align 2 1955 .type __kmp_invoke_microtask,@function 1956__kmp_invoke_microtask: 1957 .cfi_startproc 1958 1959 // First, save ra and fp 1960 addi.d $sp, $sp, -16 1961 st.d $ra, $sp, 8 1962 st.d $fp, $sp, 0 1963 addi.d $fp, $sp, 16 1964 .cfi_def_cfa 22, 0 1965 .cfi_offset 1, -8 1966 .cfi_offset 22, -16 1967 1968 // Compute the dynamic stack size: 1969 // 1970 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 1971 // reference 1972 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 1973 // function by register. Given that we have 8 of such registers (a[0-7]) 1974 // and two + 'argc' arguments (consider >id and &tid), we need to 1975 // reserve max(0, argc - 6)*8 extra bytes 1976 // 1977 // The total number of bytes is then max(0, argc - 6)*8 + 8 1978 1979 addi.d $t0, $a3, -6 1980 slt $t1, $t0, $zero 1981 masknez $t0, $t0, $t1 1982 addi.d $t0, $t0, 1 1983 slli.d $t0, $t0, 3 1984 sub.d $sp, $sp, $t0 1985 1986 // Align the stack to 16 bytes 1987 bstrins.d $sp, $zero, 3, 0 1988 1989 move $t0, $a0 1990 move $t3, $a3 1991 move $t4, $a4 1992 1993#if OMPT_SUPPORT 1994 // Save frame pointer into exit_frame 1995 st.d $fp, $a5, 0 1996#endif 1997 1998 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) 1999 2000 st.w $a1, $fp, -20 2001 st.w $a2, $fp, -24 2002 2003 addi.d $a0, $fp, -20 2004 addi.d $a1, $fp, -24 2005 2006 beqz $t3, .L_kmp_3 2007 ld.d $a2, $t4, 0 2008 2009 addi.d $t3, $t3, -1 2010 beqz $t3, .L_kmp_3 2011 ld.d $a3, $t4, 8 2012 2013 addi.d $t3, $t3, -1 2014 beqz $t3, .L_kmp_3 2015 ld.d $a4, $t4, 16 2016 2017 addi.d $t3, $t3, -1 2018 beqz $t3, .L_kmp_3 2019 ld.d $a5, $t4, 24 2020 2021 addi.d $t3, $t3, -1 2022 beqz $t3, .L_kmp_3 2023 ld.d $a6, $t4, 32 2024 2025 addi.d $t3, $t3, -1 2026 beqz $t3, .L_kmp_3 2027 ld.d $a7, $t4, 40 2028 2029 // Prepare any additional argument passed through the stack 2030 addi.d $t4, $t4, 48 2031 move $t1, $sp 2032 b .L_kmp_2 2033.L_kmp_1: 2034 ld.d $t2, $t4, 0 2035 st.d $t2, $t1, 0 2036 addi.d $t4, $t4, 8 2037 addi.d $t1, $t1, 8 2038.L_kmp_2: 2039 addi.d $t3, $t3, -1 2040 bnez $t3, .L_kmp_1 2041 2042.L_kmp_3: 2043 // Call pkfn function 2044 jirl $ra, $t0, 0 2045 2046 // Restore stack and return 2047 2048 addi.d $a0, $zero, 1 2049 2050 addi.d $sp, $fp, -16 2051 ld.d $fp, $sp, 0 2052 ld.d $ra, $sp, 8 2053 addi.d $sp, $sp, 16 2054 jr $ra 2055.Lfunc_end0: 2056 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 2057 .cfi_endproc 2058 2059// -- End __kmp_invoke_microtask 2060 2061#endif /* KMP_ARCH_LOONGARCH64 */ 2062 2063#if KMP_ARCH_ARM || KMP_ARCH_MIPS 2064 .data 2065 COMMON .gomp_critical_user_, 32, 3 2066 .data 2067 .align 4 2068 .global __kmp_unnamed_critical_addr 2069__kmp_unnamed_critical_addr: 2070 .4byte .gomp_critical_user_ 2071#ifdef __ELF__ 2072 .size __kmp_unnamed_critical_addr,4 2073#endif 2074#endif /* KMP_ARCH_ARM */ 2075 2076#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 2077#ifndef KMP_PREFIX_UNDERSCORE 2078# define KMP_PREFIX_UNDERSCORE(x) x 2079#endif 2080 .data 2081 COMMON .gomp_critical_user_, 32, 3 2082 .data 2083 .align 8 2084 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) 2085KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): 2086 .8byte .gomp_critical_user_ 2087#ifdef __ELF__ 2088 .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 2089#endif 2090#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || 2091 KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 */ 2092 2093#if KMP_OS_LINUX 2094# if KMP_ARCH_ARM || KMP_ARCH_AARCH64 2095.section .note.GNU-stack,"",%progbits 2096# else 2097.section .note.GNU-stack,"",@progbits 2098# endif 2099#endif 2100