10b57cec5SDimitry Andric// z_Linux_asm.S: - microtasking routines specifically 20b57cec5SDimitry Andric// written for Intel platforms running Linux* OS 30b57cec5SDimitry Andric 40b57cec5SDimitry Andric// 50b57cec5SDimitry Andric////===----------------------------------------------------------------------===// 60b57cec5SDimitry Andric//// 70b57cec5SDimitry Andric//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 80b57cec5SDimitry Andric//// See https://llvm.org/LICENSE.txt for license information. 90b57cec5SDimitry Andric//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 100b57cec5SDimitry Andric//// 110b57cec5SDimitry Andric////===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric// ----------------------------------------------------------------------- 150b57cec5SDimitry Andric// macros 160b57cec5SDimitry Andric// ----------------------------------------------------------------------- 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric#include "kmp_config.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric#if KMP_ARCH_X86 || KMP_ARCH_X86_64 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric# if KMP_MIC 230b57cec5SDimitry Andric// the 'delay r16/r32/r64' should be used instead of the 'pause'. 240b57cec5SDimitry Andric// The delay operation has the effect of removing the current thread from 250b57cec5SDimitry Andric// the round-robin HT mechanism, and therefore speeds up the issue rate of 260b57cec5SDimitry Andric// the other threads on the same core. 270b57cec5SDimitry Andric// 280b57cec5SDimitry Andric// A value of 0 works fine for <= 2 threads per core, but causes the EPCC 290b57cec5SDimitry Andric// barrier time to increase greatly for 3 or more threads per core. 300b57cec5SDimitry Andric// 310b57cec5SDimitry Andric// A value of 100 works pretty well for up to 4 threads per core, but isn't 320b57cec5SDimitry Andric// quite as fast as 0 for 2 threads per core. 330b57cec5SDimitry Andric// 340b57cec5SDimitry Andric// We need to check what happens for oversubscription / > 4 threads per core. 350b57cec5SDimitry Andric// It is possible that we need to pass the delay value in as a parameter 360b57cec5SDimitry Andric// that the caller determines based on the total # threads / # cores. 370b57cec5SDimitry Andric// 380b57cec5SDimitry Andric//.macro pause_op 390b57cec5SDimitry Andric// mov $100, %rax 400b57cec5SDimitry Andric// delay %rax 410b57cec5SDimitry Andric//.endm 420b57cec5SDimitry Andric# else 430b57cec5SDimitry Andric# define pause_op .byte 0xf3,0x90 440b57cec5SDimitry Andric# endif // KMP_MIC 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric# if KMP_OS_DARWIN 470b57cec5SDimitry Andric# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 480b57cec5SDimitry Andric# define KMP_LABEL(x) L_##x // form the name of label 490b57cec5SDimitry Andric.macro KMP_CFI_DEF_OFFSET 500b57cec5SDimitry Andric.endmacro 510b57cec5SDimitry Andric.macro KMP_CFI_OFFSET 520b57cec5SDimitry Andric.endmacro 530b57cec5SDimitry Andric.macro KMP_CFI_REGISTER 540b57cec5SDimitry Andric.endmacro 550b57cec5SDimitry Andric.macro KMP_CFI_DEF 560b57cec5SDimitry Andric.endmacro 570b57cec5SDimitry Andric.macro ALIGN 580b57cec5SDimitry Andric .align $0 590b57cec5SDimitry Andric.endmacro 600b57cec5SDimitry Andric.macro DEBUG_INFO 610b57cec5SDimitry Andric/* Not sure what .size does in icc, not sure if we need to do something 620b57cec5SDimitry Andric similar for OS X*. 630b57cec5SDimitry Andric*/ 640b57cec5SDimitry Andric.endmacro 650b57cec5SDimitry Andric.macro PROC 660b57cec5SDimitry Andric ALIGN 4 670b57cec5SDimitry Andric .globl KMP_PREFIX_UNDERSCORE($0) 680b57cec5SDimitry AndricKMP_PREFIX_UNDERSCORE($0): 690b57cec5SDimitry Andric.endmacro 700b57cec5SDimitry Andric# else // KMP_OS_DARWIN 710b57cec5SDimitry Andric# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols 720b57cec5SDimitry Andric// Format labels so that they don't override function names in gdb's backtraces 730b57cec5SDimitry Andric// MIC assembler doesn't accept .L syntax, the L works fine there (as well as 740b57cec5SDimitry Andric// on OS X*) 750b57cec5SDimitry Andric# if KMP_MIC 760b57cec5SDimitry Andric# define KMP_LABEL(x) L_##x // local label 770b57cec5SDimitry Andric# else 780b57cec5SDimitry Andric# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 790b57cec5SDimitry Andric# endif // KMP_MIC 800b57cec5SDimitry Andric.macro ALIGN size 810b57cec5SDimitry Andric .align 1<<(\size) 820b57cec5SDimitry Andric.endm 830b57cec5SDimitry Andric.macro DEBUG_INFO proc 840b57cec5SDimitry Andric .cfi_endproc 850b57cec5SDimitry Andric// Not sure why we need .type and .size for the functions 860b57cec5SDimitry Andric .align 16 870b57cec5SDimitry Andric .type \proc,@function 880b57cec5SDimitry Andric .size \proc,.-\proc 890b57cec5SDimitry Andric.endm 900b57cec5SDimitry Andric.macro PROC proc 910b57cec5SDimitry Andric ALIGN 4 920b57cec5SDimitry Andric .globl KMP_PREFIX_UNDERSCORE(\proc) 930b57cec5SDimitry AndricKMP_PREFIX_UNDERSCORE(\proc): 940b57cec5SDimitry Andric .cfi_startproc 950b57cec5SDimitry Andric.endm 960b57cec5SDimitry Andric.macro KMP_CFI_DEF_OFFSET sz 970b57cec5SDimitry Andric .cfi_def_cfa_offset \sz 980b57cec5SDimitry Andric.endm 990b57cec5SDimitry Andric.macro KMP_CFI_OFFSET reg, sz 1000b57cec5SDimitry Andric .cfi_offset \reg,\sz 1010b57cec5SDimitry Andric.endm 1020b57cec5SDimitry Andric.macro KMP_CFI_REGISTER reg 1030b57cec5SDimitry Andric .cfi_def_cfa_register \reg 1040b57cec5SDimitry Andric.endm 1050b57cec5SDimitry Andric.macro KMP_CFI_DEF reg, sz 1060b57cec5SDimitry Andric .cfi_def_cfa \reg,\sz 1070b57cec5SDimitry Andric.endm 1080b57cec5SDimitry Andric# endif // KMP_OS_DARWIN 1090b57cec5SDimitry Andric#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 1100b57cec5SDimitry Andric 1110fca6ea1SDimitry Andric#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM) 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric# if KMP_OS_DARWIN 1140b57cec5SDimitry Andric# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 1150b57cec5SDimitry Andric# define KMP_LABEL(x) L_##x // form the name of label 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric.macro ALIGN 1180b57cec5SDimitry Andric .align $0 1190b57cec5SDimitry Andric.endmacro 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric.macro DEBUG_INFO 1220b57cec5SDimitry Andric/* Not sure what .size does in icc, not sure if we need to do something 1230b57cec5SDimitry Andric similar for OS X*. 1240b57cec5SDimitry Andric*/ 1250b57cec5SDimitry Andric.endmacro 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric.macro PROC 1280b57cec5SDimitry Andric ALIGN 4 1290b57cec5SDimitry Andric .globl KMP_PREFIX_UNDERSCORE($0) 1300b57cec5SDimitry AndricKMP_PREFIX_UNDERSCORE($0): 1310b57cec5SDimitry Andric.endmacro 132bdd1243dSDimitry Andric# elif KMP_OS_WINDOWS 133bdd1243dSDimitry Andric# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Windows/ARM64 symbols 134bdd1243dSDimitry Andric// Format labels so that they don't override function names in gdb's backtraces 135bdd1243dSDimitry Andric# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 136bdd1243dSDimitry Andric 137bdd1243dSDimitry Andric.macro ALIGN size 138bdd1243dSDimitry Andric .align 1<<(\size) 139bdd1243dSDimitry Andric.endm 140bdd1243dSDimitry Andric 141bdd1243dSDimitry Andric.macro DEBUG_INFO proc 142bdd1243dSDimitry Andric ALIGN 2 143bdd1243dSDimitry Andric.endm 144bdd1243dSDimitry Andric 145bdd1243dSDimitry Andric.macro PROC proc 146bdd1243dSDimitry Andric ALIGN 2 147bdd1243dSDimitry Andric .globl KMP_PREFIX_UNDERSCORE(\proc) 148bdd1243dSDimitry AndricKMP_PREFIX_UNDERSCORE(\proc): 149bdd1243dSDimitry Andric.endm 150bdd1243dSDimitry Andric# else // KMP_OS_DARWIN || KMP_OS_WINDOWS 1510b57cec5SDimitry Andric# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols 1520b57cec5SDimitry Andric// Format labels so that they don't override function names in gdb's backtraces 1530b57cec5SDimitry Andric# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric.macro ALIGN size 1560b57cec5SDimitry Andric .align 1<<(\size) 1570b57cec5SDimitry Andric.endm 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric.macro DEBUG_INFO proc 1600b57cec5SDimitry Andric .cfi_endproc 1610b57cec5SDimitry Andric// Not sure why we need .type and .size for the functions 1620b57cec5SDimitry Andric ALIGN 2 163bdd1243dSDimitry Andric#if KMP_ARCH_ARM 164bdd1243dSDimitry Andric .type \proc,%function 165bdd1243dSDimitry Andric#else 1660b57cec5SDimitry Andric .type \proc,@function 167bdd1243dSDimitry Andric#endif 1680b57cec5SDimitry Andric .size \proc,.-\proc 1690b57cec5SDimitry Andric.endm 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric.macro PROC proc 1720b57cec5SDimitry Andric ALIGN 2 1730b57cec5SDimitry Andric .globl KMP_PREFIX_UNDERSCORE(\proc) 1740b57cec5SDimitry AndricKMP_PREFIX_UNDERSCORE(\proc): 1750b57cec5SDimitry Andric .cfi_startproc 1760b57cec5SDimitry Andric.endm 1770b57cec5SDimitry Andric# endif // KMP_OS_DARWIN 1780b57cec5SDimitry Andric 179*62987288SDimitry Andric# if KMP_OS_LINUX 180*62987288SDimitry Andric// BTI and PAC gnu property note 181*62987288SDimitry Andric# define NT_GNU_PROPERTY_TYPE_0 5 182*62987288SDimitry Andric# define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 183*62987288SDimitry Andric# define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1 184*62987288SDimitry Andric# define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2 185*62987288SDimitry Andric 186*62987288SDimitry Andric# define GNU_PROPERTY(type, value) \ 187*62987288SDimitry Andric .pushsection .note.gnu.property, "a"; \ 188*62987288SDimitry Andric .p2align 3; \ 189*62987288SDimitry Andric .word 4; \ 190*62987288SDimitry Andric .word 16; \ 191*62987288SDimitry Andric .word NT_GNU_PROPERTY_TYPE_0; \ 192*62987288SDimitry Andric .asciz "GNU"; \ 193*62987288SDimitry Andric .word type; \ 194*62987288SDimitry Andric .word 4; \ 195*62987288SDimitry Andric .word value; \ 196*62987288SDimitry Andric .word 0; \ 197*62987288SDimitry Andric .popsection 198*62987288SDimitry Andric# endif 199*62987288SDimitry Andric 200*62987288SDimitry Andric# if defined(__ARM_FEATURE_BTI_DEFAULT) 201*62987288SDimitry Andric# define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI 202*62987288SDimitry Andric# else 203*62987288SDimitry Andric# define BTI_FLAG 0 204*62987288SDimitry Andric# endif 205*62987288SDimitry Andric# if __ARM_FEATURE_PAC_DEFAULT & 3 206*62987288SDimitry Andric# define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC 207*62987288SDimitry Andric# else 208*62987288SDimitry Andric# define PAC_FLAG 0 209*62987288SDimitry Andric# endif 210*62987288SDimitry Andric 211*62987288SDimitry Andric# if (BTI_FLAG | PAC_FLAG) != 0 212*62987288SDimitry Andric# if PAC_FLAG != 0 213*62987288SDimitry Andric# define PACBTI_C hint #25 214*62987288SDimitry Andric# define PACBTI_RET hint #29 215*62987288SDimitry Andric# else 216*62987288SDimitry Andric# define PACBTI_C hint #34 217*62987288SDimitry Andric# define PACBTI_RET 218*62987288SDimitry Andric# endif 219*62987288SDimitry Andric# define GNU_PROPERTY_BTI_PAC \ 220*62987288SDimitry Andric GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG) 221*62987288SDimitry Andric# else 222*62987288SDimitry Andric# define PACBTI_C 223*62987288SDimitry Andric# define PACBTI_RET 224*62987288SDimitry Andric# define GNU_PROPERTY_BTI_PAC 225*62987288SDimitry Andric# endif 2260fca6ea1SDimitry Andric#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM) 227bdd1243dSDimitry Andric 228bdd1243dSDimitry Andric.macro COMMON name, size, align_power 229bdd1243dSDimitry Andric#if KMP_OS_DARWIN 230bdd1243dSDimitry Andric .comm \name, \size 231bdd1243dSDimitry Andric#elif KMP_OS_WINDOWS 232bdd1243dSDimitry Andric .comm \name, \size, \align_power 233bdd1243dSDimitry Andric#else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS 234bdd1243dSDimitry Andric .comm \name, \size, (1<<(\align_power)) 235bdd1243dSDimitry Andric#endif 236bdd1243dSDimitry Andric.endm 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric// ----------------------------------------------------------------------- 2390b57cec5SDimitry Andric// data 2400b57cec5SDimitry Andric// ----------------------------------------------------------------------- 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric#ifdef KMP_GOMP_COMPAT 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric// Support for unnamed common blocks. 2450b57cec5SDimitry Andric// 2460b57cec5SDimitry Andric// Because the symbol ".gomp_critical_user_" contains a ".", we have to 2470b57cec5SDimitry Andric// put this stuff in assembly. 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric# if KMP_ARCH_X86 2500b57cec5SDimitry Andric# if KMP_OS_DARWIN 2510b57cec5SDimitry Andric .data 2520b57cec5SDimitry Andric .comm .gomp_critical_user_,32 2530b57cec5SDimitry Andric .data 2540b57cec5SDimitry Andric .globl ___kmp_unnamed_critical_addr 2550b57cec5SDimitry Andric___kmp_unnamed_critical_addr: 2560b57cec5SDimitry Andric .long .gomp_critical_user_ 2570b57cec5SDimitry Andric# else /* Linux* OS */ 2580b57cec5SDimitry Andric .data 2590b57cec5SDimitry Andric .comm .gomp_critical_user_,32,8 2600b57cec5SDimitry Andric .data 2610b57cec5SDimitry Andric ALIGN 4 2620b57cec5SDimitry Andric .global __kmp_unnamed_critical_addr 2630b57cec5SDimitry Andric__kmp_unnamed_critical_addr: 2640b57cec5SDimitry Andric .4byte .gomp_critical_user_ 2650b57cec5SDimitry Andric .type __kmp_unnamed_critical_addr,@object 2660b57cec5SDimitry Andric .size __kmp_unnamed_critical_addr,4 2670b57cec5SDimitry Andric# endif /* KMP_OS_DARWIN */ 2680b57cec5SDimitry Andric# endif /* KMP_ARCH_X86 */ 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric# if KMP_ARCH_X86_64 2710b57cec5SDimitry Andric# if KMP_OS_DARWIN 2720b57cec5SDimitry Andric .data 2730b57cec5SDimitry Andric .comm .gomp_critical_user_,32 2740b57cec5SDimitry Andric .data 2750b57cec5SDimitry Andric .globl ___kmp_unnamed_critical_addr 2760b57cec5SDimitry Andric___kmp_unnamed_critical_addr: 2770b57cec5SDimitry Andric .quad .gomp_critical_user_ 2780b57cec5SDimitry Andric# else /* Linux* OS */ 2790b57cec5SDimitry Andric .data 2800b57cec5SDimitry Andric .comm .gomp_critical_user_,32,8 2810b57cec5SDimitry Andric .data 2820b57cec5SDimitry Andric ALIGN 8 2830b57cec5SDimitry Andric .global __kmp_unnamed_critical_addr 2840b57cec5SDimitry Andric__kmp_unnamed_critical_addr: 2850b57cec5SDimitry Andric .8byte .gomp_critical_user_ 2860b57cec5SDimitry Andric .type __kmp_unnamed_critical_addr,@object 2870b57cec5SDimitry Andric .size __kmp_unnamed_critical_addr,8 2880b57cec5SDimitry Andric# endif /* KMP_OS_DARWIN */ 2890b57cec5SDimitry Andric# endif /* KMP_ARCH_X86_64 */ 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric#endif /* KMP_GOMP_COMPAT */ 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric 2940b57cec5SDimitry Andric#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric// ----------------------------------------------------------------------- 2970b57cec5SDimitry Andric// microtasking routines specifically written for IA-32 architecture 2980b57cec5SDimitry Andric// running Linux* OS 2990b57cec5SDimitry Andric// ----------------------------------------------------------------------- 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric .ident "Intel Corporation" 3020b57cec5SDimitry Andric .data 3030b57cec5SDimitry Andric ALIGN 4 3040b57cec5SDimitry Andric// void 3050b57cec5SDimitry Andric// __kmp_x86_pause( void ); 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric .text 3080b57cec5SDimitry Andric PROC __kmp_x86_pause 3090b57cec5SDimitry Andric 3100b57cec5SDimitry Andric pause_op 3110b57cec5SDimitry Andric ret 3120b57cec5SDimitry Andric 3130b57cec5SDimitry Andric DEBUG_INFO __kmp_x86_pause 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric# if !KMP_ASM_INTRINS 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andric//------------------------------------------------------------------------ 3180b57cec5SDimitry Andric// kmp_int32 3190b57cec5SDimitry Andric// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric PROC __kmp_test_then_add32 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric movl 4(%esp), %ecx 3240b57cec5SDimitry Andric movl 8(%esp), %eax 3250b57cec5SDimitry Andric lock 3260b57cec5SDimitry Andric xaddl %eax,(%ecx) 3270b57cec5SDimitry Andric ret 3280b57cec5SDimitry Andric 3290b57cec5SDimitry Andric DEBUG_INFO __kmp_test_then_add32 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric//------------------------------------------------------------------------ 3320b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed8 3330b57cec5SDimitry Andric// 3340b57cec5SDimitry Andric// kmp_int32 3350b57cec5SDimitry Andric// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 3360b57cec5SDimitry Andric// 3370b57cec5SDimitry Andric// parameters: 3380b57cec5SDimitry Andric// p: 4(%esp) 3390b57cec5SDimitry Andric// d: 8(%esp) 3400b57cec5SDimitry Andric// 3410b57cec5SDimitry Andric// return: %al 3420b57cec5SDimitry Andric PROC __kmp_xchg_fixed8 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric movl 4(%esp), %ecx // "p" 3450b57cec5SDimitry Andric movb 8(%esp), %al // "d" 3460b57cec5SDimitry Andric 3470b57cec5SDimitry Andric lock 3480b57cec5SDimitry Andric xchgb %al,(%ecx) 3490b57cec5SDimitry Andric ret 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric DEBUG_INFO __kmp_xchg_fixed8 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric//------------------------------------------------------------------------ 3550b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed16 3560b57cec5SDimitry Andric// 3570b57cec5SDimitry Andric// kmp_int16 3580b57cec5SDimitry Andric// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 3590b57cec5SDimitry Andric// 3600b57cec5SDimitry Andric// parameters: 3610b57cec5SDimitry Andric// p: 4(%esp) 3620b57cec5SDimitry Andric// d: 8(%esp) 3630b57cec5SDimitry Andric// return: %ax 3640b57cec5SDimitry Andric PROC __kmp_xchg_fixed16 3650b57cec5SDimitry Andric 3660b57cec5SDimitry Andric movl 4(%esp), %ecx // "p" 3670b57cec5SDimitry Andric movw 8(%esp), %ax // "d" 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric lock 3700b57cec5SDimitry Andric xchgw %ax,(%ecx) 3710b57cec5SDimitry Andric ret 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric DEBUG_INFO __kmp_xchg_fixed16 3740b57cec5SDimitry Andric 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric//------------------------------------------------------------------------ 3770b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed32 3780b57cec5SDimitry Andric// 3790b57cec5SDimitry Andric// kmp_int32 3800b57cec5SDimitry Andric// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 3810b57cec5SDimitry Andric// 3820b57cec5SDimitry Andric// parameters: 3830b57cec5SDimitry Andric// p: 4(%esp) 3840b57cec5SDimitry Andric// d: 8(%esp) 3850b57cec5SDimitry Andric// 3860b57cec5SDimitry Andric// return: %eax 3870b57cec5SDimitry Andric PROC __kmp_xchg_fixed32 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric movl 4(%esp), %ecx // "p" 3900b57cec5SDimitry Andric movl 8(%esp), %eax // "d" 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric lock 3930b57cec5SDimitry Andric xchgl %eax,(%ecx) 3940b57cec5SDimitry Andric ret 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric DEBUG_INFO __kmp_xchg_fixed32 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric// kmp_int8 4000b57cec5SDimitry Andric// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 4010b57cec5SDimitry Andric PROC __kmp_compare_and_store8 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric movl 4(%esp), %ecx 4040b57cec5SDimitry Andric movb 8(%esp), %al 4050b57cec5SDimitry Andric movb 12(%esp), %dl 4060b57cec5SDimitry Andric lock 4070b57cec5SDimitry Andric cmpxchgb %dl,(%ecx) 4080b57cec5SDimitry Andric sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 4090b57cec5SDimitry Andric and $1, %eax // sign extend previous instruction 4100b57cec5SDimitry Andric ret 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store8 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric// kmp_int16 4150b57cec5SDimitry Andric// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); 4160b57cec5SDimitry Andric PROC __kmp_compare_and_store16 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric movl 4(%esp), %ecx 4190b57cec5SDimitry Andric movw 8(%esp), %ax 4200b57cec5SDimitry Andric movw 12(%esp), %dx 4210b57cec5SDimitry Andric lock 4220b57cec5SDimitry Andric cmpxchgw %dx,(%ecx) 4230b57cec5SDimitry Andric sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 4240b57cec5SDimitry Andric and $1, %eax // sign extend previous instruction 4250b57cec5SDimitry Andric ret 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store16 4280b57cec5SDimitry Andric 4290b57cec5SDimitry Andric// kmp_int32 4300b57cec5SDimitry Andric// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); 4310b57cec5SDimitry Andric PROC __kmp_compare_and_store32 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric movl 4(%esp), %ecx 4340b57cec5SDimitry Andric movl 8(%esp), %eax 4350b57cec5SDimitry Andric movl 12(%esp), %edx 4360b57cec5SDimitry Andric lock 4370b57cec5SDimitry Andric cmpxchgl %edx,(%ecx) 4380b57cec5SDimitry Andric sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 4390b57cec5SDimitry Andric and $1, %eax // sign extend previous instruction 4400b57cec5SDimitry Andric ret 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store32 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric// kmp_int32 4450b57cec5SDimitry Andric// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); 4460b57cec5SDimitry Andric PROC __kmp_compare_and_store64 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric pushl %ebp 4490b57cec5SDimitry Andric movl %esp, %ebp 4500b57cec5SDimitry Andric pushl %ebx 4510b57cec5SDimitry Andric pushl %edi 4520b57cec5SDimitry Andric movl 8(%ebp), %edi 4530b57cec5SDimitry Andric movl 12(%ebp), %eax // "cv" low order word 4540b57cec5SDimitry Andric movl 16(%ebp), %edx // "cv" high order word 4550b57cec5SDimitry Andric movl 20(%ebp), %ebx // "sv" low order word 4560b57cec5SDimitry Andric movl 24(%ebp), %ecx // "sv" high order word 4570b57cec5SDimitry Andric lock 4580b57cec5SDimitry Andric cmpxchg8b (%edi) 4590b57cec5SDimitry Andric sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 4600b57cec5SDimitry Andric and $1, %eax // sign extend previous instruction 4610b57cec5SDimitry Andric popl %edi 4620b57cec5SDimitry Andric popl %ebx 4630b57cec5SDimitry Andric movl %ebp, %esp 4640b57cec5SDimitry Andric popl %ebp 4650b57cec5SDimitry Andric ret 4660b57cec5SDimitry Andric 4670b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store64 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric// kmp_int8 4700b57cec5SDimitry Andric// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); 4710b57cec5SDimitry Andric PROC __kmp_compare_and_store_ret8 4720b57cec5SDimitry Andric 4730b57cec5SDimitry Andric movl 4(%esp), %ecx 4740b57cec5SDimitry Andric movb 8(%esp), %al 4750b57cec5SDimitry Andric movb 12(%esp), %dl 4760b57cec5SDimitry Andric lock 4770b57cec5SDimitry Andric cmpxchgb %dl,(%ecx) 4780b57cec5SDimitry Andric ret 4790b57cec5SDimitry Andric 4800b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store_ret8 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric// kmp_int16 4830b57cec5SDimitry Andric// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, 4840b57cec5SDimitry Andric// kmp_int16 sv); 4850b57cec5SDimitry Andric PROC __kmp_compare_and_store_ret16 4860b57cec5SDimitry Andric 4870b57cec5SDimitry Andric movl 4(%esp), %ecx 4880b57cec5SDimitry Andric movw 8(%esp), %ax 4890b57cec5SDimitry Andric movw 12(%esp), %dx 4900b57cec5SDimitry Andric lock 4910b57cec5SDimitry Andric cmpxchgw %dx,(%ecx) 4920b57cec5SDimitry Andric ret 4930b57cec5SDimitry Andric 4940b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store_ret16 4950b57cec5SDimitry Andric 4960b57cec5SDimitry Andric// kmp_int32 4970b57cec5SDimitry Andric// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, 4980b57cec5SDimitry Andric// kmp_int32 sv); 4990b57cec5SDimitry Andric PROC __kmp_compare_and_store_ret32 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andric movl 4(%esp), %ecx 5020b57cec5SDimitry Andric movl 8(%esp), %eax 5030b57cec5SDimitry Andric movl 12(%esp), %edx 5040b57cec5SDimitry Andric lock 5050b57cec5SDimitry Andric cmpxchgl %edx,(%ecx) 5060b57cec5SDimitry Andric ret 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store_ret32 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andric// kmp_int64 5110b57cec5SDimitry Andric// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, 5120b57cec5SDimitry Andric// kmp_int64 sv); 5130b57cec5SDimitry Andric PROC __kmp_compare_and_store_ret64 5140b57cec5SDimitry Andric 5150b57cec5SDimitry Andric pushl %ebp 5160b57cec5SDimitry Andric movl %esp, %ebp 5170b57cec5SDimitry Andric pushl %ebx 5180b57cec5SDimitry Andric pushl %edi 5190b57cec5SDimitry Andric movl 8(%ebp), %edi 5200b57cec5SDimitry Andric movl 12(%ebp), %eax // "cv" low order word 5210b57cec5SDimitry Andric movl 16(%ebp), %edx // "cv" high order word 5220b57cec5SDimitry Andric movl 20(%ebp), %ebx // "sv" low order word 5230b57cec5SDimitry Andric movl 24(%ebp), %ecx // "sv" high order word 5240b57cec5SDimitry Andric lock 5250b57cec5SDimitry Andric cmpxchg8b (%edi) 5260b57cec5SDimitry Andric popl %edi 5270b57cec5SDimitry Andric popl %ebx 5280b57cec5SDimitry Andric movl %ebp, %esp 5290b57cec5SDimitry Andric popl %ebp 5300b57cec5SDimitry Andric ret 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store_ret64 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric//------------------------------------------------------------------------ 5360b57cec5SDimitry Andric// FUNCTION __kmp_xchg_real32 5370b57cec5SDimitry Andric// 5380b57cec5SDimitry Andric// kmp_real32 5390b57cec5SDimitry Andric// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 5400b57cec5SDimitry Andric// 5410b57cec5SDimitry Andric// parameters: 5420b57cec5SDimitry Andric// addr: 4(%esp) 5430b57cec5SDimitry Andric// data: 8(%esp) 5440b57cec5SDimitry Andric// 5450b57cec5SDimitry Andric// return: %eax 5460b57cec5SDimitry Andric PROC __kmp_xchg_real32 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric pushl %ebp 5490b57cec5SDimitry Andric movl %esp, %ebp 5500b57cec5SDimitry Andric subl $4, %esp 5510b57cec5SDimitry Andric pushl %esi 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric movl 4(%ebp), %esi 5540b57cec5SDimitry Andric flds (%esi) 5550b57cec5SDimitry Andric // load <addr> 5560b57cec5SDimitry Andric fsts -4(%ebp) 5570b57cec5SDimitry Andric // store old value 5580b57cec5SDimitry Andric 5590b57cec5SDimitry Andric movl 8(%ebp), %eax 5600b57cec5SDimitry Andric 5610b57cec5SDimitry Andric lock 5620b57cec5SDimitry Andric xchgl %eax, (%esi) 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric flds -4(%ebp) 5650b57cec5SDimitry Andric // return old value 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric popl %esi 5680b57cec5SDimitry Andric movl %ebp, %esp 5690b57cec5SDimitry Andric popl %ebp 5700b57cec5SDimitry Andric ret 5710b57cec5SDimitry Andric 5720b57cec5SDimitry Andric DEBUG_INFO __kmp_xchg_real32 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric# endif /* !KMP_ASM_INTRINS */ 5750b57cec5SDimitry Andric 5760b57cec5SDimitry Andric//------------------------------------------------------------------------ 5770b57cec5SDimitry Andric// int 578489b1cf2SDimitry Andric// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 579489b1cf2SDimitry Andric// int gtid, int tid, 580489b1cf2SDimitry Andric// int argc, void *p_argv[] 581489b1cf2SDimitry Andric// #if OMPT_SUPPORT 582489b1cf2SDimitry Andric// , 583489b1cf2SDimitry Andric// void **exit_frame_ptr 584489b1cf2SDimitry Andric// #endif 585489b1cf2SDimitry Andric// ) { 586489b1cf2SDimitry Andric// #if OMPT_SUPPORT 587489b1cf2SDimitry Andric// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 588489b1cf2SDimitry Andric// #endif 589489b1cf2SDimitry Andric// 590489b1cf2SDimitry Andric// (*pkfn)( & gtid, & tid, argv[0], ... ); 5910b57cec5SDimitry Andric// return 1; 5920b57cec5SDimitry Andric// } 5930b57cec5SDimitry Andric 5940b57cec5SDimitry Andric// -- Begin __kmp_invoke_microtask 5950b57cec5SDimitry Andric// mark_begin; 5960b57cec5SDimitry Andric PROC __kmp_invoke_microtask 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric pushl %ebp 5990b57cec5SDimitry Andric KMP_CFI_DEF_OFFSET 8 6000b57cec5SDimitry Andric KMP_CFI_OFFSET ebp,-8 6010b57cec5SDimitry Andric movl %esp,%ebp // establish the base pointer for this routine. 6020b57cec5SDimitry Andric KMP_CFI_REGISTER ebp 6030b57cec5SDimitry Andric subl $8,%esp // allocate space for two local variables. 6040b57cec5SDimitry Andric // These varibales are: 6050b57cec5SDimitry Andric // argv: -4(%ebp) 6060b57cec5SDimitry Andric // temp: -8(%ebp) 6070b57cec5SDimitry Andric // 6080b57cec5SDimitry Andric pushl %ebx // save %ebx to use during this routine 6090b57cec5SDimitry Andric // 6100b57cec5SDimitry Andric#if OMPT_SUPPORT 6110b57cec5SDimitry Andric movl 28(%ebp),%ebx // get exit_frame address 6120b57cec5SDimitry Andric movl %ebp,(%ebx) // save exit_frame 6130b57cec5SDimitry Andric#endif 6140b57cec5SDimitry Andric 6150b57cec5SDimitry Andric movl 20(%ebp),%ebx // Stack alignment - # args 6160b57cec5SDimitry Andric addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) 6170b57cec5SDimitry Andric shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 6180b57cec5SDimitry Andric movl %esp,%eax // 6190b57cec5SDimitry Andric subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this 6200b57cec5SDimitry Andric movl %eax,%ebx // Save to %ebx 6210b57cec5SDimitry Andric andl $0xFFFFFF80,%eax // mask off 7 bits 6220b57cec5SDimitry Andric subl %eax,%ebx // Amount to subtract from %esp 6230b57cec5SDimitry Andric subl %ebx,%esp // Prepare the stack ptr -- 6240b57cec5SDimitry Andric // now it will be aligned on 128-byte boundary at the call 6250b57cec5SDimitry Andric 6260b57cec5SDimitry Andric movl 24(%ebp),%eax // copy from p_argv[] 6270b57cec5SDimitry Andric movl %eax,-4(%ebp) // into the local variable *argv. 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric movl 20(%ebp),%ebx // argc is 20(%ebp) 6300b57cec5SDimitry Andric shll $2,%ebx 6310b57cec5SDimitry Andric 6320b57cec5SDimitry AndricKMP_LABEL(invoke_2): 6330b57cec5SDimitry Andric cmpl $0,%ebx 6340b57cec5SDimitry Andric jg KMP_LABEL(invoke_4) 6350b57cec5SDimitry Andric jmp KMP_LABEL(invoke_3) 6360b57cec5SDimitry Andric ALIGN 2 6370b57cec5SDimitry AndricKMP_LABEL(invoke_4): 6380b57cec5SDimitry Andric movl -4(%ebp),%eax 6390b57cec5SDimitry Andric subl $4,%ebx // decrement argc. 6400b57cec5SDimitry Andric addl %ebx,%eax // index into argv. 6410b57cec5SDimitry Andric movl (%eax),%edx 6420b57cec5SDimitry Andric pushl %edx 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric jmp KMP_LABEL(invoke_2) 6450b57cec5SDimitry Andric ALIGN 2 6460b57cec5SDimitry AndricKMP_LABEL(invoke_3): 6470b57cec5SDimitry Andric leal 16(%ebp),%eax // push & tid 6480b57cec5SDimitry Andric pushl %eax 6490b57cec5SDimitry Andric 6500b57cec5SDimitry Andric leal 12(%ebp),%eax // push & gtid 6510b57cec5SDimitry Andric pushl %eax 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric movl 8(%ebp),%ebx 6540b57cec5SDimitry Andric call *%ebx // call (*pkfn)(); 6550b57cec5SDimitry Andric 6560b57cec5SDimitry Andric movl $1,%eax // return 1; 6570b57cec5SDimitry Andric 6580b57cec5SDimitry Andric movl -12(%ebp),%ebx // restore %ebx 6590b57cec5SDimitry Andric leave 6600b57cec5SDimitry Andric KMP_CFI_DEF esp,4 6610b57cec5SDimitry Andric ret 6620b57cec5SDimitry Andric 6630b57cec5SDimitry Andric DEBUG_INFO __kmp_invoke_microtask 6640b57cec5SDimitry Andric// -- End __kmp_invoke_microtask 6650b57cec5SDimitry Andric 6660b57cec5SDimitry Andric 6670b57cec5SDimitry Andric// kmp_uint64 6680b57cec5SDimitry Andric// __kmp_hardware_timestamp(void) 6690b57cec5SDimitry Andric PROC __kmp_hardware_timestamp 6700b57cec5SDimitry Andric rdtsc 6710b57cec5SDimitry Andric ret 6720b57cec5SDimitry Andric 6730b57cec5SDimitry Andric DEBUG_INFO __kmp_hardware_timestamp 6740b57cec5SDimitry Andric// -- End __kmp_hardware_timestamp 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric#endif /* KMP_ARCH_X86 */ 6770b57cec5SDimitry Andric 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric#if KMP_ARCH_X86_64 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andric// ----------------------------------------------------------------------- 6820b57cec5SDimitry Andric// microtasking routines specifically written for IA-32 architecture and 6830b57cec5SDimitry Andric// Intel(R) 64 running Linux* OS 6840b57cec5SDimitry Andric// ----------------------------------------------------------------------- 6850b57cec5SDimitry Andric 6860b57cec5SDimitry Andric// -- Machine type P 6870b57cec5SDimitry Andric// mark_description "Intel Corporation"; 6880b57cec5SDimitry Andric .ident "Intel Corporation" 6890b57cec5SDimitry Andric// -- .file "z_Linux_asm.S" 6900b57cec5SDimitry Andric .data 6910b57cec5SDimitry Andric ALIGN 4 6920b57cec5SDimitry Andric 6930b57cec5SDimitry Andric// To prevent getting our code into .data section .text added to every routine 6940b57cec5SDimitry Andric// definition for x86_64. 6950b57cec5SDimitry Andric//------------------------------------------------------------------------ 6960b57cec5SDimitry Andric# if !KMP_ASM_INTRINS 6970b57cec5SDimitry Andric 6980b57cec5SDimitry Andric//------------------------------------------------------------------------ 6990b57cec5SDimitry Andric// FUNCTION __kmp_test_then_add32 7000b57cec5SDimitry Andric// 7010b57cec5SDimitry Andric// kmp_int32 7020b57cec5SDimitry Andric// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 7030b57cec5SDimitry Andric// 7040b57cec5SDimitry Andric// parameters: 7050b57cec5SDimitry Andric// p: %rdi 7060b57cec5SDimitry Andric// d: %esi 7070b57cec5SDimitry Andric// 7080b57cec5SDimitry Andric// return: %eax 7090b57cec5SDimitry Andric .text 7100b57cec5SDimitry Andric PROC __kmp_test_then_add32 7110b57cec5SDimitry Andric 7120b57cec5SDimitry Andric movl %esi, %eax // "d" 7130b57cec5SDimitry Andric lock 7140b57cec5SDimitry Andric xaddl %eax,(%rdi) 7150b57cec5SDimitry Andric ret 7160b57cec5SDimitry Andric 7170b57cec5SDimitry Andric DEBUG_INFO __kmp_test_then_add32 7180b57cec5SDimitry Andric 7190b57cec5SDimitry Andric 7200b57cec5SDimitry Andric//------------------------------------------------------------------------ 7210b57cec5SDimitry Andric// FUNCTION __kmp_test_then_add64 7220b57cec5SDimitry Andric// 7230b57cec5SDimitry Andric// kmp_int64 7240b57cec5SDimitry Andric// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); 7250b57cec5SDimitry Andric// 7260b57cec5SDimitry Andric// parameters: 7270b57cec5SDimitry Andric// p: %rdi 7280b57cec5SDimitry Andric// d: %rsi 7290b57cec5SDimitry Andric// return: %rax 7300b57cec5SDimitry Andric .text 7310b57cec5SDimitry Andric PROC __kmp_test_then_add64 7320b57cec5SDimitry Andric 7330b57cec5SDimitry Andric movq %rsi, %rax // "d" 7340b57cec5SDimitry Andric lock 7350b57cec5SDimitry Andric xaddq %rax,(%rdi) 7360b57cec5SDimitry Andric ret 7370b57cec5SDimitry Andric 7380b57cec5SDimitry Andric DEBUG_INFO __kmp_test_then_add64 7390b57cec5SDimitry Andric 7400b57cec5SDimitry Andric 7410b57cec5SDimitry Andric//------------------------------------------------------------------------ 7420b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed8 7430b57cec5SDimitry Andric// 7440b57cec5SDimitry Andric// kmp_int32 7450b57cec5SDimitry Andric// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 7460b57cec5SDimitry Andric// 7470b57cec5SDimitry Andric// parameters: 7480b57cec5SDimitry Andric// p: %rdi 7490b57cec5SDimitry Andric// d: %sil 7500b57cec5SDimitry Andric// 7510b57cec5SDimitry Andric// return: %al 7520b57cec5SDimitry Andric .text 7530b57cec5SDimitry Andric PROC __kmp_xchg_fixed8 7540b57cec5SDimitry Andric 7550b57cec5SDimitry Andric movb %sil, %al // "d" 7560b57cec5SDimitry Andric 7570b57cec5SDimitry Andric lock 7580b57cec5SDimitry Andric xchgb %al,(%rdi) 7590b57cec5SDimitry Andric ret 7600b57cec5SDimitry Andric 7610b57cec5SDimitry Andric DEBUG_INFO __kmp_xchg_fixed8 7620b57cec5SDimitry Andric 7630b57cec5SDimitry Andric 7640b57cec5SDimitry Andric//------------------------------------------------------------------------ 7650b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed16 7660b57cec5SDimitry Andric// 7670b57cec5SDimitry Andric// kmp_int16 7680b57cec5SDimitry Andric// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 7690b57cec5SDimitry Andric// 7700b57cec5SDimitry Andric// parameters: 7710b57cec5SDimitry Andric// p: %rdi 7720b57cec5SDimitry Andric// d: %si 7730b57cec5SDimitry Andric// return: %ax 7740b57cec5SDimitry Andric .text 7750b57cec5SDimitry Andric PROC __kmp_xchg_fixed16 7760b57cec5SDimitry Andric 7770b57cec5SDimitry Andric movw %si, %ax // "d" 7780b57cec5SDimitry Andric 7790b57cec5SDimitry Andric lock 7800b57cec5SDimitry Andric xchgw %ax,(%rdi) 7810b57cec5SDimitry Andric ret 7820b57cec5SDimitry Andric 7830b57cec5SDimitry Andric DEBUG_INFO __kmp_xchg_fixed16 7840b57cec5SDimitry Andric 7850b57cec5SDimitry Andric 7860b57cec5SDimitry Andric//------------------------------------------------------------------------ 7870b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed32 7880b57cec5SDimitry Andric// 7890b57cec5SDimitry Andric// kmp_int32 7900b57cec5SDimitry Andric// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 7910b57cec5SDimitry Andric// 7920b57cec5SDimitry Andric// parameters: 7930b57cec5SDimitry Andric// p: %rdi 7940b57cec5SDimitry Andric// d: %esi 7950b57cec5SDimitry Andric// 7960b57cec5SDimitry Andric// return: %eax 7970b57cec5SDimitry Andric .text 7980b57cec5SDimitry Andric PROC __kmp_xchg_fixed32 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric movl %esi, %eax // "d" 8010b57cec5SDimitry Andric 8020b57cec5SDimitry Andric lock 8030b57cec5SDimitry Andric xchgl %eax,(%rdi) 8040b57cec5SDimitry Andric ret 8050b57cec5SDimitry Andric 8060b57cec5SDimitry Andric DEBUG_INFO __kmp_xchg_fixed32 8070b57cec5SDimitry Andric 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andric//------------------------------------------------------------------------ 8100b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed64 8110b57cec5SDimitry Andric// 8120b57cec5SDimitry Andric// kmp_int64 8130b57cec5SDimitry Andric// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); 8140b57cec5SDimitry Andric// 8150b57cec5SDimitry Andric// parameters: 8160b57cec5SDimitry Andric// p: %rdi 8170b57cec5SDimitry Andric// d: %rsi 8180b57cec5SDimitry Andric// return: %rax 8190b57cec5SDimitry Andric .text 8200b57cec5SDimitry Andric PROC __kmp_xchg_fixed64 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric movq %rsi, %rax // "d" 8230b57cec5SDimitry Andric 8240b57cec5SDimitry Andric lock 8250b57cec5SDimitry Andric xchgq %rax,(%rdi) 8260b57cec5SDimitry Andric ret 8270b57cec5SDimitry Andric 8280b57cec5SDimitry Andric DEBUG_INFO __kmp_xchg_fixed64 8290b57cec5SDimitry Andric 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric//------------------------------------------------------------------------ 8320b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store8 8330b57cec5SDimitry Andric// 8340b57cec5SDimitry Andric// kmp_int8 8350b57cec5SDimitry Andric// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 8360b57cec5SDimitry Andric// 8370b57cec5SDimitry Andric// parameters: 8380b57cec5SDimitry Andric// p: %rdi 8390b57cec5SDimitry Andric// cv: %esi 8400b57cec5SDimitry Andric// sv: %edx 8410b57cec5SDimitry Andric// 8420b57cec5SDimitry Andric// return: %eax 8430b57cec5SDimitry Andric .text 8440b57cec5SDimitry Andric PROC __kmp_compare_and_store8 8450b57cec5SDimitry Andric 8460b57cec5SDimitry Andric movb %sil, %al // "cv" 8470b57cec5SDimitry Andric lock 8480b57cec5SDimitry Andric cmpxchgb %dl,(%rdi) 8490b57cec5SDimitry Andric sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 8500b57cec5SDimitry Andric andq $1, %rax // sign extend previous instruction for return value 8510b57cec5SDimitry Andric ret 8520b57cec5SDimitry Andric 8530b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store8 8540b57cec5SDimitry Andric 8550b57cec5SDimitry Andric 8560b57cec5SDimitry Andric//------------------------------------------------------------------------ 8570b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store16 8580b57cec5SDimitry Andric// 8590b57cec5SDimitry Andric// kmp_int16 8600b57cec5SDimitry Andric// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 8610b57cec5SDimitry Andric// 8620b57cec5SDimitry Andric// parameters: 8630b57cec5SDimitry Andric// p: %rdi 8640b57cec5SDimitry Andric// cv: %si 8650b57cec5SDimitry Andric// sv: %dx 8660b57cec5SDimitry Andric// 8670b57cec5SDimitry Andric// return: %eax 8680b57cec5SDimitry Andric .text 8690b57cec5SDimitry Andric PROC __kmp_compare_and_store16 8700b57cec5SDimitry Andric 8710b57cec5SDimitry Andric movw %si, %ax // "cv" 8720b57cec5SDimitry Andric lock 8730b57cec5SDimitry Andric cmpxchgw %dx,(%rdi) 8740b57cec5SDimitry Andric sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 8750b57cec5SDimitry Andric andq $1, %rax // sign extend previous instruction for return value 8760b57cec5SDimitry Andric ret 8770b57cec5SDimitry Andric 8780b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store16 8790b57cec5SDimitry Andric 8800b57cec5SDimitry Andric 8810b57cec5SDimitry Andric//------------------------------------------------------------------------ 8820b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store32 8830b57cec5SDimitry Andric// 8840b57cec5SDimitry Andric// kmp_int32 8850b57cec5SDimitry Andric// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 8860b57cec5SDimitry Andric// 8870b57cec5SDimitry Andric// parameters: 8880b57cec5SDimitry Andric// p: %rdi 8890b57cec5SDimitry Andric// cv: %esi 8900b57cec5SDimitry Andric// sv: %edx 8910b57cec5SDimitry Andric// 8920b57cec5SDimitry Andric// return: %eax 8930b57cec5SDimitry Andric .text 8940b57cec5SDimitry Andric PROC __kmp_compare_and_store32 8950b57cec5SDimitry Andric 8960b57cec5SDimitry Andric movl %esi, %eax // "cv" 8970b57cec5SDimitry Andric lock 8980b57cec5SDimitry Andric cmpxchgl %edx,(%rdi) 8990b57cec5SDimitry Andric sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 9000b57cec5SDimitry Andric andq $1, %rax // sign extend previous instruction for return value 9010b57cec5SDimitry Andric ret 9020b57cec5SDimitry Andric 9030b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store32 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric 9060b57cec5SDimitry Andric//------------------------------------------------------------------------ 9070b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store64 9080b57cec5SDimitry Andric// 9090b57cec5SDimitry Andric// kmp_int32 9100b57cec5SDimitry Andric// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 9110b57cec5SDimitry Andric// 9120b57cec5SDimitry Andric// parameters: 9130b57cec5SDimitry Andric// p: %rdi 9140b57cec5SDimitry Andric// cv: %rsi 9150b57cec5SDimitry Andric// sv: %rdx 9160b57cec5SDimitry Andric// return: %eax 9170b57cec5SDimitry Andric .text 9180b57cec5SDimitry Andric PROC __kmp_compare_and_store64 9190b57cec5SDimitry Andric 9200b57cec5SDimitry Andric movq %rsi, %rax // "cv" 9210b57cec5SDimitry Andric lock 9220b57cec5SDimitry Andric cmpxchgq %rdx,(%rdi) 9230b57cec5SDimitry Andric sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 9240b57cec5SDimitry Andric andq $1, %rax // sign extend previous instruction for return value 9250b57cec5SDimitry Andric ret 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store64 9280b57cec5SDimitry Andric 9290b57cec5SDimitry Andric//------------------------------------------------------------------------ 9300b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store_ret8 9310b57cec5SDimitry Andric// 9320b57cec5SDimitry Andric// kmp_int8 9330b57cec5SDimitry Andric// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 9340b57cec5SDimitry Andric// 9350b57cec5SDimitry Andric// parameters: 9360b57cec5SDimitry Andric// p: %rdi 9370b57cec5SDimitry Andric// cv: %esi 9380b57cec5SDimitry Andric// sv: %edx 9390b57cec5SDimitry Andric// 9400b57cec5SDimitry Andric// return: %eax 9410b57cec5SDimitry Andric .text 9420b57cec5SDimitry Andric PROC __kmp_compare_and_store_ret8 9430b57cec5SDimitry Andric 9440b57cec5SDimitry Andric movb %sil, %al // "cv" 9450b57cec5SDimitry Andric lock 9460b57cec5SDimitry Andric cmpxchgb %dl,(%rdi) 9470b57cec5SDimitry Andric ret 9480b57cec5SDimitry Andric 9490b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store_ret8 9500b57cec5SDimitry Andric 9510b57cec5SDimitry Andric 9520b57cec5SDimitry Andric//------------------------------------------------------------------------ 9530b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store_ret16 9540b57cec5SDimitry Andric// 9550b57cec5SDimitry Andric// kmp_int16 9560b57cec5SDimitry Andric// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 9570b57cec5SDimitry Andric// 9580b57cec5SDimitry Andric// parameters: 9590b57cec5SDimitry Andric// p: %rdi 9600b57cec5SDimitry Andric// cv: %si 9610b57cec5SDimitry Andric// sv: %dx 9620b57cec5SDimitry Andric// 9630b57cec5SDimitry Andric// return: %eax 9640b57cec5SDimitry Andric .text 9650b57cec5SDimitry Andric PROC __kmp_compare_and_store_ret16 9660b57cec5SDimitry Andric 9670b57cec5SDimitry Andric movw %si, %ax // "cv" 9680b57cec5SDimitry Andric lock 9690b57cec5SDimitry Andric cmpxchgw %dx,(%rdi) 9700b57cec5SDimitry Andric ret 9710b57cec5SDimitry Andric 9720b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store_ret16 9730b57cec5SDimitry Andric 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric//------------------------------------------------------------------------ 9760b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store_ret32 9770b57cec5SDimitry Andric// 9780b57cec5SDimitry Andric// kmp_int32 9790b57cec5SDimitry Andric// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 9800b57cec5SDimitry Andric// 9810b57cec5SDimitry Andric// parameters: 9820b57cec5SDimitry Andric// p: %rdi 9830b57cec5SDimitry Andric// cv: %esi 9840b57cec5SDimitry Andric// sv: %edx 9850b57cec5SDimitry Andric// 9860b57cec5SDimitry Andric// return: %eax 9870b57cec5SDimitry Andric .text 9880b57cec5SDimitry Andric PROC __kmp_compare_and_store_ret32 9890b57cec5SDimitry Andric 9900b57cec5SDimitry Andric movl %esi, %eax // "cv" 9910b57cec5SDimitry Andric lock 9920b57cec5SDimitry Andric cmpxchgl %edx,(%rdi) 9930b57cec5SDimitry Andric ret 9940b57cec5SDimitry Andric 9950b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store_ret32 9960b57cec5SDimitry Andric 9970b57cec5SDimitry Andric 9980b57cec5SDimitry Andric//------------------------------------------------------------------------ 9990b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store_ret64 10000b57cec5SDimitry Andric// 10010b57cec5SDimitry Andric// kmp_int64 10020b57cec5SDimitry Andric// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 10030b57cec5SDimitry Andric// 10040b57cec5SDimitry Andric// parameters: 10050b57cec5SDimitry Andric// p: %rdi 10060b57cec5SDimitry Andric// cv: %rsi 10070b57cec5SDimitry Andric// sv: %rdx 10080b57cec5SDimitry Andric// return: %eax 10090b57cec5SDimitry Andric .text 10100b57cec5SDimitry Andric PROC __kmp_compare_and_store_ret64 10110b57cec5SDimitry Andric 10120b57cec5SDimitry Andric movq %rsi, %rax // "cv" 10130b57cec5SDimitry Andric lock 10140b57cec5SDimitry Andric cmpxchgq %rdx,(%rdi) 10150b57cec5SDimitry Andric ret 10160b57cec5SDimitry Andric 10170b57cec5SDimitry Andric DEBUG_INFO __kmp_compare_and_store_ret64 10180b57cec5SDimitry Andric 10190b57cec5SDimitry Andric# endif /* !KMP_ASM_INTRINS */ 10200b57cec5SDimitry Andric 10210b57cec5SDimitry Andric 10220b57cec5SDimitry Andric# if !KMP_MIC 10230b57cec5SDimitry Andric 10240b57cec5SDimitry Andric# if !KMP_ASM_INTRINS 10250b57cec5SDimitry Andric 10260b57cec5SDimitry Andric//------------------------------------------------------------------------ 10270b57cec5SDimitry Andric// FUNCTION __kmp_xchg_real32 10280b57cec5SDimitry Andric// 10290b57cec5SDimitry Andric// kmp_real32 10300b57cec5SDimitry Andric// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 10310b57cec5SDimitry Andric// 10320b57cec5SDimitry Andric// parameters: 10330b57cec5SDimitry Andric// addr: %rdi 10340b57cec5SDimitry Andric// data: %xmm0 (lower 4 bytes) 10350b57cec5SDimitry Andric// 10360b57cec5SDimitry Andric// return: %xmm0 (lower 4 bytes) 10370b57cec5SDimitry Andric .text 10380b57cec5SDimitry Andric PROC __kmp_xchg_real32 10390b57cec5SDimitry Andric 10400b57cec5SDimitry Andric movd %xmm0, %eax // load "data" to eax 10410b57cec5SDimitry Andric 10420b57cec5SDimitry Andric lock 10430b57cec5SDimitry Andric xchgl %eax, (%rdi) 10440b57cec5SDimitry Andric 10450b57cec5SDimitry Andric movd %eax, %xmm0 // load old value into return register 10460b57cec5SDimitry Andric 10470b57cec5SDimitry Andric ret 10480b57cec5SDimitry Andric 10490b57cec5SDimitry Andric DEBUG_INFO __kmp_xchg_real32 10500b57cec5SDimitry Andric 10510b57cec5SDimitry Andric 10520b57cec5SDimitry Andric//------------------------------------------------------------------------ 10530b57cec5SDimitry Andric// FUNCTION __kmp_xchg_real64 10540b57cec5SDimitry Andric// 10550b57cec5SDimitry Andric// kmp_real64 10560b57cec5SDimitry Andric// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); 10570b57cec5SDimitry Andric// 10580b57cec5SDimitry Andric// parameters: 10590b57cec5SDimitry Andric// addr: %rdi 10600b57cec5SDimitry Andric// data: %xmm0 (lower 8 bytes) 10610b57cec5SDimitry Andric// return: %xmm0 (lower 8 bytes) 10620b57cec5SDimitry Andric .text 10630b57cec5SDimitry Andric PROC __kmp_xchg_real64 10640b57cec5SDimitry Andric 10650b57cec5SDimitry Andric movd %xmm0, %rax // load "data" to rax 10660b57cec5SDimitry Andric 10670b57cec5SDimitry Andric lock 10680b57cec5SDimitry Andric xchgq %rax, (%rdi) 10690b57cec5SDimitry Andric 10700b57cec5SDimitry Andric movd %rax, %xmm0 // load old value into return register 10710b57cec5SDimitry Andric ret 10720b57cec5SDimitry Andric 10730b57cec5SDimitry Andric DEBUG_INFO __kmp_xchg_real64 10740b57cec5SDimitry Andric 10750b57cec5SDimitry Andric 10760b57cec5SDimitry Andric# endif /* !KMP_MIC */ 10770b57cec5SDimitry Andric 10780b57cec5SDimitry Andric# endif /* !KMP_ASM_INTRINS */ 10790b57cec5SDimitry Andric 10800b57cec5SDimitry Andric//------------------------------------------------------------------------ 10810b57cec5SDimitry Andric// int 10820b57cec5SDimitry Andric// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 10830b57cec5SDimitry Andric// int gtid, int tid, 1084489b1cf2SDimitry Andric// int argc, void *p_argv[] 1085489b1cf2SDimitry Andric// #if OMPT_SUPPORT 1086489b1cf2SDimitry Andric// , 1087489b1cf2SDimitry Andric// void **exit_frame_ptr 1088489b1cf2SDimitry Andric// #endif 1089489b1cf2SDimitry Andric// ) { 1090489b1cf2SDimitry Andric// #if OMPT_SUPPORT 1091489b1cf2SDimitry Andric// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1092489b1cf2SDimitry Andric// #endif 1093489b1cf2SDimitry Andric// 10940b57cec5SDimitry Andric// (*pkfn)( & gtid, & tid, argv[0], ... ); 10950b57cec5SDimitry Andric// return 1; 10960b57cec5SDimitry Andric// } 10970b57cec5SDimitry Andric// 10980b57cec5SDimitry Andric// note: at call to pkfn must have %rsp 128-byte aligned for compiler 10990b57cec5SDimitry Andric// 11000b57cec5SDimitry Andric// parameters: 11010b57cec5SDimitry Andric// %rdi: pkfn 11020b57cec5SDimitry Andric// %esi: gtid 11030b57cec5SDimitry Andric// %edx: tid 11040b57cec5SDimitry Andric// %ecx: argc 11050b57cec5SDimitry Andric// %r8: p_argv 11060b57cec5SDimitry Andric// %r9: &exit_frame 11070b57cec5SDimitry Andric// 11080b57cec5SDimitry Andric// locals: 11090b57cec5SDimitry Andric// __gtid: gtid parm pushed on stack so can pass >id to pkfn 11100b57cec5SDimitry Andric// __tid: tid parm pushed on stack so can pass &tid to pkfn 11110b57cec5SDimitry Andric// 11120b57cec5SDimitry Andric// reg temps: 11130b57cec5SDimitry Andric// %rax: used all over the place 11140b57cec5SDimitry Andric// %rdx: used in stack pointer alignment calculation 11150b57cec5SDimitry Andric// %r11: used to traverse p_argv array 11160b57cec5SDimitry Andric// %rsi: used as temporary for stack parameters 11170b57cec5SDimitry Andric// used as temporary for number of pkfn parms to push 11180b57cec5SDimitry Andric// %rbx: used to hold pkfn address, and zero constant, callee-save 11190b57cec5SDimitry Andric// 11200b57cec5SDimitry Andric// return: %eax (always 1/TRUE) 11210b57cec5SDimitry Andric__gtid = -16 11220b57cec5SDimitry Andric__tid = -24 11230b57cec5SDimitry Andric 11240b57cec5SDimitry Andric// -- Begin __kmp_invoke_microtask 11250b57cec5SDimitry Andric// mark_begin; 11260b57cec5SDimitry Andric .text 11270b57cec5SDimitry Andric PROC __kmp_invoke_microtask 11280b57cec5SDimitry Andric 11290b57cec5SDimitry Andric pushq %rbp // save base pointer 11300b57cec5SDimitry Andric KMP_CFI_DEF_OFFSET 16 11310b57cec5SDimitry Andric KMP_CFI_OFFSET rbp,-16 11320b57cec5SDimitry Andric movq %rsp,%rbp // establish the base pointer for this routine. 11330b57cec5SDimitry Andric KMP_CFI_REGISTER rbp 11340b57cec5SDimitry Andric 11350b57cec5SDimitry Andric#if OMPT_SUPPORT 11360b57cec5SDimitry Andric movq %rbp, (%r9) // save exit_frame 11370b57cec5SDimitry Andric#endif 11380b57cec5SDimitry Andric 11390b57cec5SDimitry Andric pushq %rbx // %rbx is callee-saved register 11400b57cec5SDimitry Andric pushq %rsi // Put gtid on stack so can pass &tgid to pkfn 11410b57cec5SDimitry Andric pushq %rdx // Put tid on stack so can pass &tid to pkfn 11420b57cec5SDimitry Andric 11430b57cec5SDimitry Andric movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax 11440b57cec5SDimitry Andric movq $0, %rbx // constant for cmovs later 11450b57cec5SDimitry Andric subq $4, %rax // subtract four args passed in registers to pkfn 11460b57cec5SDimitry Andric#if KMP_MIC 11470b57cec5SDimitry Andric js KMP_LABEL(kmp_0) // jump to movq 11480b57cec5SDimitry Andric jmp KMP_LABEL(kmp_0_exit) // jump ahead 11490b57cec5SDimitry AndricKMP_LABEL(kmp_0): 11500b57cec5SDimitry Andric movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 11510b57cec5SDimitry AndricKMP_LABEL(kmp_0_exit): 11520b57cec5SDimitry Andric#else 11530b57cec5SDimitry Andric cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 11540b57cec5SDimitry Andric#endif // KMP_MIC 11550b57cec5SDimitry Andric 11560b57cec5SDimitry Andric movq %rax, %rsi // save max(0, argc-4) -> %rsi for later 11570b57cec5SDimitry Andric shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 11580b57cec5SDimitry Andric 11590b57cec5SDimitry Andric movq %rsp, %rdx // 11600b57cec5SDimitry Andric subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- 11610b57cec5SDimitry Andric // without align, stack ptr would be this 11620b57cec5SDimitry Andric movq %rdx, %rax // Save to %rax 11630b57cec5SDimitry Andric 11640b57cec5SDimitry Andric andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) 11650b57cec5SDimitry Andric subq %rax, %rdx // Amount to subtract from %rsp 11660b57cec5SDimitry Andric subq %rdx, %rsp // Prepare the stack ptr -- 11670b57cec5SDimitry Andric // now %rsp will align to 128-byte boundary at call site 11680b57cec5SDimitry Andric 11690b57cec5SDimitry Andric // setup pkfn parameter reg and stack 11700b57cec5SDimitry Andric movq %rcx, %rax // argc -> %rax 11710b57cec5SDimitry Andric cmpq $0, %rsi 11720b57cec5SDimitry Andric je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push 11730b57cec5SDimitry Andric shlq $3, %rcx // argc*8 -> %rcx 11740b57cec5SDimitry Andric movq %r8, %rdx // p_argv -> %rdx 11750b57cec5SDimitry Andric addq %rcx, %rdx // &p_argv[argc] -> %rdx 11760b57cec5SDimitry Andric 11770b57cec5SDimitry Andric movq %rsi, %rcx // max (0, argc-4) -> %rcx 11780b57cec5SDimitry Andric 11790b57cec5SDimitry AndricKMP_LABEL(kmp_invoke_push_parms): 11800b57cec5SDimitry Andric // push nth - 7th parms to pkfn on stack 11810b57cec5SDimitry Andric subq $8, %rdx // decrement p_argv pointer to previous parm 11820b57cec5SDimitry Andric movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi 11830b57cec5SDimitry Andric pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) 11840b57cec5SDimitry Andric subl $1, %ecx 11850b57cec5SDimitry Andric 11860b57cec5SDimitry Andric// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e 11870b57cec5SDimitry Andric// if the name of the label that is an operand of this jecxz starts with a dot ("."); 11880b57cec5SDimitry Andric// Apple's linker does not support 1-byte length relocation; 11890b57cec5SDimitry Andric// Resolution: replace all .labelX entries with L_labelX. 11900b57cec5SDimitry Andric 11910b57cec5SDimitry Andric jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left 11920b57cec5SDimitry Andric jmp KMP_LABEL(kmp_invoke_push_parms) 11930b57cec5SDimitry Andric ALIGN 3 11940b57cec5SDimitry AndricKMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. 11950b57cec5SDimitry Andric // order here is important to avoid trashing 11960b57cec5SDimitry Andric // registers used for both input and output parms! 11970b57cec5SDimitry Andric movq %rdi, %rbx // pkfn -> %rbx 11980b57cec5SDimitry Andric leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) 11990b57cec5SDimitry Andric leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) 12000fca6ea1SDimitry Andric // Check if argc is 0 12010fca6ea1SDimitry Andric cmpq $0, %rax 12020fca6ea1SDimitry Andric je KMP_LABEL(kmp_no_args) // Jump ahead 12030b57cec5SDimitry Andric 12040b57cec5SDimitry Andric movq %r8, %r11 // p_argv -> %r11 12050b57cec5SDimitry Andric 12060b57cec5SDimitry Andric#if KMP_MIC 12070b57cec5SDimitry Andric cmpq $4, %rax // argc >= 4? 12080b57cec5SDimitry Andric jns KMP_LABEL(kmp_4) // jump to movq 12090b57cec5SDimitry Andric jmp KMP_LABEL(kmp_4_exit) // jump ahead 12100b57cec5SDimitry AndricKMP_LABEL(kmp_4): 12110b57cec5SDimitry Andric movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 12120b57cec5SDimitry AndricKMP_LABEL(kmp_4_exit): 12130b57cec5SDimitry Andric 12140b57cec5SDimitry Andric cmpq $3, %rax // argc >= 3? 12150b57cec5SDimitry Andric jns KMP_LABEL(kmp_3) // jump to movq 12160b57cec5SDimitry Andric jmp KMP_LABEL(kmp_3_exit) // jump ahead 12170b57cec5SDimitry AndricKMP_LABEL(kmp_3): 12180b57cec5SDimitry Andric movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 12190b57cec5SDimitry AndricKMP_LABEL(kmp_3_exit): 12200b57cec5SDimitry Andric 12210b57cec5SDimitry Andric cmpq $2, %rax // argc >= 2? 12220b57cec5SDimitry Andric jns KMP_LABEL(kmp_2) // jump to movq 12230b57cec5SDimitry Andric jmp KMP_LABEL(kmp_2_exit) // jump ahead 12240b57cec5SDimitry AndricKMP_LABEL(kmp_2): 12250b57cec5SDimitry Andric movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 12260b57cec5SDimitry AndricKMP_LABEL(kmp_2_exit): 12270b57cec5SDimitry Andric 12280b57cec5SDimitry Andric cmpq $1, %rax // argc >= 1? 12290b57cec5SDimitry Andric jns KMP_LABEL(kmp_1) // jump to movq 12300b57cec5SDimitry Andric jmp KMP_LABEL(kmp_1_exit) // jump ahead 12310b57cec5SDimitry AndricKMP_LABEL(kmp_1): 12320b57cec5SDimitry Andric movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 12330b57cec5SDimitry AndricKMP_LABEL(kmp_1_exit): 12340b57cec5SDimitry Andric#else 12350b57cec5SDimitry Andric cmpq $4, %rax // argc >= 4? 12360b57cec5SDimitry Andric cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 12370b57cec5SDimitry Andric 12380b57cec5SDimitry Andric cmpq $3, %rax // argc >= 3? 12390b57cec5SDimitry Andric cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 12400b57cec5SDimitry Andric 12410b57cec5SDimitry Andric cmpq $2, %rax // argc >= 2? 12420b57cec5SDimitry Andric cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 12430b57cec5SDimitry Andric 12440b57cec5SDimitry Andric cmpq $1, %rax // argc >= 1? 12450b57cec5SDimitry Andric cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 12460b57cec5SDimitry Andric#endif // KMP_MIC 12470b57cec5SDimitry Andric 12480fca6ea1SDimitry AndricKMP_LABEL(kmp_no_args): 12490b57cec5SDimitry Andric call *%rbx // call (*pkfn)(); 12500b57cec5SDimitry Andric movq $1, %rax // move 1 into return register; 12510b57cec5SDimitry Andric 12520b57cec5SDimitry Andric movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified 12530b57cec5SDimitry Andric movq %rbp, %rsp // restore stack pointer 12540b57cec5SDimitry Andric popq %rbp // restore frame pointer 12550b57cec5SDimitry Andric KMP_CFI_DEF rsp,8 12560b57cec5SDimitry Andric ret 12570b57cec5SDimitry Andric 12580b57cec5SDimitry Andric DEBUG_INFO __kmp_invoke_microtask 12590b57cec5SDimitry Andric// -- End __kmp_invoke_microtask 12600b57cec5SDimitry Andric 12610b57cec5SDimitry Andric// kmp_uint64 12620b57cec5SDimitry Andric// __kmp_hardware_timestamp(void) 12630b57cec5SDimitry Andric .text 12640b57cec5SDimitry Andric PROC __kmp_hardware_timestamp 12650b57cec5SDimitry Andric rdtsc 12660b57cec5SDimitry Andric shlq $32, %rdx 12670b57cec5SDimitry Andric orq %rdx, %rax 12680b57cec5SDimitry Andric ret 12690b57cec5SDimitry Andric 12700b57cec5SDimitry Andric DEBUG_INFO __kmp_hardware_timestamp 12710b57cec5SDimitry Andric// -- End __kmp_hardware_timestamp 12720b57cec5SDimitry Andric 12730b57cec5SDimitry Andric//------------------------------------------------------------------------ 12740b57cec5SDimitry Andric// FUNCTION __kmp_bsr32 12750b57cec5SDimitry Andric// 12760b57cec5SDimitry Andric// int 12770b57cec5SDimitry Andric// __kmp_bsr32( int ); 12780b57cec5SDimitry Andric .text 12790b57cec5SDimitry Andric PROC __kmp_bsr32 12800b57cec5SDimitry Andric 12810b57cec5SDimitry Andric bsr %edi,%eax 12820b57cec5SDimitry Andric ret 12830b57cec5SDimitry Andric 12840b57cec5SDimitry Andric DEBUG_INFO __kmp_bsr32 12850b57cec5SDimitry Andric 12860b57cec5SDimitry Andric// ----------------------------------------------------------------------- 12870b57cec5SDimitry Andric#endif /* KMP_ARCH_X86_64 */ 12880b57cec5SDimitry Andric 12890b57cec5SDimitry Andric// ' 12900fca6ea1SDimitry Andric#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric//------------------------------------------------------------------------ 12930b57cec5SDimitry Andric// int 12940b57cec5SDimitry Andric// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 12950b57cec5SDimitry Andric// int gtid, int tid, 1296489b1cf2SDimitry Andric// int argc, void *p_argv[] 1297489b1cf2SDimitry Andric// #if OMPT_SUPPORT 1298489b1cf2SDimitry Andric// , 1299489b1cf2SDimitry Andric// void **exit_frame_ptr 1300489b1cf2SDimitry Andric// #endif 1301489b1cf2SDimitry Andric// ) { 1302489b1cf2SDimitry Andric// #if OMPT_SUPPORT 1303489b1cf2SDimitry Andric// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1304489b1cf2SDimitry Andric// #endif 1305489b1cf2SDimitry Andric// 13060b57cec5SDimitry Andric// (*pkfn)( & gtid, & tid, argv[0], ... ); 1307489b1cf2SDimitry Andric// 1308489b1cf2SDimitry Andric// // FIXME: This is done at call-site and can be removed here. 1309489b1cf2SDimitry Andric// #if OMPT_SUPPORT 1310489b1cf2SDimitry Andric// *exit_frame_ptr = 0; 1311489b1cf2SDimitry Andric// #endif 1312489b1cf2SDimitry Andric// 13130b57cec5SDimitry Andric// return 1; 13140b57cec5SDimitry Andric// } 13150b57cec5SDimitry Andric// 13160b57cec5SDimitry Andric// parameters: 13170b57cec5SDimitry Andric// x0: pkfn 13180b57cec5SDimitry Andric// w1: gtid 13190b57cec5SDimitry Andric// w2: tid 13200b57cec5SDimitry Andric// w3: argc 13210b57cec5SDimitry Andric// x4: p_argv 13220b57cec5SDimitry Andric// x5: &exit_frame 13230b57cec5SDimitry Andric// 13240b57cec5SDimitry Andric// locals: 13250b57cec5SDimitry Andric// __gtid: gtid parm pushed on stack so can pass >id to pkfn 13260b57cec5SDimitry Andric// __tid: tid parm pushed on stack so can pass &tid to pkfn 13270b57cec5SDimitry Andric// 13280b57cec5SDimitry Andric// reg temps: 13290b57cec5SDimitry Andric// x8: used to hold pkfn address 13300b57cec5SDimitry Andric// w9: used as temporary for number of pkfn parms 13310b57cec5SDimitry Andric// x10: used to traverse p_argv array 13320b57cec5SDimitry Andric// x11: used as temporary for stack placement calculation 13330b57cec5SDimitry Andric// x12: used as temporary for stack parameters 13340b57cec5SDimitry Andric// x19: used to preserve exit_frame_ptr, callee-save 13350b57cec5SDimitry Andric// 13360b57cec5SDimitry Andric// return: w0 (always 1/TRUE) 13370b57cec5SDimitry Andric// 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric__gtid = 4 13400b57cec5SDimitry Andric__tid = 8 13410b57cec5SDimitry Andric 13420b57cec5SDimitry Andric// -- Begin __kmp_invoke_microtask 13430b57cec5SDimitry Andric// mark_begin; 13440b57cec5SDimitry Andric .text 13450b57cec5SDimitry Andric PROC __kmp_invoke_microtask 1346*62987288SDimitry Andric PACBTI_C 13470b57cec5SDimitry Andric 13480b57cec5SDimitry Andric stp x29, x30, [sp, #-16]! 13490b57cec5SDimitry Andric# if OMPT_SUPPORT 13500b57cec5SDimitry Andric stp x19, x20, [sp, #-16]! 13510b57cec5SDimitry Andric# endif 13520b57cec5SDimitry Andric mov x29, sp 13530b57cec5SDimitry Andric 13540b57cec5SDimitry Andric orr w9, wzr, #1 13550b57cec5SDimitry Andric add w9, w9, w3, lsr #1 13560b57cec5SDimitry Andric sub sp, sp, w9, uxtw #4 13570b57cec5SDimitry Andric mov x11, sp 13580b57cec5SDimitry Andric 13590b57cec5SDimitry Andric mov x8, x0 13600b57cec5SDimitry Andric str w1, [x29, #-__gtid] 13610b57cec5SDimitry Andric str w2, [x29, #-__tid] 13620b57cec5SDimitry Andric mov w9, w3 13630b57cec5SDimitry Andric mov x10, x4 13640b57cec5SDimitry Andric# if OMPT_SUPPORT 13650b57cec5SDimitry Andric mov x19, x5 13660b57cec5SDimitry Andric str x29, [x19] 13670b57cec5SDimitry Andric# endif 13680b57cec5SDimitry Andric 13690b57cec5SDimitry Andric sub x0, x29, #__gtid 13700b57cec5SDimitry Andric sub x1, x29, #__tid 13710b57cec5SDimitry Andric 13720b57cec5SDimitry Andric cbz w9, KMP_LABEL(kmp_1) 13730b57cec5SDimitry Andric ldr x2, [x10] 13740b57cec5SDimitry Andric 13750b57cec5SDimitry Andric sub w9, w9, #1 13760b57cec5SDimitry Andric cbz w9, KMP_LABEL(kmp_1) 13770b57cec5SDimitry Andric ldr x3, [x10, #8]! 13780b57cec5SDimitry Andric 13790b57cec5SDimitry Andric sub w9, w9, #1 13800b57cec5SDimitry Andric cbz w9, KMP_LABEL(kmp_1) 13810b57cec5SDimitry Andric ldr x4, [x10, #8]! 13820b57cec5SDimitry Andric 13830b57cec5SDimitry Andric sub w9, w9, #1 13840b57cec5SDimitry Andric cbz w9, KMP_LABEL(kmp_1) 13850b57cec5SDimitry Andric ldr x5, [x10, #8]! 13860b57cec5SDimitry Andric 13870b57cec5SDimitry Andric sub w9, w9, #1 13880b57cec5SDimitry Andric cbz w9, KMP_LABEL(kmp_1) 13890b57cec5SDimitry Andric ldr x6, [x10, #8]! 13900b57cec5SDimitry Andric 13910b57cec5SDimitry Andric sub w9, w9, #1 13920b57cec5SDimitry Andric cbz w9, KMP_LABEL(kmp_1) 13930b57cec5SDimitry Andric ldr x7, [x10, #8]! 13940b57cec5SDimitry Andric 13950b57cec5SDimitry AndricKMP_LABEL(kmp_0): 13960b57cec5SDimitry Andric sub w9, w9, #1 13970b57cec5SDimitry Andric cbz w9, KMP_LABEL(kmp_1) 13980b57cec5SDimitry Andric ldr x12, [x10, #8]! 13990b57cec5SDimitry Andric str x12, [x11], #8 14000b57cec5SDimitry Andric b KMP_LABEL(kmp_0) 14010b57cec5SDimitry AndricKMP_LABEL(kmp_1): 14020b57cec5SDimitry Andric blr x8 14030b57cec5SDimitry Andric orr w0, wzr, #1 14040b57cec5SDimitry Andric mov sp, x29 14050b57cec5SDimitry Andric# if OMPT_SUPPORT 14060b57cec5SDimitry Andric str xzr, [x19] 14070b57cec5SDimitry Andric ldp x19, x20, [sp], #16 14080b57cec5SDimitry Andric# endif 14090b57cec5SDimitry Andric ldp x29, x30, [sp], #16 1410*62987288SDimitry Andric PACBTI_RET 14110b57cec5SDimitry Andric ret 14120b57cec5SDimitry Andric 14130b57cec5SDimitry Andric DEBUG_INFO __kmp_invoke_microtask 14140b57cec5SDimitry Andric// -- End __kmp_invoke_microtask 14150b57cec5SDimitry Andric 14160fca6ea1SDimitry Andric#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */ 1417bdd1243dSDimitry Andric 1418bdd1243dSDimitry Andric#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM 1419bdd1243dSDimitry Andric 1420bdd1243dSDimitry Andric//------------------------------------------------------------------------ 1421bdd1243dSDimitry Andric// int 1422bdd1243dSDimitry Andric// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1423bdd1243dSDimitry Andric// int gtid, int tid, 1424bdd1243dSDimitry Andric// int argc, void *p_argv[] 1425bdd1243dSDimitry Andric// #if OMPT_SUPPORT 1426bdd1243dSDimitry Andric// , 1427bdd1243dSDimitry Andric// void **exit_frame_ptr 1428bdd1243dSDimitry Andric// #endif 1429bdd1243dSDimitry Andric// ) { 1430bdd1243dSDimitry Andric// #if OMPT_SUPPORT 1431bdd1243dSDimitry Andric// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1432bdd1243dSDimitry Andric// #endif 1433bdd1243dSDimitry Andric// 1434bdd1243dSDimitry Andric// (*pkfn)( & gtid, & tid, argv[0], ... ); 1435bdd1243dSDimitry Andric// 1436bdd1243dSDimitry Andric// // FIXME: This is done at call-site and can be removed here. 1437bdd1243dSDimitry Andric// #if OMPT_SUPPORT 1438bdd1243dSDimitry Andric// *exit_frame_ptr = 0; 1439bdd1243dSDimitry Andric// #endif 1440bdd1243dSDimitry Andric// 1441bdd1243dSDimitry Andric// return 1; 1442bdd1243dSDimitry Andric// } 1443bdd1243dSDimitry Andric// 1444bdd1243dSDimitry Andric// parameters: 1445bdd1243dSDimitry Andric// r0: pkfn 1446bdd1243dSDimitry Andric// r1: gtid 1447bdd1243dSDimitry Andric// r2: tid 1448bdd1243dSDimitry Andric// r3: argc 1449bdd1243dSDimitry Andric// r4(stack): p_argv 1450bdd1243dSDimitry Andric// r5(stack): &exit_frame 1451bdd1243dSDimitry Andric// 1452bdd1243dSDimitry Andric// locals: 1453bdd1243dSDimitry Andric// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1454bdd1243dSDimitry Andric// __tid: tid parm pushed on stack so can pass &tid to pkfn 1455bdd1243dSDimitry Andric// 1456bdd1243dSDimitry Andric// reg temps: 1457bdd1243dSDimitry Andric// r4: used to hold pkfn address 1458bdd1243dSDimitry Andric// r5: used as temporary for number of pkfn parms 1459bdd1243dSDimitry Andric// r6: used to traverse p_argv array 1460bdd1243dSDimitry Andric// r7: frame pointer (in some configurations) 1461bdd1243dSDimitry Andric// r8: used as temporary for stack placement calculation 1462bdd1243dSDimitry Andric// and as pointer to base of callee saved area 1463bdd1243dSDimitry Andric// r9: used as temporary for stack parameters 1464bdd1243dSDimitry Andric// r10: used to preserve exit_frame_ptr, callee-save 1465bdd1243dSDimitry Andric// r11: frame pointer (in some configurations) 1466bdd1243dSDimitry Andric// 1467bdd1243dSDimitry Andric// return: r0 (always 1/TRUE) 1468bdd1243dSDimitry Andric// 1469bdd1243dSDimitry Andric 1470bdd1243dSDimitry Andric__gtid = 4 1471bdd1243dSDimitry Andric__tid = 8 1472bdd1243dSDimitry Andric 1473bdd1243dSDimitry Andric// -- Begin __kmp_invoke_microtask 1474bdd1243dSDimitry Andric// mark_begin; 1475bdd1243dSDimitry Andric .text 1476bdd1243dSDimitry Andric PROC __kmp_invoke_microtask 1477bdd1243dSDimitry Andric 1478bdd1243dSDimitry Andric // Pushing one extra register (r3) to keep the stack aligned 1479bdd1243dSDimitry Andric // for when we call pkfn below 1480bdd1243dSDimitry Andric push {r3-r11,lr} 1481bdd1243dSDimitry Andric // Load p_argv and &exit_frame 14821ac55f4cSDimitry Andric ldr r4, [sp, #10*4] 14831ac55f4cSDimitry Andric# if OMPT_SUPPORT 14841ac55f4cSDimitry Andric ldr r5, [sp, #11*4] 14851ac55f4cSDimitry Andric# endif 1486bdd1243dSDimitry Andric 1487bdd1243dSDimitry Andric# if KMP_OS_DARWIN || (defined(__thumb__) && !KMP_OS_WINDOWS) 1488bdd1243dSDimitry Andric# define FP r7 1489bdd1243dSDimitry Andric# define FPOFF 4*4 1490bdd1243dSDimitry Andric#else 1491bdd1243dSDimitry Andric# define FP r11 1492bdd1243dSDimitry Andric# define FPOFF 8*4 1493bdd1243dSDimitry Andric#endif 1494bdd1243dSDimitry Andric add FP, sp, #FPOFF 1495bdd1243dSDimitry Andric# if OMPT_SUPPORT 1496bdd1243dSDimitry Andric mov r10, r5 1497bdd1243dSDimitry Andric str FP, [r10] 1498bdd1243dSDimitry Andric# endif 1499bdd1243dSDimitry Andric mov r8, sp 1500bdd1243dSDimitry Andric 1501bdd1243dSDimitry Andric // Calculate how much stack to allocate, in increments of 8 bytes. 1502bdd1243dSDimitry Andric // We strictly need 4*(argc-2) bytes (2 arguments are passed in 1503bdd1243dSDimitry Andric // registers) but allocate 4*argc for simplicity (to avoid needing 1504bdd1243dSDimitry Andric // to handle the argc<2 cases). We align the number of bytes 1505bdd1243dSDimitry Andric // allocated to 8 bytes, to keep the stack aligned. (Since we 1506bdd1243dSDimitry Andric // already allocate more than enough, it's ok to round down 1507bdd1243dSDimitry Andric // instead of up for the alignment.) We allocate another extra 1508bdd1243dSDimitry Andric // 8 bytes for gtid and tid. 1509bdd1243dSDimitry Andric mov r5, #1 1510bdd1243dSDimitry Andric add r5, r5, r3, lsr #1 1511bdd1243dSDimitry Andric sub sp, sp, r5, lsl #3 1512bdd1243dSDimitry Andric 1513bdd1243dSDimitry Andric str r1, [r8, #-__gtid] 1514bdd1243dSDimitry Andric str r2, [r8, #-__tid] 1515bdd1243dSDimitry Andric mov r5, r3 1516bdd1243dSDimitry Andric mov r6, r4 1517bdd1243dSDimitry Andric mov r4, r0 1518bdd1243dSDimitry Andric 1519bdd1243dSDimitry Andric // Prepare the first 2 parameters to pkfn - pointers to gtid and tid 1520bdd1243dSDimitry Andric // in our stack frame. 1521bdd1243dSDimitry Andric sub r0, r8, #__gtid 1522bdd1243dSDimitry Andric sub r1, r8, #__tid 1523bdd1243dSDimitry Andric 1524bdd1243dSDimitry Andric mov r8, sp 1525bdd1243dSDimitry Andric 1526bdd1243dSDimitry Andric // Load p_argv[0] and p_argv[1] into r2 and r3, if argc >= 1/2 1527bdd1243dSDimitry Andric cmp r5, #0 1528bdd1243dSDimitry Andric beq KMP_LABEL(kmp_1) 1529bdd1243dSDimitry Andric ldr r2, [r6] 1530bdd1243dSDimitry Andric 1531bdd1243dSDimitry Andric subs r5, r5, #1 1532bdd1243dSDimitry Andric beq KMP_LABEL(kmp_1) 1533bdd1243dSDimitry Andric ldr r3, [r6, #4]! 1534bdd1243dSDimitry Andric 1535bdd1243dSDimitry Andric // Loop, loading the rest of p_argv and writing the elements on the 1536bdd1243dSDimitry Andric // stack. 1537bdd1243dSDimitry AndricKMP_LABEL(kmp_0): 1538bdd1243dSDimitry Andric subs r5, r5, #1 1539bdd1243dSDimitry Andric beq KMP_LABEL(kmp_1) 1540bdd1243dSDimitry Andric ldr r12, [r6, #4]! 1541bdd1243dSDimitry Andric str r12, [r8], #4 1542bdd1243dSDimitry Andric b KMP_LABEL(kmp_0) 1543bdd1243dSDimitry AndricKMP_LABEL(kmp_1): 1544bdd1243dSDimitry Andric blx r4 1545bdd1243dSDimitry Andric mov r0, #1 1546bdd1243dSDimitry Andric 1547bdd1243dSDimitry Andric sub r4, FP, #FPOFF 1548bdd1243dSDimitry Andric mov sp, r4 1549bdd1243dSDimitry Andric# undef FP 1550bdd1243dSDimitry Andric# undef FPOFF 1551bdd1243dSDimitry Andric 1552bdd1243dSDimitry Andric# if OMPT_SUPPORT 1553bdd1243dSDimitry Andric mov r1, #0 1554bdd1243dSDimitry Andric str r1, [r10] 1555bdd1243dSDimitry Andric# endif 1556bdd1243dSDimitry Andric pop {r3-r11,pc} 1557bdd1243dSDimitry Andric 1558bdd1243dSDimitry Andric DEBUG_INFO __kmp_invoke_microtask 1559bdd1243dSDimitry Andric// -- End __kmp_invoke_microtask 1560bdd1243dSDimitry Andric 15610fca6ea1SDimitry Andric#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM */ 15620b57cec5SDimitry Andric 15630b57cec5SDimitry Andric#if KMP_ARCH_PPC64 15640b57cec5SDimitry Andric 15650b57cec5SDimitry Andric//------------------------------------------------------------------------ 15660b57cec5SDimitry Andric// int 15670b57cec5SDimitry Andric// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 15680b57cec5SDimitry Andric// int gtid, int tid, 1569489b1cf2SDimitry Andric// int argc, void *p_argv[] 1570489b1cf2SDimitry Andric// #if OMPT_SUPPORT 1571489b1cf2SDimitry Andric// , 1572489b1cf2SDimitry Andric// void **exit_frame_ptr 1573489b1cf2SDimitry Andric// #endif 1574489b1cf2SDimitry Andric// ) { 1575489b1cf2SDimitry Andric// #if OMPT_SUPPORT 1576489b1cf2SDimitry Andric// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1577489b1cf2SDimitry Andric// #endif 1578489b1cf2SDimitry Andric// 15790b57cec5SDimitry Andric// (*pkfn)( & gtid, & tid, argv[0], ... ); 1580489b1cf2SDimitry Andric// 1581489b1cf2SDimitry Andric// // FIXME: This is done at call-site and can be removed here. 1582489b1cf2SDimitry Andric// #if OMPT_SUPPORT 1583489b1cf2SDimitry Andric// *exit_frame_ptr = 0; 1584489b1cf2SDimitry Andric// #endif 1585489b1cf2SDimitry Andric// 15860b57cec5SDimitry Andric// return 1; 15870b57cec5SDimitry Andric// } 15880b57cec5SDimitry Andric// 15890b57cec5SDimitry Andric// parameters: 15900b57cec5SDimitry Andric// r3: pkfn 15910b57cec5SDimitry Andric// r4: gtid 15920b57cec5SDimitry Andric// r5: tid 15930b57cec5SDimitry Andric// r6: argc 15940b57cec5SDimitry Andric// r7: p_argv 15950b57cec5SDimitry Andric// r8: &exit_frame 15960b57cec5SDimitry Andric// 15970b57cec5SDimitry Andric// return: r3 (always 1/TRUE) 15980b57cec5SDimitry Andric// 15990b57cec5SDimitry Andric .text 16000b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2 16010b57cec5SDimitry Andric .abiversion 2 16020b57cec5SDimitry Andric# endif 16030b57cec5SDimitry Andric .globl __kmp_invoke_microtask 16040b57cec5SDimitry Andric 16050b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2 16060b57cec5SDimitry Andric .p2align 4 16070b57cec5SDimitry Andric# else 16080b57cec5SDimitry Andric .p2align 2 16090b57cec5SDimitry Andric# endif 16100b57cec5SDimitry Andric 16110b57cec5SDimitry Andric .type __kmp_invoke_microtask,@function 16120b57cec5SDimitry Andric 16130b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2 16140b57cec5SDimitry Andric__kmp_invoke_microtask: 16150b57cec5SDimitry Andric.Lfunc_begin0: 16160b57cec5SDimitry Andric.Lfunc_gep0: 16170b57cec5SDimitry Andric addis 2, 12, .TOC.-.Lfunc_gep0@ha 16180b57cec5SDimitry Andric addi 2, 2, .TOC.-.Lfunc_gep0@l 16190b57cec5SDimitry Andric.Lfunc_lep0: 16200b57cec5SDimitry Andric .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 16210b57cec5SDimitry Andric# else 16220b57cec5SDimitry Andric .section .opd,"aw",@progbits 16230b57cec5SDimitry Andric__kmp_invoke_microtask: 16240b57cec5SDimitry Andric .p2align 3 16250b57cec5SDimitry Andric .quad .Lfunc_begin0 16260b57cec5SDimitry Andric .quad .TOC.@tocbase 16270b57cec5SDimitry Andric .quad 0 16280b57cec5SDimitry Andric .text 16290b57cec5SDimitry Andric.Lfunc_begin0: 16300b57cec5SDimitry Andric# endif 16310b57cec5SDimitry Andric 16320b57cec5SDimitry Andric// -- Begin __kmp_invoke_microtask 16330b57cec5SDimitry Andric// mark_begin; 16340b57cec5SDimitry Andric 16350b57cec5SDimitry Andric// We need to allocate a stack frame large enough to hold all of the parameters 16360b57cec5SDimitry Andric// on the stack for the microtask plus what this function needs. That's 48 16370b57cec5SDimitry Andric// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the 16380b57cec5SDimitry Andric// parameters to the microtask, plus 8 bytes to store the values of r4 and r5, 16390b57cec5SDimitry Andric// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes 16400b57cec5SDimitry Andric// to save r30 to hold a copy of r8. 16410b57cec5SDimitry Andric 16420b57cec5SDimitry Andric .cfi_startproc 16430b57cec5SDimitry Andric mflr 0 16440b57cec5SDimitry Andric std 31, -8(1) 16450b57cec5SDimitry Andric std 0, 16(1) 16460b57cec5SDimitry Andric 16470b57cec5SDimitry Andric// This is unusual because normally we'd set r31 equal to r1 after the stack 16480b57cec5SDimitry Andric// frame is established. In this case, however, we need to dynamically compute 16490b57cec5SDimitry Andric// the stack frame size, and so we keep a direct copy of r1 to access our 16500b57cec5SDimitry Andric// register save areas and restore the r1 value before returning. 16510b57cec5SDimitry Andric mr 31, 1 16520b57cec5SDimitry Andric .cfi_def_cfa_register r31 16530b57cec5SDimitry Andric .cfi_offset r31, -8 16540b57cec5SDimitry Andric .cfi_offset lr, 16 16550b57cec5SDimitry Andric 16560b57cec5SDimitry Andric// Compute the size necessary for the local stack frame. 16570b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2 16580b57cec5SDimitry Andric li 12, 72 16590b57cec5SDimitry Andric# else 16600b57cec5SDimitry Andric li 12, 88 16610b57cec5SDimitry Andric# endif 16620b57cec5SDimitry Andric sldi 0, 6, 3 16630b57cec5SDimitry Andric add 12, 0, 12 16640b57cec5SDimitry Andric neg 12, 12 16650b57cec5SDimitry Andric 1666e8d8bef9SDimitry Andric// We need to make sure that the stack frame stays aligned (to 16 bytes). 16670b57cec5SDimitry Andric li 0, -16 16680b57cec5SDimitry Andric and 12, 0, 12 16690b57cec5SDimitry Andric 16700b57cec5SDimitry Andric// Establish the local stack frame. 16710b57cec5SDimitry Andric stdux 1, 1, 12 16720b57cec5SDimitry Andric 16730b57cec5SDimitry Andric# if OMPT_SUPPORT 16740b57cec5SDimitry Andric .cfi_offset r30, -16 16750b57cec5SDimitry Andric std 30, -16(31) 16760b57cec5SDimitry Andric std 1, 0(8) 16770b57cec5SDimitry Andric mr 30, 8 16780b57cec5SDimitry Andric# endif 16790b57cec5SDimitry Andric 16800b57cec5SDimitry Andric// Store gtid and tid to the stack because they're passed by reference to the microtask. 16810b57cec5SDimitry Andric stw 4, -20(31) 16820b57cec5SDimitry Andric stw 5, -24(31) 16830b57cec5SDimitry Andric 16840b57cec5SDimitry Andric mr 12, 6 16850b57cec5SDimitry Andric mr 4, 7 16860b57cec5SDimitry Andric 16870b57cec5SDimitry Andric cmpwi 0, 12, 1 16880b57cec5SDimitry Andric blt 0, .Lcall 16890b57cec5SDimitry Andric 16900b57cec5SDimitry Andric ld 5, 0(4) 16910b57cec5SDimitry Andric 16920b57cec5SDimitry Andric cmpwi 0, 12, 2 16930b57cec5SDimitry Andric blt 0, .Lcall 16940b57cec5SDimitry Andric 16950b57cec5SDimitry Andric ld 6, 8(4) 16960b57cec5SDimitry Andric 16970b57cec5SDimitry Andric cmpwi 0, 12, 3 16980b57cec5SDimitry Andric blt 0, .Lcall 16990b57cec5SDimitry Andric 17000b57cec5SDimitry Andric ld 7, 16(4) 17010b57cec5SDimitry Andric 17020b57cec5SDimitry Andric cmpwi 0, 12, 4 17030b57cec5SDimitry Andric blt 0, .Lcall 17040b57cec5SDimitry Andric 17050b57cec5SDimitry Andric ld 8, 24(4) 17060b57cec5SDimitry Andric 17070b57cec5SDimitry Andric cmpwi 0, 12, 5 17080b57cec5SDimitry Andric blt 0, .Lcall 17090b57cec5SDimitry Andric 17100b57cec5SDimitry Andric ld 9, 32(4) 17110b57cec5SDimitry Andric 17120b57cec5SDimitry Andric cmpwi 0, 12, 6 17130b57cec5SDimitry Andric blt 0, .Lcall 17140b57cec5SDimitry Andric 17150b57cec5SDimitry Andric ld 10, 40(4) 17160b57cec5SDimitry Andric 17170b57cec5SDimitry Andric cmpwi 0, 12, 7 17180b57cec5SDimitry Andric blt 0, .Lcall 17190b57cec5SDimitry Andric 17200b57cec5SDimitry Andric// There are more than 6 microtask parameters, so we need to store the 17210b57cec5SDimitry Andric// remainder to the stack. 17220b57cec5SDimitry Andric addi 12, 12, -6 17230b57cec5SDimitry Andric mtctr 12 17240b57cec5SDimitry Andric 17250b57cec5SDimitry Andric// These are set to 8 bytes before the first desired store address (we're using 17260b57cec5SDimitry Andric// pre-increment loads and stores in the loop below). The parameter save area 17270b57cec5SDimitry Andric// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and 17280b57cec5SDimitry Andric// 32 + 8*8 == 96 bytes above r1 for ELFv2. 17290b57cec5SDimitry Andric addi 4, 4, 40 17300b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2 17310b57cec5SDimitry Andric addi 12, 1, 88 17320b57cec5SDimitry Andric# else 17330b57cec5SDimitry Andric addi 12, 1, 104 17340b57cec5SDimitry Andric# endif 17350b57cec5SDimitry Andric 17360b57cec5SDimitry Andric.Lnext: 17370b57cec5SDimitry Andric ldu 0, 8(4) 17380b57cec5SDimitry Andric stdu 0, 8(12) 17390b57cec5SDimitry Andric bdnz .Lnext 17400b57cec5SDimitry Andric 17410b57cec5SDimitry Andric.Lcall: 17420b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2 17430b57cec5SDimitry Andric std 2, 24(1) 17440b57cec5SDimitry Andric mr 12, 3 17450b57cec5SDimitry Andric#else 17460b57cec5SDimitry Andric std 2, 40(1) 17470b57cec5SDimitry Andric// For ELFv1, we need to load the actual function address from the function descriptor. 17480b57cec5SDimitry Andric ld 12, 0(3) 17490b57cec5SDimitry Andric ld 2, 8(3) 17500b57cec5SDimitry Andric ld 11, 16(3) 17510b57cec5SDimitry Andric#endif 17520b57cec5SDimitry Andric 17530b57cec5SDimitry Andric addi 3, 31, -20 17540b57cec5SDimitry Andric addi 4, 31, -24 17550b57cec5SDimitry Andric 17560b57cec5SDimitry Andric mtctr 12 17570b57cec5SDimitry Andric bctrl 17580b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2 17590b57cec5SDimitry Andric ld 2, 24(1) 17600b57cec5SDimitry Andric# else 17610b57cec5SDimitry Andric ld 2, 40(1) 17620b57cec5SDimitry Andric# endif 17630b57cec5SDimitry Andric 17640b57cec5SDimitry Andric# if OMPT_SUPPORT 17650b57cec5SDimitry Andric li 3, 0 17660b57cec5SDimitry Andric std 3, 0(30) 17670b57cec5SDimitry Andric# endif 17680b57cec5SDimitry Andric 17690b57cec5SDimitry Andric li 3, 1 17700b57cec5SDimitry Andric 17710b57cec5SDimitry Andric# if OMPT_SUPPORT 17720b57cec5SDimitry Andric ld 30, -16(31) 17730b57cec5SDimitry Andric# endif 17740b57cec5SDimitry Andric 17750b57cec5SDimitry Andric mr 1, 31 17760b57cec5SDimitry Andric ld 0, 16(1) 17770b57cec5SDimitry Andric ld 31, -8(1) 17780b57cec5SDimitry Andric mtlr 0 17790b57cec5SDimitry Andric blr 17800b57cec5SDimitry Andric 17810b57cec5SDimitry Andric .long 0 17820b57cec5SDimitry Andric .quad 0 17830b57cec5SDimitry Andric.Lfunc_end0: 17840b57cec5SDimitry Andric .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 17850b57cec5SDimitry Andric .cfi_endproc 17860b57cec5SDimitry Andric 17870b57cec5SDimitry Andric// -- End __kmp_invoke_microtask 17880b57cec5SDimitry Andric 17890b57cec5SDimitry Andric#endif /* KMP_ARCH_PPC64 */ 17900b57cec5SDimitry Andric 1791489b1cf2SDimitry Andric#if KMP_ARCH_RISCV64 1792489b1cf2SDimitry Andric 1793489b1cf2SDimitry Andric//------------------------------------------------------------------------ 1794489b1cf2SDimitry Andric// 1795489b1cf2SDimitry Andric// typedef void (*microtask_t)(int *gtid, int *tid, ...); 1796489b1cf2SDimitry Andric// 1797489b1cf2SDimitry Andric// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 1798489b1cf2SDimitry Andric// void *p_argv[] 1799489b1cf2SDimitry Andric// #if OMPT_SUPPORT 1800489b1cf2SDimitry Andric// , 1801489b1cf2SDimitry Andric// void **exit_frame_ptr 1802489b1cf2SDimitry Andric// #endif 1803489b1cf2SDimitry Andric// ) { 1804489b1cf2SDimitry Andric// #if OMPT_SUPPORT 1805489b1cf2SDimitry Andric// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1806489b1cf2SDimitry Andric// #endif 1807489b1cf2SDimitry Andric// 1808489b1cf2SDimitry Andric// (*pkfn)(>id, &tid, argv[0], ...); 1809489b1cf2SDimitry Andric// 1810489b1cf2SDimitry Andric// return 1; 1811489b1cf2SDimitry Andric// } 1812489b1cf2SDimitry Andric// 1813489b1cf2SDimitry Andric// Parameters: 1814489b1cf2SDimitry Andric// a0: pkfn 1815489b1cf2SDimitry Andric// a1: gtid 1816489b1cf2SDimitry Andric// a2: tid 1817489b1cf2SDimitry Andric// a3: argc 1818489b1cf2SDimitry Andric// a4: p_argv 1819489b1cf2SDimitry Andric// a5: exit_frame_ptr 1820489b1cf2SDimitry Andric// 1821489b1cf2SDimitry Andric// Locals: 1822489b1cf2SDimitry Andric// __gtid: gtid param pushed on stack so can pass >id to pkfn 1823489b1cf2SDimitry Andric// __tid: tid param pushed on stack so can pass &tid to pkfn 1824489b1cf2SDimitry Andric// 1825489b1cf2SDimitry Andric// Temp. registers: 1826489b1cf2SDimitry Andric// 1827489b1cf2SDimitry Andric// t0: used to calculate the dynamic stack size / used to hold pkfn address 1828489b1cf2SDimitry Andric// t1: used as temporary for stack placement calculation 1829489b1cf2SDimitry Andric// t2: used as temporary for stack arguments 1830489b1cf2SDimitry Andric// t3: used as temporary for number of remaining pkfn parms 1831489b1cf2SDimitry Andric// t4: used to traverse p_argv array 1832489b1cf2SDimitry Andric// 1833489b1cf2SDimitry Andric// return: a0 (always 1/TRUE) 1834489b1cf2SDimitry Andric// 1835489b1cf2SDimitry Andric 1836489b1cf2SDimitry Andric__gtid = -20 1837489b1cf2SDimitry Andric__tid = -24 1838489b1cf2SDimitry Andric 1839489b1cf2SDimitry Andric// -- Begin __kmp_invoke_microtask 1840489b1cf2SDimitry Andric// mark_begin; 1841489b1cf2SDimitry Andric .text 1842489b1cf2SDimitry Andric .globl __kmp_invoke_microtask 1843489b1cf2SDimitry Andric .p2align 1 1844489b1cf2SDimitry Andric .type __kmp_invoke_microtask,@function 1845489b1cf2SDimitry Andric__kmp_invoke_microtask: 1846489b1cf2SDimitry Andric .cfi_startproc 1847489b1cf2SDimitry Andric 1848489b1cf2SDimitry Andric // First, save ra and fp 1849489b1cf2SDimitry Andric addi sp, sp, -16 1850489b1cf2SDimitry Andric sd ra, 8(sp) 1851489b1cf2SDimitry Andric sd fp, 0(sp) 1852489b1cf2SDimitry Andric addi fp, sp, 16 1853489b1cf2SDimitry Andric .cfi_def_cfa fp, 0 1854489b1cf2SDimitry Andric .cfi_offset ra, -8 1855489b1cf2SDimitry Andric .cfi_offset fp, -16 1856489b1cf2SDimitry Andric 1857489b1cf2SDimitry Andric // Compute the dynamic stack size: 1858489b1cf2SDimitry Andric // 1859489b1cf2SDimitry Andric // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 1860489b1cf2SDimitry Andric // reference 1861489b1cf2SDimitry Andric // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 1862489b1cf2SDimitry Andric // function by register. Given that we have 8 of such registers (a[0-7]) 1863489b1cf2SDimitry Andric // and two + 'argc' arguments (consider >id and &tid), we need to 1864489b1cf2SDimitry Andric // reserve max(0, argc - 6)*8 extra bytes 1865489b1cf2SDimitry Andric // 1866489b1cf2SDimitry Andric // The total number of bytes is then max(0, argc - 6)*8 + 8 1867489b1cf2SDimitry Andric 1868489b1cf2SDimitry Andric // Compute max(0, argc - 6) using the following bithack: 1869489b1cf2SDimitry Andric // max(0, x) = x - (x & (x >> 31)), where x := argc - 6 1870489b1cf2SDimitry Andric // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax 1871489b1cf2SDimitry Andric addi t0, a3, -6 1872489b1cf2SDimitry Andric srai t1, t0, 31 1873489b1cf2SDimitry Andric and t1, t0, t1 1874489b1cf2SDimitry Andric sub t0, t0, t1 1875489b1cf2SDimitry Andric 1876489b1cf2SDimitry Andric addi t0, t0, 1 1877489b1cf2SDimitry Andric 1878489b1cf2SDimitry Andric slli t0, t0, 3 1879489b1cf2SDimitry Andric sub sp, sp, t0 1880489b1cf2SDimitry Andric 1881489b1cf2SDimitry Andric // Align the stack to 16 bytes 1882489b1cf2SDimitry Andric andi sp, sp, -16 1883489b1cf2SDimitry Andric 1884489b1cf2SDimitry Andric mv t0, a0 1885489b1cf2SDimitry Andric mv t3, a3 1886489b1cf2SDimitry Andric mv t4, a4 1887489b1cf2SDimitry Andric 1888489b1cf2SDimitry Andric#if OMPT_SUPPORT 1889489b1cf2SDimitry Andric // Save frame pointer into exit_frame 1890489b1cf2SDimitry Andric sd fp, 0(a5) 1891489b1cf2SDimitry Andric#endif 1892489b1cf2SDimitry Andric 1893489b1cf2SDimitry Andric // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) 1894489b1cf2SDimitry Andric 1895489b1cf2SDimitry Andric sw a1, __gtid(fp) 1896489b1cf2SDimitry Andric sw a2, __tid(fp) 1897489b1cf2SDimitry Andric 1898489b1cf2SDimitry Andric addi a0, fp, __gtid 1899489b1cf2SDimitry Andric addi a1, fp, __tid 1900489b1cf2SDimitry Andric 1901489b1cf2SDimitry Andric beqz t3, .L_kmp_3 1902489b1cf2SDimitry Andric ld a2, 0(t4) 1903489b1cf2SDimitry Andric 1904489b1cf2SDimitry Andric addi t3, t3, -1 1905489b1cf2SDimitry Andric beqz t3, .L_kmp_3 1906489b1cf2SDimitry Andric ld a3, 8(t4) 1907489b1cf2SDimitry Andric 1908489b1cf2SDimitry Andric addi t3, t3, -1 1909489b1cf2SDimitry Andric beqz t3, .L_kmp_3 1910489b1cf2SDimitry Andric ld a4, 16(t4) 1911489b1cf2SDimitry Andric 1912489b1cf2SDimitry Andric addi t3, t3, -1 1913489b1cf2SDimitry Andric beqz t3, .L_kmp_3 1914489b1cf2SDimitry Andric ld a5, 24(t4) 1915489b1cf2SDimitry Andric 1916489b1cf2SDimitry Andric addi t3, t3, -1 1917489b1cf2SDimitry Andric beqz t3, .L_kmp_3 1918489b1cf2SDimitry Andric ld a6, 32(t4) 1919489b1cf2SDimitry Andric 1920489b1cf2SDimitry Andric addi t3, t3, -1 1921489b1cf2SDimitry Andric beqz t3, .L_kmp_3 1922489b1cf2SDimitry Andric ld a7, 40(t4) 1923489b1cf2SDimitry Andric 1924489b1cf2SDimitry Andric // Prepare any additional argument passed through the stack 1925489b1cf2SDimitry Andric addi t4, t4, 48 1926489b1cf2SDimitry Andric mv t1, sp 1927489b1cf2SDimitry Andric j .L_kmp_2 1928489b1cf2SDimitry Andric.L_kmp_1: 1929489b1cf2SDimitry Andric ld t2, 0(t4) 1930489b1cf2SDimitry Andric sd t2, 0(t1) 1931489b1cf2SDimitry Andric addi t4, t4, 8 1932489b1cf2SDimitry Andric addi t1, t1, 8 1933489b1cf2SDimitry Andric.L_kmp_2: 1934489b1cf2SDimitry Andric addi t3, t3, -1 1935489b1cf2SDimitry Andric bnez t3, .L_kmp_1 1936489b1cf2SDimitry Andric 1937489b1cf2SDimitry Andric.L_kmp_3: 1938489b1cf2SDimitry Andric // Call pkfn function 1939489b1cf2SDimitry Andric jalr t0 1940489b1cf2SDimitry Andric 1941489b1cf2SDimitry Andric // Restore stack and return 1942489b1cf2SDimitry Andric 1943489b1cf2SDimitry Andric addi a0, zero, 1 1944489b1cf2SDimitry Andric 1945489b1cf2SDimitry Andric addi sp, fp, -16 1946489b1cf2SDimitry Andric ld fp, 0(sp) 1947489b1cf2SDimitry Andric ld ra, 8(sp) 1948489b1cf2SDimitry Andric addi sp, sp, 16 1949489b1cf2SDimitry Andric ret 1950489b1cf2SDimitry Andric.Lfunc_end0: 1951489b1cf2SDimitry Andric .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 1952489b1cf2SDimitry Andric .cfi_endproc 1953489b1cf2SDimitry Andric 1954489b1cf2SDimitry Andric// -- End __kmp_invoke_microtask 1955489b1cf2SDimitry Andric 1956489b1cf2SDimitry Andric#endif /* KMP_ARCH_RISCV64 */ 1957489b1cf2SDimitry Andric 1958bdd1243dSDimitry Andric#if KMP_ARCH_LOONGARCH64 1959bdd1243dSDimitry Andric 1960bdd1243dSDimitry Andric//------------------------------------------------------------------------ 1961bdd1243dSDimitry Andric// 1962bdd1243dSDimitry Andric// typedef void (*microtask_t)(int *gtid, int *tid, ...); 1963bdd1243dSDimitry Andric// 1964bdd1243dSDimitry Andric// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 1965bdd1243dSDimitry Andric// void *p_argv[] 1966bdd1243dSDimitry Andric// #if OMPT_SUPPORT 1967bdd1243dSDimitry Andric// , 1968bdd1243dSDimitry Andric// void **exit_frame_ptr 1969bdd1243dSDimitry Andric// #endif 1970bdd1243dSDimitry Andric// ) { 1971bdd1243dSDimitry Andric// #if OMPT_SUPPORT 1972bdd1243dSDimitry Andric// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1973bdd1243dSDimitry Andric// #endif 1974bdd1243dSDimitry Andric// 1975bdd1243dSDimitry Andric// (*pkfn)(>id, &tid, argv[0], ...); 1976bdd1243dSDimitry Andric// 1977bdd1243dSDimitry Andric// return 1; 1978bdd1243dSDimitry Andric// } 1979bdd1243dSDimitry Andric// 1980bdd1243dSDimitry Andric// Parameters: 1981bdd1243dSDimitry Andric// a0: pkfn 1982bdd1243dSDimitry Andric// a1: gtid 1983bdd1243dSDimitry Andric// a2: tid 1984bdd1243dSDimitry Andric// a3: argc 1985bdd1243dSDimitry Andric// a4: p_argv 1986bdd1243dSDimitry Andric// a5: exit_frame_ptr 1987bdd1243dSDimitry Andric// 1988bdd1243dSDimitry Andric// Locals: 1989bdd1243dSDimitry Andric// __gtid: gtid param pushed on stack so can pass >id to pkfn 1990bdd1243dSDimitry Andric// __tid: tid param pushed on stack so can pass &tid to pkfn 1991bdd1243dSDimitry Andric// 1992bdd1243dSDimitry Andric// Temp registers: 1993bdd1243dSDimitry Andric// 1994bdd1243dSDimitry Andric// t0: used to calculate the dynamic stack size / used to hold pkfn address 1995bdd1243dSDimitry Andric// t1: used as temporary for stack placement calculation 1996bdd1243dSDimitry Andric// t2: used as temporary for stack arguments 1997bdd1243dSDimitry Andric// t3: used as temporary for number of remaining pkfn parms 1998bdd1243dSDimitry Andric// t4: used to traverse p_argv array 1999bdd1243dSDimitry Andric// 2000bdd1243dSDimitry Andric// return: a0 (always 1/TRUE) 2001bdd1243dSDimitry Andric// 2002bdd1243dSDimitry Andric 2003bdd1243dSDimitry Andric// -- Begin __kmp_invoke_microtask 2004bdd1243dSDimitry Andric// mark_begin; 2005bdd1243dSDimitry Andric .text 2006bdd1243dSDimitry Andric .globl __kmp_invoke_microtask 2007bdd1243dSDimitry Andric .p2align 2 2008bdd1243dSDimitry Andric .type __kmp_invoke_microtask,@function 2009bdd1243dSDimitry Andric__kmp_invoke_microtask: 2010bdd1243dSDimitry Andric .cfi_startproc 2011bdd1243dSDimitry Andric 2012bdd1243dSDimitry Andric // First, save ra and fp 2013bdd1243dSDimitry Andric addi.d $sp, $sp, -16 2014bdd1243dSDimitry Andric st.d $ra, $sp, 8 2015bdd1243dSDimitry Andric st.d $fp, $sp, 0 2016bdd1243dSDimitry Andric addi.d $fp, $sp, 16 2017bdd1243dSDimitry Andric .cfi_def_cfa 22, 0 2018bdd1243dSDimitry Andric .cfi_offset 1, -8 2019bdd1243dSDimitry Andric .cfi_offset 22, -16 2020bdd1243dSDimitry Andric 2021bdd1243dSDimitry Andric // Compute the dynamic stack size: 2022bdd1243dSDimitry Andric // 2023bdd1243dSDimitry Andric // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 2024bdd1243dSDimitry Andric // reference 2025bdd1243dSDimitry Andric // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 2026bdd1243dSDimitry Andric // function by register. Given that we have 8 of such registers (a[0-7]) 2027bdd1243dSDimitry Andric // and two + 'argc' arguments (consider >id and &tid), we need to 2028bdd1243dSDimitry Andric // reserve max(0, argc - 6)*8 extra bytes 2029bdd1243dSDimitry Andric // 2030bdd1243dSDimitry Andric // The total number of bytes is then max(0, argc - 6)*8 + 8 2031bdd1243dSDimitry Andric 2032bdd1243dSDimitry Andric addi.d $t0, $a3, -6 2033bdd1243dSDimitry Andric slt $t1, $t0, $zero 2034bdd1243dSDimitry Andric masknez $t0, $t0, $t1 2035bdd1243dSDimitry Andric addi.d $t0, $t0, 1 2036bdd1243dSDimitry Andric slli.d $t0, $t0, 3 2037bdd1243dSDimitry Andric sub.d $sp, $sp, $t0 2038bdd1243dSDimitry Andric 2039bdd1243dSDimitry Andric // Align the stack to 16 bytes 2040bdd1243dSDimitry Andric bstrins.d $sp, $zero, 3, 0 2041bdd1243dSDimitry Andric 2042bdd1243dSDimitry Andric move $t0, $a0 2043bdd1243dSDimitry Andric move $t3, $a3 2044bdd1243dSDimitry Andric move $t4, $a4 2045bdd1243dSDimitry Andric 2046bdd1243dSDimitry Andric#if OMPT_SUPPORT 2047bdd1243dSDimitry Andric // Save frame pointer into exit_frame 2048bdd1243dSDimitry Andric st.d $fp, $a5, 0 2049bdd1243dSDimitry Andric#endif 2050bdd1243dSDimitry Andric 2051bdd1243dSDimitry Andric // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) 2052bdd1243dSDimitry Andric 2053bdd1243dSDimitry Andric st.w $a1, $fp, -20 2054bdd1243dSDimitry Andric st.w $a2, $fp, -24 2055bdd1243dSDimitry Andric 2056bdd1243dSDimitry Andric addi.d $a0, $fp, -20 2057bdd1243dSDimitry Andric addi.d $a1, $fp, -24 2058bdd1243dSDimitry Andric 2059bdd1243dSDimitry Andric beqz $t3, .L_kmp_3 2060bdd1243dSDimitry Andric ld.d $a2, $t4, 0 2061bdd1243dSDimitry Andric 2062bdd1243dSDimitry Andric addi.d $t3, $t3, -1 2063bdd1243dSDimitry Andric beqz $t3, .L_kmp_3 2064bdd1243dSDimitry Andric ld.d $a3, $t4, 8 2065bdd1243dSDimitry Andric 2066bdd1243dSDimitry Andric addi.d $t3, $t3, -1 2067bdd1243dSDimitry Andric beqz $t3, .L_kmp_3 2068bdd1243dSDimitry Andric ld.d $a4, $t4, 16 2069bdd1243dSDimitry Andric 2070bdd1243dSDimitry Andric addi.d $t3, $t3, -1 2071bdd1243dSDimitry Andric beqz $t3, .L_kmp_3 2072bdd1243dSDimitry Andric ld.d $a5, $t4, 24 2073bdd1243dSDimitry Andric 2074bdd1243dSDimitry Andric addi.d $t3, $t3, -1 2075bdd1243dSDimitry Andric beqz $t3, .L_kmp_3 2076bdd1243dSDimitry Andric ld.d $a6, $t4, 32 2077bdd1243dSDimitry Andric 2078bdd1243dSDimitry Andric addi.d $t3, $t3, -1 2079bdd1243dSDimitry Andric beqz $t3, .L_kmp_3 2080bdd1243dSDimitry Andric ld.d $a7, $t4, 40 2081bdd1243dSDimitry Andric 2082bdd1243dSDimitry Andric // Prepare any additional argument passed through the stack 2083bdd1243dSDimitry Andric addi.d $t4, $t4, 48 2084bdd1243dSDimitry Andric move $t1, $sp 2085bdd1243dSDimitry Andric b .L_kmp_2 2086bdd1243dSDimitry Andric.L_kmp_1: 2087bdd1243dSDimitry Andric ld.d $t2, $t4, 0 2088bdd1243dSDimitry Andric st.d $t2, $t1, 0 2089bdd1243dSDimitry Andric addi.d $t4, $t4, 8 2090bdd1243dSDimitry Andric addi.d $t1, $t1, 8 2091bdd1243dSDimitry Andric.L_kmp_2: 2092bdd1243dSDimitry Andric addi.d $t3, $t3, -1 2093bdd1243dSDimitry Andric bnez $t3, .L_kmp_1 2094bdd1243dSDimitry Andric 2095bdd1243dSDimitry Andric.L_kmp_3: 2096bdd1243dSDimitry Andric // Call pkfn function 2097bdd1243dSDimitry Andric jirl $ra, $t0, 0 2098bdd1243dSDimitry Andric 2099bdd1243dSDimitry Andric // Restore stack and return 2100bdd1243dSDimitry Andric 2101bdd1243dSDimitry Andric addi.d $a0, $zero, 1 2102bdd1243dSDimitry Andric 2103bdd1243dSDimitry Andric addi.d $sp, $fp, -16 2104bdd1243dSDimitry Andric ld.d $fp, $sp, 0 2105bdd1243dSDimitry Andric ld.d $ra, $sp, 8 2106bdd1243dSDimitry Andric addi.d $sp, $sp, 16 2107bdd1243dSDimitry Andric jr $ra 2108bdd1243dSDimitry Andric.Lfunc_end0: 2109bdd1243dSDimitry Andric .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 2110bdd1243dSDimitry Andric .cfi_endproc 2111bdd1243dSDimitry Andric 2112bdd1243dSDimitry Andric// -- End __kmp_invoke_microtask 2113bdd1243dSDimitry Andric 2114bdd1243dSDimitry Andric#endif /* KMP_ARCH_LOONGARCH64 */ 2115bdd1243dSDimitry Andric 21165f757f3fSDimitry Andric#if KMP_ARCH_VE 21175f757f3fSDimitry Andric 21185f757f3fSDimitry Andric//------------------------------------------------------------------------ 21195f757f3fSDimitry Andric// 21205f757f3fSDimitry Andric// typedef void (*microtask_t)(int *gtid, int *tid, ...); 21215f757f3fSDimitry Andric// 21225f757f3fSDimitry Andric// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 21235f757f3fSDimitry Andric// void *p_argv[] 21245f757f3fSDimitry Andric// #if OMPT_SUPPORT 21255f757f3fSDimitry Andric// , 21265f757f3fSDimitry Andric// void **exit_frame_ptr 21275f757f3fSDimitry Andric// #endif 21285f757f3fSDimitry Andric// ) { 21295f757f3fSDimitry Andric// #if OMPT_SUPPORT 21305f757f3fSDimitry Andric// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 21315f757f3fSDimitry Andric// #endif 21325f757f3fSDimitry Andric// 21335f757f3fSDimitry Andric// (*pkfn)(>id, &tid, argv[0], ...); 21345f757f3fSDimitry Andric// 21355f757f3fSDimitry Andric// return 1; 21365f757f3fSDimitry Andric// } 21375f757f3fSDimitry Andric// 21385f757f3fSDimitry Andric// Parameters: 21395f757f3fSDimitry Andric// s0: pkfn 21405f757f3fSDimitry Andric// s1: gtid 21415f757f3fSDimitry Andric// s2: tid 21425f757f3fSDimitry Andric// s3: argc 21435f757f3fSDimitry Andric// s4: p_argv 21445f757f3fSDimitry Andric// s5: exit_frame_ptr 21455f757f3fSDimitry Andric// 21465f757f3fSDimitry Andric// Locals: 21475f757f3fSDimitry Andric// __gtid: gtid param pushed on stack so can pass >id to pkfn 21485f757f3fSDimitry Andric// __tid: tid param pushed on stack so can pass &tid to pkfn 21495f757f3fSDimitry Andric// 21505f757f3fSDimitry Andric// Temp. registers: 21515f757f3fSDimitry Andric// 21525f757f3fSDimitry Andric// s34: used to calculate the dynamic stack size 21535f757f3fSDimitry Andric// s35: used as temporary for stack placement calculation 21545f757f3fSDimitry Andric// s36: used as temporary for stack arguments 21555f757f3fSDimitry Andric// s37: used as temporary for number of remaining pkfn parms 21565f757f3fSDimitry Andric// s38: used to traverse p_argv array 21575f757f3fSDimitry Andric// 21585f757f3fSDimitry Andric// return: s0 (always 1/TRUE) 21595f757f3fSDimitry Andric// 21605f757f3fSDimitry Andric 21615f757f3fSDimitry Andric__gtid = -4 21625f757f3fSDimitry Andric__tid = -8 21635f757f3fSDimitry Andric 21645f757f3fSDimitry Andric// -- Begin __kmp_invoke_microtask 21655f757f3fSDimitry Andric// mark_begin; 21665f757f3fSDimitry Andric .text 21675f757f3fSDimitry Andric .globl __kmp_invoke_microtask 21685f757f3fSDimitry Andric // A function requires 8 bytes align. 21695f757f3fSDimitry Andric .p2align 3 21705f757f3fSDimitry Andric .type __kmp_invoke_microtask,@function 21715f757f3fSDimitry Andric__kmp_invoke_microtask: 21725f757f3fSDimitry Andric .cfi_startproc 21735f757f3fSDimitry Andric 21745f757f3fSDimitry Andric // First, save fp and lr. VE stores them at caller stack frame. 21755f757f3fSDimitry Andric st %fp, 0(, %sp) 21765f757f3fSDimitry Andric st %lr, 8(, %sp) 21775f757f3fSDimitry Andric or %fp, 0, %sp 21785f757f3fSDimitry Andric .cfi_def_cfa %fp, 0 21795f757f3fSDimitry Andric .cfi_offset %lr, 8 21805f757f3fSDimitry Andric .cfi_offset %fp, 0 21815f757f3fSDimitry Andric 21825f757f3fSDimitry Andric // Compute the dynamic stack size: 21835f757f3fSDimitry Andric // 21845f757f3fSDimitry Andric // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them 21855f757f3fSDimitry Andric // by reference 21865f757f3fSDimitry Andric // - We need 8 bytes for whole arguments. We have two + 'argc' 21875f757f3fSDimitry Andric // arguments (condider >id and &tid). We need to reserve 21885f757f3fSDimitry Andric // (argc + 2) * 8 bytes. 21895f757f3fSDimitry Andric // - We need 176 bytes for RSA and others 21905f757f3fSDimitry Andric // 21915f757f3fSDimitry Andric // The total number of bytes is then (argc + 2) * 8 + 8 + 176. 21925f757f3fSDimitry Andric // 21935f757f3fSDimitry Andric // |------------------------------| 21945f757f3fSDimitry Andric // | return address of callee | 8(%fp) 21955f757f3fSDimitry Andric // |------------------------------| 21965f757f3fSDimitry Andric // | frame pointer of callee | 0(%fp) 21975f757f3fSDimitry Andric // |------------------------------| <------------------ %fp 21985f757f3fSDimitry Andric // | __tid / __gtid | -8(%fp) / -4(%fp) 21995f757f3fSDimitry Andric // |------------------------------| 22005f757f3fSDimitry Andric // | argc+2 for arguments | 176(%sp) 22015f757f3fSDimitry Andric // |------------------------------| 22025f757f3fSDimitry Andric // | RSA | 22035f757f3fSDimitry Andric // |------------------------------| 22045f757f3fSDimitry Andric // | return address | 22055f757f3fSDimitry Andric // |------------------------------| 22065f757f3fSDimitry Andric // | frame pointer | 22075f757f3fSDimitry Andric // |------------------------------| <------------------ %sp 22085f757f3fSDimitry Andric 22095f757f3fSDimitry Andric adds.w.sx %s34, 2, %s3 22105f757f3fSDimitry Andric sll %s34, %s34, 3 22115f757f3fSDimitry Andric lea %s34, 184(, %s34) 22125f757f3fSDimitry Andric subs.l %sp, %sp, %s34 22135f757f3fSDimitry Andric 22145f757f3fSDimitry Andric // Align the stack to 16 bytes. 22155f757f3fSDimitry Andric and %sp, -16, %sp 22165f757f3fSDimitry Andric 22175f757f3fSDimitry Andric // Save pkfn. 22185f757f3fSDimitry Andric or %s12, 0, %s0 22195f757f3fSDimitry Andric 22205f757f3fSDimitry Andric // Call host to allocate stack if it is necessary. 22215f757f3fSDimitry Andric brge.l %sp, %sl, .L_kmp_pass 22225f757f3fSDimitry Andric ld %s61, 24(, %tp) 22235f757f3fSDimitry Andric lea %s63, 0x13b 22245f757f3fSDimitry Andric shm.l %s63, 0(%s61) 22255f757f3fSDimitry Andric shm.l %sl, 8(%s61) 22265f757f3fSDimitry Andric shm.l %sp, 16(%s61) 22275f757f3fSDimitry Andric monc 22285f757f3fSDimitry Andric 22295f757f3fSDimitry Andric.L_kmp_pass: 22305f757f3fSDimitry Andric lea %s35, 176(, %sp) 22315f757f3fSDimitry Andric adds.w.sx %s37, 0, %s3 22325f757f3fSDimitry Andric or %s38, 0, %s4 22335f757f3fSDimitry Andric 22345f757f3fSDimitry Andric#if OMPT_SUPPORT 22355f757f3fSDimitry Andric // Save frame pointer into exit_frame. 22365f757f3fSDimitry Andric st %fp, 0(%s5) 22375f757f3fSDimitry Andric#endif 22385f757f3fSDimitry Andric 22395f757f3fSDimitry Andric // Prepare arguments for the pkfn function (first 8 using s0-s7 22405f757f3fSDimitry Andric // registers, but need to store stack also because of varargs). 22415f757f3fSDimitry Andric 22425f757f3fSDimitry Andric stl %s1, __gtid(%fp) 22435f757f3fSDimitry Andric stl %s2, __tid(%fp) 22445f757f3fSDimitry Andric 22455f757f3fSDimitry Andric adds.l %s0, __gtid, %fp 22465f757f3fSDimitry Andric st %s0, 0(, %s35) 22475f757f3fSDimitry Andric adds.l %s1, __tid, %fp 22485f757f3fSDimitry Andric st %s1, 8(, %s35) 22495f757f3fSDimitry Andric 22505f757f3fSDimitry Andric breq.l 0, %s37, .L_kmp_call 22515f757f3fSDimitry Andric ld %s2, 0(, %s38) 22525f757f3fSDimitry Andric st %s2, 16(, %s35) 22535f757f3fSDimitry Andric 22545f757f3fSDimitry Andric breq.l 1, %s37, .L_kmp_call 22555f757f3fSDimitry Andric ld %s3, 8(, %s38) 22565f757f3fSDimitry Andric st %s3, 24(, %s35) 22575f757f3fSDimitry Andric 22585f757f3fSDimitry Andric breq.l 2, %s37, .L_kmp_call 22595f757f3fSDimitry Andric ld %s4, 16(, %s38) 22605f757f3fSDimitry Andric st %s4, 32(, %s35) 22615f757f3fSDimitry Andric 22625f757f3fSDimitry Andric breq.l 3, %s37, .L_kmp_call 22635f757f3fSDimitry Andric ld %s5, 24(, %s38) 22645f757f3fSDimitry Andric st %s5, 40(, %s35) 22655f757f3fSDimitry Andric 22665f757f3fSDimitry Andric breq.l 4, %s37, .L_kmp_call 22675f757f3fSDimitry Andric ld %s6, 32(, %s38) 22685f757f3fSDimitry Andric st %s6, 48(, %s35) 22695f757f3fSDimitry Andric 22705f757f3fSDimitry Andric breq.l 5, %s37, .L_kmp_call 22715f757f3fSDimitry Andric ld %s7, 40(, %s38) 22725f757f3fSDimitry Andric st %s7, 56(, %s35) 22735f757f3fSDimitry Andric 22745f757f3fSDimitry Andric breq.l 6, %s37, .L_kmp_call 22755f757f3fSDimitry Andric 22765f757f3fSDimitry Andric // Prepare any additional argument passed through the stack. 22775f757f3fSDimitry Andric adds.l %s37, -6, %s37 22785f757f3fSDimitry Andric lea %s38, 48(, %s38) 22795f757f3fSDimitry Andric lea %s35, 64(, %s35) 22805f757f3fSDimitry Andric.L_kmp_loop: 22815f757f3fSDimitry Andric ld %s36, 0(, %s38) 22825f757f3fSDimitry Andric st %s36, 0(, %s35) 22835f757f3fSDimitry Andric adds.l %s37, -1, %s37 22845f757f3fSDimitry Andric adds.l %s38, 8, %s38 22855f757f3fSDimitry Andric adds.l %s35, 8, %s35 22865f757f3fSDimitry Andric brne.l 0, %s37, .L_kmp_loop 22875f757f3fSDimitry Andric 22885f757f3fSDimitry Andric.L_kmp_call: 22895f757f3fSDimitry Andric // Call pkfn function. 22905f757f3fSDimitry Andric bsic %lr, (, %s12) 22915f757f3fSDimitry Andric 22925f757f3fSDimitry Andric // Return value. 22935f757f3fSDimitry Andric lea %s0, 1 22945f757f3fSDimitry Andric 22955f757f3fSDimitry Andric // Restore stack and return. 22965f757f3fSDimitry Andric or %sp, 0, %fp 22975f757f3fSDimitry Andric ld %lr, 8(, %sp) 22985f757f3fSDimitry Andric ld %fp, 0(, %sp) 22995f757f3fSDimitry Andric b.l.t (, %lr) 23005f757f3fSDimitry Andric.Lfunc_end0: 23015f757f3fSDimitry Andric .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 23025f757f3fSDimitry Andric .cfi_endproc 23035f757f3fSDimitry Andric 23045f757f3fSDimitry Andric// -- End __kmp_invoke_microtask 23055f757f3fSDimitry Andric 23065f757f3fSDimitry Andric#endif /* KMP_ARCH_VE */ 23075f757f3fSDimitry Andric 23085f757f3fSDimitry Andric#if KMP_ARCH_S390X 23095f757f3fSDimitry Andric 23105f757f3fSDimitry Andric//------------------------------------------------------------------------ 23115f757f3fSDimitry Andric// 23125f757f3fSDimitry Andric// typedef void (*microtask_t)(int *gtid, int *tid, ...); 23135f757f3fSDimitry Andric// 23145f757f3fSDimitry Andric// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 23155f757f3fSDimitry Andric// void *p_argv[] 23165f757f3fSDimitry Andric// #if OMPT_SUPPORT 23175f757f3fSDimitry Andric// , 23185f757f3fSDimitry Andric// void **exit_frame_ptr 23195f757f3fSDimitry Andric// #endif 23205f757f3fSDimitry Andric// ) { 23215f757f3fSDimitry Andric// #if OMPT_SUPPORT 23225f757f3fSDimitry Andric// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 23235f757f3fSDimitry Andric// #endif 23245f757f3fSDimitry Andric// 23255f757f3fSDimitry Andric// (*pkfn)(>id, &tid, argv[0], ...); 23265f757f3fSDimitry Andric// 23275f757f3fSDimitry Andric// return 1; 23285f757f3fSDimitry Andric// } 23295f757f3fSDimitry Andric// 23305f757f3fSDimitry Andric// Parameters: 23315f757f3fSDimitry Andric// r2: pkfn 23325f757f3fSDimitry Andric// r3: gtid 23335f757f3fSDimitry Andric// r4: tid 23345f757f3fSDimitry Andric// r5: argc 23355f757f3fSDimitry Andric// r6: p_argv 23365f757f3fSDimitry Andric// SP+160: exit_frame_ptr 23375f757f3fSDimitry Andric// 23385f757f3fSDimitry Andric// Locals: 23395f757f3fSDimitry Andric// __gtid: gtid param pushed on stack so can pass >id to pkfn 23405f757f3fSDimitry Andric// __tid: tid param pushed on stack so can pass &tid to pkfn 23415f757f3fSDimitry Andric// 23425f757f3fSDimitry Andric// Temp. registers: 23435f757f3fSDimitry Andric// 23445f757f3fSDimitry Andric// r0: used to fetch argv slots 23455f757f3fSDimitry Andric// r7: used as temporary for number of remaining pkfn parms 23465f757f3fSDimitry Andric// r8: argv 23475f757f3fSDimitry Andric// r9: pkfn 23485f757f3fSDimitry Andric// r10: stack size 23495f757f3fSDimitry Andric// r11: previous fp 23505f757f3fSDimitry Andric// r12: stack parameter area 23515f757f3fSDimitry Andric// r13: argv slot 23525f757f3fSDimitry Andric// 23535f757f3fSDimitry Andric// return: r2 (always 1/TRUE) 23545f757f3fSDimitry Andric// 23555f757f3fSDimitry Andric 23565f757f3fSDimitry Andric// -- Begin __kmp_invoke_microtask 23575f757f3fSDimitry Andric// mark_begin; 23585f757f3fSDimitry Andric .text 23595f757f3fSDimitry Andric .globl __kmp_invoke_microtask 23605f757f3fSDimitry Andric .p2align 1 23615f757f3fSDimitry Andric .type __kmp_invoke_microtask,@function 23625f757f3fSDimitry Andric__kmp_invoke_microtask: 23635f757f3fSDimitry Andric .cfi_startproc 23645f757f3fSDimitry Andric 23655f757f3fSDimitry Andric stmg %r6,%r14,48(%r15) 23665f757f3fSDimitry Andric .cfi_offset %r6, -112 23675f757f3fSDimitry Andric .cfi_offset %r7, -104 23685f757f3fSDimitry Andric .cfi_offset %r8, -96 23695f757f3fSDimitry Andric .cfi_offset %r9, -88 23705f757f3fSDimitry Andric .cfi_offset %r10, -80 23715f757f3fSDimitry Andric .cfi_offset %r11, -72 23725f757f3fSDimitry Andric .cfi_offset %r12, -64 23735f757f3fSDimitry Andric .cfi_offset %r13, -56 23745f757f3fSDimitry Andric .cfi_offset %r14, -48 23755f757f3fSDimitry Andric .cfi_offset %r15, -40 23765f757f3fSDimitry Andric lgr %r11,%r15 23775f757f3fSDimitry Andric .cfi_def_cfa %r11, 160 23785f757f3fSDimitry Andric 23795f757f3fSDimitry Andric // Compute the dynamic stack size: 23805f757f3fSDimitry Andric // 23815f757f3fSDimitry Andric // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 23825f757f3fSDimitry Andric // reference 23835f757f3fSDimitry Andric // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 23845f757f3fSDimitry Andric // function by register. Given that we have 5 of such registers (r[2-6]) 23855f757f3fSDimitry Andric // and two + 'argc' arguments (consider >id and &tid), we need to 23865f757f3fSDimitry Andric // reserve max(0, argc - 3)*8 extra bytes 23875f757f3fSDimitry Andric // 23885f757f3fSDimitry Andric // The total number of bytes is then max(0, argc - 3)*8 + 8 23895f757f3fSDimitry Andric 23905f757f3fSDimitry Andric lgr %r10,%r5 23915f757f3fSDimitry Andric aghi %r10,-2 23925f757f3fSDimitry Andric jnm 0f 23935f757f3fSDimitry Andric lghi %r10,0 23945f757f3fSDimitry Andric0: 23955f757f3fSDimitry Andric sllg %r10,%r10,3 23965f757f3fSDimitry Andric lgr %r12,%r10 23975f757f3fSDimitry Andric aghi %r10,176 23985f757f3fSDimitry Andric sgr %r15,%r10 23995f757f3fSDimitry Andric agr %r12,%r15 24005f757f3fSDimitry Andric stg %r11,0(%r15) 24015f757f3fSDimitry Andric 24025f757f3fSDimitry Andric lgr %r9,%r2 // pkfn 24035f757f3fSDimitry Andric 24045f757f3fSDimitry Andric#if OMPT_SUPPORT 24055f757f3fSDimitry Andric // Save frame pointer into exit_frame 24065f757f3fSDimitry Andric lg %r8,160(%r11) 24075f757f3fSDimitry Andric stg %r11,0(%r8) 24085f757f3fSDimitry Andric#endif 24095f757f3fSDimitry Andric 24105f757f3fSDimitry Andric // Prepare arguments for the pkfn function (first 5 using r2-r6 registers) 24115f757f3fSDimitry Andric 24125f757f3fSDimitry Andric stg %r3,160(%r12) 24135f757f3fSDimitry Andric la %r2,164(%r12) // gid 24145f757f3fSDimitry Andric stg %r4,168(%r12) 24155f757f3fSDimitry Andric la %r3,172(%r12) // tid 24165f757f3fSDimitry Andric lgr %r8,%r6 // argv 24175f757f3fSDimitry Andric 24185f757f3fSDimitry Andric // If argc > 0 24195f757f3fSDimitry Andric ltgr %r7,%r5 24205f757f3fSDimitry Andric jz 1f 24215f757f3fSDimitry Andric 24225f757f3fSDimitry Andric lg %r4,0(%r8) // argv[0] 24235f757f3fSDimitry Andric aghi %r7,-1 24245f757f3fSDimitry Andric jz 1f 24255f757f3fSDimitry Andric 24265f757f3fSDimitry Andric // If argc > 1 24275f757f3fSDimitry Andric lg %r5,8(%r8) // argv[1] 24285f757f3fSDimitry Andric aghi %r7,-1 24295f757f3fSDimitry Andric jz 1f 24305f757f3fSDimitry Andric 24315f757f3fSDimitry Andric // If argc > 2 24325f757f3fSDimitry Andric lg %r6,16(%r8) // argv[2] 24335f757f3fSDimitry Andric aghi %r7,-1 24345f757f3fSDimitry Andric jz 1f 24355f757f3fSDimitry Andric 24365f757f3fSDimitry Andric lghi %r13,0 // Index [n] 24375f757f3fSDimitry Andric2: 24385f757f3fSDimitry Andric lg %r0,24(%r13,%r8) // argv[2+n] 24395f757f3fSDimitry Andric stg %r0,160(%r13,%r15) // parm[2+n] 24405f757f3fSDimitry Andric aghi %r13,8 // Next 24415f757f3fSDimitry Andric aghi %r7,-1 24425f757f3fSDimitry Andric jnz 2b 24435f757f3fSDimitry Andric 24445f757f3fSDimitry Andric1: 24455f757f3fSDimitry Andric basr %r14,%r9 // Call pkfn 24465f757f3fSDimitry Andric 24475f757f3fSDimitry Andric // Restore stack and return 24485f757f3fSDimitry Andric 24495f757f3fSDimitry Andric lgr %r15,%r11 24505f757f3fSDimitry Andric lmg %r6,%r14,48(%r15) 24515f757f3fSDimitry Andric lghi %r2,1 24525f757f3fSDimitry Andric br %r14 24535f757f3fSDimitry Andric.Lfunc_end0: 24545f757f3fSDimitry Andric .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 24555f757f3fSDimitry Andric .cfi_endproc 24565f757f3fSDimitry Andric 24575f757f3fSDimitry Andric// -- End __kmp_invoke_microtask 24585f757f3fSDimitry Andric 24595f757f3fSDimitry Andric#endif /* KMP_ARCH_S390X */ 24605f757f3fSDimitry Andric 24610fca6ea1SDimitry Andric#if KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 24620fca6ea1SDimitry Andric#ifndef KMP_PREFIX_UNDERSCORE 24630fca6ea1SDimitry Andric# define KMP_PREFIX_UNDERSCORE(x) x 24640fca6ea1SDimitry Andric#endif 24650b57cec5SDimitry Andric .data 2466bdd1243dSDimitry Andric COMMON .gomp_critical_user_, 32, 3 24670b57cec5SDimitry Andric .data 24680b57cec5SDimitry Andric .align 4 24690fca6ea1SDimitry Andric .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) 24700fca6ea1SDimitry AndricKMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): 24710b57cec5SDimitry Andric .4byte .gomp_critical_user_ 2472bdd1243dSDimitry Andric#ifdef __ELF__ 24730fca6ea1SDimitry Andric .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4 2474bdd1243dSDimitry Andric#endif 24750fca6ea1SDimitry Andric#endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 */ 24760b57cec5SDimitry Andric 24775f757f3fSDimitry Andric#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \ 24785f757f3fSDimitry Andric KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || \ 24795f757f3fSDimitry Andric KMP_ARCH_S390X 2480e8d8bef9SDimitry Andric#ifndef KMP_PREFIX_UNDERSCORE 2481e8d8bef9SDimitry Andric# define KMP_PREFIX_UNDERSCORE(x) x 2482e8d8bef9SDimitry Andric#endif 24830b57cec5SDimitry Andric .data 2484bdd1243dSDimitry Andric COMMON .gomp_critical_user_, 32, 3 24850b57cec5SDimitry Andric .data 24860b57cec5SDimitry Andric .align 8 2487e8d8bef9SDimitry Andric .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) 2488e8d8bef9SDimitry AndricKMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): 24890b57cec5SDimitry Andric .8byte .gomp_critical_user_ 2490e8d8bef9SDimitry Andric#ifdef __ELF__ 2491e8d8bef9SDimitry Andric .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 2492e8d8bef9SDimitry Andric#endif 2493489b1cf2SDimitry Andric#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || 24945f757f3fSDimitry Andric KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || 24955f757f3fSDimitry Andric KMP_ARCH_S390X */ 24960b57cec5SDimitry Andric 24970b57cec5SDimitry Andric#if KMP_OS_LINUX 249806c3fb27SDimitry Andric# if KMP_ARCH_ARM || KMP_ARCH_AARCH64 24990b57cec5SDimitry Andric.section .note.GNU-stack,"",%progbits 25005f757f3fSDimitry Andric# elif !KMP_ARCH_WASM 25010b57cec5SDimitry Andric.section .note.GNU-stack,"",@progbits 25020b57cec5SDimitry Andric# endif 25030b57cec5SDimitry Andric#endif 25045f757f3fSDimitry Andric 25055f757f3fSDimitry Andric#if KMP_ARCH_WASM 25065f757f3fSDimitry Andric.data 25075f757f3fSDimitry Andric.global .gomp_critical_user_ 25085f757f3fSDimitry Andric.global .gomp_critical_user_.var 25095f757f3fSDimitry Andric.global .gomp_critical_user_.reduction.var 25105f757f3fSDimitry Andric.global __kmp_unnamed_critical_addr 25115f757f3fSDimitry Andric.gomp_critical_user_: 25125f757f3fSDimitry Andric.zero 4 25135f757f3fSDimitry Andric.size .gomp_critical_user_, 4 25145f757f3fSDimitry Andric.gomp_critical_user_.var: 25155f757f3fSDimitry Andric.zero 4 25165f757f3fSDimitry Andric.size .gomp_critical_user_.var, 4 25175f757f3fSDimitry Andric.gomp_critical_user_.reduction.var: 25185f757f3fSDimitry Andric.zero 4 25195f757f3fSDimitry Andric.size .gomp_critical_user_.reduction.var, 4 25205f757f3fSDimitry Andric__kmp_unnamed_critical_addr: 25215f757f3fSDimitry Andric .4byte .gomp_critical_user_ 25225f757f3fSDimitry Andric .size __kmp_unnamed_critical_addr, 4 25235f757f3fSDimitry Andric#endif 2524*62987288SDimitry Andric 2525*62987288SDimitry Andric#if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) 2526*62987288SDimitry AndricGNU_PROPERTY_BTI_PAC 2527*62987288SDimitry Andric#endif 2528