xref: /freebsd/contrib/llvm-project/openmp/runtime/src/z_Linux_asm.S (revision 62987288060ff68c817b7056815aa9fb8ba8ecd7)
10b57cec5SDimitry Andric//  z_Linux_asm.S:  - microtasking routines specifically
20b57cec5SDimitry Andric//                    written for Intel platforms running Linux* OS
30b57cec5SDimitry Andric
40b57cec5SDimitry Andric//
50b57cec5SDimitry Andric////===----------------------------------------------------------------------===//
60b57cec5SDimitry Andric////
70b57cec5SDimitry Andric//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
80b57cec5SDimitry Andric//// See https://llvm.org/LICENSE.txt for license information.
90b57cec5SDimitry Andric//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
100b57cec5SDimitry Andric////
110b57cec5SDimitry Andric////===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric//
130b57cec5SDimitry Andric
140b57cec5SDimitry Andric// -----------------------------------------------------------------------
150b57cec5SDimitry Andric// macros
160b57cec5SDimitry Andric// -----------------------------------------------------------------------
170b57cec5SDimitry Andric
180b57cec5SDimitry Andric#include "kmp_config.h"
190b57cec5SDimitry Andric
200b57cec5SDimitry Andric#if KMP_ARCH_X86 || KMP_ARCH_X86_64
210b57cec5SDimitry Andric
220b57cec5SDimitry Andric# if KMP_MIC
230b57cec5SDimitry Andric// the 'delay r16/r32/r64' should be used instead of the 'pause'.
240b57cec5SDimitry Andric// The delay operation has the effect of removing the current thread from
250b57cec5SDimitry Andric// the round-robin HT mechanism, and therefore speeds up the issue rate of
260b57cec5SDimitry Andric// the other threads on the same core.
270b57cec5SDimitry Andric//
280b57cec5SDimitry Andric// A value of 0 works fine for <= 2 threads per core, but causes the EPCC
290b57cec5SDimitry Andric// barrier time to increase greatly for 3 or more threads per core.
300b57cec5SDimitry Andric//
310b57cec5SDimitry Andric// A value of 100 works pretty well for up to 4 threads per core, but isn't
320b57cec5SDimitry Andric// quite as fast as 0 for 2 threads per core.
330b57cec5SDimitry Andric//
340b57cec5SDimitry Andric// We need to check what happens for oversubscription / > 4 threads per core.
350b57cec5SDimitry Andric// It is possible that we need to pass the delay value in as a parameter
360b57cec5SDimitry Andric// that the caller determines based on the total # threads / # cores.
370b57cec5SDimitry Andric//
380b57cec5SDimitry Andric//.macro pause_op
390b57cec5SDimitry Andric//	mov    $100, %rax
400b57cec5SDimitry Andric//	delay  %rax
410b57cec5SDimitry Andric//.endm
420b57cec5SDimitry Andric# else
430b57cec5SDimitry Andric#  define pause_op   .byte 0xf3,0x90
440b57cec5SDimitry Andric# endif // KMP_MIC
450b57cec5SDimitry Andric
460b57cec5SDimitry Andric# if KMP_OS_DARWIN
470b57cec5SDimitry Andric#  define KMP_PREFIX_UNDERSCORE(x) _##x  // extra underscore for OS X* symbols
480b57cec5SDimitry Andric#  define KMP_LABEL(x) L_##x             // form the name of label
490b57cec5SDimitry Andric.macro KMP_CFI_DEF_OFFSET
500b57cec5SDimitry Andric.endmacro
510b57cec5SDimitry Andric.macro KMP_CFI_OFFSET
520b57cec5SDimitry Andric.endmacro
530b57cec5SDimitry Andric.macro KMP_CFI_REGISTER
540b57cec5SDimitry Andric.endmacro
550b57cec5SDimitry Andric.macro KMP_CFI_DEF
560b57cec5SDimitry Andric.endmacro
570b57cec5SDimitry Andric.macro ALIGN
580b57cec5SDimitry Andric	.align $0
590b57cec5SDimitry Andric.endmacro
600b57cec5SDimitry Andric.macro DEBUG_INFO
610b57cec5SDimitry Andric/* Not sure what .size does in icc, not sure if we need to do something
620b57cec5SDimitry Andric   similar for OS X*.
630b57cec5SDimitry Andric*/
640b57cec5SDimitry Andric.endmacro
650b57cec5SDimitry Andric.macro PROC
660b57cec5SDimitry Andric	ALIGN  4
670b57cec5SDimitry Andric	.globl KMP_PREFIX_UNDERSCORE($0)
680b57cec5SDimitry AndricKMP_PREFIX_UNDERSCORE($0):
690b57cec5SDimitry Andric.endmacro
700b57cec5SDimitry Andric# else // KMP_OS_DARWIN
710b57cec5SDimitry Andric#  define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
720b57cec5SDimitry Andric// Format labels so that they don't override function names in gdb's backtraces
730b57cec5SDimitry Andric// MIC assembler doesn't accept .L syntax, the L works fine there (as well as
740b57cec5SDimitry Andric// on OS X*)
750b57cec5SDimitry Andric# if KMP_MIC
760b57cec5SDimitry Andric#  define KMP_LABEL(x) L_##x          // local label
770b57cec5SDimitry Andric# else
780b57cec5SDimitry Andric#  define KMP_LABEL(x) .L_##x         // local label hidden from backtraces
790b57cec5SDimitry Andric# endif // KMP_MIC
800b57cec5SDimitry Andric.macro ALIGN size
810b57cec5SDimitry Andric	.align 1<<(\size)
820b57cec5SDimitry Andric.endm
830b57cec5SDimitry Andric.macro DEBUG_INFO proc
840b57cec5SDimitry Andric	.cfi_endproc
850b57cec5SDimitry Andric// Not sure why we need .type and .size for the functions
860b57cec5SDimitry Andric	.align 16
870b57cec5SDimitry Andric	.type  \proc,@function
880b57cec5SDimitry Andric        .size  \proc,.-\proc
890b57cec5SDimitry Andric.endm
900b57cec5SDimitry Andric.macro PROC proc
910b57cec5SDimitry Andric	ALIGN  4
920b57cec5SDimitry Andric        .globl KMP_PREFIX_UNDERSCORE(\proc)
930b57cec5SDimitry AndricKMP_PREFIX_UNDERSCORE(\proc):
940b57cec5SDimitry Andric	.cfi_startproc
950b57cec5SDimitry Andric.endm
960b57cec5SDimitry Andric.macro KMP_CFI_DEF_OFFSET sz
970b57cec5SDimitry Andric	.cfi_def_cfa_offset	\sz
980b57cec5SDimitry Andric.endm
990b57cec5SDimitry Andric.macro KMP_CFI_OFFSET reg, sz
1000b57cec5SDimitry Andric	.cfi_offset	\reg,\sz
1010b57cec5SDimitry Andric.endm
1020b57cec5SDimitry Andric.macro KMP_CFI_REGISTER reg
1030b57cec5SDimitry Andric	.cfi_def_cfa_register	\reg
1040b57cec5SDimitry Andric.endm
1050b57cec5SDimitry Andric.macro KMP_CFI_DEF reg, sz
1060b57cec5SDimitry Andric	.cfi_def_cfa	\reg,\sz
1070b57cec5SDimitry Andric.endm
1080b57cec5SDimitry Andric# endif // KMP_OS_DARWIN
1090b57cec5SDimitry Andric#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
1100b57cec5SDimitry Andric
1110fca6ea1SDimitry Andric#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
1120b57cec5SDimitry Andric
1130b57cec5SDimitry Andric# if KMP_OS_DARWIN
1140b57cec5SDimitry Andric#  define KMP_PREFIX_UNDERSCORE(x) _##x  // extra underscore for OS X* symbols
1150b57cec5SDimitry Andric#  define KMP_LABEL(x) L_##x             // form the name of label
1160b57cec5SDimitry Andric
1170b57cec5SDimitry Andric.macro ALIGN
1180b57cec5SDimitry Andric	.align $0
1190b57cec5SDimitry Andric.endmacro
1200b57cec5SDimitry Andric
1210b57cec5SDimitry Andric.macro DEBUG_INFO
1220b57cec5SDimitry Andric/* Not sure what .size does in icc, not sure if we need to do something
1230b57cec5SDimitry Andric   similar for OS X*.
1240b57cec5SDimitry Andric*/
1250b57cec5SDimitry Andric.endmacro
1260b57cec5SDimitry Andric
1270b57cec5SDimitry Andric.macro PROC
1280b57cec5SDimitry Andric	ALIGN  4
1290b57cec5SDimitry Andric	.globl KMP_PREFIX_UNDERSCORE($0)
1300b57cec5SDimitry AndricKMP_PREFIX_UNDERSCORE($0):
1310b57cec5SDimitry Andric.endmacro
132bdd1243dSDimitry Andric# elif KMP_OS_WINDOWS
133bdd1243dSDimitry Andric#  define KMP_PREFIX_UNDERSCORE(x) x  // no extra underscore for Windows/ARM64 symbols
134bdd1243dSDimitry Andric// Format labels so that they don't override function names in gdb's backtraces
135bdd1243dSDimitry Andric#  define KMP_LABEL(x) .L_##x         // local label hidden from backtraces
136bdd1243dSDimitry Andric
137bdd1243dSDimitry Andric.macro ALIGN size
138bdd1243dSDimitry Andric	.align 1<<(\size)
139bdd1243dSDimitry Andric.endm
140bdd1243dSDimitry Andric
141bdd1243dSDimitry Andric.macro DEBUG_INFO proc
142bdd1243dSDimitry Andric	ALIGN 2
143bdd1243dSDimitry Andric.endm
144bdd1243dSDimitry Andric
145bdd1243dSDimitry Andric.macro PROC proc
146bdd1243dSDimitry Andric	ALIGN 2
147bdd1243dSDimitry Andric	.globl KMP_PREFIX_UNDERSCORE(\proc)
148bdd1243dSDimitry AndricKMP_PREFIX_UNDERSCORE(\proc):
149bdd1243dSDimitry Andric.endm
150bdd1243dSDimitry Andric# else // KMP_OS_DARWIN || KMP_OS_WINDOWS
1510b57cec5SDimitry Andric#  define KMP_PREFIX_UNDERSCORE(x) x  // no extra underscore for Linux* OS symbols
1520b57cec5SDimitry Andric// Format labels so that they don't override function names in gdb's backtraces
1530b57cec5SDimitry Andric#  define KMP_LABEL(x) .L_##x         // local label hidden from backtraces
1540b57cec5SDimitry Andric
1550b57cec5SDimitry Andric.macro ALIGN size
1560b57cec5SDimitry Andric	.align 1<<(\size)
1570b57cec5SDimitry Andric.endm
1580b57cec5SDimitry Andric
1590b57cec5SDimitry Andric.macro DEBUG_INFO proc
1600b57cec5SDimitry Andric	.cfi_endproc
1610b57cec5SDimitry Andric// Not sure why we need .type and .size for the functions
1620b57cec5SDimitry Andric	ALIGN 2
163bdd1243dSDimitry Andric#if KMP_ARCH_ARM
164bdd1243dSDimitry Andric	.type  \proc,%function
165bdd1243dSDimitry Andric#else
1660b57cec5SDimitry Andric	.type  \proc,@function
167bdd1243dSDimitry Andric#endif
1680b57cec5SDimitry Andric	.size  \proc,.-\proc
1690b57cec5SDimitry Andric.endm
1700b57cec5SDimitry Andric
1710b57cec5SDimitry Andric.macro PROC proc
1720b57cec5SDimitry Andric	ALIGN 2
1730b57cec5SDimitry Andric	.globl KMP_PREFIX_UNDERSCORE(\proc)
1740b57cec5SDimitry AndricKMP_PREFIX_UNDERSCORE(\proc):
1750b57cec5SDimitry Andric	.cfi_startproc
1760b57cec5SDimitry Andric.endm
1770b57cec5SDimitry Andric# endif // KMP_OS_DARWIN
1780b57cec5SDimitry Andric
179*62987288SDimitry Andric# if KMP_OS_LINUX
180*62987288SDimitry Andric// BTI and PAC gnu property note
181*62987288SDimitry Andric#  define NT_GNU_PROPERTY_TYPE_0 5
182*62987288SDimitry Andric#  define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
183*62987288SDimitry Andric#  define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
184*62987288SDimitry Andric#  define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
185*62987288SDimitry Andric
186*62987288SDimitry Andric#  define GNU_PROPERTY(type, value)                                            \
187*62987288SDimitry Andric  .pushsection .note.gnu.property, "a";                                        \
188*62987288SDimitry Andric  .p2align 3;                                                                  \
189*62987288SDimitry Andric  .word 4;                                                                     \
190*62987288SDimitry Andric  .word 16;                                                                    \
191*62987288SDimitry Andric  .word NT_GNU_PROPERTY_TYPE_0;                                                \
192*62987288SDimitry Andric  .asciz "GNU";                                                                \
193*62987288SDimitry Andric  .word type;                                                                  \
194*62987288SDimitry Andric  .word 4;                                                                     \
195*62987288SDimitry Andric  .word value;                                                                 \
196*62987288SDimitry Andric  .word 0;                                                                     \
197*62987288SDimitry Andric  .popsection
198*62987288SDimitry Andric# endif
199*62987288SDimitry Andric
200*62987288SDimitry Andric# if defined(__ARM_FEATURE_BTI_DEFAULT)
201*62987288SDimitry Andric#  define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
202*62987288SDimitry Andric# else
203*62987288SDimitry Andric#  define BTI_FLAG 0
204*62987288SDimitry Andric# endif
205*62987288SDimitry Andric# if __ARM_FEATURE_PAC_DEFAULT & 3
206*62987288SDimitry Andric#  define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC
207*62987288SDimitry Andric# else
208*62987288SDimitry Andric#  define PAC_FLAG 0
209*62987288SDimitry Andric# endif
210*62987288SDimitry Andric
211*62987288SDimitry Andric# if (BTI_FLAG | PAC_FLAG) != 0
212*62987288SDimitry Andric#  if PAC_FLAG != 0
213*62987288SDimitry Andric#   define PACBTI_C hint #25
214*62987288SDimitry Andric#   define PACBTI_RET hint #29
215*62987288SDimitry Andric#  else
216*62987288SDimitry Andric#   define PACBTI_C hint #34
217*62987288SDimitry Andric#   define PACBTI_RET
218*62987288SDimitry Andric#  endif
219*62987288SDimitry Andric#  define GNU_PROPERTY_BTI_PAC \
220*62987288SDimitry Andric    GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
221*62987288SDimitry Andric# else
222*62987288SDimitry Andric#  define PACBTI_C
223*62987288SDimitry Andric#  define PACBTI_RET
224*62987288SDimitry Andric#  define GNU_PROPERTY_BTI_PAC
225*62987288SDimitry Andric# endif
2260fca6ea1SDimitry Andric#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
227bdd1243dSDimitry Andric
228bdd1243dSDimitry Andric.macro COMMON name, size, align_power
229bdd1243dSDimitry Andric#if KMP_OS_DARWIN
230bdd1243dSDimitry Andric	.comm \name, \size
231bdd1243dSDimitry Andric#elif KMP_OS_WINDOWS
232bdd1243dSDimitry Andric	.comm \name, \size, \align_power
233bdd1243dSDimitry Andric#else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS
234bdd1243dSDimitry Andric	.comm \name, \size, (1<<(\align_power))
235bdd1243dSDimitry Andric#endif
236bdd1243dSDimitry Andric.endm
2370b57cec5SDimitry Andric
2380b57cec5SDimitry Andric// -----------------------------------------------------------------------
2390b57cec5SDimitry Andric// data
2400b57cec5SDimitry Andric// -----------------------------------------------------------------------
2410b57cec5SDimitry Andric
2420b57cec5SDimitry Andric#ifdef KMP_GOMP_COMPAT
2430b57cec5SDimitry Andric
2440b57cec5SDimitry Andric// Support for unnamed common blocks.
2450b57cec5SDimitry Andric//
2460b57cec5SDimitry Andric// Because the symbol ".gomp_critical_user_" contains a ".", we have to
2470b57cec5SDimitry Andric// put this stuff in assembly.
2480b57cec5SDimitry Andric
2490b57cec5SDimitry Andric# if KMP_ARCH_X86
2500b57cec5SDimitry Andric#  if KMP_OS_DARWIN
2510b57cec5SDimitry Andric        .data
2520b57cec5SDimitry Andric        .comm .gomp_critical_user_,32
2530b57cec5SDimitry Andric        .data
2540b57cec5SDimitry Andric        .globl ___kmp_unnamed_critical_addr
2550b57cec5SDimitry Andric___kmp_unnamed_critical_addr:
2560b57cec5SDimitry Andric        .long .gomp_critical_user_
2570b57cec5SDimitry Andric#  else /* Linux* OS */
2580b57cec5SDimitry Andric        .data
2590b57cec5SDimitry Andric        .comm .gomp_critical_user_,32,8
2600b57cec5SDimitry Andric        .data
2610b57cec5SDimitry Andric	ALIGN 4
2620b57cec5SDimitry Andric        .global __kmp_unnamed_critical_addr
2630b57cec5SDimitry Andric__kmp_unnamed_critical_addr:
2640b57cec5SDimitry Andric        .4byte .gomp_critical_user_
2650b57cec5SDimitry Andric        .type __kmp_unnamed_critical_addr,@object
2660b57cec5SDimitry Andric        .size __kmp_unnamed_critical_addr,4
2670b57cec5SDimitry Andric#  endif /* KMP_OS_DARWIN */
2680b57cec5SDimitry Andric# endif /* KMP_ARCH_X86 */
2690b57cec5SDimitry Andric
2700b57cec5SDimitry Andric# if KMP_ARCH_X86_64
2710b57cec5SDimitry Andric#  if KMP_OS_DARWIN
2720b57cec5SDimitry Andric        .data
2730b57cec5SDimitry Andric        .comm .gomp_critical_user_,32
2740b57cec5SDimitry Andric        .data
2750b57cec5SDimitry Andric        .globl ___kmp_unnamed_critical_addr
2760b57cec5SDimitry Andric___kmp_unnamed_critical_addr:
2770b57cec5SDimitry Andric        .quad .gomp_critical_user_
2780b57cec5SDimitry Andric#  else /* Linux* OS */
2790b57cec5SDimitry Andric        .data
2800b57cec5SDimitry Andric        .comm .gomp_critical_user_,32,8
2810b57cec5SDimitry Andric        .data
2820b57cec5SDimitry Andric	ALIGN 8
2830b57cec5SDimitry Andric        .global __kmp_unnamed_critical_addr
2840b57cec5SDimitry Andric__kmp_unnamed_critical_addr:
2850b57cec5SDimitry Andric        .8byte .gomp_critical_user_
2860b57cec5SDimitry Andric        .type __kmp_unnamed_critical_addr,@object
2870b57cec5SDimitry Andric        .size __kmp_unnamed_critical_addr,8
2880b57cec5SDimitry Andric#  endif /* KMP_OS_DARWIN */
2890b57cec5SDimitry Andric# endif /* KMP_ARCH_X86_64 */
2900b57cec5SDimitry Andric
2910b57cec5SDimitry Andric#endif /* KMP_GOMP_COMPAT */
2920b57cec5SDimitry Andric
2930b57cec5SDimitry Andric
2940b57cec5SDimitry Andric#if KMP_ARCH_X86 && !KMP_ARCH_PPC64
2950b57cec5SDimitry Andric
2960b57cec5SDimitry Andric// -----------------------------------------------------------------------
2970b57cec5SDimitry Andric// microtasking routines specifically written for IA-32 architecture
2980b57cec5SDimitry Andric// running Linux* OS
2990b57cec5SDimitry Andric// -----------------------------------------------------------------------
3000b57cec5SDimitry Andric
3010b57cec5SDimitry Andric	.ident "Intel Corporation"
3020b57cec5SDimitry Andric	.data
3030b57cec5SDimitry Andric	ALIGN 4
3040b57cec5SDimitry Andric// void
3050b57cec5SDimitry Andric// __kmp_x86_pause( void );
3060b57cec5SDimitry Andric
3070b57cec5SDimitry Andric        .text
3080b57cec5SDimitry Andric	PROC  __kmp_x86_pause
3090b57cec5SDimitry Andric
3100b57cec5SDimitry Andric        pause_op
3110b57cec5SDimitry Andric        ret
3120b57cec5SDimitry Andric
3130b57cec5SDimitry Andric	DEBUG_INFO __kmp_x86_pause
3140b57cec5SDimitry Andric
3150b57cec5SDimitry Andric# if !KMP_ASM_INTRINS
3160b57cec5SDimitry Andric
3170b57cec5SDimitry Andric//------------------------------------------------------------------------
3180b57cec5SDimitry Andric// kmp_int32
3190b57cec5SDimitry Andric// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
3200b57cec5SDimitry Andric
3210b57cec5SDimitry Andric        PROC      __kmp_test_then_add32
3220b57cec5SDimitry Andric
3230b57cec5SDimitry Andric        movl      4(%esp), %ecx
3240b57cec5SDimitry Andric        movl      8(%esp), %eax
3250b57cec5SDimitry Andric        lock
3260b57cec5SDimitry Andric        xaddl     %eax,(%ecx)
3270b57cec5SDimitry Andric        ret
3280b57cec5SDimitry Andric
3290b57cec5SDimitry Andric	DEBUG_INFO __kmp_test_then_add32
3300b57cec5SDimitry Andric
3310b57cec5SDimitry Andric//------------------------------------------------------------------------
3320b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed8
3330b57cec5SDimitry Andric//
3340b57cec5SDimitry Andric// kmp_int32
3350b57cec5SDimitry Andric// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
3360b57cec5SDimitry Andric//
3370b57cec5SDimitry Andric// parameters:
3380b57cec5SDimitry Andric// 	p:	4(%esp)
3390b57cec5SDimitry Andric// 	d:	8(%esp)
3400b57cec5SDimitry Andric//
3410b57cec5SDimitry Andric// return:	%al
3420b57cec5SDimitry Andric        PROC  __kmp_xchg_fixed8
3430b57cec5SDimitry Andric
3440b57cec5SDimitry Andric        movl      4(%esp), %ecx    // "p"
3450b57cec5SDimitry Andric        movb      8(%esp), %al	// "d"
3460b57cec5SDimitry Andric
3470b57cec5SDimitry Andric        lock
3480b57cec5SDimitry Andric        xchgb     %al,(%ecx)
3490b57cec5SDimitry Andric        ret
3500b57cec5SDimitry Andric
3510b57cec5SDimitry Andric        DEBUG_INFO __kmp_xchg_fixed8
3520b57cec5SDimitry Andric
3530b57cec5SDimitry Andric
3540b57cec5SDimitry Andric//------------------------------------------------------------------------
3550b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed16
3560b57cec5SDimitry Andric//
3570b57cec5SDimitry Andric// kmp_int16
3580b57cec5SDimitry Andric// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
3590b57cec5SDimitry Andric//
3600b57cec5SDimitry Andric// parameters:
3610b57cec5SDimitry Andric// 	p:	4(%esp)
3620b57cec5SDimitry Andric// 	d:	8(%esp)
3630b57cec5SDimitry Andric// return:     %ax
3640b57cec5SDimitry Andric        PROC  __kmp_xchg_fixed16
3650b57cec5SDimitry Andric
3660b57cec5SDimitry Andric        movl      4(%esp), %ecx    // "p"
3670b57cec5SDimitry Andric        movw      8(%esp), %ax	// "d"
3680b57cec5SDimitry Andric
3690b57cec5SDimitry Andric        lock
3700b57cec5SDimitry Andric        xchgw     %ax,(%ecx)
3710b57cec5SDimitry Andric        ret
3720b57cec5SDimitry Andric
3730b57cec5SDimitry Andric        DEBUG_INFO __kmp_xchg_fixed16
3740b57cec5SDimitry Andric
3750b57cec5SDimitry Andric
3760b57cec5SDimitry Andric//------------------------------------------------------------------------
3770b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed32
3780b57cec5SDimitry Andric//
3790b57cec5SDimitry Andric// kmp_int32
3800b57cec5SDimitry Andric// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
3810b57cec5SDimitry Andric//
3820b57cec5SDimitry Andric// parameters:
3830b57cec5SDimitry Andric// 	p:	4(%esp)
3840b57cec5SDimitry Andric// 	d:	8(%esp)
3850b57cec5SDimitry Andric//
3860b57cec5SDimitry Andric// return:	%eax
3870b57cec5SDimitry Andric        PROC  __kmp_xchg_fixed32
3880b57cec5SDimitry Andric
3890b57cec5SDimitry Andric        movl      4(%esp), %ecx    // "p"
3900b57cec5SDimitry Andric        movl      8(%esp), %eax	// "d"
3910b57cec5SDimitry Andric
3920b57cec5SDimitry Andric        lock
3930b57cec5SDimitry Andric        xchgl     %eax,(%ecx)
3940b57cec5SDimitry Andric        ret
3950b57cec5SDimitry Andric
3960b57cec5SDimitry Andric        DEBUG_INFO __kmp_xchg_fixed32
3970b57cec5SDimitry Andric
3980b57cec5SDimitry Andric
3990b57cec5SDimitry Andric// kmp_int8
4000b57cec5SDimitry Andric// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
4010b57cec5SDimitry Andric        PROC  __kmp_compare_and_store8
4020b57cec5SDimitry Andric
4030b57cec5SDimitry Andric        movl      4(%esp), %ecx
4040b57cec5SDimitry Andric        movb      8(%esp), %al
4050b57cec5SDimitry Andric        movb      12(%esp), %dl
4060b57cec5SDimitry Andric        lock
4070b57cec5SDimitry Andric        cmpxchgb  %dl,(%ecx)
4080b57cec5SDimitry Andric        sete      %al           // if %al == (%ecx) set %al = 1 else set %al = 0
4090b57cec5SDimitry Andric        and       $1, %eax      // sign extend previous instruction
4100b57cec5SDimitry Andric        ret
4110b57cec5SDimitry Andric
4120b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store8
4130b57cec5SDimitry Andric
4140b57cec5SDimitry Andric// kmp_int16
4150b57cec5SDimitry Andric// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv);
4160b57cec5SDimitry Andric        PROC  __kmp_compare_and_store16
4170b57cec5SDimitry Andric
4180b57cec5SDimitry Andric        movl      4(%esp), %ecx
4190b57cec5SDimitry Andric        movw      8(%esp), %ax
4200b57cec5SDimitry Andric        movw      12(%esp), %dx
4210b57cec5SDimitry Andric        lock
4220b57cec5SDimitry Andric        cmpxchgw  %dx,(%ecx)
4230b57cec5SDimitry Andric        sete      %al           // if %ax == (%ecx) set %al = 1 else set %al = 0
4240b57cec5SDimitry Andric        and       $1, %eax      // sign extend previous instruction
4250b57cec5SDimitry Andric        ret
4260b57cec5SDimitry Andric
4270b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store16
4280b57cec5SDimitry Andric
4290b57cec5SDimitry Andric// kmp_int32
4300b57cec5SDimitry Andric// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv);
4310b57cec5SDimitry Andric        PROC  __kmp_compare_and_store32
4320b57cec5SDimitry Andric
4330b57cec5SDimitry Andric        movl      4(%esp), %ecx
4340b57cec5SDimitry Andric        movl      8(%esp), %eax
4350b57cec5SDimitry Andric        movl      12(%esp), %edx
4360b57cec5SDimitry Andric        lock
4370b57cec5SDimitry Andric        cmpxchgl  %edx,(%ecx)
4380b57cec5SDimitry Andric        sete      %al          // if %eax == (%ecx) set %al = 1 else set %al = 0
4390b57cec5SDimitry Andric        and       $1, %eax     // sign extend previous instruction
4400b57cec5SDimitry Andric        ret
4410b57cec5SDimitry Andric
4420b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store32
4430b57cec5SDimitry Andric
4440b57cec5SDimitry Andric// kmp_int32
4450b57cec5SDimitry Andric// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s );
4460b57cec5SDimitry Andric        PROC  __kmp_compare_and_store64
4470b57cec5SDimitry Andric
4480b57cec5SDimitry Andric        pushl     %ebp
4490b57cec5SDimitry Andric        movl      %esp, %ebp
4500b57cec5SDimitry Andric        pushl     %ebx
4510b57cec5SDimitry Andric        pushl     %edi
4520b57cec5SDimitry Andric        movl      8(%ebp), %edi
4530b57cec5SDimitry Andric        movl      12(%ebp), %eax        // "cv" low order word
4540b57cec5SDimitry Andric        movl      16(%ebp), %edx        // "cv" high order word
4550b57cec5SDimitry Andric        movl      20(%ebp), %ebx        // "sv" low order word
4560b57cec5SDimitry Andric        movl      24(%ebp), %ecx        // "sv" high order word
4570b57cec5SDimitry Andric        lock
4580b57cec5SDimitry Andric        cmpxchg8b (%edi)
4590b57cec5SDimitry Andric        sete      %al      // if %edx:eax == (%edi) set %al = 1 else set %al = 0
4600b57cec5SDimitry Andric        and       $1, %eax // sign extend previous instruction
4610b57cec5SDimitry Andric        popl      %edi
4620b57cec5SDimitry Andric        popl      %ebx
4630b57cec5SDimitry Andric        movl      %ebp, %esp
4640b57cec5SDimitry Andric        popl      %ebp
4650b57cec5SDimitry Andric        ret
4660b57cec5SDimitry Andric
4670b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store64
4680b57cec5SDimitry Andric
4690b57cec5SDimitry Andric// kmp_int8
4700b57cec5SDimitry Andric// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv);
4710b57cec5SDimitry Andric        PROC  __kmp_compare_and_store_ret8
4720b57cec5SDimitry Andric
4730b57cec5SDimitry Andric        movl      4(%esp), %ecx
4740b57cec5SDimitry Andric        movb      8(%esp), %al
4750b57cec5SDimitry Andric        movb      12(%esp), %dl
4760b57cec5SDimitry Andric        lock
4770b57cec5SDimitry Andric        cmpxchgb  %dl,(%ecx)
4780b57cec5SDimitry Andric        ret
4790b57cec5SDimitry Andric
4800b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store_ret8
4810b57cec5SDimitry Andric
4820b57cec5SDimitry Andric// kmp_int16
4830b57cec5SDimitry Andric// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv,
4840b57cec5SDimitry Andric//                               kmp_int16 sv);
4850b57cec5SDimitry Andric        PROC  __kmp_compare_and_store_ret16
4860b57cec5SDimitry Andric
4870b57cec5SDimitry Andric        movl      4(%esp), %ecx
4880b57cec5SDimitry Andric        movw      8(%esp), %ax
4890b57cec5SDimitry Andric        movw      12(%esp), %dx
4900b57cec5SDimitry Andric        lock
4910b57cec5SDimitry Andric        cmpxchgw  %dx,(%ecx)
4920b57cec5SDimitry Andric        ret
4930b57cec5SDimitry Andric
4940b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store_ret16
4950b57cec5SDimitry Andric
4960b57cec5SDimitry Andric// kmp_int32
4970b57cec5SDimitry Andric// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv,
4980b57cec5SDimitry Andric//                               kmp_int32 sv);
4990b57cec5SDimitry Andric        PROC  __kmp_compare_and_store_ret32
5000b57cec5SDimitry Andric
5010b57cec5SDimitry Andric        movl      4(%esp), %ecx
5020b57cec5SDimitry Andric        movl      8(%esp), %eax
5030b57cec5SDimitry Andric        movl      12(%esp), %edx
5040b57cec5SDimitry Andric        lock
5050b57cec5SDimitry Andric        cmpxchgl  %edx,(%ecx)
5060b57cec5SDimitry Andric        ret
5070b57cec5SDimitry Andric
5080b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store_ret32
5090b57cec5SDimitry Andric
5100b57cec5SDimitry Andric// kmp_int64
5110b57cec5SDimitry Andric// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv,
5120b57cec5SDimitry Andric//                               kmp_int64 sv);
5130b57cec5SDimitry Andric        PROC  __kmp_compare_and_store_ret64
5140b57cec5SDimitry Andric
5150b57cec5SDimitry Andric        pushl     %ebp
5160b57cec5SDimitry Andric        movl      %esp, %ebp
5170b57cec5SDimitry Andric        pushl     %ebx
5180b57cec5SDimitry Andric        pushl     %edi
5190b57cec5SDimitry Andric        movl      8(%ebp), %edi
5200b57cec5SDimitry Andric        movl      12(%ebp), %eax        // "cv" low order word
5210b57cec5SDimitry Andric        movl      16(%ebp), %edx        // "cv" high order word
5220b57cec5SDimitry Andric        movl      20(%ebp), %ebx        // "sv" low order word
5230b57cec5SDimitry Andric        movl      24(%ebp), %ecx        // "sv" high order word
5240b57cec5SDimitry Andric        lock
5250b57cec5SDimitry Andric        cmpxchg8b (%edi)
5260b57cec5SDimitry Andric        popl      %edi
5270b57cec5SDimitry Andric        popl      %ebx
5280b57cec5SDimitry Andric        movl      %ebp, %esp
5290b57cec5SDimitry Andric        popl      %ebp
5300b57cec5SDimitry Andric        ret
5310b57cec5SDimitry Andric
5320b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store_ret64
5330b57cec5SDimitry Andric
5340b57cec5SDimitry Andric
5350b57cec5SDimitry Andric//------------------------------------------------------------------------
5360b57cec5SDimitry Andric// FUNCTION __kmp_xchg_real32
5370b57cec5SDimitry Andric//
5380b57cec5SDimitry Andric// kmp_real32
5390b57cec5SDimitry Andric// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
5400b57cec5SDimitry Andric//
5410b57cec5SDimitry Andric// parameters:
5420b57cec5SDimitry Andric// 	addr:	4(%esp)
5430b57cec5SDimitry Andric// 	data:	8(%esp)
5440b57cec5SDimitry Andric//
5450b57cec5SDimitry Andric// return:	%eax
5460b57cec5SDimitry Andric        PROC  __kmp_xchg_real32
5470b57cec5SDimitry Andric
5480b57cec5SDimitry Andric        pushl   %ebp
5490b57cec5SDimitry Andric        movl    %esp, %ebp
5500b57cec5SDimitry Andric        subl    $4, %esp
5510b57cec5SDimitry Andric        pushl   %esi
5520b57cec5SDimitry Andric
5530b57cec5SDimitry Andric        movl    4(%ebp), %esi
5540b57cec5SDimitry Andric        flds    (%esi)
5550b57cec5SDimitry Andric                        // load <addr>
5560b57cec5SDimitry Andric        fsts    -4(%ebp)
5570b57cec5SDimitry Andric                        // store old value
5580b57cec5SDimitry Andric
5590b57cec5SDimitry Andric        movl    8(%ebp), %eax
5600b57cec5SDimitry Andric
5610b57cec5SDimitry Andric        lock
5620b57cec5SDimitry Andric        xchgl   %eax, (%esi)
5630b57cec5SDimitry Andric
5640b57cec5SDimitry Andric        flds    -4(%ebp)
5650b57cec5SDimitry Andric                        // return old value
5660b57cec5SDimitry Andric
5670b57cec5SDimitry Andric        popl    %esi
5680b57cec5SDimitry Andric        movl    %ebp, %esp
5690b57cec5SDimitry Andric        popl    %ebp
5700b57cec5SDimitry Andric        ret
5710b57cec5SDimitry Andric
5720b57cec5SDimitry Andric        DEBUG_INFO __kmp_xchg_real32
5730b57cec5SDimitry Andric
5740b57cec5SDimitry Andric# endif /* !KMP_ASM_INTRINS */
5750b57cec5SDimitry Andric
5760b57cec5SDimitry Andric//------------------------------------------------------------------------
5770b57cec5SDimitry Andric// int
578489b1cf2SDimitry Andric// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
579489b1cf2SDimitry Andric//                         int gtid, int tid,
580489b1cf2SDimitry Andric//                         int argc, void *p_argv[]
581489b1cf2SDimitry Andric// #if OMPT_SUPPORT
582489b1cf2SDimitry Andric//                         ,
583489b1cf2SDimitry Andric//                         void **exit_frame_ptr
584489b1cf2SDimitry Andric// #endif
585489b1cf2SDimitry Andric//                       ) {
586489b1cf2SDimitry Andric// #if OMPT_SUPPORT
587489b1cf2SDimitry Andric//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
588489b1cf2SDimitry Andric// #endif
589489b1cf2SDimitry Andric//
590489b1cf2SDimitry Andric//   (*pkfn)( & gtid, & tid, argv[0], ... );
5910b57cec5SDimitry Andric//   return 1;
5920b57cec5SDimitry Andric// }
5930b57cec5SDimitry Andric
5940b57cec5SDimitry Andric// -- Begin __kmp_invoke_microtask
5950b57cec5SDimitry Andric// mark_begin;
5960b57cec5SDimitry Andric	PROC  __kmp_invoke_microtask
5970b57cec5SDimitry Andric
5980b57cec5SDimitry Andric	pushl %ebp
5990b57cec5SDimitry Andric	KMP_CFI_DEF_OFFSET 8
6000b57cec5SDimitry Andric	KMP_CFI_OFFSET ebp,-8
6010b57cec5SDimitry Andric	movl %esp,%ebp		// establish the base pointer for this routine.
6020b57cec5SDimitry Andric	KMP_CFI_REGISTER ebp
6030b57cec5SDimitry Andric	subl $8,%esp		// allocate space for two local variables.
6040b57cec5SDimitry Andric				// These varibales are:
6050b57cec5SDimitry Andric				//	argv: -4(%ebp)
6060b57cec5SDimitry Andric				//	temp: -8(%ebp)
6070b57cec5SDimitry Andric				//
6080b57cec5SDimitry Andric	pushl %ebx		// save %ebx to use during this routine
6090b57cec5SDimitry Andric				//
6100b57cec5SDimitry Andric#if OMPT_SUPPORT
6110b57cec5SDimitry Andric	movl 28(%ebp),%ebx	// get exit_frame address
6120b57cec5SDimitry Andric	movl %ebp,(%ebx)	// save exit_frame
6130b57cec5SDimitry Andric#endif
6140b57cec5SDimitry Andric
6150b57cec5SDimitry Andric	movl 20(%ebp),%ebx	// Stack alignment - # args
6160b57cec5SDimitry Andric	addl $2,%ebx		// #args +2  Always pass at least 2 args (gtid and tid)
6170b57cec5SDimitry Andric	shll $2,%ebx		// Number of bytes used on stack: (#args+2)*4
6180b57cec5SDimitry Andric	movl %esp,%eax		//
6190b57cec5SDimitry Andric	subl %ebx,%eax		// %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this
6200b57cec5SDimitry Andric	movl %eax,%ebx		// Save to %ebx
6210b57cec5SDimitry Andric	andl $0xFFFFFF80,%eax	// mask off 7 bits
6220b57cec5SDimitry Andric	subl %eax,%ebx		// Amount to subtract from %esp
6230b57cec5SDimitry Andric	subl %ebx,%esp		// Prepare the stack ptr --
6240b57cec5SDimitry Andric				//   now it will be aligned on 128-byte boundary at the call
6250b57cec5SDimitry Andric
6260b57cec5SDimitry Andric	movl 24(%ebp),%eax	// copy from p_argv[]
6270b57cec5SDimitry Andric	movl %eax,-4(%ebp)	// into the local variable *argv.
6280b57cec5SDimitry Andric
6290b57cec5SDimitry Andric	movl 20(%ebp),%ebx	// argc is 20(%ebp)
6300b57cec5SDimitry Andric	shll $2,%ebx
6310b57cec5SDimitry Andric
6320b57cec5SDimitry AndricKMP_LABEL(invoke_2):
6330b57cec5SDimitry Andric	cmpl $0,%ebx
6340b57cec5SDimitry Andric	jg  KMP_LABEL(invoke_4)
6350b57cec5SDimitry Andric	jmp KMP_LABEL(invoke_3)
6360b57cec5SDimitry Andric	ALIGN 2
6370b57cec5SDimitry AndricKMP_LABEL(invoke_4):
6380b57cec5SDimitry Andric	movl -4(%ebp),%eax
6390b57cec5SDimitry Andric	subl $4,%ebx			// decrement argc.
6400b57cec5SDimitry Andric	addl %ebx,%eax			// index into argv.
6410b57cec5SDimitry Andric	movl (%eax),%edx
6420b57cec5SDimitry Andric	pushl %edx
6430b57cec5SDimitry Andric
6440b57cec5SDimitry Andric	jmp KMP_LABEL(invoke_2)
6450b57cec5SDimitry Andric	ALIGN 2
6460b57cec5SDimitry AndricKMP_LABEL(invoke_3):
6470b57cec5SDimitry Andric	leal 16(%ebp),%eax		// push & tid
6480b57cec5SDimitry Andric	pushl %eax
6490b57cec5SDimitry Andric
6500b57cec5SDimitry Andric	leal 12(%ebp),%eax		// push & gtid
6510b57cec5SDimitry Andric	pushl %eax
6520b57cec5SDimitry Andric
6530b57cec5SDimitry Andric	movl 8(%ebp),%ebx
6540b57cec5SDimitry Andric	call *%ebx			// call (*pkfn)();
6550b57cec5SDimitry Andric
6560b57cec5SDimitry Andric	movl $1,%eax			// return 1;
6570b57cec5SDimitry Andric
6580b57cec5SDimitry Andric	movl -12(%ebp),%ebx		// restore %ebx
6590b57cec5SDimitry Andric	leave
6600b57cec5SDimitry Andric	KMP_CFI_DEF esp,4
6610b57cec5SDimitry Andric	ret
6620b57cec5SDimitry Andric
6630b57cec5SDimitry Andric	DEBUG_INFO __kmp_invoke_microtask
6640b57cec5SDimitry Andric// -- End  __kmp_invoke_microtask
6650b57cec5SDimitry Andric
6660b57cec5SDimitry Andric
6670b57cec5SDimitry Andric// kmp_uint64
6680b57cec5SDimitry Andric// __kmp_hardware_timestamp(void)
6690b57cec5SDimitry Andric	PROC  __kmp_hardware_timestamp
6700b57cec5SDimitry Andric	rdtsc
6710b57cec5SDimitry Andric	ret
6720b57cec5SDimitry Andric
6730b57cec5SDimitry Andric	DEBUG_INFO __kmp_hardware_timestamp
6740b57cec5SDimitry Andric// -- End  __kmp_hardware_timestamp
6750b57cec5SDimitry Andric
6760b57cec5SDimitry Andric#endif /* KMP_ARCH_X86 */
6770b57cec5SDimitry Andric
6780b57cec5SDimitry Andric
6790b57cec5SDimitry Andric#if KMP_ARCH_X86_64
6800b57cec5SDimitry Andric
6810b57cec5SDimitry Andric// -----------------------------------------------------------------------
6820b57cec5SDimitry Andric// microtasking routines specifically written for IA-32 architecture and
6830b57cec5SDimitry Andric// Intel(R) 64 running Linux* OS
6840b57cec5SDimitry Andric// -----------------------------------------------------------------------
6850b57cec5SDimitry Andric
6860b57cec5SDimitry Andric// -- Machine type P
6870b57cec5SDimitry Andric// mark_description "Intel Corporation";
6880b57cec5SDimitry Andric	.ident "Intel Corporation"
6890b57cec5SDimitry Andric// --	.file "z_Linux_asm.S"
6900b57cec5SDimitry Andric	.data
6910b57cec5SDimitry Andric	ALIGN 4
6920b57cec5SDimitry Andric
6930b57cec5SDimitry Andric// To prevent getting our code into .data section .text added to every routine
6940b57cec5SDimitry Andric// definition for x86_64.
6950b57cec5SDimitry Andric//------------------------------------------------------------------------
6960b57cec5SDimitry Andric# if !KMP_ASM_INTRINS
6970b57cec5SDimitry Andric
6980b57cec5SDimitry Andric//------------------------------------------------------------------------
6990b57cec5SDimitry Andric// FUNCTION __kmp_test_then_add32
7000b57cec5SDimitry Andric//
7010b57cec5SDimitry Andric// kmp_int32
7020b57cec5SDimitry Andric// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
7030b57cec5SDimitry Andric//
7040b57cec5SDimitry Andric// parameters:
7050b57cec5SDimitry Andric// 	p:	%rdi
7060b57cec5SDimitry Andric// 	d:	%esi
7070b57cec5SDimitry Andric//
7080b57cec5SDimitry Andric// return:	%eax
7090b57cec5SDimitry Andric        .text
7100b57cec5SDimitry Andric        PROC  __kmp_test_then_add32
7110b57cec5SDimitry Andric
7120b57cec5SDimitry Andric        movl      %esi, %eax	// "d"
7130b57cec5SDimitry Andric        lock
7140b57cec5SDimitry Andric        xaddl     %eax,(%rdi)
7150b57cec5SDimitry Andric        ret
7160b57cec5SDimitry Andric
7170b57cec5SDimitry Andric        DEBUG_INFO __kmp_test_then_add32
7180b57cec5SDimitry Andric
7190b57cec5SDimitry Andric
7200b57cec5SDimitry Andric//------------------------------------------------------------------------
7210b57cec5SDimitry Andric// FUNCTION __kmp_test_then_add64
7220b57cec5SDimitry Andric//
7230b57cec5SDimitry Andric// kmp_int64
7240b57cec5SDimitry Andric// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
7250b57cec5SDimitry Andric//
7260b57cec5SDimitry Andric// parameters:
7270b57cec5SDimitry Andric// 	p:	%rdi
7280b57cec5SDimitry Andric// 	d:	%rsi
7290b57cec5SDimitry Andric//	return:	%rax
7300b57cec5SDimitry Andric        .text
7310b57cec5SDimitry Andric        PROC  __kmp_test_then_add64
7320b57cec5SDimitry Andric
7330b57cec5SDimitry Andric        movq      %rsi, %rax	// "d"
7340b57cec5SDimitry Andric        lock
7350b57cec5SDimitry Andric        xaddq     %rax,(%rdi)
7360b57cec5SDimitry Andric        ret
7370b57cec5SDimitry Andric
7380b57cec5SDimitry Andric        DEBUG_INFO __kmp_test_then_add64
7390b57cec5SDimitry Andric
7400b57cec5SDimitry Andric
7410b57cec5SDimitry Andric//------------------------------------------------------------------------
7420b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed8
7430b57cec5SDimitry Andric//
7440b57cec5SDimitry Andric// kmp_int32
7450b57cec5SDimitry Andric// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
7460b57cec5SDimitry Andric//
7470b57cec5SDimitry Andric// parameters:
7480b57cec5SDimitry Andric// 	p:	%rdi
7490b57cec5SDimitry Andric// 	d:	%sil
7500b57cec5SDimitry Andric//
7510b57cec5SDimitry Andric// return:	%al
7520b57cec5SDimitry Andric        .text
7530b57cec5SDimitry Andric        PROC  __kmp_xchg_fixed8
7540b57cec5SDimitry Andric
7550b57cec5SDimitry Andric        movb      %sil, %al	// "d"
7560b57cec5SDimitry Andric
7570b57cec5SDimitry Andric        lock
7580b57cec5SDimitry Andric        xchgb     %al,(%rdi)
7590b57cec5SDimitry Andric        ret
7600b57cec5SDimitry Andric
7610b57cec5SDimitry Andric        DEBUG_INFO __kmp_xchg_fixed8
7620b57cec5SDimitry Andric
7630b57cec5SDimitry Andric
7640b57cec5SDimitry Andric//------------------------------------------------------------------------
7650b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed16
7660b57cec5SDimitry Andric//
7670b57cec5SDimitry Andric// kmp_int16
7680b57cec5SDimitry Andric// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
7690b57cec5SDimitry Andric//
7700b57cec5SDimitry Andric// parameters:
7710b57cec5SDimitry Andric// 	p:	%rdi
7720b57cec5SDimitry Andric// 	d:	%si
7730b57cec5SDimitry Andric// return:     %ax
7740b57cec5SDimitry Andric        .text
7750b57cec5SDimitry Andric        PROC  __kmp_xchg_fixed16
7760b57cec5SDimitry Andric
7770b57cec5SDimitry Andric        movw      %si, %ax	// "d"
7780b57cec5SDimitry Andric
7790b57cec5SDimitry Andric        lock
7800b57cec5SDimitry Andric        xchgw     %ax,(%rdi)
7810b57cec5SDimitry Andric        ret
7820b57cec5SDimitry Andric
7830b57cec5SDimitry Andric        DEBUG_INFO __kmp_xchg_fixed16
7840b57cec5SDimitry Andric
7850b57cec5SDimitry Andric
7860b57cec5SDimitry Andric//------------------------------------------------------------------------
7870b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed32
7880b57cec5SDimitry Andric//
7890b57cec5SDimitry Andric// kmp_int32
7900b57cec5SDimitry Andric// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
7910b57cec5SDimitry Andric//
7920b57cec5SDimitry Andric// parameters:
7930b57cec5SDimitry Andric// 	p:	%rdi
7940b57cec5SDimitry Andric// 	d:	%esi
7950b57cec5SDimitry Andric//
7960b57cec5SDimitry Andric// return:	%eax
7970b57cec5SDimitry Andric        .text
7980b57cec5SDimitry Andric        PROC  __kmp_xchg_fixed32
7990b57cec5SDimitry Andric
8000b57cec5SDimitry Andric        movl      %esi, %eax	// "d"
8010b57cec5SDimitry Andric
8020b57cec5SDimitry Andric        lock
8030b57cec5SDimitry Andric        xchgl     %eax,(%rdi)
8040b57cec5SDimitry Andric        ret
8050b57cec5SDimitry Andric
8060b57cec5SDimitry Andric        DEBUG_INFO __kmp_xchg_fixed32
8070b57cec5SDimitry Andric
8080b57cec5SDimitry Andric
8090b57cec5SDimitry Andric//------------------------------------------------------------------------
8100b57cec5SDimitry Andric// FUNCTION __kmp_xchg_fixed64
8110b57cec5SDimitry Andric//
8120b57cec5SDimitry Andric// kmp_int64
8130b57cec5SDimitry Andric// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
8140b57cec5SDimitry Andric//
8150b57cec5SDimitry Andric// parameters:
8160b57cec5SDimitry Andric// 	p:	%rdi
8170b57cec5SDimitry Andric// 	d:	%rsi
8180b57cec5SDimitry Andric// return:	%rax
8190b57cec5SDimitry Andric        .text
8200b57cec5SDimitry Andric        PROC  __kmp_xchg_fixed64
8210b57cec5SDimitry Andric
8220b57cec5SDimitry Andric        movq      %rsi, %rax	// "d"
8230b57cec5SDimitry Andric
8240b57cec5SDimitry Andric        lock
8250b57cec5SDimitry Andric        xchgq     %rax,(%rdi)
8260b57cec5SDimitry Andric        ret
8270b57cec5SDimitry Andric
8280b57cec5SDimitry Andric        DEBUG_INFO __kmp_xchg_fixed64
8290b57cec5SDimitry Andric
8300b57cec5SDimitry Andric
8310b57cec5SDimitry Andric//------------------------------------------------------------------------
8320b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store8
8330b57cec5SDimitry Andric//
8340b57cec5SDimitry Andric// kmp_int8
8350b57cec5SDimitry Andric// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
8360b57cec5SDimitry Andric//
8370b57cec5SDimitry Andric// parameters:
8380b57cec5SDimitry Andric// 	p:	%rdi
8390b57cec5SDimitry Andric// 	cv:	%esi
8400b57cec5SDimitry Andric//	sv:	%edx
8410b57cec5SDimitry Andric//
8420b57cec5SDimitry Andric// return:	%eax
8430b57cec5SDimitry Andric        .text
8440b57cec5SDimitry Andric        PROC  __kmp_compare_and_store8
8450b57cec5SDimitry Andric
8460b57cec5SDimitry Andric        movb      %sil, %al	// "cv"
8470b57cec5SDimitry Andric        lock
8480b57cec5SDimitry Andric        cmpxchgb  %dl,(%rdi)
8490b57cec5SDimitry Andric        sete      %al           // if %al == (%rdi) set %al = 1 else set %al = 0
8500b57cec5SDimitry Andric        andq      $1, %rax      // sign extend previous instruction for return value
8510b57cec5SDimitry Andric        ret
8520b57cec5SDimitry Andric
8530b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store8
8540b57cec5SDimitry Andric
8550b57cec5SDimitry Andric
8560b57cec5SDimitry Andric//------------------------------------------------------------------------
8570b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store16
8580b57cec5SDimitry Andric//
8590b57cec5SDimitry Andric// kmp_int16
8600b57cec5SDimitry Andric// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
8610b57cec5SDimitry Andric//
8620b57cec5SDimitry Andric// parameters:
8630b57cec5SDimitry Andric// 	p:	%rdi
8640b57cec5SDimitry Andric// 	cv:	%si
8650b57cec5SDimitry Andric//	sv:	%dx
8660b57cec5SDimitry Andric//
8670b57cec5SDimitry Andric// return:	%eax
8680b57cec5SDimitry Andric        .text
8690b57cec5SDimitry Andric        PROC  __kmp_compare_and_store16
8700b57cec5SDimitry Andric
8710b57cec5SDimitry Andric        movw      %si, %ax	// "cv"
8720b57cec5SDimitry Andric        lock
8730b57cec5SDimitry Andric        cmpxchgw  %dx,(%rdi)
8740b57cec5SDimitry Andric        sete      %al           // if %ax == (%rdi) set %al = 1 else set %al = 0
8750b57cec5SDimitry Andric        andq      $1, %rax      // sign extend previous instruction for return value
8760b57cec5SDimitry Andric        ret
8770b57cec5SDimitry Andric
8780b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store16
8790b57cec5SDimitry Andric
8800b57cec5SDimitry Andric
8810b57cec5SDimitry Andric//------------------------------------------------------------------------
8820b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store32
8830b57cec5SDimitry Andric//
8840b57cec5SDimitry Andric// kmp_int32
8850b57cec5SDimitry Andric// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
8860b57cec5SDimitry Andric//
8870b57cec5SDimitry Andric// parameters:
8880b57cec5SDimitry Andric// 	p:	%rdi
8890b57cec5SDimitry Andric// 	cv:	%esi
8900b57cec5SDimitry Andric//	sv:	%edx
8910b57cec5SDimitry Andric//
8920b57cec5SDimitry Andric// return:	%eax
8930b57cec5SDimitry Andric        .text
8940b57cec5SDimitry Andric        PROC  __kmp_compare_and_store32
8950b57cec5SDimitry Andric
8960b57cec5SDimitry Andric        movl      %esi, %eax	// "cv"
8970b57cec5SDimitry Andric        lock
8980b57cec5SDimitry Andric        cmpxchgl  %edx,(%rdi)
8990b57cec5SDimitry Andric        sete      %al           // if %eax == (%rdi) set %al = 1 else set %al = 0
9000b57cec5SDimitry Andric        andq      $1, %rax      // sign extend previous instruction for return value
9010b57cec5SDimitry Andric        ret
9020b57cec5SDimitry Andric
9030b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store32
9040b57cec5SDimitry Andric
9050b57cec5SDimitry Andric
9060b57cec5SDimitry Andric//------------------------------------------------------------------------
9070b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store64
9080b57cec5SDimitry Andric//
9090b57cec5SDimitry Andric// kmp_int32
9100b57cec5SDimitry Andric// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
9110b57cec5SDimitry Andric//
9120b57cec5SDimitry Andric// parameters:
9130b57cec5SDimitry Andric// 	p:	%rdi
9140b57cec5SDimitry Andric// 	cv:	%rsi
9150b57cec5SDimitry Andric//	sv:	%rdx
9160b57cec5SDimitry Andric//	return:	%eax
9170b57cec5SDimitry Andric        .text
9180b57cec5SDimitry Andric        PROC  __kmp_compare_and_store64
9190b57cec5SDimitry Andric
9200b57cec5SDimitry Andric        movq      %rsi, %rax    // "cv"
9210b57cec5SDimitry Andric        lock
9220b57cec5SDimitry Andric        cmpxchgq  %rdx,(%rdi)
9230b57cec5SDimitry Andric        sete      %al           // if %rax == (%rdi) set %al = 1 else set %al = 0
9240b57cec5SDimitry Andric        andq      $1, %rax      // sign extend previous instruction for return value
9250b57cec5SDimitry Andric        ret
9260b57cec5SDimitry Andric
9270b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store64
9280b57cec5SDimitry Andric
9290b57cec5SDimitry Andric//------------------------------------------------------------------------
9300b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store_ret8
9310b57cec5SDimitry Andric//
9320b57cec5SDimitry Andric// kmp_int8
9330b57cec5SDimitry Andric// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
9340b57cec5SDimitry Andric//
9350b57cec5SDimitry Andric// parameters:
9360b57cec5SDimitry Andric// 	p:	%rdi
9370b57cec5SDimitry Andric// 	cv:	%esi
9380b57cec5SDimitry Andric//	sv:	%edx
9390b57cec5SDimitry Andric//
9400b57cec5SDimitry Andric// return:	%eax
9410b57cec5SDimitry Andric        .text
9420b57cec5SDimitry Andric        PROC  __kmp_compare_and_store_ret8
9430b57cec5SDimitry Andric
9440b57cec5SDimitry Andric        movb      %sil, %al	// "cv"
9450b57cec5SDimitry Andric        lock
9460b57cec5SDimitry Andric        cmpxchgb  %dl,(%rdi)
9470b57cec5SDimitry Andric        ret
9480b57cec5SDimitry Andric
9490b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store_ret8
9500b57cec5SDimitry Andric
9510b57cec5SDimitry Andric
9520b57cec5SDimitry Andric//------------------------------------------------------------------------
9530b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store_ret16
9540b57cec5SDimitry Andric//
9550b57cec5SDimitry Andric// kmp_int16
9560b57cec5SDimitry Andric// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
9570b57cec5SDimitry Andric//
9580b57cec5SDimitry Andric// parameters:
9590b57cec5SDimitry Andric// 	p:	%rdi
9600b57cec5SDimitry Andric// 	cv:	%si
9610b57cec5SDimitry Andric//	sv:	%dx
9620b57cec5SDimitry Andric//
9630b57cec5SDimitry Andric// return:	%eax
9640b57cec5SDimitry Andric        .text
9650b57cec5SDimitry Andric        PROC  __kmp_compare_and_store_ret16
9660b57cec5SDimitry Andric
9670b57cec5SDimitry Andric        movw      %si, %ax	// "cv"
9680b57cec5SDimitry Andric        lock
9690b57cec5SDimitry Andric        cmpxchgw  %dx,(%rdi)
9700b57cec5SDimitry Andric        ret
9710b57cec5SDimitry Andric
9720b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store_ret16
9730b57cec5SDimitry Andric
9740b57cec5SDimitry Andric
9750b57cec5SDimitry Andric//------------------------------------------------------------------------
9760b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store_ret32
9770b57cec5SDimitry Andric//
9780b57cec5SDimitry Andric// kmp_int32
9790b57cec5SDimitry Andric// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
9800b57cec5SDimitry Andric//
9810b57cec5SDimitry Andric// parameters:
9820b57cec5SDimitry Andric// 	p:	%rdi
9830b57cec5SDimitry Andric// 	cv:	%esi
9840b57cec5SDimitry Andric//	sv:	%edx
9850b57cec5SDimitry Andric//
9860b57cec5SDimitry Andric// return:	%eax
9870b57cec5SDimitry Andric        .text
9880b57cec5SDimitry Andric        PROC  __kmp_compare_and_store_ret32
9890b57cec5SDimitry Andric
9900b57cec5SDimitry Andric        movl      %esi, %eax	// "cv"
9910b57cec5SDimitry Andric        lock
9920b57cec5SDimitry Andric        cmpxchgl  %edx,(%rdi)
9930b57cec5SDimitry Andric        ret
9940b57cec5SDimitry Andric
9950b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store_ret32
9960b57cec5SDimitry Andric
9970b57cec5SDimitry Andric
9980b57cec5SDimitry Andric//------------------------------------------------------------------------
9990b57cec5SDimitry Andric// FUNCTION __kmp_compare_and_store_ret64
10000b57cec5SDimitry Andric//
10010b57cec5SDimitry Andric// kmp_int64
10020b57cec5SDimitry Andric// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
10030b57cec5SDimitry Andric//
10040b57cec5SDimitry Andric// parameters:
10050b57cec5SDimitry Andric// 	p:	%rdi
10060b57cec5SDimitry Andric// 	cv:	%rsi
10070b57cec5SDimitry Andric//	sv:	%rdx
10080b57cec5SDimitry Andric//	return:	%eax
10090b57cec5SDimitry Andric        .text
10100b57cec5SDimitry Andric        PROC  __kmp_compare_and_store_ret64
10110b57cec5SDimitry Andric
10120b57cec5SDimitry Andric        movq      %rsi, %rax    // "cv"
10130b57cec5SDimitry Andric        lock
10140b57cec5SDimitry Andric        cmpxchgq  %rdx,(%rdi)
10150b57cec5SDimitry Andric        ret
10160b57cec5SDimitry Andric
10170b57cec5SDimitry Andric        DEBUG_INFO __kmp_compare_and_store_ret64
10180b57cec5SDimitry Andric
10190b57cec5SDimitry Andric# endif /* !KMP_ASM_INTRINS */
10200b57cec5SDimitry Andric
10210b57cec5SDimitry Andric
10220b57cec5SDimitry Andric# if !KMP_MIC
10230b57cec5SDimitry Andric
10240b57cec5SDimitry Andric# if !KMP_ASM_INTRINS
10250b57cec5SDimitry Andric
10260b57cec5SDimitry Andric//------------------------------------------------------------------------
10270b57cec5SDimitry Andric// FUNCTION __kmp_xchg_real32
10280b57cec5SDimitry Andric//
10290b57cec5SDimitry Andric// kmp_real32
10300b57cec5SDimitry Andric// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
10310b57cec5SDimitry Andric//
10320b57cec5SDimitry Andric// parameters:
10330b57cec5SDimitry Andric// 	addr:	%rdi
10340b57cec5SDimitry Andric// 	data:	%xmm0 (lower 4 bytes)
10350b57cec5SDimitry Andric//
10360b57cec5SDimitry Andric// return:	%xmm0 (lower 4 bytes)
10370b57cec5SDimitry Andric        .text
10380b57cec5SDimitry Andric        PROC  __kmp_xchg_real32
10390b57cec5SDimitry Andric
10400b57cec5SDimitry Andric	movd	%xmm0, %eax	// load "data" to eax
10410b57cec5SDimitry Andric
10420b57cec5SDimitry Andric         lock
10430b57cec5SDimitry Andric         xchgl %eax, (%rdi)
10440b57cec5SDimitry Andric
10450b57cec5SDimitry Andric	movd	%eax, %xmm0	// load old value into return register
10460b57cec5SDimitry Andric
10470b57cec5SDimitry Andric        ret
10480b57cec5SDimitry Andric
10490b57cec5SDimitry Andric        DEBUG_INFO __kmp_xchg_real32
10500b57cec5SDimitry Andric
10510b57cec5SDimitry Andric
10520b57cec5SDimitry Andric//------------------------------------------------------------------------
10530b57cec5SDimitry Andric// FUNCTION __kmp_xchg_real64
10540b57cec5SDimitry Andric//
10550b57cec5SDimitry Andric// kmp_real64
10560b57cec5SDimitry Andric// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
10570b57cec5SDimitry Andric//
10580b57cec5SDimitry Andric// parameters:
10590b57cec5SDimitry Andric//      addr:   %rdi
10600b57cec5SDimitry Andric//      data:   %xmm0 (lower 8 bytes)
10610b57cec5SDimitry Andric//      return: %xmm0 (lower 8 bytes)
10620b57cec5SDimitry Andric        .text
10630b57cec5SDimitry Andric        PROC  __kmp_xchg_real64
10640b57cec5SDimitry Andric
10650b57cec5SDimitry Andric	movd	%xmm0, %rax	// load "data" to rax
10660b57cec5SDimitry Andric
10670b57cec5SDimitry Andric         lock
10680b57cec5SDimitry Andric	xchgq  %rax, (%rdi)
10690b57cec5SDimitry Andric
10700b57cec5SDimitry Andric	movd	%rax, %xmm0	// load old value into return register
10710b57cec5SDimitry Andric        ret
10720b57cec5SDimitry Andric
10730b57cec5SDimitry Andric        DEBUG_INFO __kmp_xchg_real64
10740b57cec5SDimitry Andric
10750b57cec5SDimitry Andric
10760b57cec5SDimitry Andric# endif /* !KMP_MIC */
10770b57cec5SDimitry Andric
10780b57cec5SDimitry Andric# endif /* !KMP_ASM_INTRINS */
10790b57cec5SDimitry Andric
10800b57cec5SDimitry Andric//------------------------------------------------------------------------
10810b57cec5SDimitry Andric// int
10820b57cec5SDimitry Andric// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
10830b57cec5SDimitry Andric//                         int gtid, int tid,
1084489b1cf2SDimitry Andric//                         int argc, void *p_argv[]
1085489b1cf2SDimitry Andric// #if OMPT_SUPPORT
1086489b1cf2SDimitry Andric//                         ,
1087489b1cf2SDimitry Andric//                         void **exit_frame_ptr
1088489b1cf2SDimitry Andric// #endif
1089489b1cf2SDimitry Andric//                       ) {
1090489b1cf2SDimitry Andric// #if OMPT_SUPPORT
1091489b1cf2SDimitry Andric//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1092489b1cf2SDimitry Andric// #endif
1093489b1cf2SDimitry Andric//
10940b57cec5SDimitry Andric//   (*pkfn)( & gtid, & tid, argv[0], ... );
10950b57cec5SDimitry Andric//   return 1;
10960b57cec5SDimitry Andric// }
10970b57cec5SDimitry Andric//
10980b57cec5SDimitry Andric// note: at call to pkfn must have %rsp 128-byte aligned for compiler
10990b57cec5SDimitry Andric//
11000b57cec5SDimitry Andric// parameters:
11010b57cec5SDimitry Andric//      %rdi:  	pkfn
11020b57cec5SDimitry Andric//	%esi:	gtid
11030b57cec5SDimitry Andric//	%edx:	tid
11040b57cec5SDimitry Andric//	%ecx:	argc
11050b57cec5SDimitry Andric//	%r8:	p_argv
11060b57cec5SDimitry Andric//	%r9:	&exit_frame
11070b57cec5SDimitry Andric//
11080b57cec5SDimitry Andric// locals:
11090b57cec5SDimitry Andric//	__gtid:	gtid parm pushed on stack so can pass &gtid to pkfn
11100b57cec5SDimitry Andric//	__tid:	tid parm pushed on stack so can pass &tid to pkfn
11110b57cec5SDimitry Andric//
11120b57cec5SDimitry Andric// reg temps:
11130b57cec5SDimitry Andric//	%rax:	used all over the place
11140b57cec5SDimitry Andric//	%rdx:	used in stack pointer alignment calculation
11150b57cec5SDimitry Andric//	%r11:	used to traverse p_argv array
11160b57cec5SDimitry Andric//	%rsi:	used as temporary for stack parameters
11170b57cec5SDimitry Andric//		used as temporary for number of pkfn parms to push
11180b57cec5SDimitry Andric//	%rbx:	used to hold pkfn address, and zero constant, callee-save
11190b57cec5SDimitry Andric//
11200b57cec5SDimitry Andric// return:	%eax 	(always 1/TRUE)
11210b57cec5SDimitry Andric__gtid = -16
11220b57cec5SDimitry Andric__tid = -24
11230b57cec5SDimitry Andric
11240b57cec5SDimitry Andric// -- Begin __kmp_invoke_microtask
11250b57cec5SDimitry Andric// mark_begin;
11260b57cec5SDimitry Andric        .text
11270b57cec5SDimitry Andric	PROC  __kmp_invoke_microtask
11280b57cec5SDimitry Andric
11290b57cec5SDimitry Andric	pushq 	%rbp		// save base pointer
11300b57cec5SDimitry Andric	KMP_CFI_DEF_OFFSET 16
11310b57cec5SDimitry Andric	KMP_CFI_OFFSET rbp,-16
11320b57cec5SDimitry Andric	movq 	%rsp,%rbp	// establish the base pointer for this routine.
11330b57cec5SDimitry Andric	KMP_CFI_REGISTER rbp
11340b57cec5SDimitry Andric
11350b57cec5SDimitry Andric#if OMPT_SUPPORT
11360b57cec5SDimitry Andric	movq	%rbp, (%r9)	// save exit_frame
11370b57cec5SDimitry Andric#endif
11380b57cec5SDimitry Andric
11390b57cec5SDimitry Andric	pushq 	%rbx		// %rbx is callee-saved register
11400b57cec5SDimitry Andric	pushq	%rsi		// Put gtid on stack so can pass &tgid to pkfn
11410b57cec5SDimitry Andric	pushq	%rdx		// Put tid on stack so can pass &tid to pkfn
11420b57cec5SDimitry Andric
11430b57cec5SDimitry Andric	movq	%rcx, %rax	// Stack alignment calculation begins; argc -> %rax
11440b57cec5SDimitry Andric	movq	$0, %rbx	// constant for cmovs later
11450b57cec5SDimitry Andric	subq	$4, %rax	// subtract four args passed in registers to pkfn
11460b57cec5SDimitry Andric#if KMP_MIC
11470b57cec5SDimitry Andric	js	KMP_LABEL(kmp_0)	// jump to movq
11480b57cec5SDimitry Andric	jmp	KMP_LABEL(kmp_0_exit)	// jump ahead
11490b57cec5SDimitry AndricKMP_LABEL(kmp_0):
11500b57cec5SDimitry Andric	movq	%rbx, %rax	// zero negative value in %rax <- max(0, argc-4)
11510b57cec5SDimitry AndricKMP_LABEL(kmp_0_exit):
11520b57cec5SDimitry Andric#else
11530b57cec5SDimitry Andric	cmovsq	%rbx, %rax	// zero negative value in %rax <- max(0, argc-4)
11540b57cec5SDimitry Andric#endif // KMP_MIC
11550b57cec5SDimitry Andric
11560b57cec5SDimitry Andric	movq	%rax, %rsi	// save max(0, argc-4) -> %rsi for later
11570b57cec5SDimitry Andric	shlq 	$3, %rax	// Number of bytes used on stack: max(0, argc-4)*8
11580b57cec5SDimitry Andric
11590b57cec5SDimitry Andric	movq 	%rsp, %rdx	//
11600b57cec5SDimitry Andric	subq 	%rax, %rdx	// %rsp-(max(0,argc-4)*8) -> %rdx --
11610b57cec5SDimitry Andric				// without align, stack ptr would be this
11620b57cec5SDimitry Andric	movq 	%rdx, %rax	// Save to %rax
11630b57cec5SDimitry Andric
11640b57cec5SDimitry Andric	andq 	$0xFFFFFFFFFFFFFF80, %rax  // mask off lower 7 bits (128 bytes align)
11650b57cec5SDimitry Andric	subq 	%rax, %rdx	// Amount to subtract from %rsp
11660b57cec5SDimitry Andric	subq 	%rdx, %rsp	// Prepare the stack ptr --
11670b57cec5SDimitry Andric				// now %rsp will align to 128-byte boundary at call site
11680b57cec5SDimitry Andric
11690b57cec5SDimitry Andric				// setup pkfn parameter reg and stack
11700b57cec5SDimitry Andric	movq	%rcx, %rax	// argc -> %rax
11710b57cec5SDimitry Andric	cmpq	$0, %rsi
11720b57cec5SDimitry Andric	je	KMP_LABEL(kmp_invoke_pass_parms)	// jump ahead if no parms to push
11730b57cec5SDimitry Andric	shlq	$3, %rcx	// argc*8 -> %rcx
11740b57cec5SDimitry Andric	movq 	%r8, %rdx	// p_argv -> %rdx
11750b57cec5SDimitry Andric	addq	%rcx, %rdx	// &p_argv[argc] -> %rdx
11760b57cec5SDimitry Andric
11770b57cec5SDimitry Andric	movq	%rsi, %rcx	// max (0, argc-4) -> %rcx
11780b57cec5SDimitry Andric
11790b57cec5SDimitry AndricKMP_LABEL(kmp_invoke_push_parms):
11800b57cec5SDimitry Andric	// push nth - 7th parms to pkfn on stack
11810b57cec5SDimitry Andric	subq	$8, %rdx	// decrement p_argv pointer to previous parm
11820b57cec5SDimitry Andric	movq	(%rdx), %rsi	// p_argv[%rcx-1] -> %rsi
11830b57cec5SDimitry Andric	pushq	%rsi		// push p_argv[%rcx-1] onto stack (reverse order)
11840b57cec5SDimitry Andric	subl	$1, %ecx
11850b57cec5SDimitry Andric
11860b57cec5SDimitry Andric// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e
11870b57cec5SDimitry Andric//		if the name of the label that is an operand of this jecxz starts with a dot (".");
11880b57cec5SDimitry Andric//	   Apple's linker does not support 1-byte length relocation;
11890b57cec5SDimitry Andric//         Resolution: replace all .labelX entries with L_labelX.
11900b57cec5SDimitry Andric
11910b57cec5SDimitry Andric	jecxz   KMP_LABEL(kmp_invoke_pass_parms)  // stop when four p_argv[] parms left
11920b57cec5SDimitry Andric	jmp	KMP_LABEL(kmp_invoke_push_parms)
11930b57cec5SDimitry Andric	ALIGN 3
11940b57cec5SDimitry AndricKMP_LABEL(kmp_invoke_pass_parms):	// put 1st - 6th parms to pkfn in registers.
11950b57cec5SDimitry Andric				// order here is important to avoid trashing
11960b57cec5SDimitry Andric				// registers used for both input and output parms!
11970b57cec5SDimitry Andric	movq	%rdi, %rbx	// pkfn -> %rbx
11980b57cec5SDimitry Andric	leaq	__gtid(%rbp), %rdi // &gtid -> %rdi (store 1st parm to pkfn)
11990b57cec5SDimitry Andric	leaq	__tid(%rbp), %rsi  // &tid -> %rsi (store 2nd parm to pkfn)
12000fca6ea1SDimitry Andric	// Check if argc is 0
12010fca6ea1SDimitry Andric	cmpq $0, %rax
12020fca6ea1SDimitry Andric	je KMP_LABEL(kmp_no_args) // Jump ahead
12030b57cec5SDimitry Andric
12040b57cec5SDimitry Andric	movq	%r8, %r11	// p_argv -> %r11
12050b57cec5SDimitry Andric
12060b57cec5SDimitry Andric#if KMP_MIC
12070b57cec5SDimitry Andric	cmpq	$4, %rax	// argc >= 4?
12080b57cec5SDimitry Andric	jns	KMP_LABEL(kmp_4)	// jump to movq
12090b57cec5SDimitry Andric	jmp	KMP_LABEL(kmp_4_exit)	// jump ahead
12100b57cec5SDimitry AndricKMP_LABEL(kmp_4):
12110b57cec5SDimitry Andric	movq	24(%r11), %r9	// p_argv[3] -> %r9 (store 6th parm to pkfn)
12120b57cec5SDimitry AndricKMP_LABEL(kmp_4_exit):
12130b57cec5SDimitry Andric
12140b57cec5SDimitry Andric	cmpq	$3, %rax	// argc >= 3?
12150b57cec5SDimitry Andric	jns	KMP_LABEL(kmp_3)	// jump to movq
12160b57cec5SDimitry Andric	jmp	KMP_LABEL(kmp_3_exit)	// jump ahead
12170b57cec5SDimitry AndricKMP_LABEL(kmp_3):
12180b57cec5SDimitry Andric	movq	16(%r11), %r8	// p_argv[2] -> %r8 (store 5th parm to pkfn)
12190b57cec5SDimitry AndricKMP_LABEL(kmp_3_exit):
12200b57cec5SDimitry Andric
12210b57cec5SDimitry Andric	cmpq	$2, %rax	// argc >= 2?
12220b57cec5SDimitry Andric	jns	KMP_LABEL(kmp_2)	// jump to movq
12230b57cec5SDimitry Andric	jmp	KMP_LABEL(kmp_2_exit)	// jump ahead
12240b57cec5SDimitry AndricKMP_LABEL(kmp_2):
12250b57cec5SDimitry Andric	movq	8(%r11), %rcx	// p_argv[1] -> %rcx (store 4th parm to pkfn)
12260b57cec5SDimitry AndricKMP_LABEL(kmp_2_exit):
12270b57cec5SDimitry Andric
12280b57cec5SDimitry Andric	cmpq	$1, %rax	// argc >= 1?
12290b57cec5SDimitry Andric	jns	KMP_LABEL(kmp_1)	// jump to movq
12300b57cec5SDimitry Andric	jmp	KMP_LABEL(kmp_1_exit)	// jump ahead
12310b57cec5SDimitry AndricKMP_LABEL(kmp_1):
12320b57cec5SDimitry Andric	movq	(%r11), %rdx	// p_argv[0] -> %rdx (store 3rd parm to pkfn)
12330b57cec5SDimitry AndricKMP_LABEL(kmp_1_exit):
12340b57cec5SDimitry Andric#else
12350b57cec5SDimitry Andric	cmpq	$4, %rax	// argc >= 4?
12360b57cec5SDimitry Andric	cmovnsq	24(%r11), %r9	// p_argv[3] -> %r9 (store 6th parm to pkfn)
12370b57cec5SDimitry Andric
12380b57cec5SDimitry Andric	cmpq	$3, %rax	// argc >= 3?
12390b57cec5SDimitry Andric	cmovnsq	16(%r11), %r8	// p_argv[2] -> %r8 (store 5th parm to pkfn)
12400b57cec5SDimitry Andric
12410b57cec5SDimitry Andric	cmpq	$2, %rax	// argc >= 2?
12420b57cec5SDimitry Andric	cmovnsq	8(%r11), %rcx	// p_argv[1] -> %rcx (store 4th parm to pkfn)
12430b57cec5SDimitry Andric
12440b57cec5SDimitry Andric	cmpq	$1, %rax	// argc >= 1?
12450b57cec5SDimitry Andric	cmovnsq	(%r11), %rdx	// p_argv[0] -> %rdx (store 3rd parm to pkfn)
12460b57cec5SDimitry Andric#endif // KMP_MIC
12470b57cec5SDimitry Andric
12480fca6ea1SDimitry AndricKMP_LABEL(kmp_no_args):
12490b57cec5SDimitry Andric	call	*%rbx		// call (*pkfn)();
12500b57cec5SDimitry Andric	movq	$1, %rax	// move 1 into return register;
12510b57cec5SDimitry Andric
12520b57cec5SDimitry Andric	movq	-8(%rbp), %rbx	// restore %rbx	using %rbp since %rsp was modified
12530b57cec5SDimitry Andric	movq 	%rbp, %rsp	// restore stack pointer
12540b57cec5SDimitry Andric	popq 	%rbp		// restore frame pointer
12550b57cec5SDimitry Andric	KMP_CFI_DEF rsp,8
12560b57cec5SDimitry Andric	ret
12570b57cec5SDimitry Andric
12580b57cec5SDimitry Andric	DEBUG_INFO __kmp_invoke_microtask
12590b57cec5SDimitry Andric// -- End  __kmp_invoke_microtask
12600b57cec5SDimitry Andric
12610b57cec5SDimitry Andric// kmp_uint64
12620b57cec5SDimitry Andric// __kmp_hardware_timestamp(void)
12630b57cec5SDimitry Andric        .text
12640b57cec5SDimitry Andric	PROC  __kmp_hardware_timestamp
12650b57cec5SDimitry Andric	rdtsc
12660b57cec5SDimitry Andric	shlq    $32, %rdx
12670b57cec5SDimitry Andric	orq     %rdx, %rax
12680b57cec5SDimitry Andric	ret
12690b57cec5SDimitry Andric
12700b57cec5SDimitry Andric	DEBUG_INFO __kmp_hardware_timestamp
12710b57cec5SDimitry Andric// -- End  __kmp_hardware_timestamp
12720b57cec5SDimitry Andric
12730b57cec5SDimitry Andric//------------------------------------------------------------------------
12740b57cec5SDimitry Andric// FUNCTION __kmp_bsr32
12750b57cec5SDimitry Andric//
12760b57cec5SDimitry Andric// int
12770b57cec5SDimitry Andric// __kmp_bsr32( int );
12780b57cec5SDimitry Andric        .text
12790b57cec5SDimitry Andric        PROC  __kmp_bsr32
12800b57cec5SDimitry Andric
12810b57cec5SDimitry Andric        bsr    %edi,%eax
12820b57cec5SDimitry Andric        ret
12830b57cec5SDimitry Andric
12840b57cec5SDimitry Andric        DEBUG_INFO __kmp_bsr32
12850b57cec5SDimitry Andric
12860b57cec5SDimitry Andric// -----------------------------------------------------------------------
12870b57cec5SDimitry Andric#endif /* KMP_ARCH_X86_64 */
12880b57cec5SDimitry Andric
12890b57cec5SDimitry Andric// '
12900fca6ea1SDimitry Andric#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
12910b57cec5SDimitry Andric
12920b57cec5SDimitry Andric//------------------------------------------------------------------------
12930b57cec5SDimitry Andric// int
12940b57cec5SDimitry Andric// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
12950b57cec5SDimitry Andric//                         int gtid, int tid,
1296489b1cf2SDimitry Andric//                         int argc, void *p_argv[]
1297489b1cf2SDimitry Andric// #if OMPT_SUPPORT
1298489b1cf2SDimitry Andric//                         ,
1299489b1cf2SDimitry Andric//                         void **exit_frame_ptr
1300489b1cf2SDimitry Andric// #endif
1301489b1cf2SDimitry Andric//                       ) {
1302489b1cf2SDimitry Andric// #if OMPT_SUPPORT
1303489b1cf2SDimitry Andric//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1304489b1cf2SDimitry Andric// #endif
1305489b1cf2SDimitry Andric//
13060b57cec5SDimitry Andric//   (*pkfn)( & gtid, & tid, argv[0], ... );
1307489b1cf2SDimitry Andric//
1308489b1cf2SDimitry Andric// // FIXME: This is done at call-site and can be removed here.
1309489b1cf2SDimitry Andric// #if OMPT_SUPPORT
1310489b1cf2SDimitry Andric//   *exit_frame_ptr = 0;
1311489b1cf2SDimitry Andric// #endif
1312489b1cf2SDimitry Andric//
13130b57cec5SDimitry Andric//   return 1;
13140b57cec5SDimitry Andric// }
13150b57cec5SDimitry Andric//
13160b57cec5SDimitry Andric// parameters:
13170b57cec5SDimitry Andric//	x0:	pkfn
13180b57cec5SDimitry Andric//	w1:	gtid
13190b57cec5SDimitry Andric//	w2:	tid
13200b57cec5SDimitry Andric//	w3:	argc
13210b57cec5SDimitry Andric//	x4:	p_argv
13220b57cec5SDimitry Andric//	x5:	&exit_frame
13230b57cec5SDimitry Andric//
13240b57cec5SDimitry Andric// locals:
13250b57cec5SDimitry Andric//	__gtid:	gtid parm pushed on stack so can pass &gtid to pkfn
13260b57cec5SDimitry Andric//	__tid:	tid parm pushed on stack so can pass &tid to pkfn
13270b57cec5SDimitry Andric//
13280b57cec5SDimitry Andric// reg temps:
13290b57cec5SDimitry Andric//	 x8:	used to hold pkfn address
13300b57cec5SDimitry Andric//	 w9:	used as temporary for number of pkfn parms
13310b57cec5SDimitry Andric//	x10:	used to traverse p_argv array
13320b57cec5SDimitry Andric//	x11:	used as temporary for stack placement calculation
13330b57cec5SDimitry Andric//	x12:	used as temporary for stack parameters
13340b57cec5SDimitry Andric//	x19:	used to preserve exit_frame_ptr, callee-save
13350b57cec5SDimitry Andric//
13360b57cec5SDimitry Andric// return:	w0	(always 1/TRUE)
13370b57cec5SDimitry Andric//
13380b57cec5SDimitry Andric
13390b57cec5SDimitry Andric__gtid = 4
13400b57cec5SDimitry Andric__tid = 8
13410b57cec5SDimitry Andric
13420b57cec5SDimitry Andric// -- Begin __kmp_invoke_microtask
13430b57cec5SDimitry Andric// mark_begin;
13440b57cec5SDimitry Andric	.text
13450b57cec5SDimitry Andric	PROC __kmp_invoke_microtask
1346*62987288SDimitry Andric	PACBTI_C
13470b57cec5SDimitry Andric
13480b57cec5SDimitry Andric	stp	x29, x30, [sp, #-16]!
13490b57cec5SDimitry Andric# if OMPT_SUPPORT
13500b57cec5SDimitry Andric	stp	x19, x20, [sp, #-16]!
13510b57cec5SDimitry Andric# endif
13520b57cec5SDimitry Andric	mov	x29, sp
13530b57cec5SDimitry Andric
13540b57cec5SDimitry Andric	orr	w9, wzr, #1
13550b57cec5SDimitry Andric	add	w9, w9, w3, lsr #1
13560b57cec5SDimitry Andric	sub	sp, sp, w9, uxtw #4
13570b57cec5SDimitry Andric	mov	x11, sp
13580b57cec5SDimitry Andric
13590b57cec5SDimitry Andric	mov	x8, x0
13600b57cec5SDimitry Andric	str	w1, [x29, #-__gtid]
13610b57cec5SDimitry Andric	str	w2, [x29, #-__tid]
13620b57cec5SDimitry Andric	mov	w9, w3
13630b57cec5SDimitry Andric	mov	x10, x4
13640b57cec5SDimitry Andric# if OMPT_SUPPORT
13650b57cec5SDimitry Andric	mov	x19, x5
13660b57cec5SDimitry Andric	str	x29, [x19]
13670b57cec5SDimitry Andric# endif
13680b57cec5SDimitry Andric
13690b57cec5SDimitry Andric	sub	x0, x29, #__gtid
13700b57cec5SDimitry Andric	sub	x1, x29, #__tid
13710b57cec5SDimitry Andric
13720b57cec5SDimitry Andric	cbz	w9, KMP_LABEL(kmp_1)
13730b57cec5SDimitry Andric	ldr	x2, [x10]
13740b57cec5SDimitry Andric
13750b57cec5SDimitry Andric	sub	w9, w9, #1
13760b57cec5SDimitry Andric	cbz	w9, KMP_LABEL(kmp_1)
13770b57cec5SDimitry Andric	ldr	x3, [x10, #8]!
13780b57cec5SDimitry Andric
13790b57cec5SDimitry Andric	sub	w9, w9, #1
13800b57cec5SDimitry Andric	cbz	w9, KMP_LABEL(kmp_1)
13810b57cec5SDimitry Andric	ldr	x4, [x10, #8]!
13820b57cec5SDimitry Andric
13830b57cec5SDimitry Andric	sub	w9, w9, #1
13840b57cec5SDimitry Andric	cbz	w9, KMP_LABEL(kmp_1)
13850b57cec5SDimitry Andric	ldr	x5, [x10, #8]!
13860b57cec5SDimitry Andric
13870b57cec5SDimitry Andric	sub	w9, w9, #1
13880b57cec5SDimitry Andric	cbz	w9, KMP_LABEL(kmp_1)
13890b57cec5SDimitry Andric	ldr	x6, [x10, #8]!
13900b57cec5SDimitry Andric
13910b57cec5SDimitry Andric	sub	w9, w9, #1
13920b57cec5SDimitry Andric	cbz	w9, KMP_LABEL(kmp_1)
13930b57cec5SDimitry Andric	ldr	x7, [x10, #8]!
13940b57cec5SDimitry Andric
13950b57cec5SDimitry AndricKMP_LABEL(kmp_0):
13960b57cec5SDimitry Andric	sub	w9, w9, #1
13970b57cec5SDimitry Andric	cbz	w9, KMP_LABEL(kmp_1)
13980b57cec5SDimitry Andric	ldr	x12, [x10, #8]!
13990b57cec5SDimitry Andric	str	x12, [x11], #8
14000b57cec5SDimitry Andric	b	KMP_LABEL(kmp_0)
14010b57cec5SDimitry AndricKMP_LABEL(kmp_1):
14020b57cec5SDimitry Andric	blr	x8
14030b57cec5SDimitry Andric	orr	w0, wzr, #1
14040b57cec5SDimitry Andric	mov	sp, x29
14050b57cec5SDimitry Andric# if OMPT_SUPPORT
14060b57cec5SDimitry Andric	str	xzr, [x19]
14070b57cec5SDimitry Andric	ldp	x19, x20, [sp], #16
14080b57cec5SDimitry Andric# endif
14090b57cec5SDimitry Andric	ldp	x29, x30, [sp], #16
1410*62987288SDimitry Andric	PACBTI_RET
14110b57cec5SDimitry Andric	ret
14120b57cec5SDimitry Andric
14130b57cec5SDimitry Andric	DEBUG_INFO __kmp_invoke_microtask
14140b57cec5SDimitry Andric// -- End  __kmp_invoke_microtask
14150b57cec5SDimitry Andric
14160fca6ea1SDimitry Andric#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */
1417bdd1243dSDimitry Andric
1418bdd1243dSDimitry Andric#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM
1419bdd1243dSDimitry Andric
1420bdd1243dSDimitry Andric//------------------------------------------------------------------------
1421bdd1243dSDimitry Andric// int
1422bdd1243dSDimitry Andric// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
1423bdd1243dSDimitry Andric//                         int gtid, int tid,
1424bdd1243dSDimitry Andric//                         int argc, void *p_argv[]
1425bdd1243dSDimitry Andric// #if OMPT_SUPPORT
1426bdd1243dSDimitry Andric//                         ,
1427bdd1243dSDimitry Andric//                         void **exit_frame_ptr
1428bdd1243dSDimitry Andric// #endif
1429bdd1243dSDimitry Andric//                       ) {
1430bdd1243dSDimitry Andric// #if OMPT_SUPPORT
1431bdd1243dSDimitry Andric//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1432bdd1243dSDimitry Andric// #endif
1433bdd1243dSDimitry Andric//
1434bdd1243dSDimitry Andric//   (*pkfn)( & gtid, & tid, argv[0], ... );
1435bdd1243dSDimitry Andric//
1436bdd1243dSDimitry Andric// // FIXME: This is done at call-site and can be removed here.
1437bdd1243dSDimitry Andric// #if OMPT_SUPPORT
1438bdd1243dSDimitry Andric//   *exit_frame_ptr = 0;
1439bdd1243dSDimitry Andric// #endif
1440bdd1243dSDimitry Andric//
1441bdd1243dSDimitry Andric//   return 1;
1442bdd1243dSDimitry Andric// }
1443bdd1243dSDimitry Andric//
1444bdd1243dSDimitry Andric// parameters:
1445bdd1243dSDimitry Andric//	r0:	pkfn
1446bdd1243dSDimitry Andric//	r1:	gtid
1447bdd1243dSDimitry Andric//	r2:	tid
1448bdd1243dSDimitry Andric//	r3:	argc
1449bdd1243dSDimitry Andric//	r4(stack):	p_argv
1450bdd1243dSDimitry Andric//	r5(stack):	&exit_frame
1451bdd1243dSDimitry Andric//
1452bdd1243dSDimitry Andric// locals:
1453bdd1243dSDimitry Andric//	__gtid:	gtid parm pushed on stack so can pass &gtid to pkfn
1454bdd1243dSDimitry Andric//	__tid:	tid parm pushed on stack so can pass &tid to pkfn
1455bdd1243dSDimitry Andric//
1456bdd1243dSDimitry Andric// reg temps:
1457bdd1243dSDimitry Andric//	 r4:	used to hold pkfn address
1458bdd1243dSDimitry Andric//	 r5:	used as temporary for number of pkfn parms
1459bdd1243dSDimitry Andric//	 r6:	used to traverse p_argv array
1460bdd1243dSDimitry Andric//	 r7:	frame pointer (in some configurations)
1461bdd1243dSDimitry Andric//	 r8:	used as temporary for stack placement calculation
1462bdd1243dSDimitry Andric//	 	and as pointer to base of callee saved area
1463bdd1243dSDimitry Andric//	 r9:	used as temporary for stack parameters
1464bdd1243dSDimitry Andric//	r10:	used to preserve exit_frame_ptr, callee-save
1465bdd1243dSDimitry Andric//	r11:	frame pointer (in some configurations)
1466bdd1243dSDimitry Andric//
1467bdd1243dSDimitry Andric// return:	r0	(always 1/TRUE)
1468bdd1243dSDimitry Andric//
1469bdd1243dSDimitry Andric
1470bdd1243dSDimitry Andric__gtid = 4
1471bdd1243dSDimitry Andric__tid = 8
1472bdd1243dSDimitry Andric
1473bdd1243dSDimitry Andric// -- Begin __kmp_invoke_microtask
1474bdd1243dSDimitry Andric// mark_begin;
1475bdd1243dSDimitry Andric	.text
1476bdd1243dSDimitry Andric	PROC __kmp_invoke_microtask
1477bdd1243dSDimitry Andric
1478bdd1243dSDimitry Andric	// Pushing one extra register (r3) to keep the stack aligned
1479bdd1243dSDimitry Andric	// for when we call pkfn below
1480bdd1243dSDimitry Andric	push	{r3-r11,lr}
1481bdd1243dSDimitry Andric	// Load p_argv and &exit_frame
14821ac55f4cSDimitry Andric	ldr	r4, [sp, #10*4]
14831ac55f4cSDimitry Andric# if OMPT_SUPPORT
14841ac55f4cSDimitry Andric	ldr	r5, [sp, #11*4]
14851ac55f4cSDimitry Andric# endif
1486bdd1243dSDimitry Andric
1487bdd1243dSDimitry Andric# if KMP_OS_DARWIN || (defined(__thumb__) && !KMP_OS_WINDOWS)
1488bdd1243dSDimitry Andric# define FP r7
1489bdd1243dSDimitry Andric# define FPOFF 4*4
1490bdd1243dSDimitry Andric#else
1491bdd1243dSDimitry Andric# define FP r11
1492bdd1243dSDimitry Andric# define FPOFF 8*4
1493bdd1243dSDimitry Andric#endif
1494bdd1243dSDimitry Andric	add	FP, sp, #FPOFF
1495bdd1243dSDimitry Andric# if OMPT_SUPPORT
1496bdd1243dSDimitry Andric	mov	r10, r5
1497bdd1243dSDimitry Andric	str	FP, [r10]
1498bdd1243dSDimitry Andric# endif
1499bdd1243dSDimitry Andric	mov	r8, sp
1500bdd1243dSDimitry Andric
1501bdd1243dSDimitry Andric	// Calculate how much stack to allocate, in increments of 8 bytes.
1502bdd1243dSDimitry Andric	// We strictly need 4*(argc-2) bytes (2 arguments are passed in
1503bdd1243dSDimitry Andric	// registers) but allocate 4*argc for simplicity (to avoid needing
1504bdd1243dSDimitry Andric	// to handle the argc<2 cases). We align the number of bytes
1505bdd1243dSDimitry Andric	// allocated to 8 bytes, to keep the stack aligned. (Since we
1506bdd1243dSDimitry Andric	// already allocate more than enough, it's ok to round down
1507bdd1243dSDimitry Andric	// instead of up for the alignment.) We allocate another extra
1508bdd1243dSDimitry Andric	// 8 bytes for gtid and tid.
1509bdd1243dSDimitry Andric	mov	r5, #1
1510bdd1243dSDimitry Andric	add	r5, r5, r3, lsr #1
1511bdd1243dSDimitry Andric	sub	sp, sp, r5, lsl #3
1512bdd1243dSDimitry Andric
1513bdd1243dSDimitry Andric	str	r1, [r8, #-__gtid]
1514bdd1243dSDimitry Andric	str	r2, [r8, #-__tid]
1515bdd1243dSDimitry Andric	mov	r5, r3
1516bdd1243dSDimitry Andric	mov	r6, r4
1517bdd1243dSDimitry Andric	mov	r4, r0
1518bdd1243dSDimitry Andric
1519bdd1243dSDimitry Andric	// Prepare the first 2 parameters to pkfn - pointers to gtid and tid
1520bdd1243dSDimitry Andric	// in our stack frame.
1521bdd1243dSDimitry Andric	sub	r0, r8, #__gtid
1522bdd1243dSDimitry Andric	sub	r1, r8, #__tid
1523bdd1243dSDimitry Andric
1524bdd1243dSDimitry Andric	mov	r8, sp
1525bdd1243dSDimitry Andric
1526bdd1243dSDimitry Andric	// Load p_argv[0] and p_argv[1] into r2 and r3, if argc >= 1/2
1527bdd1243dSDimitry Andric	cmp	r5, #0
1528bdd1243dSDimitry Andric	beq	KMP_LABEL(kmp_1)
1529bdd1243dSDimitry Andric	ldr	r2, [r6]
1530bdd1243dSDimitry Andric
1531bdd1243dSDimitry Andric	subs	r5, r5, #1
1532bdd1243dSDimitry Andric	beq	KMP_LABEL(kmp_1)
1533bdd1243dSDimitry Andric	ldr	r3, [r6, #4]!
1534bdd1243dSDimitry Andric
1535bdd1243dSDimitry Andric	// Loop, loading the rest of p_argv and writing the elements on the
1536bdd1243dSDimitry Andric	// stack.
1537bdd1243dSDimitry AndricKMP_LABEL(kmp_0):
1538bdd1243dSDimitry Andric	subs	r5, r5, #1
1539bdd1243dSDimitry Andric	beq	KMP_LABEL(kmp_1)
1540bdd1243dSDimitry Andric	ldr	r12, [r6, #4]!
1541bdd1243dSDimitry Andric	str	r12, [r8], #4
1542bdd1243dSDimitry Andric	b	KMP_LABEL(kmp_0)
1543bdd1243dSDimitry AndricKMP_LABEL(kmp_1):
1544bdd1243dSDimitry Andric	blx	r4
1545bdd1243dSDimitry Andric	mov	r0, #1
1546bdd1243dSDimitry Andric
1547bdd1243dSDimitry Andric	sub	r4, FP, #FPOFF
1548bdd1243dSDimitry Andric	mov	sp, r4
1549bdd1243dSDimitry Andric# undef FP
1550bdd1243dSDimitry Andric# undef FPOFF
1551bdd1243dSDimitry Andric
1552bdd1243dSDimitry Andric# if OMPT_SUPPORT
1553bdd1243dSDimitry Andric	mov	r1, #0
1554bdd1243dSDimitry Andric	str	r1, [r10]
1555bdd1243dSDimitry Andric# endif
1556bdd1243dSDimitry Andric	pop	{r3-r11,pc}
1557bdd1243dSDimitry Andric
1558bdd1243dSDimitry Andric	DEBUG_INFO __kmp_invoke_microtask
1559bdd1243dSDimitry Andric// -- End  __kmp_invoke_microtask
1560bdd1243dSDimitry Andric
15610fca6ea1SDimitry Andric#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM */
15620b57cec5SDimitry Andric
15630b57cec5SDimitry Andric#if KMP_ARCH_PPC64
15640b57cec5SDimitry Andric
15650b57cec5SDimitry Andric//------------------------------------------------------------------------
15660b57cec5SDimitry Andric// int
15670b57cec5SDimitry Andric// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
15680b57cec5SDimitry Andric//                         int gtid, int tid,
1569489b1cf2SDimitry Andric//                         int argc, void *p_argv[]
1570489b1cf2SDimitry Andric// #if OMPT_SUPPORT
1571489b1cf2SDimitry Andric//                         ,
1572489b1cf2SDimitry Andric//                         void **exit_frame_ptr
1573489b1cf2SDimitry Andric// #endif
1574489b1cf2SDimitry Andric//                       ) {
1575489b1cf2SDimitry Andric// #if OMPT_SUPPORT
1576489b1cf2SDimitry Andric//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1577489b1cf2SDimitry Andric// #endif
1578489b1cf2SDimitry Andric//
15790b57cec5SDimitry Andric//   (*pkfn)( & gtid, & tid, argv[0], ... );
1580489b1cf2SDimitry Andric//
1581489b1cf2SDimitry Andric// // FIXME: This is done at call-site and can be removed here.
1582489b1cf2SDimitry Andric// #if OMPT_SUPPORT
1583489b1cf2SDimitry Andric//   *exit_frame_ptr = 0;
1584489b1cf2SDimitry Andric// #endif
1585489b1cf2SDimitry Andric//
15860b57cec5SDimitry Andric//   return 1;
15870b57cec5SDimitry Andric// }
15880b57cec5SDimitry Andric//
15890b57cec5SDimitry Andric// parameters:
15900b57cec5SDimitry Andric//	r3:	pkfn
15910b57cec5SDimitry Andric//	r4:	gtid
15920b57cec5SDimitry Andric//	r5:	tid
15930b57cec5SDimitry Andric//	r6:	argc
15940b57cec5SDimitry Andric//	r7:	p_argv
15950b57cec5SDimitry Andric//	r8:	&exit_frame
15960b57cec5SDimitry Andric//
15970b57cec5SDimitry Andric// return:	r3	(always 1/TRUE)
15980b57cec5SDimitry Andric//
15990b57cec5SDimitry Andric	.text
16000b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2
16010b57cec5SDimitry Andric	.abiversion 2
16020b57cec5SDimitry Andric# endif
16030b57cec5SDimitry Andric	.globl	__kmp_invoke_microtask
16040b57cec5SDimitry Andric
16050b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2
16060b57cec5SDimitry Andric	.p2align	4
16070b57cec5SDimitry Andric# else
16080b57cec5SDimitry Andric	.p2align	2
16090b57cec5SDimitry Andric# endif
16100b57cec5SDimitry Andric
16110b57cec5SDimitry Andric	.type	__kmp_invoke_microtask,@function
16120b57cec5SDimitry Andric
16130b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2
16140b57cec5SDimitry Andric__kmp_invoke_microtask:
16150b57cec5SDimitry Andric.Lfunc_begin0:
16160b57cec5SDimitry Andric.Lfunc_gep0:
16170b57cec5SDimitry Andric	addis 2, 12, .TOC.-.Lfunc_gep0@ha
16180b57cec5SDimitry Andric	addi 2, 2, .TOC.-.Lfunc_gep0@l
16190b57cec5SDimitry Andric.Lfunc_lep0:
16200b57cec5SDimitry Andric	.localentry	__kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
16210b57cec5SDimitry Andric# else
16220b57cec5SDimitry Andric	.section	.opd,"aw",@progbits
16230b57cec5SDimitry Andric__kmp_invoke_microtask:
16240b57cec5SDimitry Andric	.p2align	3
16250b57cec5SDimitry Andric	.quad	.Lfunc_begin0
16260b57cec5SDimitry Andric	.quad	.TOC.@tocbase
16270b57cec5SDimitry Andric	.quad	0
16280b57cec5SDimitry Andric	.text
16290b57cec5SDimitry Andric.Lfunc_begin0:
16300b57cec5SDimitry Andric# endif
16310b57cec5SDimitry Andric
16320b57cec5SDimitry Andric// -- Begin __kmp_invoke_microtask
16330b57cec5SDimitry Andric// mark_begin;
16340b57cec5SDimitry Andric
16350b57cec5SDimitry Andric// We need to allocate a stack frame large enough to hold all of the parameters
16360b57cec5SDimitry Andric// on the stack for the microtask plus what this function needs. That's 48
16370b57cec5SDimitry Andric// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the
16380b57cec5SDimitry Andric// parameters to the microtask, plus 8 bytes to store the values of r4 and r5,
16390b57cec5SDimitry Andric// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes
16400b57cec5SDimitry Andric// to save r30 to hold a copy of r8.
16410b57cec5SDimitry Andric
16420b57cec5SDimitry Andric	.cfi_startproc
16430b57cec5SDimitry Andric	mflr 0
16440b57cec5SDimitry Andric	std 31, -8(1)
16450b57cec5SDimitry Andric	std 0, 16(1)
16460b57cec5SDimitry Andric
16470b57cec5SDimitry Andric// This is unusual because normally we'd set r31 equal to r1 after the stack
16480b57cec5SDimitry Andric// frame is established. In this case, however, we need to dynamically compute
16490b57cec5SDimitry Andric// the stack frame size, and so we keep a direct copy of r1 to access our
16500b57cec5SDimitry Andric// register save areas and restore the r1 value before returning.
16510b57cec5SDimitry Andric	mr 31, 1
16520b57cec5SDimitry Andric	.cfi_def_cfa_register r31
16530b57cec5SDimitry Andric	.cfi_offset r31, -8
16540b57cec5SDimitry Andric	.cfi_offset lr, 16
16550b57cec5SDimitry Andric
16560b57cec5SDimitry Andric// Compute the size necessary for the local stack frame.
16570b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2
16580b57cec5SDimitry Andric	li 12, 72
16590b57cec5SDimitry Andric# else
16600b57cec5SDimitry Andric	li 12, 88
16610b57cec5SDimitry Andric# endif
16620b57cec5SDimitry Andric	sldi 0, 6, 3
16630b57cec5SDimitry Andric	add 12, 0, 12
16640b57cec5SDimitry Andric	neg 12, 12
16650b57cec5SDimitry Andric
1666e8d8bef9SDimitry Andric// We need to make sure that the stack frame stays aligned (to 16 bytes).
16670b57cec5SDimitry Andric	li 0, -16
16680b57cec5SDimitry Andric	and 12, 0, 12
16690b57cec5SDimitry Andric
16700b57cec5SDimitry Andric// Establish the local stack frame.
16710b57cec5SDimitry Andric	stdux 1, 1, 12
16720b57cec5SDimitry Andric
16730b57cec5SDimitry Andric# if OMPT_SUPPORT
16740b57cec5SDimitry Andric	.cfi_offset r30, -16
16750b57cec5SDimitry Andric	std 30, -16(31)
16760b57cec5SDimitry Andric	std 1, 0(8)
16770b57cec5SDimitry Andric	mr 30, 8
16780b57cec5SDimitry Andric# endif
16790b57cec5SDimitry Andric
16800b57cec5SDimitry Andric// Store gtid and tid to the stack because they're passed by reference to the microtask.
16810b57cec5SDimitry Andric	stw 4, -20(31)
16820b57cec5SDimitry Andric	stw 5, -24(31)
16830b57cec5SDimitry Andric
16840b57cec5SDimitry Andric	mr 12, 6
16850b57cec5SDimitry Andric	mr 4, 7
16860b57cec5SDimitry Andric
16870b57cec5SDimitry Andric	cmpwi 0, 12, 1
16880b57cec5SDimitry Andric	blt	 0, .Lcall
16890b57cec5SDimitry Andric
16900b57cec5SDimitry Andric	ld 5, 0(4)
16910b57cec5SDimitry Andric
16920b57cec5SDimitry Andric	cmpwi 0, 12, 2
16930b57cec5SDimitry Andric	blt	 0, .Lcall
16940b57cec5SDimitry Andric
16950b57cec5SDimitry Andric	ld 6, 8(4)
16960b57cec5SDimitry Andric
16970b57cec5SDimitry Andric	cmpwi 0, 12, 3
16980b57cec5SDimitry Andric	blt	 0, .Lcall
16990b57cec5SDimitry Andric
17000b57cec5SDimitry Andric	ld 7, 16(4)
17010b57cec5SDimitry Andric
17020b57cec5SDimitry Andric	cmpwi 0, 12, 4
17030b57cec5SDimitry Andric	blt	 0, .Lcall
17040b57cec5SDimitry Andric
17050b57cec5SDimitry Andric	ld 8, 24(4)
17060b57cec5SDimitry Andric
17070b57cec5SDimitry Andric	cmpwi 0, 12, 5
17080b57cec5SDimitry Andric	blt	 0, .Lcall
17090b57cec5SDimitry Andric
17100b57cec5SDimitry Andric	ld 9, 32(4)
17110b57cec5SDimitry Andric
17120b57cec5SDimitry Andric	cmpwi 0, 12, 6
17130b57cec5SDimitry Andric	blt	 0, .Lcall
17140b57cec5SDimitry Andric
17150b57cec5SDimitry Andric	ld 10, 40(4)
17160b57cec5SDimitry Andric
17170b57cec5SDimitry Andric	cmpwi 0, 12, 7
17180b57cec5SDimitry Andric	blt	 0, .Lcall
17190b57cec5SDimitry Andric
17200b57cec5SDimitry Andric// There are more than 6 microtask parameters, so we need to store the
17210b57cec5SDimitry Andric// remainder to the stack.
17220b57cec5SDimitry Andric	addi 12, 12, -6
17230b57cec5SDimitry Andric	mtctr 12
17240b57cec5SDimitry Andric
17250b57cec5SDimitry Andric// These are set to 8 bytes before the first desired store address (we're using
17260b57cec5SDimitry Andric// pre-increment loads and stores in the loop below). The parameter save area
17270b57cec5SDimitry Andric// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and
17280b57cec5SDimitry Andric// 32 + 8*8 == 96 bytes above r1 for ELFv2.
17290b57cec5SDimitry Andric	addi 4, 4, 40
17300b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2
17310b57cec5SDimitry Andric	addi 12, 1, 88
17320b57cec5SDimitry Andric# else
17330b57cec5SDimitry Andric	addi 12, 1, 104
17340b57cec5SDimitry Andric# endif
17350b57cec5SDimitry Andric
17360b57cec5SDimitry Andric.Lnext:
17370b57cec5SDimitry Andric	ldu 0, 8(4)
17380b57cec5SDimitry Andric	stdu 0, 8(12)
17390b57cec5SDimitry Andric	bdnz .Lnext
17400b57cec5SDimitry Andric
17410b57cec5SDimitry Andric.Lcall:
17420b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2
17430b57cec5SDimitry Andric	std 2, 24(1)
17440b57cec5SDimitry Andric	mr 12, 3
17450b57cec5SDimitry Andric#else
17460b57cec5SDimitry Andric	std 2, 40(1)
17470b57cec5SDimitry Andric// For ELFv1, we need to load the actual function address from the function descriptor.
17480b57cec5SDimitry Andric	ld 12, 0(3)
17490b57cec5SDimitry Andric	ld 2, 8(3)
17500b57cec5SDimitry Andric	ld 11, 16(3)
17510b57cec5SDimitry Andric#endif
17520b57cec5SDimitry Andric
17530b57cec5SDimitry Andric	addi 3, 31, -20
17540b57cec5SDimitry Andric	addi 4, 31, -24
17550b57cec5SDimitry Andric
17560b57cec5SDimitry Andric	mtctr 12
17570b57cec5SDimitry Andric	bctrl
17580b57cec5SDimitry Andric# if KMP_ARCH_PPC64_ELFv2
17590b57cec5SDimitry Andric	ld 2, 24(1)
17600b57cec5SDimitry Andric# else
17610b57cec5SDimitry Andric	ld 2, 40(1)
17620b57cec5SDimitry Andric# endif
17630b57cec5SDimitry Andric
17640b57cec5SDimitry Andric# if OMPT_SUPPORT
17650b57cec5SDimitry Andric	li 3, 0
17660b57cec5SDimitry Andric	std 3, 0(30)
17670b57cec5SDimitry Andric# endif
17680b57cec5SDimitry Andric
17690b57cec5SDimitry Andric	li 3, 1
17700b57cec5SDimitry Andric
17710b57cec5SDimitry Andric# if OMPT_SUPPORT
17720b57cec5SDimitry Andric	ld 30, -16(31)
17730b57cec5SDimitry Andric# endif
17740b57cec5SDimitry Andric
17750b57cec5SDimitry Andric	mr 1, 31
17760b57cec5SDimitry Andric	ld 0, 16(1)
17770b57cec5SDimitry Andric	ld 31, -8(1)
17780b57cec5SDimitry Andric	mtlr 0
17790b57cec5SDimitry Andric	blr
17800b57cec5SDimitry Andric
17810b57cec5SDimitry Andric	.long	0
17820b57cec5SDimitry Andric	.quad	0
17830b57cec5SDimitry Andric.Lfunc_end0:
17840b57cec5SDimitry Andric	.size	__kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
17850b57cec5SDimitry Andric	.cfi_endproc
17860b57cec5SDimitry Andric
17870b57cec5SDimitry Andric// -- End  __kmp_invoke_microtask
17880b57cec5SDimitry Andric
17890b57cec5SDimitry Andric#endif /* KMP_ARCH_PPC64 */
17900b57cec5SDimitry Andric
1791489b1cf2SDimitry Andric#if KMP_ARCH_RISCV64
1792489b1cf2SDimitry Andric
1793489b1cf2SDimitry Andric//------------------------------------------------------------------------
1794489b1cf2SDimitry Andric//
1795489b1cf2SDimitry Andric// typedef void (*microtask_t)(int *gtid, int *tid, ...);
1796489b1cf2SDimitry Andric//
1797489b1cf2SDimitry Andric// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
1798489b1cf2SDimitry Andric//                            void *p_argv[]
1799489b1cf2SDimitry Andric// #if OMPT_SUPPORT
1800489b1cf2SDimitry Andric//                            ,
1801489b1cf2SDimitry Andric//                            void **exit_frame_ptr
1802489b1cf2SDimitry Andric// #endif
1803489b1cf2SDimitry Andric//                            ) {
1804489b1cf2SDimitry Andric// #if OMPT_SUPPORT
1805489b1cf2SDimitry Andric//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1806489b1cf2SDimitry Andric// #endif
1807489b1cf2SDimitry Andric//
1808489b1cf2SDimitry Andric//   (*pkfn)(&gtid, &tid, argv[0], ...);
1809489b1cf2SDimitry Andric//
1810489b1cf2SDimitry Andric//   return 1;
1811489b1cf2SDimitry Andric// }
1812489b1cf2SDimitry Andric//
1813489b1cf2SDimitry Andric// Parameters:
1814489b1cf2SDimitry Andric//   a0: pkfn
1815489b1cf2SDimitry Andric//   a1: gtid
1816489b1cf2SDimitry Andric//   a2: tid
1817489b1cf2SDimitry Andric//   a3: argc
1818489b1cf2SDimitry Andric//   a4: p_argv
1819489b1cf2SDimitry Andric//   a5: exit_frame_ptr
1820489b1cf2SDimitry Andric//
1821489b1cf2SDimitry Andric// Locals:
1822489b1cf2SDimitry Andric//   __gtid: gtid param pushed on stack so can pass &gtid to pkfn
1823489b1cf2SDimitry Andric//   __tid: tid param pushed on stack so can pass &tid to pkfn
1824489b1cf2SDimitry Andric//
1825489b1cf2SDimitry Andric// Temp. registers:
1826489b1cf2SDimitry Andric//
1827489b1cf2SDimitry Andric//  t0: used to calculate the dynamic stack size / used to hold pkfn address
1828489b1cf2SDimitry Andric//  t1: used as temporary for stack placement calculation
1829489b1cf2SDimitry Andric//  t2: used as temporary for stack arguments
1830489b1cf2SDimitry Andric//  t3: used as temporary for number of remaining pkfn parms
1831489b1cf2SDimitry Andric//  t4: used to traverse p_argv array
1832489b1cf2SDimitry Andric//
1833489b1cf2SDimitry Andric// return: a0 (always 1/TRUE)
1834489b1cf2SDimitry Andric//
1835489b1cf2SDimitry Andric
1836489b1cf2SDimitry Andric__gtid = -20
1837489b1cf2SDimitry Andric__tid = -24
1838489b1cf2SDimitry Andric
1839489b1cf2SDimitry Andric// -- Begin __kmp_invoke_microtask
1840489b1cf2SDimitry Andric// mark_begin;
1841489b1cf2SDimitry Andric	.text
1842489b1cf2SDimitry Andric	.globl	__kmp_invoke_microtask
1843489b1cf2SDimitry Andric	.p2align	1
1844489b1cf2SDimitry Andric	.type	__kmp_invoke_microtask,@function
1845489b1cf2SDimitry Andric__kmp_invoke_microtask:
1846489b1cf2SDimitry Andric	.cfi_startproc
1847489b1cf2SDimitry Andric
1848489b1cf2SDimitry Andric	// First, save ra and fp
1849489b1cf2SDimitry Andric	addi	sp, sp, -16
1850489b1cf2SDimitry Andric	sd	ra, 8(sp)
1851489b1cf2SDimitry Andric	sd	fp, 0(sp)
1852489b1cf2SDimitry Andric	addi	fp, sp, 16
1853489b1cf2SDimitry Andric	.cfi_def_cfa	fp, 0
1854489b1cf2SDimitry Andric	.cfi_offset	ra, -8
1855489b1cf2SDimitry Andric	.cfi_offset	fp, -16
1856489b1cf2SDimitry Andric
1857489b1cf2SDimitry Andric	// Compute the dynamic stack size:
1858489b1cf2SDimitry Andric	//
1859489b1cf2SDimitry Andric	// - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
1860489b1cf2SDimitry Andric	//   reference
1861489b1cf2SDimitry Andric	// - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
1862489b1cf2SDimitry Andric	//   function by register. Given that we have 8 of such registers (a[0-7])
1863489b1cf2SDimitry Andric	//   and two + 'argc' arguments (consider &gtid and &tid), we need to
1864489b1cf2SDimitry Andric	//   reserve max(0, argc - 6)*8 extra bytes
1865489b1cf2SDimitry Andric	//
1866489b1cf2SDimitry Andric	// The total number of bytes is then max(0, argc - 6)*8 + 8
1867489b1cf2SDimitry Andric
1868489b1cf2SDimitry Andric	// Compute max(0, argc - 6) using the following bithack:
1869489b1cf2SDimitry Andric	// max(0, x) = x - (x & (x >> 31)), where x := argc - 6
1870489b1cf2SDimitry Andric	// Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
1871489b1cf2SDimitry Andric	addi	t0, a3, -6
1872489b1cf2SDimitry Andric	srai	t1, t0, 31
1873489b1cf2SDimitry Andric	and	t1, t0, t1
1874489b1cf2SDimitry Andric	sub	t0, t0, t1
1875489b1cf2SDimitry Andric
1876489b1cf2SDimitry Andric	addi	t0, t0, 1
1877489b1cf2SDimitry Andric
1878489b1cf2SDimitry Andric	slli	t0, t0, 3
1879489b1cf2SDimitry Andric	sub	sp, sp, t0
1880489b1cf2SDimitry Andric
1881489b1cf2SDimitry Andric	// Align the stack to 16 bytes
1882489b1cf2SDimitry Andric	andi	sp, sp, -16
1883489b1cf2SDimitry Andric
1884489b1cf2SDimitry Andric	mv	t0, a0
1885489b1cf2SDimitry Andric	mv	t3, a3
1886489b1cf2SDimitry Andric	mv	t4, a4
1887489b1cf2SDimitry Andric
1888489b1cf2SDimitry Andric#if OMPT_SUPPORT
1889489b1cf2SDimitry Andric	// Save frame pointer into exit_frame
1890489b1cf2SDimitry Andric	sd	fp, 0(a5)
1891489b1cf2SDimitry Andric#endif
1892489b1cf2SDimitry Andric
1893489b1cf2SDimitry Andric	// Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
1894489b1cf2SDimitry Andric
1895489b1cf2SDimitry Andric	sw	a1, __gtid(fp)
1896489b1cf2SDimitry Andric	sw	a2, __tid(fp)
1897489b1cf2SDimitry Andric
1898489b1cf2SDimitry Andric	addi	a0, fp, __gtid
1899489b1cf2SDimitry Andric	addi	a1, fp, __tid
1900489b1cf2SDimitry Andric
1901489b1cf2SDimitry Andric	beqz	t3, .L_kmp_3
1902489b1cf2SDimitry Andric	ld	a2, 0(t4)
1903489b1cf2SDimitry Andric
1904489b1cf2SDimitry Andric	addi	t3, t3, -1
1905489b1cf2SDimitry Andric	beqz	t3, .L_kmp_3
1906489b1cf2SDimitry Andric	ld	a3, 8(t4)
1907489b1cf2SDimitry Andric
1908489b1cf2SDimitry Andric	addi	t3, t3, -1
1909489b1cf2SDimitry Andric	beqz	t3, .L_kmp_3
1910489b1cf2SDimitry Andric	ld	a4, 16(t4)
1911489b1cf2SDimitry Andric
1912489b1cf2SDimitry Andric	addi	t3, t3, -1
1913489b1cf2SDimitry Andric	beqz	t3, .L_kmp_3
1914489b1cf2SDimitry Andric	ld	a5, 24(t4)
1915489b1cf2SDimitry Andric
1916489b1cf2SDimitry Andric	addi	t3, t3, -1
1917489b1cf2SDimitry Andric	beqz	t3, .L_kmp_3
1918489b1cf2SDimitry Andric	ld	a6, 32(t4)
1919489b1cf2SDimitry Andric
1920489b1cf2SDimitry Andric	addi	t3, t3, -1
1921489b1cf2SDimitry Andric	beqz	t3, .L_kmp_3
1922489b1cf2SDimitry Andric	ld	a7, 40(t4)
1923489b1cf2SDimitry Andric
1924489b1cf2SDimitry Andric	// Prepare any additional argument passed through the stack
1925489b1cf2SDimitry Andric	addi	t4, t4, 48
1926489b1cf2SDimitry Andric	mv	t1, sp
1927489b1cf2SDimitry Andric	j .L_kmp_2
1928489b1cf2SDimitry Andric.L_kmp_1:
1929489b1cf2SDimitry Andric	ld	t2, 0(t4)
1930489b1cf2SDimitry Andric	sd	t2, 0(t1)
1931489b1cf2SDimitry Andric	addi	t4, t4, 8
1932489b1cf2SDimitry Andric	addi	t1, t1, 8
1933489b1cf2SDimitry Andric.L_kmp_2:
1934489b1cf2SDimitry Andric	addi	t3, t3, -1
1935489b1cf2SDimitry Andric	bnez	t3, .L_kmp_1
1936489b1cf2SDimitry Andric
1937489b1cf2SDimitry Andric.L_kmp_3:
1938489b1cf2SDimitry Andric	// Call pkfn function
1939489b1cf2SDimitry Andric	jalr	t0
1940489b1cf2SDimitry Andric
1941489b1cf2SDimitry Andric	// Restore stack and return
1942489b1cf2SDimitry Andric
1943489b1cf2SDimitry Andric	addi	a0, zero, 1
1944489b1cf2SDimitry Andric
1945489b1cf2SDimitry Andric	addi	sp, fp, -16
1946489b1cf2SDimitry Andric	ld	fp, 0(sp)
1947489b1cf2SDimitry Andric	ld	ra, 8(sp)
1948489b1cf2SDimitry Andric	addi	sp, sp, 16
1949489b1cf2SDimitry Andric	ret
1950489b1cf2SDimitry Andric.Lfunc_end0:
1951489b1cf2SDimitry Andric	.size	__kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
1952489b1cf2SDimitry Andric	.cfi_endproc
1953489b1cf2SDimitry Andric
1954489b1cf2SDimitry Andric// -- End  __kmp_invoke_microtask
1955489b1cf2SDimitry Andric
1956489b1cf2SDimitry Andric#endif /* KMP_ARCH_RISCV64 */
1957489b1cf2SDimitry Andric
1958bdd1243dSDimitry Andric#if KMP_ARCH_LOONGARCH64
1959bdd1243dSDimitry Andric
1960bdd1243dSDimitry Andric//------------------------------------------------------------------------
1961bdd1243dSDimitry Andric//
1962bdd1243dSDimitry Andric// typedef void (*microtask_t)(int *gtid, int *tid, ...);
1963bdd1243dSDimitry Andric//
1964bdd1243dSDimitry Andric// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
1965bdd1243dSDimitry Andric//                            void *p_argv[]
1966bdd1243dSDimitry Andric// #if OMPT_SUPPORT
1967bdd1243dSDimitry Andric//                            ,
1968bdd1243dSDimitry Andric//                            void **exit_frame_ptr
1969bdd1243dSDimitry Andric// #endif
1970bdd1243dSDimitry Andric//                            ) {
1971bdd1243dSDimitry Andric// #if OMPT_SUPPORT
1972bdd1243dSDimitry Andric//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
1973bdd1243dSDimitry Andric// #endif
1974bdd1243dSDimitry Andric//
1975bdd1243dSDimitry Andric//   (*pkfn)(&gtid, &tid, argv[0], ...);
1976bdd1243dSDimitry Andric//
1977bdd1243dSDimitry Andric//   return 1;
1978bdd1243dSDimitry Andric// }
1979bdd1243dSDimitry Andric//
1980bdd1243dSDimitry Andric// Parameters:
1981bdd1243dSDimitry Andric//   a0: pkfn
1982bdd1243dSDimitry Andric//   a1: gtid
1983bdd1243dSDimitry Andric//   a2: tid
1984bdd1243dSDimitry Andric//   a3: argc
1985bdd1243dSDimitry Andric//   a4: p_argv
1986bdd1243dSDimitry Andric//   a5: exit_frame_ptr
1987bdd1243dSDimitry Andric//
1988bdd1243dSDimitry Andric// Locals:
1989bdd1243dSDimitry Andric//   __gtid: gtid param pushed on stack so can pass &gtid to pkfn
1990bdd1243dSDimitry Andric//   __tid: tid param pushed on stack so can pass &tid to pkfn
1991bdd1243dSDimitry Andric//
1992bdd1243dSDimitry Andric// Temp registers:
1993bdd1243dSDimitry Andric//
1994bdd1243dSDimitry Andric//  t0: used to calculate the dynamic stack size / used to hold pkfn address
1995bdd1243dSDimitry Andric//  t1: used as temporary for stack placement calculation
1996bdd1243dSDimitry Andric//  t2: used as temporary for stack arguments
1997bdd1243dSDimitry Andric//  t3: used as temporary for number of remaining pkfn parms
1998bdd1243dSDimitry Andric//  t4: used to traverse p_argv array
1999bdd1243dSDimitry Andric//
2000bdd1243dSDimitry Andric// return: a0 (always 1/TRUE)
2001bdd1243dSDimitry Andric//
2002bdd1243dSDimitry Andric
2003bdd1243dSDimitry Andric// -- Begin __kmp_invoke_microtask
2004bdd1243dSDimitry Andric// mark_begin;
2005bdd1243dSDimitry Andric	.text
2006bdd1243dSDimitry Andric	.globl	__kmp_invoke_microtask
2007bdd1243dSDimitry Andric	.p2align	2
2008bdd1243dSDimitry Andric	.type	__kmp_invoke_microtask,@function
2009bdd1243dSDimitry Andric__kmp_invoke_microtask:
2010bdd1243dSDimitry Andric	.cfi_startproc
2011bdd1243dSDimitry Andric
2012bdd1243dSDimitry Andric	// First, save ra and fp
2013bdd1243dSDimitry Andric	addi.d	$sp, $sp, -16
2014bdd1243dSDimitry Andric	st.d	$ra, $sp, 8
2015bdd1243dSDimitry Andric	st.d	$fp, $sp, 0
2016bdd1243dSDimitry Andric	addi.d	$fp, $sp, 16
2017bdd1243dSDimitry Andric	.cfi_def_cfa	22, 0
2018bdd1243dSDimitry Andric	.cfi_offset	1, -8
2019bdd1243dSDimitry Andric	.cfi_offset	22, -16
2020bdd1243dSDimitry Andric
2021bdd1243dSDimitry Andric	// Compute the dynamic stack size:
2022bdd1243dSDimitry Andric	//
2023bdd1243dSDimitry Andric	// - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
2024bdd1243dSDimitry Andric	//   reference
2025bdd1243dSDimitry Andric	// - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
2026bdd1243dSDimitry Andric	//   function by register. Given that we have 8 of such registers (a[0-7])
2027bdd1243dSDimitry Andric	//   and two + 'argc' arguments (consider &gtid and &tid), we need to
2028bdd1243dSDimitry Andric	//   reserve max(0, argc - 6)*8 extra bytes
2029bdd1243dSDimitry Andric	//
2030bdd1243dSDimitry Andric	// The total number of bytes is then max(0, argc - 6)*8 + 8
2031bdd1243dSDimitry Andric
2032bdd1243dSDimitry Andric	addi.d  $t0, $a3, -6
2033bdd1243dSDimitry Andric	slt  $t1, $t0, $zero
2034bdd1243dSDimitry Andric	masknez  $t0, $t0, $t1
2035bdd1243dSDimitry Andric	addi.d  $t0, $t0, 1
2036bdd1243dSDimitry Andric	slli.d	$t0, $t0, 3
2037bdd1243dSDimitry Andric	sub.d	$sp, $sp, $t0
2038bdd1243dSDimitry Andric
2039bdd1243dSDimitry Andric	// Align the stack to 16 bytes
2040bdd1243dSDimitry Andric	bstrins.d $sp, $zero, 3, 0
2041bdd1243dSDimitry Andric
2042bdd1243dSDimitry Andric	move	$t0, $a0
2043bdd1243dSDimitry Andric	move	$t3, $a3
2044bdd1243dSDimitry Andric	move	$t4, $a4
2045bdd1243dSDimitry Andric
2046bdd1243dSDimitry Andric#if OMPT_SUPPORT
2047bdd1243dSDimitry Andric	// Save frame pointer into exit_frame
2048bdd1243dSDimitry Andric	st.d	$fp, $a5, 0
2049bdd1243dSDimitry Andric#endif
2050bdd1243dSDimitry Andric
2051bdd1243dSDimitry Andric	// Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
2052bdd1243dSDimitry Andric
2053bdd1243dSDimitry Andric	st.w	$a1, $fp, -20
2054bdd1243dSDimitry Andric	st.w	$a2, $fp, -24
2055bdd1243dSDimitry Andric
2056bdd1243dSDimitry Andric	addi.d	$a0, $fp, -20
2057bdd1243dSDimitry Andric	addi.d	$a1, $fp, -24
2058bdd1243dSDimitry Andric
2059bdd1243dSDimitry Andric	beqz	$t3, .L_kmp_3
2060bdd1243dSDimitry Andric	ld.d	$a2, $t4, 0
2061bdd1243dSDimitry Andric
2062bdd1243dSDimitry Andric	addi.d	$t3, $t3, -1
2063bdd1243dSDimitry Andric	beqz	$t3, .L_kmp_3
2064bdd1243dSDimitry Andric	ld.d	$a3, $t4, 8
2065bdd1243dSDimitry Andric
2066bdd1243dSDimitry Andric	addi.d	$t3, $t3, -1
2067bdd1243dSDimitry Andric	beqz	$t3, .L_kmp_3
2068bdd1243dSDimitry Andric	ld.d	$a4, $t4, 16
2069bdd1243dSDimitry Andric
2070bdd1243dSDimitry Andric	addi.d	$t3, $t3, -1
2071bdd1243dSDimitry Andric	beqz	$t3, .L_kmp_3
2072bdd1243dSDimitry Andric	ld.d	$a5, $t4, 24
2073bdd1243dSDimitry Andric
2074bdd1243dSDimitry Andric	addi.d	$t3, $t3, -1
2075bdd1243dSDimitry Andric	beqz	$t3, .L_kmp_3
2076bdd1243dSDimitry Andric	ld.d	$a6, $t4, 32
2077bdd1243dSDimitry Andric
2078bdd1243dSDimitry Andric	addi.d	$t3, $t3, -1
2079bdd1243dSDimitry Andric	beqz	$t3, .L_kmp_3
2080bdd1243dSDimitry Andric	ld.d	$a7, $t4, 40
2081bdd1243dSDimitry Andric
2082bdd1243dSDimitry Andric	// Prepare any additional argument passed through the stack
2083bdd1243dSDimitry Andric	addi.d	$t4, $t4, 48
2084bdd1243dSDimitry Andric	move	$t1, $sp
2085bdd1243dSDimitry Andric	b .L_kmp_2
2086bdd1243dSDimitry Andric.L_kmp_1:
2087bdd1243dSDimitry Andric	ld.d	$t2, $t4, 0
2088bdd1243dSDimitry Andric	st.d	$t2, $t1, 0
2089bdd1243dSDimitry Andric	addi.d	$t4, $t4, 8
2090bdd1243dSDimitry Andric	addi.d	$t1, $t1, 8
2091bdd1243dSDimitry Andric.L_kmp_2:
2092bdd1243dSDimitry Andric	addi.d	$t3, $t3, -1
2093bdd1243dSDimitry Andric	bnez	$t3, .L_kmp_1
2094bdd1243dSDimitry Andric
2095bdd1243dSDimitry Andric.L_kmp_3:
2096bdd1243dSDimitry Andric	// Call pkfn function
2097bdd1243dSDimitry Andric	jirl	$ra, $t0, 0
2098bdd1243dSDimitry Andric
2099bdd1243dSDimitry Andric	// Restore stack and return
2100bdd1243dSDimitry Andric
2101bdd1243dSDimitry Andric	addi.d	$a0, $zero, 1
2102bdd1243dSDimitry Andric
2103bdd1243dSDimitry Andric	addi.d	$sp, $fp, -16
2104bdd1243dSDimitry Andric	ld.d	$fp, $sp, 0
2105bdd1243dSDimitry Andric	ld.d	$ra, $sp, 8
2106bdd1243dSDimitry Andric	addi.d	$sp, $sp, 16
2107bdd1243dSDimitry Andric	jr $ra
2108bdd1243dSDimitry Andric.Lfunc_end0:
2109bdd1243dSDimitry Andric	.size	__kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
2110bdd1243dSDimitry Andric	.cfi_endproc
2111bdd1243dSDimitry Andric
2112bdd1243dSDimitry Andric// -- End  __kmp_invoke_microtask
2113bdd1243dSDimitry Andric
2114bdd1243dSDimitry Andric#endif /* KMP_ARCH_LOONGARCH64 */
2115bdd1243dSDimitry Andric
21165f757f3fSDimitry Andric#if KMP_ARCH_VE
21175f757f3fSDimitry Andric
21185f757f3fSDimitry Andric//------------------------------------------------------------------------
21195f757f3fSDimitry Andric//
21205f757f3fSDimitry Andric// typedef void (*microtask_t)(int *gtid, int *tid, ...);
21215f757f3fSDimitry Andric//
21225f757f3fSDimitry Andric// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
21235f757f3fSDimitry Andric//                            void *p_argv[]
21245f757f3fSDimitry Andric// #if OMPT_SUPPORT
21255f757f3fSDimitry Andric//                            ,
21265f757f3fSDimitry Andric//                            void **exit_frame_ptr
21275f757f3fSDimitry Andric// #endif
21285f757f3fSDimitry Andric//                            ) {
21295f757f3fSDimitry Andric// #if OMPT_SUPPORT
21305f757f3fSDimitry Andric//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
21315f757f3fSDimitry Andric// #endif
21325f757f3fSDimitry Andric//
21335f757f3fSDimitry Andric//   (*pkfn)(&gtid, &tid, argv[0], ...);
21345f757f3fSDimitry Andric//
21355f757f3fSDimitry Andric//   return 1;
21365f757f3fSDimitry Andric// }
21375f757f3fSDimitry Andric//
21385f757f3fSDimitry Andric// Parameters:
21395f757f3fSDimitry Andric//   s0: pkfn
21405f757f3fSDimitry Andric//   s1: gtid
21415f757f3fSDimitry Andric//   s2: tid
21425f757f3fSDimitry Andric//   s3: argc
21435f757f3fSDimitry Andric//   s4: p_argv
21445f757f3fSDimitry Andric//   s5: exit_frame_ptr
21455f757f3fSDimitry Andric//
21465f757f3fSDimitry Andric// Locals:
21475f757f3fSDimitry Andric//   __gtid: gtid param pushed on stack so can pass &gtid to pkfn
21485f757f3fSDimitry Andric//   __tid: tid param pushed on stack so can pass &tid to pkfn
21495f757f3fSDimitry Andric//
21505f757f3fSDimitry Andric// Temp. registers:
21515f757f3fSDimitry Andric//
21525f757f3fSDimitry Andric//  s34: used to calculate the dynamic stack size
21535f757f3fSDimitry Andric//  s35: used as temporary for stack placement calculation
21545f757f3fSDimitry Andric//  s36: used as temporary for stack arguments
21555f757f3fSDimitry Andric//  s37: used as temporary for number of remaining pkfn parms
21565f757f3fSDimitry Andric//  s38: used to traverse p_argv array
21575f757f3fSDimitry Andric//
21585f757f3fSDimitry Andric// return: s0 (always 1/TRUE)
21595f757f3fSDimitry Andric//
21605f757f3fSDimitry Andric
21615f757f3fSDimitry Andric__gtid = -4
21625f757f3fSDimitry Andric__tid = -8
21635f757f3fSDimitry Andric
21645f757f3fSDimitry Andric// -- Begin __kmp_invoke_microtask
21655f757f3fSDimitry Andric// mark_begin;
21665f757f3fSDimitry Andric	.text
21675f757f3fSDimitry Andric	.globl	__kmp_invoke_microtask
21685f757f3fSDimitry Andric	// A function requires 8 bytes align.
21695f757f3fSDimitry Andric	.p2align	3
21705f757f3fSDimitry Andric	.type	__kmp_invoke_microtask,@function
21715f757f3fSDimitry Andric__kmp_invoke_microtask:
21725f757f3fSDimitry Andric	.cfi_startproc
21735f757f3fSDimitry Andric
21745f757f3fSDimitry Andric	// First, save fp and lr.  VE stores them at caller stack frame.
21755f757f3fSDimitry Andric	st	%fp, 0(, %sp)
21765f757f3fSDimitry Andric	st	%lr, 8(, %sp)
21775f757f3fSDimitry Andric	or	%fp, 0, %sp
21785f757f3fSDimitry Andric	.cfi_def_cfa	%fp, 0
21795f757f3fSDimitry Andric	.cfi_offset	%lr, 8
21805f757f3fSDimitry Andric	.cfi_offset	%fp, 0
21815f757f3fSDimitry Andric
21825f757f3fSDimitry Andric	// Compute the dynamic stack size:
21835f757f3fSDimitry Andric	//
21845f757f3fSDimitry Andric	// - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them
21855f757f3fSDimitry Andric	//   by reference
21865f757f3fSDimitry Andric	// - We need 8 bytes for whole arguments.  We have two + 'argc'
21875f757f3fSDimitry Andric	//   arguments (condider &gtid and &tid).  We need to reserve
21885f757f3fSDimitry Andric	//   (argc + 2) * 8 bytes.
21895f757f3fSDimitry Andric	// - We need 176 bytes for RSA and others
21905f757f3fSDimitry Andric	//
21915f757f3fSDimitry Andric	// The total number of bytes is then (argc + 2) * 8 + 8 + 176.
21925f757f3fSDimitry Andric	//
21935f757f3fSDimitry Andric	// |------------------------------|
21945f757f3fSDimitry Andric	// | return address of callee     | 8(%fp)
21955f757f3fSDimitry Andric	// |------------------------------|
21965f757f3fSDimitry Andric	// | frame pointer of callee      | 0(%fp)
21975f757f3fSDimitry Andric	// |------------------------------| <------------------ %fp
21985f757f3fSDimitry Andric	// | __tid / __gtid               | -8(%fp) / -4(%fp)
21995f757f3fSDimitry Andric	// |------------------------------|
22005f757f3fSDimitry Andric	// | argc+2 for arguments         | 176(%sp)
22015f757f3fSDimitry Andric	// |------------------------------|
22025f757f3fSDimitry Andric	// | RSA                          |
22035f757f3fSDimitry Andric	// |------------------------------|
22045f757f3fSDimitry Andric	// | return address               |
22055f757f3fSDimitry Andric	// |------------------------------|
22065f757f3fSDimitry Andric	// | frame pointer                |
22075f757f3fSDimitry Andric	// |------------------------------| <------------------ %sp
22085f757f3fSDimitry Andric
22095f757f3fSDimitry Andric	adds.w.sx	%s34, 2, %s3
22105f757f3fSDimitry Andric	sll	%s34, %s34, 3
22115f757f3fSDimitry Andric	lea	%s34, 184(, %s34)
22125f757f3fSDimitry Andric	subs.l	%sp, %sp, %s34
22135f757f3fSDimitry Andric
22145f757f3fSDimitry Andric	// Align the stack to 16 bytes.
22155f757f3fSDimitry Andric	and	%sp, -16, %sp
22165f757f3fSDimitry Andric
22175f757f3fSDimitry Andric	// Save pkfn.
22185f757f3fSDimitry Andric	or	%s12, 0, %s0
22195f757f3fSDimitry Andric
22205f757f3fSDimitry Andric	// Call host to allocate stack if it is necessary.
22215f757f3fSDimitry Andric	brge.l	%sp, %sl, .L_kmp_pass
22225f757f3fSDimitry Andric	ld	%s61, 24(, %tp)
22235f757f3fSDimitry Andric	lea	%s63, 0x13b
22245f757f3fSDimitry Andric	shm.l	%s63, 0(%s61)
22255f757f3fSDimitry Andric	shm.l	%sl, 8(%s61)
22265f757f3fSDimitry Andric	shm.l	%sp, 16(%s61)
22275f757f3fSDimitry Andric	monc
22285f757f3fSDimitry Andric
22295f757f3fSDimitry Andric.L_kmp_pass:
22305f757f3fSDimitry Andric	lea	%s35, 176(, %sp)
22315f757f3fSDimitry Andric	adds.w.sx	%s37, 0, %s3
22325f757f3fSDimitry Andric	or	%s38, 0, %s4
22335f757f3fSDimitry Andric
22345f757f3fSDimitry Andric#if OMPT_SUPPORT
22355f757f3fSDimitry Andric	// Save frame pointer into exit_frame.
22365f757f3fSDimitry Andric	st	%fp, 0(%s5)
22375f757f3fSDimitry Andric#endif
22385f757f3fSDimitry Andric
22395f757f3fSDimitry Andric	// Prepare arguments for the pkfn function (first 8 using s0-s7
22405f757f3fSDimitry Andric	// registers, but need to store stack also because of varargs).
22415f757f3fSDimitry Andric
22425f757f3fSDimitry Andric	stl	%s1, __gtid(%fp)
22435f757f3fSDimitry Andric	stl	%s2, __tid(%fp)
22445f757f3fSDimitry Andric
22455f757f3fSDimitry Andric	adds.l	%s0, __gtid, %fp
22465f757f3fSDimitry Andric	st	%s0, 0(, %s35)
22475f757f3fSDimitry Andric	adds.l	%s1, __tid, %fp
22485f757f3fSDimitry Andric	st	%s1, 8(, %s35)
22495f757f3fSDimitry Andric
22505f757f3fSDimitry Andric	breq.l	0, %s37, .L_kmp_call
22515f757f3fSDimitry Andric	ld	%s2, 0(, %s38)
22525f757f3fSDimitry Andric	st	%s2, 16(, %s35)
22535f757f3fSDimitry Andric
22545f757f3fSDimitry Andric	breq.l	1, %s37, .L_kmp_call
22555f757f3fSDimitry Andric	ld	%s3, 8(, %s38)
22565f757f3fSDimitry Andric	st	%s3, 24(, %s35)
22575f757f3fSDimitry Andric
22585f757f3fSDimitry Andric	breq.l	2, %s37, .L_kmp_call
22595f757f3fSDimitry Andric	ld	%s4, 16(, %s38)
22605f757f3fSDimitry Andric	st	%s4, 32(, %s35)
22615f757f3fSDimitry Andric
22625f757f3fSDimitry Andric	breq.l	3, %s37, .L_kmp_call
22635f757f3fSDimitry Andric	ld	%s5, 24(, %s38)
22645f757f3fSDimitry Andric	st	%s5, 40(, %s35)
22655f757f3fSDimitry Andric
22665f757f3fSDimitry Andric	breq.l	4, %s37, .L_kmp_call
22675f757f3fSDimitry Andric	ld	%s6, 32(, %s38)
22685f757f3fSDimitry Andric	st	%s6, 48(, %s35)
22695f757f3fSDimitry Andric
22705f757f3fSDimitry Andric	breq.l	5, %s37, .L_kmp_call
22715f757f3fSDimitry Andric	ld	%s7, 40(, %s38)
22725f757f3fSDimitry Andric	st	%s7, 56(, %s35)
22735f757f3fSDimitry Andric
22745f757f3fSDimitry Andric	breq.l	6, %s37, .L_kmp_call
22755f757f3fSDimitry Andric
22765f757f3fSDimitry Andric	// Prepare any additional argument passed through the stack.
22775f757f3fSDimitry Andric	adds.l	%s37, -6, %s37
22785f757f3fSDimitry Andric	lea	%s38, 48(, %s38)
22795f757f3fSDimitry Andric	lea	%s35, 64(, %s35)
22805f757f3fSDimitry Andric.L_kmp_loop:
22815f757f3fSDimitry Andric	ld	%s36, 0(, %s38)
22825f757f3fSDimitry Andric	st	%s36, 0(, %s35)
22835f757f3fSDimitry Andric	adds.l	%s37, -1, %s37
22845f757f3fSDimitry Andric	adds.l	%s38, 8, %s38
22855f757f3fSDimitry Andric	adds.l	%s35, 8, %s35
22865f757f3fSDimitry Andric	brne.l	0, %s37, .L_kmp_loop
22875f757f3fSDimitry Andric
22885f757f3fSDimitry Andric.L_kmp_call:
22895f757f3fSDimitry Andric	// Call pkfn function.
22905f757f3fSDimitry Andric	bsic	%lr, (, %s12)
22915f757f3fSDimitry Andric
22925f757f3fSDimitry Andric	// Return value.
22935f757f3fSDimitry Andric	lea	%s0, 1
22945f757f3fSDimitry Andric
22955f757f3fSDimitry Andric	// Restore stack and return.
22965f757f3fSDimitry Andric	or	%sp, 0, %fp
22975f757f3fSDimitry Andric	ld	%lr, 8(, %sp)
22985f757f3fSDimitry Andric	ld	%fp, 0(, %sp)
22995f757f3fSDimitry Andric	b.l.t	(, %lr)
23005f757f3fSDimitry Andric.Lfunc_end0:
23015f757f3fSDimitry Andric	.size	__kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
23025f757f3fSDimitry Andric	.cfi_endproc
23035f757f3fSDimitry Andric
23045f757f3fSDimitry Andric// -- End  __kmp_invoke_microtask
23055f757f3fSDimitry Andric
23065f757f3fSDimitry Andric#endif /* KMP_ARCH_VE */
23075f757f3fSDimitry Andric
23085f757f3fSDimitry Andric#if KMP_ARCH_S390X
23095f757f3fSDimitry Andric
23105f757f3fSDimitry Andric//------------------------------------------------------------------------
23115f757f3fSDimitry Andric//
23125f757f3fSDimitry Andric// typedef void (*microtask_t)(int *gtid, int *tid, ...);
23135f757f3fSDimitry Andric//
23145f757f3fSDimitry Andric// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
23155f757f3fSDimitry Andric//                            void *p_argv[]
23165f757f3fSDimitry Andric// #if OMPT_SUPPORT
23175f757f3fSDimitry Andric//                            ,
23185f757f3fSDimitry Andric//                            void **exit_frame_ptr
23195f757f3fSDimitry Andric// #endif
23205f757f3fSDimitry Andric//                            ) {
23215f757f3fSDimitry Andric// #if OMPT_SUPPORT
23225f757f3fSDimitry Andric//   *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
23235f757f3fSDimitry Andric// #endif
23245f757f3fSDimitry Andric//
23255f757f3fSDimitry Andric//   (*pkfn)(&gtid, &tid, argv[0], ...);
23265f757f3fSDimitry Andric//
23275f757f3fSDimitry Andric//   return 1;
23285f757f3fSDimitry Andric// }
23295f757f3fSDimitry Andric//
23305f757f3fSDimitry Andric// Parameters:
23315f757f3fSDimitry Andric//   r2: pkfn
23325f757f3fSDimitry Andric//   r3: gtid
23335f757f3fSDimitry Andric//   r4: tid
23345f757f3fSDimitry Andric//   r5: argc
23355f757f3fSDimitry Andric//   r6: p_argv
23365f757f3fSDimitry Andric//   SP+160: exit_frame_ptr
23375f757f3fSDimitry Andric//
23385f757f3fSDimitry Andric// Locals:
23395f757f3fSDimitry Andric//   __gtid: gtid param pushed on stack so can pass &gtid to pkfn
23405f757f3fSDimitry Andric//   __tid: tid param pushed on stack so can pass &tid to pkfn
23415f757f3fSDimitry Andric//
23425f757f3fSDimitry Andric// Temp. registers:
23435f757f3fSDimitry Andric//
23445f757f3fSDimitry Andric//  r0: used to fetch argv slots
23455f757f3fSDimitry Andric//  r7: used as temporary for number of remaining pkfn parms
23465f757f3fSDimitry Andric//  r8: argv
23475f757f3fSDimitry Andric//  r9: pkfn
23485f757f3fSDimitry Andric//  r10: stack size
23495f757f3fSDimitry Andric//  r11: previous fp
23505f757f3fSDimitry Andric//  r12: stack parameter area
23515f757f3fSDimitry Andric//  r13: argv slot
23525f757f3fSDimitry Andric//
23535f757f3fSDimitry Andric// return: r2 (always 1/TRUE)
23545f757f3fSDimitry Andric//
23555f757f3fSDimitry Andric
23565f757f3fSDimitry Andric// -- Begin __kmp_invoke_microtask
23575f757f3fSDimitry Andric// mark_begin;
23585f757f3fSDimitry Andric	.text
23595f757f3fSDimitry Andric	.globl	__kmp_invoke_microtask
23605f757f3fSDimitry Andric	.p2align	1
23615f757f3fSDimitry Andric	.type	__kmp_invoke_microtask,@function
23625f757f3fSDimitry Andric__kmp_invoke_microtask:
23635f757f3fSDimitry Andric	.cfi_startproc
23645f757f3fSDimitry Andric
23655f757f3fSDimitry Andric	stmg	%r6,%r14,48(%r15)
23665f757f3fSDimitry Andric        .cfi_offset %r6, -112
23675f757f3fSDimitry Andric        .cfi_offset %r7, -104
23685f757f3fSDimitry Andric        .cfi_offset %r8, -96
23695f757f3fSDimitry Andric        .cfi_offset %r9, -88
23705f757f3fSDimitry Andric        .cfi_offset %r10, -80
23715f757f3fSDimitry Andric        .cfi_offset %r11, -72
23725f757f3fSDimitry Andric        .cfi_offset %r12, -64
23735f757f3fSDimitry Andric        .cfi_offset %r13, -56
23745f757f3fSDimitry Andric        .cfi_offset %r14, -48
23755f757f3fSDimitry Andric        .cfi_offset %r15, -40
23765f757f3fSDimitry Andric	lgr	%r11,%r15
23775f757f3fSDimitry Andric	.cfi_def_cfa %r11, 160
23785f757f3fSDimitry Andric
23795f757f3fSDimitry Andric	// Compute the dynamic stack size:
23805f757f3fSDimitry Andric	//
23815f757f3fSDimitry Andric	// - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
23825f757f3fSDimitry Andric	//   reference
23835f757f3fSDimitry Andric	// - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
23845f757f3fSDimitry Andric	//   function by register. Given that we have 5 of such registers (r[2-6])
23855f757f3fSDimitry Andric	//   and two + 'argc' arguments (consider &gtid and &tid), we need to
23865f757f3fSDimitry Andric	//   reserve max(0, argc - 3)*8 extra bytes
23875f757f3fSDimitry Andric	//
23885f757f3fSDimitry Andric	// The total number of bytes is then max(0, argc - 3)*8 + 8
23895f757f3fSDimitry Andric
23905f757f3fSDimitry Andric	lgr	%r10,%r5
23915f757f3fSDimitry Andric	aghi	%r10,-2
23925f757f3fSDimitry Andric	jnm	0f
23935f757f3fSDimitry Andric	lghi	%r10,0
23945f757f3fSDimitry Andric0:
23955f757f3fSDimitry Andric	sllg	%r10,%r10,3
23965f757f3fSDimitry Andric	lgr	%r12,%r10
23975f757f3fSDimitry Andric	aghi	%r10,176
23985f757f3fSDimitry Andric	sgr 	%r15,%r10
23995f757f3fSDimitry Andric	agr	%r12,%r15
24005f757f3fSDimitry Andric	stg	%r11,0(%r15)
24015f757f3fSDimitry Andric
24025f757f3fSDimitry Andric	lgr	%r9,%r2			// pkfn
24035f757f3fSDimitry Andric
24045f757f3fSDimitry Andric#if OMPT_SUPPORT
24055f757f3fSDimitry Andric	// Save frame pointer into exit_frame
24065f757f3fSDimitry Andric	lg	%r8,160(%r11)
24075f757f3fSDimitry Andric	stg	%r11,0(%r8)
24085f757f3fSDimitry Andric#endif
24095f757f3fSDimitry Andric
24105f757f3fSDimitry Andric	// Prepare arguments for the pkfn function (first 5 using r2-r6 registers)
24115f757f3fSDimitry Andric
24125f757f3fSDimitry Andric	stg     %r3,160(%r12)
24135f757f3fSDimitry Andric	la	%r2,164(%r12)		// gid
24145f757f3fSDimitry Andric	stg	%r4,168(%r12)
24155f757f3fSDimitry Andric	la	%r3,172(%r12)		// tid
24165f757f3fSDimitry Andric	lgr	%r8,%r6			// argv
24175f757f3fSDimitry Andric
24185f757f3fSDimitry Andric	// If argc > 0
24195f757f3fSDimitry Andric	ltgr	%r7,%r5
24205f757f3fSDimitry Andric	jz	1f
24215f757f3fSDimitry Andric
24225f757f3fSDimitry Andric	lg	%r4,0(%r8)		// argv[0]
24235f757f3fSDimitry Andric	aghi	%r7,-1
24245f757f3fSDimitry Andric	jz	1f
24255f757f3fSDimitry Andric
24265f757f3fSDimitry Andric	// If argc > 1
24275f757f3fSDimitry Andric	lg	%r5,8(%r8)		// argv[1]
24285f757f3fSDimitry Andric	aghi	%r7,-1
24295f757f3fSDimitry Andric	jz	1f
24305f757f3fSDimitry Andric
24315f757f3fSDimitry Andric	// If argc > 2
24325f757f3fSDimitry Andric	lg	%r6,16(%r8)		// argv[2]
24335f757f3fSDimitry Andric	aghi	%r7,-1
24345f757f3fSDimitry Andric	jz	1f
24355f757f3fSDimitry Andric
24365f757f3fSDimitry Andric	lghi	%r13,0			// Index [n]
24375f757f3fSDimitry Andric2:
24385f757f3fSDimitry Andric	lg	%r0,24(%r13,%r8)	// argv[2+n]
24395f757f3fSDimitry Andric	stg	%r0,160(%r13,%r15)	// parm[2+n]
24405f757f3fSDimitry Andric	aghi	%r13,8			// Next
24415f757f3fSDimitry Andric	aghi	%r7,-1
24425f757f3fSDimitry Andric	jnz	2b
24435f757f3fSDimitry Andric
24445f757f3fSDimitry Andric1:
24455f757f3fSDimitry Andric	basr	%r14,%r9		// Call pkfn
24465f757f3fSDimitry Andric
24475f757f3fSDimitry Andric	// Restore stack and return
24485f757f3fSDimitry Andric
24495f757f3fSDimitry Andric	lgr	%r15,%r11
24505f757f3fSDimitry Andric	lmg	%r6,%r14,48(%r15)
24515f757f3fSDimitry Andric	lghi	%r2,1
24525f757f3fSDimitry Andric	br	%r14
24535f757f3fSDimitry Andric.Lfunc_end0:
24545f757f3fSDimitry Andric	.size	__kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
24555f757f3fSDimitry Andric	.cfi_endproc
24565f757f3fSDimitry Andric
24575f757f3fSDimitry Andric// -- End  __kmp_invoke_microtask
24585f757f3fSDimitry Andric
24595f757f3fSDimitry Andric#endif /* KMP_ARCH_S390X */
24605f757f3fSDimitry Andric
24610fca6ea1SDimitry Andric#if KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32
24620fca6ea1SDimitry Andric#ifndef KMP_PREFIX_UNDERSCORE
24630fca6ea1SDimitry Andric# define KMP_PREFIX_UNDERSCORE(x) x
24640fca6ea1SDimitry Andric#endif
24650b57cec5SDimitry Andric    .data
2466bdd1243dSDimitry Andric    COMMON .gomp_critical_user_, 32, 3
24670b57cec5SDimitry Andric    .data
24680b57cec5SDimitry Andric    .align 4
24690fca6ea1SDimitry Andric    .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
24700fca6ea1SDimitry AndricKMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
24710b57cec5SDimitry Andric    .4byte .gomp_critical_user_
2472bdd1243dSDimitry Andric#ifdef __ELF__
24730fca6ea1SDimitry Andric    .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4
2474bdd1243dSDimitry Andric#endif
24750fca6ea1SDimitry Andric#endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 */
24760b57cec5SDimitry Andric
24775f757f3fSDimitry Andric#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||                   \
24785f757f3fSDimitry Andric    KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE ||                 \
24795f757f3fSDimitry Andric    KMP_ARCH_S390X
2480e8d8bef9SDimitry Andric#ifndef KMP_PREFIX_UNDERSCORE
2481e8d8bef9SDimitry Andric# define KMP_PREFIX_UNDERSCORE(x) x
2482e8d8bef9SDimitry Andric#endif
24830b57cec5SDimitry Andric    .data
2484bdd1243dSDimitry Andric    COMMON .gomp_critical_user_, 32, 3
24850b57cec5SDimitry Andric    .data
24860b57cec5SDimitry Andric    .align 8
2487e8d8bef9SDimitry Andric    .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
2488e8d8bef9SDimitry AndricKMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
24890b57cec5SDimitry Andric    .8byte .gomp_critical_user_
2490e8d8bef9SDimitry Andric#ifdef __ELF__
2491e8d8bef9SDimitry Andric    .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
2492e8d8bef9SDimitry Andric#endif
2493489b1cf2SDimitry Andric#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
24945f757f3fSDimitry Andric          KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE ||
24955f757f3fSDimitry Andric          KMP_ARCH_S390X */
24960b57cec5SDimitry Andric
24970b57cec5SDimitry Andric#if KMP_OS_LINUX
249806c3fb27SDimitry Andric# if KMP_ARCH_ARM || KMP_ARCH_AARCH64
24990b57cec5SDimitry Andric.section .note.GNU-stack,"",%progbits
25005f757f3fSDimitry Andric# elif !KMP_ARCH_WASM
25010b57cec5SDimitry Andric.section .note.GNU-stack,"",@progbits
25020b57cec5SDimitry Andric# endif
25030b57cec5SDimitry Andric#endif
25045f757f3fSDimitry Andric
25055f757f3fSDimitry Andric#if KMP_ARCH_WASM
25065f757f3fSDimitry Andric.data
25075f757f3fSDimitry Andric.global .gomp_critical_user_
25085f757f3fSDimitry Andric.global .gomp_critical_user_.var
25095f757f3fSDimitry Andric.global .gomp_critical_user_.reduction.var
25105f757f3fSDimitry Andric.global __kmp_unnamed_critical_addr
25115f757f3fSDimitry Andric.gomp_critical_user_:
25125f757f3fSDimitry Andric.zero 4
25135f757f3fSDimitry Andric.size .gomp_critical_user_, 4
25145f757f3fSDimitry Andric.gomp_critical_user_.var:
25155f757f3fSDimitry Andric.zero 4
25165f757f3fSDimitry Andric.size .gomp_critical_user_.var, 4
25175f757f3fSDimitry Andric.gomp_critical_user_.reduction.var:
25185f757f3fSDimitry Andric.zero 4
25195f757f3fSDimitry Andric.size .gomp_critical_user_.reduction.var, 4
25205f757f3fSDimitry Andric__kmp_unnamed_critical_addr:
25215f757f3fSDimitry Andric    .4byte .gomp_critical_user_
25225f757f3fSDimitry Andric    .size __kmp_unnamed_critical_addr, 4
25235f757f3fSDimitry Andric#endif
2524*62987288SDimitry Andric
2525*62987288SDimitry Andric#if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
2526*62987288SDimitry AndricGNU_PROPERTY_BTI_PAC
2527*62987288SDimitry Andric#endif
2528