1// z_AIX_asm.S: - microtasking routines specifically 2// written for Power platforms running AIX OS 3 4// 5////===----------------------------------------------------------------------===// 6//// 7//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8//// See https://llvm.org/LICENSE.txt for license information. 9//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10//// 11////===----------------------------------------------------------------------===// 12// 13 14// ----------------------------------------------------------------------- 15// macros 16// ----------------------------------------------------------------------- 17 18#include "kmp_config.h" 19 20#if KMP_OS_AIX 21//------------------------------------------------------------------------ 22// int 23// __kmp_invoke_microtask( void (*pkfn) (int *gtid, int *tid, ...), 24// int gtid, int tid, 25// int argc, void *p_argv[] 26// #if OMPT_SUPPORT 27// , 28// void **exit_frame_ptr 29// #endif 30// ) { 31// #if OMPT_SUPPORT 32// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 33// #endif 34// 35// (*pkfn)( & gtid, & tid, p_argv[0], ... ); 36// 37// // FIXME: This is done at call-site and can be removed here. 38// #if OMPT_SUPPORT 39// *exit_frame_ptr = 0; 40// #endif 41// 42// return 1; 43// } 44// 45// parameters: 46// r3: pkfn 47// r4: gtid 48// r5: tid 49// r6: argc 50// r7: p_argv 51// r8: &exit_frame 52// 53// return: r3 (always 1/TRUE) 54// 55 56#if KMP_ARCH_PPC64_XCOFF 57 58 .globl __kmp_invoke_microtask[DS] 59 .globl .__kmp_invoke_microtask 60 .align 4 61 .csect __kmp_invoke_microtask[DS],3 62 .vbyte 8, .__kmp_invoke_microtask 63 .vbyte 8, TOC[TC0] 64 .vbyte 8, 0 65 .csect .text[PR],2 66 .machine "pwr7" 67.__kmp_invoke_microtask: 68 69 70// -- Begin __kmp_invoke_microtask 71// mark_begin; 72 73// We need to allocate a stack frame large enough to hold all of the parameters 74// on the stack for the microtask plus what this function needs. That's 48 75// bytes under the XCOFF64 ABI, plus max(64, 8*(2 + argc)) for 76// the parameters to the microtask (gtid, tid, argc elements of p_argv), 77// plus 8 bytes to store the values of r4 and r5, and 8 bytes to store r31. 78// With OMP-T support, we need an additional 8 bytes to save r30 to hold 79// a copy of r8. 80// Stack offsets relative to stack pointer: 81// r31: -8, r30: -16, gtid: -20, tid: -24 82 83 mflr 0 84 std 31, -8(1) # Save r31 to the stack 85 std 0, 16(1) # Save LR to the linkage area 86 87// This is unusual because normally we'd set r31 equal to r1 after the stack 88// frame is established. In this case, however, we need to dynamically compute 89// the stack frame size, and so we keep a direct copy of r1 to access our 90// register save areas and restore the r1 value before returning. 91 mr 31, 1 92 93// Compute the size of the "argc" portion of the parameter save area. 94// The parameter save area is always at least 64 bytes long (i.e. 8 regs) 95// The microtask has (2 + argc) parameters, so if argc <= 6, we need to 96// to allocate 8*6 bytes, not 8*argc. 97 li 0, 6 98 cmpwi 0, 6, 6 99 iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 100 sldi 0, 0, 3 # r0 = 8 * max(argc, 6) 101 102// Compute the size necessary for the local stack frame. 103// 88 = 48 + 4 (for r4) + 4 (for r5) + 8 (for r31) + 8 (for OMP-T r30) + 104// 8 (parameter gtid) + 8 (parameter tid) 105 li 12, 88 106 add 12, 0, 12 107 neg 12, 12 108 109// We need to make sure that the stack frame stays aligned (to 16 bytes). 110 li 0, -16 111 and 12, 0, 12 112 113// Establish the local stack frame. 114 stdux 1, 1, 12 115 116#if OMPT_SUPPORT 117 std 30, -16(31) # Save r30 to the stack 118 std 1, 0(8) 119 mr 30, 8 120#endif 121 122// Store gtid and tid to the stack because they're passed by reference to the microtask. 123 stw 4, -20(31) # Save gtid to the stack 124 stw 5, -24(31) # Save tid to the stack 125 126 mr 12, 6 # r12 = argc 127 mr 4, 7 # r4 = p_argv 128 129 cmpwi 0, 12, 1 130 blt 0, .Lcall # if (argc < 1) goto .Lcall 131 132 ld 5, 0(4) # r5 = p_argv[0] 133 134 cmpwi 0, 12, 2 135 blt 0, .Lcall # if (argc < 2) goto .Lcall 136 137 ld 6, 8(4) # r6 = p_argv[1] 138 139 cmpwi 0, 12, 3 140 blt 0, .Lcall # if (argc < 3) goto .Lcall 141 142 ld 7, 16(4) # r7 = p_argv[2] 143 144 cmpwi 0, 12, 4 145 blt 0, .Lcall # if (argc < 4) goto .Lcall 146 147 ld 8, 24(4) # r8 = p_argv[3] 148 149 cmpwi 0, 12, 5 150 blt 0, .Lcall # if (argc < 5) goto .Lcall 151 152 ld 9, 32(4) # r9 = p_argv[4] 153 154 cmpwi 0, 12, 6 155 blt 0, .Lcall # if (argc < 6) goto .Lcall 156 157 ld 10, 40(4) # r10 = p_argv[5] 158 159 cmpwi 0, 12, 7 160 blt 0, .Lcall # if (argc < 7) goto .Lcall 161 162// There are more than 6 microtask parameters, so we need to store the 163// remainder to the stack. 164 addi 12, 12, -6 # argc -= 6 165 mtctr 12 166 167// These are set to 8 bytes before the first desired store address (we're using 168// pre-increment loads and stores in the loop below). The parameter save area 169// for the microtask begins 48 + 8*8 == 112 bytes above r1 for XCOFF64. 170 addi 4, 4, 40 # p_argv = p_argv + 5 171 # (i.e. skip the 5 elements we already processed) 172 addi 12, 1, 104 # r12 = stack offset (112 - 8) 173 174.Lnext: 175 ldu 0, 8(4) 176 stdu 0, 8(12) 177 bdnz .Lnext 178 179.Lcall: 180 std 2, 40(1) # Save the TOC pointer to the linkage area 181// Load the actual function address from the function descriptor. 182 ld 12, 0(3) # Function address 183 ld 2, 8(3) # TOC pointer 184 ld 11, 16(3) # Environment pointer 185 186 addi 3, 31, -20 # r3 = >id 187 addi 4, 31, -24 # r4 = &tid 188 189 mtctr 12 # CTR = function address 190 bctrl # Branch to CTR 191 ld 2, 40(1) # Restore TOC pointer from linkage area 192 193#if OMPT_SUPPORT 194 li 3, 0 195 std 3, 0(30) 196#endif 197 198 li 3, 1 199 200#if OMPT_SUPPORT 201 ld 30, -16(31) # Restore r30 from the saved value on the stack 202#endif 203 204 mr 1, 31 205 ld 31, -8(1) # Restore r31 from the saved value on the stack 206 ld 0, 16(1) 207 mtlr 0 # Restore LR from the linkage area 208 blr # Branch to LR 209 210#else // KMP_ARCH_PPC_XCOFF 211 212 .globl __kmp_invoke_microtask[DS] 213 .globl .__kmp_invoke_microtask 214 .align 4 215 .csect __kmp_invoke_microtask[DS],2 216 .vbyte 4, .__kmp_invoke_microtask 217 .vbyte 4, TOC[TC0] 218 .vbyte 4, 0 219 .csect .text[PR],2 220 .machine "pwr7" 221.__kmp_invoke_microtask: 222 223 224// -- Begin __kmp_invoke_microtask 225// mark_begin; 226 227// We need to allocate a stack frame large enough to hold all of the parameters 228// on the stack for the microtask plus what this function needs. That's 24 229// bytes under the XCOFF ABI, plus max(32, 8*(2 + argc)) for 230// the parameters to the microtask (gtid, tid, argc elements of p_argv), 231// plus 8 bytes to store the values of r4 and r5, and 4 bytes to store r31. 232// With OMP-T support, we need an additional 4 bytes to save r30 to hold 233// a copy of r8. 234// Stack offsets relative to stack pointer: 235// r31: -4, r30: -8, gtid: -12, tid: -16 236 237 mflr 0 238 stw 31, -4(1) # Save r31 to the stack 239 stw 0, 8(1) # Save LR to the linkage area 240 241// This is unusual because normally we'd set r31 equal to r1 after the stack 242// frame is established. In this case, however, we need to dynamically compute 243// the stack frame size, and so we keep a direct copy of r1 to access our 244// register save areas and restore the r1 value before returning. 245 mr 31, 1 246 247// Compute the size of the "argc" portion of the parameter save area. 248// The parameter save area is always at least 32 bytes long (i.e. 8 regs) 249// The microtask has (2 + argc) parameters, so if argc <= 6, we need to 250// to allocate 4*6 bytes, not 4*argc. 251 li 0, 6 252 cmpwi 0, 6, 6 253 iselgt 0, 6, 0 # r0 = (argc > 6)? argc : 6 254 slwi 0, 0, 2 # r0 = 4 * max(argc, 6) 255 256// Compute the size necessary for the local stack frame. 257// 56 = 32 + 4 (for r4) + 4 (for r5) + 4 (for r31) + 4 (for OMP-T r30) + 258// 4 (parameter gtid) + 4 (parameter tid) 259 li 12, 56 260 add 12, 0, 12 261 neg 12, 12 262 263// We need to make sure that the stack frame stays aligned (to 16 bytes). 264 li 0, -16 265 and 12, 0, 12 266 267// Establish the local stack frame. 268 stwux 1, 1, 12 269 270#if OMPT_SUPPORT 271 stw 30, -8(31) # Save r30 to the stack 272 stw 1, 0(8) 273 mr 30, 8 274#endif 275 276// Store gtid and tid to the stack because they're passed by reference to the microtask. 277 stw 4, -12(31) # Save gtid to the stack 278 stw 5, -16(31) # Save tid to the stack 279 280 mr 12, 6 # r12 = argc 281 mr 4, 7 # r4 = p_argv 282 283 cmpwi 0, 12, 1 284 blt 0, .Lcall # if (argc < 1) goto .Lcall 285 286 lwz 5, 0(4) # r5 = p_argv[0] 287 288 cmpwi 0, 12, 2 289 blt 0, .Lcall # if (argc < 2) goto .Lcall 290 291 lwz 6, 4(4) # r6 = p_argv[1] 292 293 cmpwi 0, 12, 3 294 blt 0, .Lcall # if (argc < 3) goto .Lcall 295 296 lwz 7, 8(4) # r7 = p_argv[2] 297 298 cmpwi 0, 12, 4 299 blt 0, .Lcall # if (argc < 4) goto .Lcall 300 301 lwz 8, 12(4) # r8 = p_argv[3] 302 303 cmpwi 0, 12, 5 304 blt 0, .Lcall # if (argc < 5) goto .Lcall 305 306 lwz 9, 16(4) # r9 = p_argv[4] 307 308 cmpwi 0, 12, 6 309 blt 0, .Lcall # if (argc < 6) goto .Lcall 310 311 lwz 10, 20(4) # r10 = p_argv[5] 312 313 cmpwi 0, 12, 7 314 blt 0, .Lcall # if (argc < 7) goto .Lcall 315 316// There are more than 6 microtask parameters, so we need to store the 317// remainder to the stack. 318 addi 12, 12, -6 # argc -= 6 319 mtctr 12 320 321// These are set to 4 bytes before the first desired store address (we're using 322// pre-increment loads and stores in the loop below). The parameter save area 323// for the microtask begins 24 + 4*8 == 56 bytes above r1 for XCOFF. 324 addi 4, 4, 20 # p_argv = p_argv + 5 325 # (i.e. skip the 5 elements we already processed) 326 addi 12, 1, 52 # r12 = stack offset (56 - 4) 327 328.Lnext: 329 lwzu 0, 4(4) 330 stwu 0, 4(12) 331 bdnz .Lnext 332 333.Lcall: 334 stw 2, 20(1) # Save the TOC pointer to the linkage area 335// Load the actual function address from the function descriptor. 336 lwz 12, 0(3) # Function address 337 lwz 2, 4(3) # TOC pointer 338 lwz 11, 8(3) # Environment pointer 339 340 addi 3, 31, -12 # r3 = >id 341 addi 4, 31, -16 # r4 = &tid 342 343 mtctr 12 # CTR = function address 344 bctrl # Branch to CTR 345 lwz 2, 20(1) # Restore TOC pointer from linkage area 346 347#if OMPT_SUPPORT 348 li 3, 0 349 stw 3, 0(30) 350#endif 351 352 li 3, 1 353 354#if OMPT_SUPPORT 355 lwz 30, -8(31) # Restore r30 from the saved value on the stack 356#endif 357 358 mr 1, 31 359 lwz 31, -4(1) # Restore r31 from the saved value on the stack 360 lwz 0, 8(1) 361 mtlr 0 # Restore LR from the linkage area 362 blr # Branch to LR 363 364#endif // KMP_ARCH_PPC64_XCOFF 365 366.Lfunc_end0: 367 .vbyte 4, 0x00000000 # Traceback table begin 368 .byte 0x00 # Version = 0 369 .byte 0x09 # Language = CPlusPlus 370 .byte 0x20 # -IsGlobaLinkage, -IsOutOfLineEpilogOrPrologue 371 # +HasTraceBackTableOffset, -IsInternalProcedure 372 # -HasControlledStorage, -IsTOCless 373 # -IsFloatingPointPresent 374 # -IsFloatingPointOperationLogOrAbortEnabled 375 .byte 0x61 # -IsInterruptHandler, +IsFunctionNamePresent, +IsAllocaUsed 376 # OnConditionDirective = 0, -IsCRSaved, +IsLRSaved 377 .byte 0x80 # +IsBackChainStored, -IsFixup, NumOfFPRsSaved = 0 378#if OMPT_SUPPORT 379 .byte 0x02 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 2 380 .byte 0x06 # NumberOfFixedParms = 6 381#else 382 .byte 0x01 # -HasExtensionTable, -HasVectorInfo, NumOfGPRsSaved = 1 383 .byte 0x05 # NumberOfFixedParms = 5 384#endif 385 .byte 0x01 # NumberOfFPParms = 0, +HasParmsOnStack 386 .vbyte 4, 0x00000000 # Parameter type = i, i, i, i, i 387 .vbyte 4, .Lfunc_end0-.__kmp_invoke_microtask # Function size 388 .vbyte 2, 0x0016 # Function name len = 22 389 .byte "__kmp_invoke_microtask" # Function Name 390 .byte 0x1f # AllocaRegister = 31 391 # -- End function 392 393// -- End __kmp_invoke_microtask 394 395// Support for unnamed common blocks. 396 397 .comm .gomp_critical_user_, 32, 3 398#if KMP_ARCH_PPC64_XCOFF 399 .csect __kmp_unnamed_critical_addr[RW],3 400#else 401 .csect __kmp_unnamed_critical_addr[RW],2 402#endif 403 .globl __kmp_unnamed_critical_addr[RW] 404 .ptr .gomp_critical_user_ 405 406// -- End unnamed common block 407 408 .toc 409 410#endif // KMP_OS_AIX 411