xref: /freebsd/contrib/llvm-project/openmp/runtime/src/z_Windows_NT-586_asm.asm (revision a4e5e0106ac7145f56eb39a691e302cabb4635be)
1;  z_Windows_NT-586_asm.asm:  - microtasking routines specifically
2;    written for IA-32 architecture and Intel(R) 64 running Windows* OS
3
4;
5;//===----------------------------------------------------------------------===//
6;//
7;// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8;// See https://llvm.org/LICENSE.txt for license information.
9;// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10;//
11;//===----------------------------------------------------------------------===//
12;
13
14        TITLE   z_Windows_NT-586_asm.asm
15
16; ============================= IA-32 architecture ==========================
17ifdef _M_IA32
18
19        .586P
20
21if @Version gt 510
22        .model HUGE
23else
24_TEXT   SEGMENT PARA USE32 PUBLIC 'CODE'
25_TEXT   ENDS
26_DATA   SEGMENT DWORD USE32 PUBLIC 'DATA'
27_DATA   ENDS
28CONST   SEGMENT DWORD USE32 PUBLIC 'CONST'
29CONST   ENDS
30_BSS    SEGMENT DWORD USE32 PUBLIC 'BSS'
31_BSS    ENDS
32$$SYMBOLS       SEGMENT BYTE USE32 'DEBSYM'
33$$SYMBOLS       ENDS
34$$TYPES SEGMENT BYTE USE32 'DEBTYP'
35$$TYPES ENDS
36_TLS    SEGMENT DWORD USE32 PUBLIC 'TLS'
37_TLS    ENDS
38FLAT    GROUP _DATA, CONST, _BSS
39        ASSUME  CS: FLAT, DS: FLAT, SS: FLAT
40endif
41
42
43;------------------------------------------------------------------------
44; FUNCTION ___kmp_x86_pause
45;
46; void
47; __kmp_x86_pause( void )
48PUBLIC  ___kmp_x86_pause
49_p$ = 4
50_d$ = 8
51_TEXT   SEGMENT
52        ALIGN 16
53___kmp_x86_pause PROC NEAR
54
55        db      0f3H
56        db      090H    ;; pause
57        ret
58
59___kmp_x86_pause ENDP
60_TEXT   ENDS
61
62;------------------------------------------------------------------------
63; FUNCTION ___kmp_x86_cpuid
64;
65; void
66; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
67PUBLIC  ___kmp_x86_cpuid
68_TEXT   SEGMENT
69        ALIGN 16
70_mode$  = 8
71_mode2$ = 12
72_p$     = 16
73_eax$   = 0
74_ebx$   = 4
75_ecx$   = 8
76_edx$   = 12
77
78___kmp_x86_cpuid PROC NEAR
79
80        push      ebp
81        mov       ebp, esp
82
83        push      edi
84        push      ebx
85        push      ecx
86        push      edx
87
88        mov	  eax, DWORD PTR _mode$[ebp]
89        mov	  ecx, DWORD PTR _mode2$[ebp]
90	cpuid					; Query the CPUID for the current processor
91
92        mov       edi, DWORD PTR _p$[ebp]
93	mov 	  DWORD PTR _eax$[ edi ], eax
94	mov 	  DWORD PTR _ebx$[ edi ], ebx
95	mov 	  DWORD PTR _ecx$[ edi ], ecx
96	mov 	  DWORD PTR _edx$[ edi ], edx
97
98        pop       edx
99        pop       ecx
100        pop       ebx
101        pop       edi
102
103        mov       esp, ebp
104        pop       ebp
105        ret
106
107___kmp_x86_cpuid ENDP
108_TEXT     ENDS
109
110;------------------------------------------------------------------------
111; FUNCTION ___kmp_test_then_add32
112;
113; kmp_int32
114; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
115PUBLIC  ___kmp_test_then_add32
116_p$ = 4
117_d$ = 8
118_TEXT   SEGMENT
119        ALIGN 16
120___kmp_test_then_add32 PROC NEAR
121
122        mov     eax, DWORD PTR _d$[esp]
123        mov     ecx, DWORD PTR _p$[esp]
124lock    xadd    DWORD PTR [ecx], eax
125        ret
126
127___kmp_test_then_add32 ENDP
128_TEXT   ENDS
129
130;------------------------------------------------------------------------
131; FUNCTION ___kmp_compare_and_store8
132;
133; kmp_int8
134; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
135PUBLIC  ___kmp_compare_and_store8
136_TEXT   SEGMENT
137        ALIGN 16
138_p$ = 4
139_cv$ = 8
140_sv$ = 12
141
142___kmp_compare_and_store8 PROC NEAR
143
144        mov       ecx, DWORD PTR _p$[esp]
145        mov       al, BYTE PTR _cv$[esp]
146        mov       dl, BYTE PTR _sv$[esp]
147lock    cmpxchg   BYTE PTR [ecx], dl
148        sete      al           ; if al == [ecx] set al = 1 else set al = 0
149        and       eax, 1       ; sign extend previous instruction
150        ret
151
152___kmp_compare_and_store8 ENDP
153_TEXT     ENDS
154
155;------------------------------------------------------------------------
156; FUNCTION ___kmp_compare_and_store16
157;
158; kmp_int16
159; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
160PUBLIC  ___kmp_compare_and_store16
161_TEXT   SEGMENT
162        ALIGN 16
163_p$ = 4
164_cv$ = 8
165_sv$ = 12
166
167___kmp_compare_and_store16 PROC NEAR
168
169        mov       ecx, DWORD PTR _p$[esp]
170        mov       ax, WORD PTR _cv$[esp]
171        mov       dx, WORD PTR _sv$[esp]
172lock    cmpxchg   WORD PTR [ecx], dx
173        sete      al           ; if ax == [ecx] set al = 1 else set al = 0
174        and       eax, 1       ; sign extend previous instruction
175        ret
176
177___kmp_compare_and_store16 ENDP
178_TEXT     ENDS
179
180;------------------------------------------------------------------------
181; FUNCTION ___kmp_compare_and_store32
182;
183; kmp_int32
184; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
185PUBLIC  ___kmp_compare_and_store32
186_TEXT   SEGMENT
187        ALIGN 16
188_p$ = 4
189_cv$ = 8
190_sv$ = 12
191
192___kmp_compare_and_store32 PROC NEAR
193
194        mov       ecx, DWORD PTR _p$[esp]
195        mov       eax, DWORD PTR _cv$[esp]
196        mov       edx, DWORD PTR _sv$[esp]
197lock    cmpxchg   DWORD PTR [ecx], edx
198        sete      al           ; if eax == [ecx] set al = 1 else set al = 0
199        and       eax, 1       ; sign extend previous instruction
200        ret
201
202___kmp_compare_and_store32 ENDP
203_TEXT     ENDS
204
205;------------------------------------------------------------------------
206; FUNCTION ___kmp_compare_and_store64
207;
208; kmp_int32
209; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
210PUBLIC  ___kmp_compare_and_store64
211_TEXT   SEGMENT
212        ALIGN 16
213_p$ = 8
214_cv_low$ = 12
215_cv_high$ = 16
216_sv_low$ = 20
217_sv_high$ = 24
218
219___kmp_compare_and_store64 PROC NEAR
220
221        push      ebp
222        mov       ebp, esp
223        push      ebx
224        push      edi
225        mov       edi, DWORD PTR _p$[ebp]
226        mov       eax, DWORD PTR _cv_low$[ebp]
227        mov       edx, DWORD PTR _cv_high$[ebp]
228        mov       ebx, DWORD PTR _sv_low$[ebp]
229        mov       ecx, DWORD PTR _sv_high$[ebp]
230lock    cmpxchg8b QWORD PTR [edi]
231        sete      al           ; if edx:eax == [edi] set al = 1 else set al = 0
232        and       eax, 1       ; sign extend previous instruction
233        pop       edi
234        pop       ebx
235        mov       esp, ebp
236        pop       ebp
237        ret
238
239___kmp_compare_and_store64 ENDP
240_TEXT     ENDS
241
242;------------------------------------------------------------------------
243; FUNCTION ___kmp_xchg_fixed8
244;
245; kmp_int8
246; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
247PUBLIC  ___kmp_xchg_fixed8
248_TEXT   SEGMENT
249        ALIGN 16
250_p$ = 4
251_d$ = 8
252
253___kmp_xchg_fixed8 PROC NEAR
254
255        mov       ecx, DWORD PTR _p$[esp]
256        mov       al,  BYTE PTR _d$[esp]
257lock    xchg      BYTE PTR [ecx], al
258        ret
259
260___kmp_xchg_fixed8 ENDP
261_TEXT     ENDS
262
263;------------------------------------------------------------------------
264; FUNCTION ___kmp_xchg_fixed16
265;
266; kmp_int16
267; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
268PUBLIC  ___kmp_xchg_fixed16
269_TEXT   SEGMENT
270        ALIGN 16
271_p$ = 4
272_d$ = 8
273
274___kmp_xchg_fixed16 PROC NEAR
275
276        mov       ecx, DWORD PTR _p$[esp]
277        mov       ax,  WORD PTR  _d$[esp]
278lock    xchg      WORD PTR [ecx], ax
279        ret
280
281___kmp_xchg_fixed16 ENDP
282_TEXT     ENDS
283
284;------------------------------------------------------------------------
285; FUNCTION ___kmp_xchg_fixed32
286;
287; kmp_int32
288; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
289PUBLIC  ___kmp_xchg_fixed32
290_TEXT   SEGMENT
291        ALIGN 16
292_p$ = 4
293_d$ = 8
294
295___kmp_xchg_fixed32 PROC NEAR
296
297        mov       ecx, DWORD PTR _p$[esp]
298        mov       eax, DWORD PTR _d$[esp]
299lock    xchg      DWORD PTR [ecx], eax
300        ret
301
302___kmp_xchg_fixed32 ENDP
303_TEXT     ENDS
304
305
306;------------------------------------------------------------------------
307; FUNCTION ___kmp_xchg_real32
308;
309; kmp_real32
310; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
311PUBLIC  ___kmp_xchg_real32
312_TEXT   SEGMENT
313        ALIGN 16
314_p$ = 8
315_d$ = 12
316_old_value$ = -4
317
318___kmp_xchg_real32 PROC NEAR
319
320        push    ebp
321        mov     ebp, esp
322        sub     esp, 4
323        push    esi
324        mov     esi, DWORD PTR _p$[ebp]
325
326        fld     DWORD PTR [esi]
327                        ;; load <addr>
328        fst     DWORD PTR _old_value$[ebp]
329                        ;; store into old_value
330
331        mov     eax, DWORD PTR _d$[ebp]
332
333lock    xchg    DWORD PTR [esi], eax
334
335        fld     DWORD PTR _old_value$[ebp]
336                        ;; return old_value
337        pop     esi
338        mov     esp, ebp
339        pop     ebp
340        ret
341
342___kmp_xchg_real32 ENDP
343_TEXT   ENDS
344
345
346;------------------------------------------------------------------------
347; FUNCTION ___kmp_compare_and_store_ret8
348;
349; kmp_int8
350; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
351PUBLIC  ___kmp_compare_and_store_ret8
352_TEXT   SEGMENT
353        ALIGN 16
354_p$ = 4
355_cv$ = 8
356_sv$ = 12
357
358___kmp_compare_and_store_ret8 PROC NEAR
359
360        mov       ecx, DWORD PTR _p$[esp]
361        mov       al, BYTE PTR _cv$[esp]
362        mov       dl, BYTE PTR _sv$[esp]
363lock    cmpxchg   BYTE PTR [ecx], dl
364        ret
365
366___kmp_compare_and_store_ret8 ENDP
367_TEXT     ENDS
368
369;------------------------------------------------------------------------
370; FUNCTION ___kmp_compare_and_store_ret16
371;
372; kmp_int16
373; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
374PUBLIC  ___kmp_compare_and_store_ret16
375_TEXT   SEGMENT
376        ALIGN 16
377_p$ = 4
378_cv$ = 8
379_sv$ = 12
380
381___kmp_compare_and_store_ret16 PROC NEAR
382
383        mov       ecx, DWORD PTR _p$[esp]
384        mov       ax, WORD PTR _cv$[esp]
385        mov       dx, WORD PTR _sv$[esp]
386lock    cmpxchg   WORD PTR [ecx], dx
387        ret
388
389___kmp_compare_and_store_ret16 ENDP
390_TEXT     ENDS
391
392;------------------------------------------------------------------------
393; FUNCTION ___kmp_compare_and_store_ret32
394;
395; kmp_int32
396; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
397PUBLIC  ___kmp_compare_and_store_ret32
398_TEXT   SEGMENT
399        ALIGN 16
400_p$ = 4
401_cv$ = 8
402_sv$ = 12
403
404___kmp_compare_and_store_ret32 PROC NEAR
405
406        mov       ecx, DWORD PTR _p$[esp]
407        mov       eax, DWORD PTR _cv$[esp]
408        mov       edx, DWORD PTR _sv$[esp]
409lock    cmpxchg   DWORD PTR [ecx], edx
410        ret
411
412___kmp_compare_and_store_ret32 ENDP
413_TEXT     ENDS
414
415;------------------------------------------------------------------------
416; FUNCTION ___kmp_compare_and_store_ret64
417;
418; kmp_int64
419; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
420PUBLIC  ___kmp_compare_and_store_ret64
421_TEXT   SEGMENT
422        ALIGN 16
423_p$ = 8
424_cv_low$ = 12
425_cv_high$ = 16
426_sv_low$ = 20
427_sv_high$ = 24
428
429___kmp_compare_and_store_ret64 PROC NEAR
430
431        push      ebp
432        mov       ebp, esp
433        push      ebx
434        push      edi
435        mov       edi, DWORD PTR _p$[ebp]
436        mov       eax, DWORD PTR _cv_low$[ebp]
437        mov       edx, DWORD PTR _cv_high$[ebp]
438        mov       ebx, DWORD PTR _sv_low$[ebp]
439        mov       ecx, DWORD PTR _sv_high$[ebp]
440lock    cmpxchg8b QWORD PTR [edi]
441        pop       edi
442        pop       ebx
443        mov       esp, ebp
444        pop       ebp
445        ret
446
447___kmp_compare_and_store_ret64 ENDP
448_TEXT     ENDS
449
450;------------------------------------------------------------------------
451; FUNCTION ___kmp_load_x87_fpu_control_word
452;
453; void
454; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
455;
456; parameters:
457;       p:      4(%esp)
458PUBLIC  ___kmp_load_x87_fpu_control_word
459_TEXT   SEGMENT
460        ALIGN 16
461_p$ = 4
462
463___kmp_load_x87_fpu_control_word PROC NEAR
464
465        mov       eax, DWORD PTR _p$[esp]
466        fldcw     WORD PTR [eax]
467        ret
468
469___kmp_load_x87_fpu_control_word ENDP
470_TEXT     ENDS
471
472;------------------------------------------------------------------------
473; FUNCTION ___kmp_store_x87_fpu_control_word
474;
475; void
476; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
477;
478; parameters:
479;       p:      4(%esp)
480PUBLIC  ___kmp_store_x87_fpu_control_word
481_TEXT   SEGMENT
482        ALIGN 16
483_p$ = 4
484
485___kmp_store_x87_fpu_control_word PROC NEAR
486
487        mov       eax, DWORD PTR _p$[esp]
488        fstcw     WORD PTR [eax]
489        ret
490
491___kmp_store_x87_fpu_control_word ENDP
492_TEXT     ENDS
493
494;------------------------------------------------------------------------
495; FUNCTION ___kmp_clear_x87_fpu_status_word
496;
497; void
498; __kmp_clear_x87_fpu_status_word();
499PUBLIC  ___kmp_clear_x87_fpu_status_word
500_TEXT   SEGMENT
501        ALIGN 16
502
503___kmp_clear_x87_fpu_status_word PROC NEAR
504
505        fnclex
506        ret
507
508___kmp_clear_x87_fpu_status_word ENDP
509_TEXT     ENDS
510
511
512;------------------------------------------------------------------------
513; FUNCTION ___kmp_invoke_microtask
514;
515; typedef void  (*microtask_t)( int *gtid, int *tid, ... );
516;
517; int
518; __kmp_invoke_microtask( microtask_t pkfn,
519;                         int gtid, int tid,
520;                         int argc, void *p_argv[] )
521PUBLIC  ___kmp_invoke_microtask
522_TEXT   SEGMENT
523        ALIGN 16
524_pkfn$ = 8
525_gtid$ = 12
526_tid$ = 16
527_argc$ = 20
528_argv$ = 24
529if OMPT_SUPPORT
530_exit_frame$ = 28
531endif
532_i$ = -8
533_stk_adj$ = -16
534_vptr$ = -12
535_qptr$ = -4
536
537___kmp_invoke_microtask PROC NEAR
538; Line 102
539        push    ebp
540        mov     ebp, esp
541        sub     esp, 16                                 ; 00000010H
542        push    ebx
543        push    esi
544        push    edi
545if OMPT_SUPPORT
546        mov     eax, DWORD PTR _exit_frame$[ebp]
547        mov     DWORD PTR [eax], ebp
548endif
549; Line 114
550        mov     eax, DWORD PTR _argc$[ebp]
551        mov     DWORD PTR _i$[ebp], eax
552
553;; ------------------------------------------------------------
554	lea     edx, DWORD PTR [eax*4+8]
555	mov     ecx, esp                                ; Save current SP into ECX
556	mov	eax,edx		; Save the size of the args in eax
557	sub	ecx,edx		; esp-((#args+2)*4) -> ecx -- without mods, stack ptr would be this
558	mov	edx,ecx		; Save to edx
559	and	ecx,-128	; Mask off 7 bits
560	sub	edx,ecx		; Amount to subtract from esp
561	sub	esp,edx		; Prepare stack ptr-- Now it will be aligned on 128-byte boundary at the call
562
563	add	edx,eax		; Calculate total size of the stack decrement.
564        mov     DWORD PTR _stk_adj$[ebp], edx
565;; ------------------------------------------------------------
566
567        jmp     SHORT $L22237
568$L22238:
569        mov     ecx, DWORD PTR _i$[ebp]
570        sub     ecx, 1
571        mov     DWORD PTR _i$[ebp], ecx
572$L22237:
573        cmp     DWORD PTR _i$[ebp], 0
574        jle     SHORT $L22239
575; Line 116
576        mov     edx, DWORD PTR _i$[ebp]
577        mov     eax, DWORD PTR _argv$[ebp]
578        mov     ecx, DWORD PTR [eax+edx*4-4]
579        mov     DWORD PTR _vptr$[ebp], ecx
580; Line 123
581        mov     eax, DWORD PTR _vptr$[ebp]
582; Line 124
583        push    eax
584; Line 127
585        jmp     SHORT $L22238
586$L22239:
587; Line 129
588        lea     edx, DWORD PTR _tid$[ebp]
589        mov     DWORD PTR _vptr$[ebp], edx
590; Line 130
591        lea     eax, DWORD PTR _gtid$[ebp]
592        mov     DWORD PTR _qptr$[ebp], eax
593; Line 143
594        mov     eax, DWORD PTR _vptr$[ebp]
595; Line 144
596        push    eax
597; Line 145
598        mov     eax, DWORD PTR _qptr$[ebp]
599; Line 146
600        push    eax
601; Line 147
602        call    DWORD PTR _pkfn$[ebp]
603; Line 148
604        add     esp, DWORD PTR _stk_adj$[ebp]
605; Line 152
606        mov     eax, 1
607; Line 153
608        pop     edi
609        pop     esi
610        pop     ebx
611        mov     esp, ebp
612        pop     ebp
613        ret     0
614___kmp_invoke_microtask ENDP
615_TEXT   ENDS
616
617endif
618
619; ==================================== Intel(R) 64 ===================================
620
621ifdef _M_AMD64
622
623;------------------------------------------------------------------------
624; FUNCTION __kmp_x86_cpuid
625;
626; void
627; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
628;
629; parameters:
630;	mode:		ecx
631;	mode2:		edx
632;	cpuid_buffer: 	r8
633PUBLIC  __kmp_x86_cpuid
634_TEXT   SEGMENT
635        ALIGN 16
636
637__kmp_x86_cpuid PROC FRAME ;NEAR
638
639        push      rbp
640        .pushreg  rbp
641        mov       rbp, rsp
642        .setframe rbp, 0
643        push      rbx				; callee-save register
644        .pushreg  rbx
645        .ENDPROLOG
646
647	mov	  r10, r8                       ; p parameter
648        mov	  eax, ecx			; mode parameter
649        mov	  ecx, edx                      ; mode2 parameter
650	cpuid					; Query the CPUID for the current processor
651
652	mov 	  DWORD PTR 0[ r10 ], eax	; store results into buffer
653	mov 	  DWORD PTR 4[ r10 ], ebx
654	mov 	  DWORD PTR 8[ r10 ], ecx
655	mov 	  DWORD PTR 12[ r10 ], edx
656
657        pop       rbx				; callee-save register
658        mov       rsp, rbp
659        pop       rbp
660        ret
661
662__kmp_x86_cpuid ENDP
663_TEXT     ENDS
664
665
666;------------------------------------------------------------------------
667; FUNCTION __kmp_test_then_add32
668;
669; kmp_int32
670; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
671;
672; parameters:
673;	p:	rcx
674;	d:	edx
675;
676; return: 	eax
677PUBLIC  __kmp_test_then_add32
678_TEXT   SEGMENT
679        ALIGN 16
680__kmp_test_then_add32 PROC ;NEAR
681
682        mov     eax, edx
683lock    xadd    DWORD PTR [rcx], eax
684        ret
685
686__kmp_test_then_add32 ENDP
687_TEXT   ENDS
688
689
690;------------------------------------------------------------------------
691; FUNCTION __kmp_test_then_add64
692;
693; kmp_int32
694; __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
695;
696; parameters:
697;	p:	rcx
698;	d:	rdx
699;
700; return: 	rax
701PUBLIC  __kmp_test_then_add64
702_TEXT   SEGMENT
703        ALIGN 16
704__kmp_test_then_add64 PROC ;NEAR
705
706        mov     rax, rdx
707lock    xadd    QWORD PTR [rcx], rax
708        ret
709
710__kmp_test_then_add64 ENDP
711_TEXT   ENDS
712
713
714;------------------------------------------------------------------------
715; FUNCTION __kmp_compare_and_store8
716;
717; kmp_int8
718; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
719; parameters:
720;	p:	rcx
721;	cv:	edx
722;	sv:	r8d
723;
724; return:	eax
725PUBLIC  __kmp_compare_and_store8
726_TEXT   SEGMENT
727        ALIGN 16
728
729__kmp_compare_and_store8 PROC ;NEAR
730
731        mov       al, dl	; "cv"
732	mov	  edx, r8d	; "sv"
733lock    cmpxchg   BYTE PTR [rcx], dl
734        sete      al           	; if al == [rcx] set al = 1 else set al = 0
735        and       rax, 1       	; sign extend previous instruction
736        ret
737
738__kmp_compare_and_store8 ENDP
739_TEXT     ENDS
740
741
742;------------------------------------------------------------------------
743; FUNCTION __kmp_compare_and_store16
744;
745; kmp_int16
746; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
747; parameters:
748;	p:	rcx
749;	cv:	edx
750;	sv:	r8d
751;
752; return:	eax
753PUBLIC  __kmp_compare_and_store16
754_TEXT   SEGMENT
755        ALIGN 16
756
757__kmp_compare_and_store16 PROC ;NEAR
758
759        mov       ax, dx	; "cv"
760	mov	  edx, r8d	; "sv"
761lock    cmpxchg   WORD PTR [rcx], dx
762        sete      al           	; if ax == [rcx] set al = 1 else set al = 0
763        and       rax, 1       	; sign extend previous instruction
764        ret
765
766__kmp_compare_and_store16 ENDP
767_TEXT     ENDS
768
769
770;------------------------------------------------------------------------
771; FUNCTION __kmp_compare_and_store32
772;
773; kmp_int32
774; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
775; parameters:
776;	p:	rcx
777;	cv:	edx
778;	sv:	r8d
779;
780; return:	eax
781PUBLIC  __kmp_compare_and_store32
782_TEXT   SEGMENT
783        ALIGN 16
784
785__kmp_compare_and_store32 PROC ;NEAR
786
787        mov       eax, edx	; "cv"
788	mov	  edx, r8d	; "sv"
789lock    cmpxchg   DWORD PTR [rcx], edx
790        sete      al           	; if eax == [rcx] set al = 1 else set al = 0
791        and       rax, 1       	; sign extend previous instruction
792        ret
793
794__kmp_compare_and_store32 ENDP
795_TEXT     ENDS
796
797
798;------------------------------------------------------------------------
799; FUNCTION __kmp_compare_and_store64
800;
801; kmp_int32
802; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
803; parameters:
804;	p:	rcx
805;	cv:	rdx
806;	sv:	r8
807;
808; return:	eax
809PUBLIC  __kmp_compare_and_store64
810_TEXT   SEGMENT
811        ALIGN 16
812
813__kmp_compare_and_store64 PROC ;NEAR
814
815        mov       rax, rdx	; "cv"
816	mov	  rdx, r8	; "sv"
817lock    cmpxchg   QWORD PTR [rcx], rdx
818        sete      al           ; if rax == [rcx] set al = 1 else set al = 0
819        and       rax, 1       ; sign extend previous instruction
820        ret
821
822__kmp_compare_and_store64 ENDP
823_TEXT     ENDS
824
825
826;------------------------------------------------------------------------
827; FUNCTION ___kmp_xchg_fixed8
828;
829; kmp_int8
830; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
831;
832; parameters:
833;	p:	rcx
834;	d:	dl
835;
836; return: 	al
837PUBLIC  __kmp_xchg_fixed8
838_TEXT   SEGMENT
839        ALIGN 16
840
841__kmp_xchg_fixed8 PROC ;NEAR
842
843        mov       al,  dl
844lock    xchg      BYTE PTR [rcx], al
845        ret
846
847__kmp_xchg_fixed8 ENDP
848_TEXT     ENDS
849
850
851;------------------------------------------------------------------------
852; FUNCTION ___kmp_xchg_fixed16
853;
854; kmp_int16
855; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
856;
857; parameters:
858;	p:	rcx
859;	d:	dx
860;
861; return: 	ax
862PUBLIC  __kmp_xchg_fixed16
863_TEXT   SEGMENT
864        ALIGN 16
865
866__kmp_xchg_fixed16 PROC ;NEAR
867
868        mov       ax,  dx
869lock    xchg      WORD PTR [rcx], ax
870        ret
871
872__kmp_xchg_fixed16 ENDP
873_TEXT     ENDS
874
875
876;------------------------------------------------------------------------
877; FUNCTION ___kmp_xchg_fixed32
878;
879; kmp_int32
880; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
881;
882; parameters:
883;	p:	rcx
884;	d:	edx
885;
886; return: 	eax
887PUBLIC  __kmp_xchg_fixed32
888_TEXT   SEGMENT
889        ALIGN 16
890__kmp_xchg_fixed32 PROC ;NEAR
891
892        mov     eax, edx
893lock    xchg    DWORD PTR [rcx], eax
894        ret
895
896__kmp_xchg_fixed32 ENDP
897_TEXT   ENDS
898
899
900;------------------------------------------------------------------------
901; FUNCTION ___kmp_xchg_fixed64
902;
903; kmp_int64
904; __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
905;
906; parameters:
907;	p:	rcx
908;	d:	rdx
909;
910; return: 	rax
911PUBLIC  __kmp_xchg_fixed64
912_TEXT   SEGMENT
913        ALIGN 16
914__kmp_xchg_fixed64 PROC ;NEAR
915
916        mov     rax, rdx
917lock    xchg    QWORD PTR [rcx], rax
918        ret
919
920__kmp_xchg_fixed64 ENDP
921_TEXT   ENDS
922
923
924;------------------------------------------------------------------------
925; FUNCTION __kmp_compare_and_store_ret8
926;
927; kmp_int8
928; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
929; parameters:
930;	p:	rcx
931;	cv:	edx
932;	sv:	r8d
933;
934; return:	eax
935PUBLIC  __kmp_compare_and_store_ret8
936_TEXT   SEGMENT
937        ALIGN 16
938
939__kmp_compare_and_store_ret8 PROC ;NEAR
940        mov       al, dl	; "cv"
941	mov	  edx, r8d	; "sv"
942lock    cmpxchg   BYTE PTR [rcx], dl
943                        ; Compare AL with [rcx].  If equal set
944                        ; ZF and exchange DL with [rcx].  Else, clear
945                        ; ZF and load [rcx] into AL.
946        ret
947
948__kmp_compare_and_store_ret8 ENDP
949_TEXT     ENDS
950
951
952;------------------------------------------------------------------------
953; FUNCTION __kmp_compare_and_store_ret16
954;
955; kmp_int16
956; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
957; parameters:
958;	p:	rcx
959;	cv:	edx
960;	sv:	r8d
961;
962; return:	eax
963PUBLIC  __kmp_compare_and_store_ret16
964_TEXT   SEGMENT
965        ALIGN 16
966
967__kmp_compare_and_store_ret16 PROC ;NEAR
968
969        mov       ax, dx	; "cv"
970	mov	  edx, r8d	; "sv"
971lock    cmpxchg   WORD PTR [rcx], dx
972        ret
973
974__kmp_compare_and_store_ret16 ENDP
975_TEXT     ENDS
976
977
978;------------------------------------------------------------------------
979; FUNCTION __kmp_compare_and_store_ret32
980;
981; kmp_int32
982; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
983; parameters:
984;	p:	rcx
985;	cv:	edx
986;	sv:	r8d
987;
988; return:	eax
989PUBLIC  __kmp_compare_and_store_ret32
990_TEXT   SEGMENT
991        ALIGN 16
992
993__kmp_compare_and_store_ret32 PROC ;NEAR
994
995        mov       eax, edx	; "cv"
996	mov	  edx, r8d	; "sv"
997lock    cmpxchg   DWORD PTR [rcx], edx
998        ret
999
1000__kmp_compare_and_store_ret32 ENDP
1001_TEXT     ENDS
1002
1003
1004;------------------------------------------------------------------------
1005; FUNCTION __kmp_compare_and_store_ret64
1006;
1007; kmp_int64
1008; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
1009; parameters:
1010;	p:	rcx
1011;	cv:	rdx
1012;	sv:	r8
1013;
1014; return:	rax
1015PUBLIC  __kmp_compare_and_store_ret64
1016_TEXT   SEGMENT
1017        ALIGN 16
1018
1019__kmp_compare_and_store_ret64 PROC ;NEAR
1020
1021        mov       rax, rdx	; "cv"
1022	mov	  rdx, r8	; "sv"
1023lock    cmpxchg   QWORD PTR [rcx], rdx
1024        ret
1025
1026__kmp_compare_and_store_ret64 ENDP
1027_TEXT     ENDS
1028
1029
1030;------------------------------------------------------------------------
1031; FUNCTION __kmp_compare_and_store_loop8
1032;
1033; kmp_int8
1034; __kmp_compare_and_store_loop8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
1035; parameters:
1036;	p:	rcx
1037;	cv:	edx
1038;	sv:	r8d
1039;
1040; return:	al
1041PUBLIC  __kmp_compare_and_store_loop8
1042_TEXT   SEGMENT
1043        ALIGN 16
1044
1045__kmp_compare_and_store_loop8 PROC ;NEAR
1046$__kmp_loop:
1047        mov       al, dl	; "cv"
1048	mov	  edx, r8d	; "sv"
1049lock    cmpxchg   BYTE PTR [rcx], dl
1050                        ; Compare AL with [rcx].  If equal set
1051                        ; ZF and exchange DL with [rcx].  Else, clear
1052                        ; ZF and load [rcx] into AL.
1053        jz     	SHORT $__kmp_success
1054
1055        db      0f3H
1056        db      090H    		; pause
1057
1058	jmp	SHORT $__kmp_loop
1059
1060$__kmp_success:
1061        ret
1062
1063__kmp_compare_and_store_loop8 ENDP
1064_TEXT     ENDS
1065
1066
1067;------------------------------------------------------------------------
1068; FUNCTION __kmp_xchg_real32
1069;
1070; kmp_real32
1071; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d );
1072;
1073; parameters:
1074;	p:	rcx
1075;       d:	xmm1 (lower 4 bytes)
1076;
1077; return:	xmm0 (lower 4 bytes)
1078PUBLIC  __kmp_xchg_real32
1079_TEXT   SEGMENT
1080        ALIGN 16
1081__kmp_xchg_real32 PROC ;NEAR
1082
1083	movd	eax, xmm1		; load d
1084
1085lock    xchg    DWORD PTR [rcx], eax
1086
1087	movd	xmm0, eax		; load old value into return register
1088        ret
1089
1090__kmp_xchg_real32 ENDP
1091_TEXT   ENDS
1092
1093
1094;------------------------------------------------------------------------
1095; FUNCTION __kmp_xchg_real64
1096;
1097; kmp_real64
1098; __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 d );
1099;
1100; parameters:
1101;	p:	rcx
1102;	d:	xmm1 (lower 8 bytes)
1103;
1104; return:	xmm0 (lower 8 bytes)
1105PUBLIC  __kmp_xchg_real64
1106_TEXT   SEGMENT
1107        ALIGN 16
1108__kmp_xchg_real64 PROC ;NEAR
1109
1110	movd	rax, xmm1		; load "d"
1111
1112lock    xchg    QWORD PTR [rcx], rax
1113
1114	movd	xmm0, rax		; load old value into return register
1115        ret
1116
1117__kmp_xchg_real64 ENDP
1118_TEXT   ENDS
1119
1120;------------------------------------------------------------------------
1121; FUNCTION __kmp_load_x87_fpu_control_word
1122;
1123; void
1124; __kmp_load_x87_fpu_control_word( kmp_int16 *p );
1125;
1126; parameters:
1127;	p:	rcx
1128PUBLIC  __kmp_load_x87_fpu_control_word
1129_TEXT   SEGMENT
1130        ALIGN 16
1131__kmp_load_x87_fpu_control_word PROC ;NEAR
1132
1133        fldcw   WORD PTR [rcx]
1134        ret
1135
1136__kmp_load_x87_fpu_control_word ENDP
1137_TEXT   ENDS
1138
1139
1140;------------------------------------------------------------------------
1141; FUNCTION __kmp_store_x87_fpu_control_word
1142;
1143; void
1144; __kmp_store_x87_fpu_control_word( kmp_int16 *p );
1145;
1146; parameters:
1147;	p:	rcx
1148PUBLIC  __kmp_store_x87_fpu_control_word
1149_TEXT   SEGMENT
1150        ALIGN 16
1151__kmp_store_x87_fpu_control_word PROC ;NEAR
1152
1153        fstcw   WORD PTR [rcx]
1154        ret
1155
1156__kmp_store_x87_fpu_control_word ENDP
1157_TEXT   ENDS
1158
1159
1160;------------------------------------------------------------------------
1161; FUNCTION __kmp_clear_x87_fpu_status_word
1162;
1163; void
1164; __kmp_clear_x87_fpu_status_word()
1165PUBLIC  __kmp_clear_x87_fpu_status_word
1166_TEXT   SEGMENT
1167        ALIGN 16
1168__kmp_clear_x87_fpu_status_word PROC ;NEAR
1169
1170        fnclex
1171        ret
1172
1173__kmp_clear_x87_fpu_status_word ENDP
1174_TEXT   ENDS
1175
1176
1177;------------------------------------------------------------------------
1178; FUNCTION __kmp_invoke_microtask
1179;
1180; typedef void  (*microtask_t)( int *gtid, int *tid, ... );
1181;
1182; int
1183; __kmp_invoke_microtask( microtask_t pkfn,
1184;                         int gtid, int tid,
1185;                         int argc, void *p_argv[] ) {
1186;
1187;     (*pkfn) ( &gtid, &tid, argv[0], ... );
1188;     return 1;
1189; }
1190;
1191; note:
1192;      just before call to pkfn must have rsp 128-byte aligned for compiler
1193;
1194; parameters:
1195;      rcx:   pkfn	16[rbp]
1196;      edx:   gtid	24[rbp]
1197;      r8d:   tid	32[rbp]
1198;      r9d:   argc	40[rbp]
1199;      [st]:  p_argv	48[rbp]
1200;
1201; reg temps:
1202;      rax:   used all over the place
1203;      rdx:   used all over the place
1204;      rcx:   used as argument counter for push parms loop
1205;      r10:   used to hold pkfn function pointer argument
1206;
1207; return:      eax    (always 1/TRUE)
1208$_pkfn   = 16
1209$_gtid   = 24
1210$_tid    = 32
1211$_argc   = 40
1212$_p_argv = 48
1213if OMPT_SUPPORT
1214$_exit_frame = 56
1215endif
1216
1217PUBLIC  __kmp_invoke_microtask
1218_TEXT   SEGMENT
1219        ALIGN 16
1220
1221__kmp_invoke_microtask PROC FRAME ;NEAR
1222	mov	QWORD PTR 16[rsp], rdx	; home gtid parameter
1223	mov 	QWORD PTR 24[rsp], r8	; home tid parameter
1224        push    rbp		; save base pointer
1225        .pushreg rbp
1226	sub	rsp, 0		; no fixed allocation necessary - end prolog
1227
1228        lea     rbp, QWORD PTR [rsp]   	; establish the base pointer
1229        .setframe rbp, 0
1230        .ENDPROLOG
1231if OMPT_SUPPORT
1232        mov     rax, QWORD PTR $_exit_frame[rbp]
1233        mov     QWORD PTR [rax], rbp
1234endif
1235	mov	r10, rcx	; save pkfn pointer for later
1236
1237;; ------------------------------------------------------------
1238        mov     rax, r9		; rax <= argc
1239        cmp     rax, 2
1240        jge     SHORT $_kmp_invoke_stack_align
1241        mov     rax, 2          ; set 4 homes if less than 2 parms
1242$_kmp_invoke_stack_align:
1243	lea     rdx, QWORD PTR [rax*8+16] ; rax <= (argc + 2) * 8
1244	mov     rax, rsp        ; Save current SP into rax
1245	sub	rax, rdx	; rsp - ((argc+2)*8) -> rax
1246				; without align, rsp would be this
1247	and     rax, -128       ; Mask off 7 bits (128-byte align)
1248	add     rax, rdx        ; add space for push's in a loop below
1249	mov     rsp, rax        ; Prepare the stack ptr
1250				; Now it will align to 128-byte at the call
1251;; ------------------------------------------------------------
1252        			; setup pkfn parameter stack
1253	mov	rax, r9		; rax <= argc
1254	shl	rax, 3		; rax <= argc*8
1255	mov	rdx, QWORD PTR $_p_argv[rbp]	; rdx <= p_argv
1256	add	rdx, rax	; rdx <= &p_argv[argc]
1257	mov	rcx, r9		; rcx <= argc
1258	jecxz	SHORT $_kmp_invoke_pass_parms	; nothing to push if argc=0
1259	cmp	ecx, 1		; if argc=1 branch ahead
1260	je	SHORT $_kmp_invoke_one_parm
1261	sub	ecx, 2		; if argc=2 branch ahead, subtract two from
1262	je	SHORT $_kmp_invoke_two_parms
1263
1264$_kmp_invoke_push_parms:	; push last - 5th parms to pkfn on stack
1265	sub	rdx, 8		; decrement p_argv pointer to previous parm
1266	mov 	r8, QWORD PTR [rdx] ; r8 <= p_argv[rcx-1]
1267	push	r8		; push p_argv[rcx-1] onto stack (reverse order)
1268	sub	ecx, 1
1269	jecxz	SHORT $_kmp_invoke_two_parms
1270	jmp	SHORT $_kmp_invoke_push_parms
1271
1272$_kmp_invoke_two_parms:
1273	sub	rdx, 8		; put 4th parm to pkfn in r9
1274	mov	r9, QWORD PTR [rdx] ; r9 <= p_argv[1]
1275
1276$_kmp_invoke_one_parm:
1277        sub	rdx, 8		; put 3rd parm to pkfn in r8
1278	mov	r8, QWORD PTR [rdx] ; r8 <= p_argv[0]
1279
1280$_kmp_invoke_pass_parms:	; put 1st & 2nd parms to pkfn in registers
1281	lea	rdx, QWORD PTR $_tid[rbp]  ; rdx <= &tid (2nd parm to pkfn)
1282	lea	rcx, QWORD PTR $_gtid[rbp] ; rcx <= &gtid (1st parm to pkfn)
1283        sub     rsp, 32         ; add stack space for first four parms
1284	mov	rax, r10	; rax <= pkfn
1285	call	rax		; call (*pkfn)()
1286	mov	rax, 1		; move 1 into return register;
1287
1288        lea     rsp, QWORD PTR [rbp]	; restore stack pointer
1289
1290;	add	rsp, 0		; no fixed allocation necessary - start epilog
1291        pop     rbp		; restore frame pointer
1292        ret
1293__kmp_invoke_microtask ENDP
1294_TEXT   ENDS
1295
1296endif
1297
1298END
1299