1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from armv4-gf2m.pl. */ 2bc3d5698SJohn Baldwin#include "arm_arch.h" 3bc3d5698SJohn Baldwin 4bc3d5698SJohn Baldwin#if defined(__thumb2__) 5bc3d5698SJohn Baldwin.syntax unified 6bc3d5698SJohn Baldwin.thumb 7bc3d5698SJohn Baldwin#else 8bc3d5698SJohn Baldwin.code 32 9bc3d5698SJohn Baldwin#endif 10*c0855eaaSJohn Baldwin 11*c0855eaaSJohn Baldwin.text 12bc3d5698SJohn Baldwin.type mul_1x1_ialu,%function 13bc3d5698SJohn Baldwin.align 5 14bc3d5698SJohn Baldwinmul_1x1_ialu: 15bc3d5698SJohn Baldwin mov r4,#0 16bc3d5698SJohn Baldwin bic r5,r1,#3<<30 @ a1=a&0x3fffffff 17bc3d5698SJohn Baldwin str r4,[sp,#0] @ tab[0]=0 18bc3d5698SJohn Baldwin add r6,r5,r5 @ a2=a1<<1 19bc3d5698SJohn Baldwin str r5,[sp,#4] @ tab[1]=a1 20bc3d5698SJohn Baldwin eor r7,r5,r6 @ a1^a2 21bc3d5698SJohn Baldwin str r6,[sp,#8] @ tab[2]=a2 22bc3d5698SJohn Baldwin mov r8,r5,lsl#2 @ a4=a1<<2 23bc3d5698SJohn Baldwin str r7,[sp,#12] @ tab[3]=a1^a2 24bc3d5698SJohn Baldwin eor r9,r5,r8 @ a1^a4 25bc3d5698SJohn Baldwin str r8,[sp,#16] @ tab[4]=a4 26bc3d5698SJohn Baldwin eor r4,r6,r8 @ a2^a4 27bc3d5698SJohn Baldwin str r9,[sp,#20] @ tab[5]=a1^a4 28bc3d5698SJohn Baldwin eor r7,r7,r8 @ a1^a2^a4 29bc3d5698SJohn Baldwin str r4,[sp,#24] @ tab[6]=a2^a4 30bc3d5698SJohn Baldwin and r8,r12,r0,lsl#2 31bc3d5698SJohn Baldwin str r7,[sp,#28] @ tab[7]=a1^a2^a4 32bc3d5698SJohn Baldwin 33bc3d5698SJohn Baldwin and r9,r12,r0,lsr#1 34bc3d5698SJohn Baldwin ldr r5,[sp,r8] @ tab[b & 0x7] 35bc3d5698SJohn Baldwin and r8,r12,r0,lsr#4 36bc3d5698SJohn Baldwin ldr r7,[sp,r9] @ tab[b >> 3 & 0x7] 37bc3d5698SJohn Baldwin and r9,r12,r0,lsr#7 38bc3d5698SJohn Baldwin ldr r6,[sp,r8] @ tab[b >> 6 & 0x7] 39bc3d5698SJohn Baldwin eor r5,r5,r7,lsl#3 @ stall 40bc3d5698SJohn Baldwin mov r4,r7,lsr#29 41bc3d5698SJohn Baldwin ldr r7,[sp,r9] @ tab[b >> 9 & 0x7] 42bc3d5698SJohn Baldwin 43bc3d5698SJohn Baldwin and r8,r12,r0,lsr#10 44bc3d5698SJohn Baldwin eor r5,r5,r6,lsl#6 45bc3d5698SJohn Baldwin eor r4,r4,r6,lsr#26 46bc3d5698SJohn Baldwin ldr r6,[sp,r8] @ tab[b >> 12 & 0x7] 47bc3d5698SJohn Baldwin 48bc3d5698SJohn Baldwin and r9,r12,r0,lsr#13 49bc3d5698SJohn Baldwin eor r5,r5,r7,lsl#9 50bc3d5698SJohn Baldwin eor r4,r4,r7,lsr#23 51bc3d5698SJohn Baldwin ldr r7,[sp,r9] @ tab[b >> 15 & 0x7] 52bc3d5698SJohn Baldwin 53bc3d5698SJohn Baldwin and r8,r12,r0,lsr#16 54bc3d5698SJohn Baldwin eor r5,r5,r6,lsl#12 55bc3d5698SJohn Baldwin eor r4,r4,r6,lsr#20 56bc3d5698SJohn Baldwin ldr r6,[sp,r8] @ tab[b >> 18 & 0x7] 57bc3d5698SJohn Baldwin 58bc3d5698SJohn Baldwin and r9,r12,r0,lsr#19 59bc3d5698SJohn Baldwin eor r5,r5,r7,lsl#15 60bc3d5698SJohn Baldwin eor r4,r4,r7,lsr#17 61bc3d5698SJohn Baldwin ldr r7,[sp,r9] @ tab[b >> 21 & 0x7] 62bc3d5698SJohn Baldwin 63bc3d5698SJohn Baldwin and r8,r12,r0,lsr#22 64bc3d5698SJohn Baldwin eor r5,r5,r6,lsl#18 65bc3d5698SJohn Baldwin eor r4,r4,r6,lsr#14 66bc3d5698SJohn Baldwin ldr r6,[sp,r8] @ tab[b >> 24 & 0x7] 67bc3d5698SJohn Baldwin 68bc3d5698SJohn Baldwin and r9,r12,r0,lsr#25 69bc3d5698SJohn Baldwin eor r5,r5,r7,lsl#21 70bc3d5698SJohn Baldwin eor r4,r4,r7,lsr#11 71bc3d5698SJohn Baldwin ldr r7,[sp,r9] @ tab[b >> 27 & 0x7] 72bc3d5698SJohn Baldwin 73bc3d5698SJohn Baldwin tst r1,#1<<30 74bc3d5698SJohn Baldwin and r8,r12,r0,lsr#28 75bc3d5698SJohn Baldwin eor r5,r5,r6,lsl#24 76bc3d5698SJohn Baldwin eor r4,r4,r6,lsr#8 77bc3d5698SJohn Baldwin ldr r6,[sp,r8] @ tab[b >> 30 ] 78bc3d5698SJohn Baldwin 79bc3d5698SJohn Baldwin#ifdef __thumb2__ 80bc3d5698SJohn Baldwin itt ne 81bc3d5698SJohn Baldwin#endif 82bc3d5698SJohn Baldwin eorne r5,r5,r0,lsl#30 83bc3d5698SJohn Baldwin eorne r4,r4,r0,lsr#2 84bc3d5698SJohn Baldwin tst r1,#1<<31 85bc3d5698SJohn Baldwin eor r5,r5,r7,lsl#27 86bc3d5698SJohn Baldwin eor r4,r4,r7,lsr#5 87bc3d5698SJohn Baldwin#ifdef __thumb2__ 88bc3d5698SJohn Baldwin itt ne 89bc3d5698SJohn Baldwin#endif 90bc3d5698SJohn Baldwin eorne r5,r5,r0,lsl#31 91bc3d5698SJohn Baldwin eorne r4,r4,r0,lsr#1 92bc3d5698SJohn Baldwin eor r5,r5,r6,lsl#30 93bc3d5698SJohn Baldwin eor r4,r4,r6,lsr#2 94bc3d5698SJohn Baldwin 95bc3d5698SJohn Baldwin mov pc,lr 96bc3d5698SJohn Baldwin.size mul_1x1_ialu,.-mul_1x1_ialu 97bc3d5698SJohn Baldwin.globl bn_GF2m_mul_2x2 98bc3d5698SJohn Baldwin.type bn_GF2m_mul_2x2,%function 99bc3d5698SJohn Baldwin.align 5 100bc3d5698SJohn Baldwinbn_GF2m_mul_2x2: 101bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 102bc3d5698SJohn Baldwin stmdb sp!,{r10,lr} 103bc3d5698SJohn Baldwin ldr r12,.LOPENSSL_armcap 104*c0855eaaSJohn Baldwin# if !defined(_WIN32) 105bc3d5698SJohn Baldwin adr r10,.LOPENSSL_armcap 106bc3d5698SJohn Baldwin ldr r12,[r12,r10] 107*c0855eaaSJohn Baldwin# endif 108*c0855eaaSJohn Baldwin# if defined(__APPLE__) || defined(_WIN32) 109bc3d5698SJohn Baldwin ldr r12,[r12] 110bc3d5698SJohn Baldwin# endif 111bc3d5698SJohn Baldwin tst r12,#ARMV7_NEON 112bc3d5698SJohn Baldwin itt ne 113bc3d5698SJohn Baldwin ldrne r10,[sp],#8 114bc3d5698SJohn Baldwin bne .LNEON 115bc3d5698SJohn Baldwin stmdb sp!,{r4,r5,r6,r7,r8,r9} 116bc3d5698SJohn Baldwin#else 117bc3d5698SJohn Baldwin stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 118bc3d5698SJohn Baldwin#endif 119bc3d5698SJohn Baldwin mov r10,r0 @ reassign 1st argument 120bc3d5698SJohn Baldwin mov r0,r3 @ r0=b1 121bc3d5698SJohn Baldwin sub r7,sp,#36 122bc3d5698SJohn Baldwin mov r8,sp 123bc3d5698SJohn Baldwin and r7,r7,#-32 124bc3d5698SJohn Baldwin ldr r3,[sp,#32] @ load b0 125bc3d5698SJohn Baldwin mov r12,#7<<2 126bc3d5698SJohn Baldwin mov sp,r7 @ allocate tab[8] 127bc3d5698SJohn Baldwin str r8,[r7,#32] 128bc3d5698SJohn Baldwin 129bc3d5698SJohn Baldwin bl mul_1x1_ialu @ a1·b1 130bc3d5698SJohn Baldwin str r5,[r10,#8] 131bc3d5698SJohn Baldwin str r4,[r10,#12] 132bc3d5698SJohn Baldwin 133bc3d5698SJohn Baldwin eor r0,r0,r3 @ flip b0 and b1 134bc3d5698SJohn Baldwin eor r1,r1,r2 @ flip a0 and a1 135bc3d5698SJohn Baldwin eor r3,r3,r0 136bc3d5698SJohn Baldwin eor r2,r2,r1 137bc3d5698SJohn Baldwin eor r0,r0,r3 138bc3d5698SJohn Baldwin eor r1,r1,r2 139bc3d5698SJohn Baldwin bl mul_1x1_ialu @ a0·b0 140bc3d5698SJohn Baldwin str r5,[r10] 141bc3d5698SJohn Baldwin str r4,[r10,#4] 142bc3d5698SJohn Baldwin 143bc3d5698SJohn Baldwin eor r1,r1,r2 144bc3d5698SJohn Baldwin eor r0,r0,r3 145bc3d5698SJohn Baldwin bl mul_1x1_ialu @ (a1+a0)·(b1+b0) 146bc3d5698SJohn Baldwin ldmia r10,{r6,r7,r8,r9} 147bc3d5698SJohn Baldwin eor r5,r5,r4 148bc3d5698SJohn Baldwin ldr sp,[sp,#32] @ destroy tab[8] 149bc3d5698SJohn Baldwin eor r4,r4,r7 150bc3d5698SJohn Baldwin eor r5,r5,r6 151bc3d5698SJohn Baldwin eor r4,r4,r8 152bc3d5698SJohn Baldwin eor r5,r5,r9 153bc3d5698SJohn Baldwin eor r4,r4,r9 154bc3d5698SJohn Baldwin str r4,[r10,#8] 155bc3d5698SJohn Baldwin eor r5,r5,r4 156bc3d5698SJohn Baldwin str r5,[r10,#4] 157bc3d5698SJohn Baldwin 158bc3d5698SJohn Baldwin#if __ARM_ARCH__>=5 159bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 160bc3d5698SJohn Baldwin#else 161bc3d5698SJohn Baldwin ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 162bc3d5698SJohn Baldwin tst lr,#1 163bc3d5698SJohn Baldwin moveq pc,lr @ be binary compatible with V4, yet 164bc3d5698SJohn Baldwin.word 0xe12fff1e @ interoperable with Thumb ISA:-) 165bc3d5698SJohn Baldwin#endif 166bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 167bc3d5698SJohn Baldwin.arch armv7-a 168bc3d5698SJohn Baldwin.fpu neon 169bc3d5698SJohn Baldwin 170bc3d5698SJohn Baldwin.align 5 171bc3d5698SJohn Baldwin.LNEON: 172bc3d5698SJohn Baldwin ldr r12, [sp] @ 5th argument 173bc3d5698SJohn Baldwin vmov d26, r2, r1 174bc3d5698SJohn Baldwin vmov d27, r12, r3 175bc3d5698SJohn Baldwin vmov.i64 d28, #0x0000ffffffffffff 176bc3d5698SJohn Baldwin vmov.i64 d29, #0x00000000ffffffff 177bc3d5698SJohn Baldwin vmov.i64 d30, #0x000000000000ffff 178bc3d5698SJohn Baldwin 179bc3d5698SJohn Baldwin vext.8 d2, d26, d26, #1 @ A1 180bc3d5698SJohn Baldwin vmull.p8 q1, d2, d27 @ F = A1*B 181bc3d5698SJohn Baldwin vext.8 d0, d27, d27, #1 @ B1 182bc3d5698SJohn Baldwin vmull.p8 q0, d26, d0 @ E = A*B1 183bc3d5698SJohn Baldwin vext.8 d4, d26, d26, #2 @ A2 184bc3d5698SJohn Baldwin vmull.p8 q2, d4, d27 @ H = A2*B 185bc3d5698SJohn Baldwin vext.8 d16, d27, d27, #2 @ B2 186bc3d5698SJohn Baldwin vmull.p8 q8, d26, d16 @ G = A*B2 187bc3d5698SJohn Baldwin vext.8 d6, d26, d26, #3 @ A3 188bc3d5698SJohn Baldwin veor q1, q1, q0 @ L = E + F 189bc3d5698SJohn Baldwin vmull.p8 q3, d6, d27 @ J = A3*B 190bc3d5698SJohn Baldwin vext.8 d0, d27, d27, #3 @ B3 191bc3d5698SJohn Baldwin veor q2, q2, q8 @ M = G + H 192bc3d5698SJohn Baldwin vmull.p8 q0, d26, d0 @ I = A*B3 193bc3d5698SJohn Baldwin veor d2, d2, d3 @ t0 = (L) (P0 + P1) << 8 194bc3d5698SJohn Baldwin vand d3, d3, d28 195bc3d5698SJohn Baldwin vext.8 d16, d27, d27, #4 @ B4 196bc3d5698SJohn Baldwin veor d4, d4, d5 @ t1 = (M) (P2 + P3) << 16 197bc3d5698SJohn Baldwin vand d5, d5, d29 198bc3d5698SJohn Baldwin vmull.p8 q8, d26, d16 @ K = A*B4 199bc3d5698SJohn Baldwin veor q3, q3, q0 @ N = I + J 200bc3d5698SJohn Baldwin veor d2, d2, d3 201bc3d5698SJohn Baldwin veor d4, d4, d5 202bc3d5698SJohn Baldwin veor d6, d6, d7 @ t2 = (N) (P4 + P5) << 24 203bc3d5698SJohn Baldwin vand d7, d7, d30 204bc3d5698SJohn Baldwin vext.8 q1, q1, q1, #15 205bc3d5698SJohn Baldwin veor d16, d16, d17 @ t3 = (K) (P6 + P7) << 32 206bc3d5698SJohn Baldwin vmov.i64 d17, #0 207bc3d5698SJohn Baldwin vext.8 q2, q2, q2, #14 208bc3d5698SJohn Baldwin veor d6, d6, d7 209bc3d5698SJohn Baldwin vmull.p8 q0, d26, d27 @ D = A*B 210bc3d5698SJohn Baldwin vext.8 q8, q8, q8, #12 211bc3d5698SJohn Baldwin vext.8 q3, q3, q3, #13 212bc3d5698SJohn Baldwin veor q1, q1, q2 213bc3d5698SJohn Baldwin veor q3, q3, q8 214bc3d5698SJohn Baldwin veor q0, q0, q1 215bc3d5698SJohn Baldwin veor q0, q0, q3 216bc3d5698SJohn Baldwin 217bc3d5698SJohn Baldwin vst1.32 {q0}, [r0] 218bc3d5698SJohn Baldwin bx lr @ bx lr 219bc3d5698SJohn Baldwin#endif 220bc3d5698SJohn Baldwin.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 221bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 222bc3d5698SJohn Baldwin.align 5 223bc3d5698SJohn Baldwin.LOPENSSL_armcap: 224*c0855eaaSJohn Baldwin# ifdef _WIN32 225*c0855eaaSJohn Baldwin.word OPENSSL_armcap_P 226*c0855eaaSJohn Baldwin# else 227bc3d5698SJohn Baldwin.word OPENSSL_armcap_P-. 228bc3d5698SJohn Baldwin# endif 229*c0855eaaSJohn Baldwin#endif 230bc3d5698SJohn Baldwin.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 231bc3d5698SJohn Baldwin.align 2 232bc3d5698SJohn Baldwin.align 5 233bc3d5698SJohn Baldwin 234bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 235bc3d5698SJohn Baldwin.comm OPENSSL_armcap_P,4,4 236bc3d5698SJohn Baldwin#endif 237