1/* Do not modify. This file is auto-generated from armv4-gf2m.pl. */ 2#include "arm_arch.h" 3 4.text 5#if defined(__thumb2__) 6.syntax unified 7.thumb 8#else 9.code 32 10#endif 11.type mul_1x1_ialu,%function 12.align 5 13mul_1x1_ialu: 14 mov r4,#0 15 bic r5,r1,#3<<30 @ a1=a&0x3fffffff 16 str r4,[sp,#0] @ tab[0]=0 17 add r6,r5,r5 @ a2=a1<<1 18 str r5,[sp,#4] @ tab[1]=a1 19 eor r7,r5,r6 @ a1^a2 20 str r6,[sp,#8] @ tab[2]=a2 21 mov r8,r5,lsl#2 @ a4=a1<<2 22 str r7,[sp,#12] @ tab[3]=a1^a2 23 eor r9,r5,r8 @ a1^a4 24 str r8,[sp,#16] @ tab[4]=a4 25 eor r4,r6,r8 @ a2^a4 26 str r9,[sp,#20] @ tab[5]=a1^a4 27 eor r7,r7,r8 @ a1^a2^a4 28 str r4,[sp,#24] @ tab[6]=a2^a4 29 and r8,r12,r0,lsl#2 30 str r7,[sp,#28] @ tab[7]=a1^a2^a4 31 32 and r9,r12,r0,lsr#1 33 ldr r5,[sp,r8] @ tab[b & 0x7] 34 and r8,r12,r0,lsr#4 35 ldr r7,[sp,r9] @ tab[b >> 3 & 0x7] 36 and r9,r12,r0,lsr#7 37 ldr r6,[sp,r8] @ tab[b >> 6 & 0x7] 38 eor r5,r5,r7,lsl#3 @ stall 39 mov r4,r7,lsr#29 40 ldr r7,[sp,r9] @ tab[b >> 9 & 0x7] 41 42 and r8,r12,r0,lsr#10 43 eor r5,r5,r6,lsl#6 44 eor r4,r4,r6,lsr#26 45 ldr r6,[sp,r8] @ tab[b >> 12 & 0x7] 46 47 and r9,r12,r0,lsr#13 48 eor r5,r5,r7,lsl#9 49 eor r4,r4,r7,lsr#23 50 ldr r7,[sp,r9] @ tab[b >> 15 & 0x7] 51 52 and r8,r12,r0,lsr#16 53 eor r5,r5,r6,lsl#12 54 eor r4,r4,r6,lsr#20 55 ldr r6,[sp,r8] @ tab[b >> 18 & 0x7] 56 57 and r9,r12,r0,lsr#19 58 eor r5,r5,r7,lsl#15 59 eor r4,r4,r7,lsr#17 60 ldr r7,[sp,r9] @ tab[b >> 21 & 0x7] 61 62 and r8,r12,r0,lsr#22 63 eor r5,r5,r6,lsl#18 64 eor r4,r4,r6,lsr#14 65 ldr r6,[sp,r8] @ tab[b >> 24 & 0x7] 66 67 and r9,r12,r0,lsr#25 68 eor r5,r5,r7,lsl#21 69 eor r4,r4,r7,lsr#11 70 ldr r7,[sp,r9] @ tab[b >> 27 & 0x7] 71 72 tst r1,#1<<30 73 and r8,r12,r0,lsr#28 74 eor r5,r5,r6,lsl#24 75 eor r4,r4,r6,lsr#8 76 ldr r6,[sp,r8] @ tab[b >> 30 ] 77 78#ifdef __thumb2__ 79 itt ne 80#endif 81 eorne r5,r5,r0,lsl#30 82 eorne r4,r4,r0,lsr#2 83 tst r1,#1<<31 84 eor r5,r5,r7,lsl#27 85 eor r4,r4,r7,lsr#5 86#ifdef __thumb2__ 87 itt ne 88#endif 89 eorne r5,r5,r0,lsl#31 90 eorne r4,r4,r0,lsr#1 91 eor r5,r5,r6,lsl#30 92 eor r4,r4,r6,lsr#2 93 94 mov pc,lr 95.size mul_1x1_ialu,.-mul_1x1_ialu 96.globl bn_GF2m_mul_2x2 97.type bn_GF2m_mul_2x2,%function 98.align 5 99bn_GF2m_mul_2x2: 100#if __ARM_MAX_ARCH__>=7 101 stmdb sp!,{r10,lr} 102 ldr r12,.LOPENSSL_armcap 103 adr r10,.LOPENSSL_armcap 104 ldr r12,[r12,r10] 105#ifdef __APPLE__ 106 ldr r12,[r12] 107#endif 108 tst r12,#ARMV7_NEON 109 itt ne 110 ldrne r10,[sp],#8 111 bne .LNEON 112 stmdb sp!,{r4,r5,r6,r7,r8,r9} 113#else 114 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 115#endif 116 mov r10,r0 @ reassign 1st argument 117 mov r0,r3 @ r0=b1 118 sub r7,sp,#36 119 mov r8,sp 120 and r7,r7,#-32 121 ldr r3,[sp,#32] @ load b0 122 mov r12,#7<<2 123 mov sp,r7 @ allocate tab[8] 124 str r8,[r7,#32] 125 126 bl mul_1x1_ialu @ a1·b1 127 str r5,[r10,#8] 128 str r4,[r10,#12] 129 130 eor r0,r0,r3 @ flip b0 and b1 131 eor r1,r1,r2 @ flip a0 and a1 132 eor r3,r3,r0 133 eor r2,r2,r1 134 eor r0,r0,r3 135 eor r1,r1,r2 136 bl mul_1x1_ialu @ a0·b0 137 str r5,[r10] 138 str r4,[r10,#4] 139 140 eor r1,r1,r2 141 eor r0,r0,r3 142 bl mul_1x1_ialu @ (a1+a0)·(b1+b0) 143 ldmia r10,{r6,r7,r8,r9} 144 eor r5,r5,r4 145 ldr sp,[sp,#32] @ destroy tab[8] 146 eor r4,r4,r7 147 eor r5,r5,r6 148 eor r4,r4,r8 149 eor r5,r5,r9 150 eor r4,r4,r9 151 str r4,[r10,#8] 152 eor r5,r5,r4 153 str r5,[r10,#4] 154 155#if __ARM_ARCH__>=5 156 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} 157#else 158 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,lr} 159 tst lr,#1 160 moveq pc,lr @ be binary compatible with V4, yet 161.word 0xe12fff1e @ interoperable with Thumb ISA:-) 162#endif 163#if __ARM_MAX_ARCH__>=7 164.arch armv7-a 165.fpu neon 166 167.align 5 168.LNEON: 169 ldr r12, [sp] @ 5th argument 170 vmov d26, r2, r1 171 vmov d27, r12, r3 172 vmov.i64 d28, #0x0000ffffffffffff 173 vmov.i64 d29, #0x00000000ffffffff 174 vmov.i64 d30, #0x000000000000ffff 175 176 vext.8 d2, d26, d26, #1 @ A1 177 vmull.p8 q1, d2, d27 @ F = A1*B 178 vext.8 d0, d27, d27, #1 @ B1 179 vmull.p8 q0, d26, d0 @ E = A*B1 180 vext.8 d4, d26, d26, #2 @ A2 181 vmull.p8 q2, d4, d27 @ H = A2*B 182 vext.8 d16, d27, d27, #2 @ B2 183 vmull.p8 q8, d26, d16 @ G = A*B2 184 vext.8 d6, d26, d26, #3 @ A3 185 veor q1, q1, q0 @ L = E + F 186 vmull.p8 q3, d6, d27 @ J = A3*B 187 vext.8 d0, d27, d27, #3 @ B3 188 veor q2, q2, q8 @ M = G + H 189 vmull.p8 q0, d26, d0 @ I = A*B3 190 veor d2, d2, d3 @ t0 = (L) (P0 + P1) << 8 191 vand d3, d3, d28 192 vext.8 d16, d27, d27, #4 @ B4 193 veor d4, d4, d5 @ t1 = (M) (P2 + P3) << 16 194 vand d5, d5, d29 195 vmull.p8 q8, d26, d16 @ K = A*B4 196 veor q3, q3, q0 @ N = I + J 197 veor d2, d2, d3 198 veor d4, d4, d5 199 veor d6, d6, d7 @ t2 = (N) (P4 + P5) << 24 200 vand d7, d7, d30 201 vext.8 q1, q1, q1, #15 202 veor d16, d16, d17 @ t3 = (K) (P6 + P7) << 32 203 vmov.i64 d17, #0 204 vext.8 q2, q2, q2, #14 205 veor d6, d6, d7 206 vmull.p8 q0, d26, d27 @ D = A*B 207 vext.8 q8, q8, q8, #12 208 vext.8 q3, q3, q3, #13 209 veor q1, q1, q2 210 veor q3, q3, q8 211 veor q0, q0, q1 212 veor q0, q0, q3 213 214 vst1.32 {q0}, [r0] 215 bx lr @ bx lr 216#endif 217.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 218#if __ARM_MAX_ARCH__>=7 219.align 5 220.LOPENSSL_armcap: 221.word OPENSSL_armcap_P-. 222#endif 223.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 224.align 2 225.align 5 226 227#if __ARM_MAX_ARCH__>=7 228.comm OPENSSL_armcap_P,4,4 229#endif 230