1*68546e56SEric Biggers/* SPDX-License-Identifier: GPL-2.0 OR MIT */ 2*68546e56SEric Biggers/* 3*68546e56SEric Biggers * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 4*68546e56SEric Biggers * 5*68546e56SEric Biggers * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This 6*68546e56SEric Biggers * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been 7*68546e56SEric Biggers * manually reworked for use in kernel space. 8*68546e56SEric Biggers */ 9*68546e56SEric Biggers 10*68546e56SEric Biggers#include <linux/linkage.h> 11*68546e56SEric Biggers 12*68546e56SEric Biggers.text 13*68546e56SEric Biggers.arch armv7-a 14*68546e56SEric Biggers.fpu neon 15*68546e56SEric Biggers.align 4 16*68546e56SEric Biggers 17*68546e56SEric BiggersENTRY(curve25519_neon) 18*68546e56SEric Biggers push {r4-r11, lr} 19*68546e56SEric Biggers mov ip, sp 20*68546e56SEric Biggers sub r3, sp, #704 21*68546e56SEric Biggers and r3, r3, #0xfffffff0 22*68546e56SEric Biggers mov sp, r3 23*68546e56SEric Biggers movw r4, #0 24*68546e56SEric Biggers movw r5, #254 25*68546e56SEric Biggers vmov.i32 q0, #1 26*68546e56SEric Biggers vshr.u64 q1, q0, #7 27*68546e56SEric Biggers vshr.u64 q0, q0, #8 28*68546e56SEric Biggers vmov.i32 d4, #19 29*68546e56SEric Biggers vmov.i32 d5, #38 30*68546e56SEric Biggers add r6, sp, #480 31*68546e56SEric Biggers vst1.8 {d2-d3}, [r6, : 128]! 32*68546e56SEric Biggers vst1.8 {d0-d1}, [r6, : 128]! 33*68546e56SEric Biggers vst1.8 {d4-d5}, [r6, : 128] 34*68546e56SEric Biggers add r6, r3, #0 35*68546e56SEric Biggers vmov.i32 q2, #0 36*68546e56SEric Biggers vst1.8 {d4-d5}, [r6, : 128]! 37*68546e56SEric Biggers vst1.8 {d4-d5}, [r6, : 128]! 38*68546e56SEric Biggers vst1.8 d4, [r6, : 64] 39*68546e56SEric Biggers add r6, r3, #0 40*68546e56SEric Biggers movw r7, #960 41*68546e56SEric Biggers sub r7, r7, #2 42*68546e56SEric Biggers neg r7, r7 43*68546e56SEric Biggers sub r7, r7, r7, LSL #7 44*68546e56SEric Biggers str r7, [r6] 45*68546e56SEric Biggers add r6, sp, #672 46*68546e56SEric Biggers vld1.8 {d4-d5}, [r1]! 47*68546e56SEric Biggers vld1.8 {d6-d7}, [r1] 48*68546e56SEric Biggers vst1.8 {d4-d5}, [r6, : 128]! 49*68546e56SEric Biggers vst1.8 {d6-d7}, [r6, : 128] 50*68546e56SEric Biggers sub r1, r6, #16 51*68546e56SEric Biggers ldrb r6, [r1] 52*68546e56SEric Biggers and r6, r6, #248 53*68546e56SEric Biggers strb r6, [r1] 54*68546e56SEric Biggers ldrb r6, [r1, #31] 55*68546e56SEric Biggers and r6, r6, #127 56*68546e56SEric Biggers orr r6, r6, #64 57*68546e56SEric Biggers strb r6, [r1, #31] 58*68546e56SEric Biggers vmov.i64 q2, #0xffffffff 59*68546e56SEric Biggers vshr.u64 q3, q2, #7 60*68546e56SEric Biggers vshr.u64 q2, q2, #6 61*68546e56SEric Biggers vld1.8 {d8}, [r2] 62*68546e56SEric Biggers vld1.8 {d10}, [r2] 63*68546e56SEric Biggers add r2, r2, #6 64*68546e56SEric Biggers vld1.8 {d12}, [r2] 65*68546e56SEric Biggers vld1.8 {d14}, [r2] 66*68546e56SEric Biggers add r2, r2, #6 67*68546e56SEric Biggers vld1.8 {d16}, [r2] 68*68546e56SEric Biggers add r2, r2, #4 69*68546e56SEric Biggers vld1.8 {d18}, [r2] 70*68546e56SEric Biggers vld1.8 {d20}, [r2] 71*68546e56SEric Biggers add r2, r2, #6 72*68546e56SEric Biggers vld1.8 {d22}, [r2] 73*68546e56SEric Biggers add r2, r2, #2 74*68546e56SEric Biggers vld1.8 {d24}, [r2] 75*68546e56SEric Biggers vld1.8 {d26}, [r2] 76*68546e56SEric Biggers vshr.u64 q5, q5, #26 77*68546e56SEric Biggers vshr.u64 q6, q6, #3 78*68546e56SEric Biggers vshr.u64 q7, q7, #29 79*68546e56SEric Biggers vshr.u64 q8, q8, #6 80*68546e56SEric Biggers vshr.u64 q10, q10, #25 81*68546e56SEric Biggers vshr.u64 q11, q11, #3 82*68546e56SEric Biggers vshr.u64 q12, q12, #12 83*68546e56SEric Biggers vshr.u64 q13, q13, #38 84*68546e56SEric Biggers vand q4, q4, q2 85*68546e56SEric Biggers vand q6, q6, q2 86*68546e56SEric Biggers vand q8, q8, q2 87*68546e56SEric Biggers vand q10, q10, q2 88*68546e56SEric Biggers vand q2, q12, q2 89*68546e56SEric Biggers vand q5, q5, q3 90*68546e56SEric Biggers vand q7, q7, q3 91*68546e56SEric Biggers vand q9, q9, q3 92*68546e56SEric Biggers vand q11, q11, q3 93*68546e56SEric Biggers vand q3, q13, q3 94*68546e56SEric Biggers add r2, r3, #48 95*68546e56SEric Biggers vadd.i64 q12, q4, q1 96*68546e56SEric Biggers vadd.i64 q13, q10, q1 97*68546e56SEric Biggers vshr.s64 q12, q12, #26 98*68546e56SEric Biggers vshr.s64 q13, q13, #26 99*68546e56SEric Biggers vadd.i64 q5, q5, q12 100*68546e56SEric Biggers vshl.i64 q12, q12, #26 101*68546e56SEric Biggers vadd.i64 q14, q5, q0 102*68546e56SEric Biggers vadd.i64 q11, q11, q13 103*68546e56SEric Biggers vshl.i64 q13, q13, #26 104*68546e56SEric Biggers vadd.i64 q15, q11, q0 105*68546e56SEric Biggers vsub.i64 q4, q4, q12 106*68546e56SEric Biggers vshr.s64 q12, q14, #25 107*68546e56SEric Biggers vsub.i64 q10, q10, q13 108*68546e56SEric Biggers vshr.s64 q13, q15, #25 109*68546e56SEric Biggers vadd.i64 q6, q6, q12 110*68546e56SEric Biggers vshl.i64 q12, q12, #25 111*68546e56SEric Biggers vadd.i64 q14, q6, q1 112*68546e56SEric Biggers vadd.i64 q2, q2, q13 113*68546e56SEric Biggers vsub.i64 q5, q5, q12 114*68546e56SEric Biggers vshr.s64 q12, q14, #26 115*68546e56SEric Biggers vshl.i64 q13, q13, #25 116*68546e56SEric Biggers vadd.i64 q14, q2, q1 117*68546e56SEric Biggers vadd.i64 q7, q7, q12 118*68546e56SEric Biggers vshl.i64 q12, q12, #26 119*68546e56SEric Biggers vadd.i64 q15, q7, q0 120*68546e56SEric Biggers vsub.i64 q11, q11, q13 121*68546e56SEric Biggers vshr.s64 q13, q14, #26 122*68546e56SEric Biggers vsub.i64 q6, q6, q12 123*68546e56SEric Biggers vshr.s64 q12, q15, #25 124*68546e56SEric Biggers vadd.i64 q3, q3, q13 125*68546e56SEric Biggers vshl.i64 q13, q13, #26 126*68546e56SEric Biggers vadd.i64 q14, q3, q0 127*68546e56SEric Biggers vadd.i64 q8, q8, q12 128*68546e56SEric Biggers vshl.i64 q12, q12, #25 129*68546e56SEric Biggers vadd.i64 q15, q8, q1 130*68546e56SEric Biggers add r2, r2, #8 131*68546e56SEric Biggers vsub.i64 q2, q2, q13 132*68546e56SEric Biggers vshr.s64 q13, q14, #25 133*68546e56SEric Biggers vsub.i64 q7, q7, q12 134*68546e56SEric Biggers vshr.s64 q12, q15, #26 135*68546e56SEric Biggers vadd.i64 q14, q13, q13 136*68546e56SEric Biggers vadd.i64 q9, q9, q12 137*68546e56SEric Biggers vtrn.32 d12, d14 138*68546e56SEric Biggers vshl.i64 q12, q12, #26 139*68546e56SEric Biggers vtrn.32 d13, d15 140*68546e56SEric Biggers vadd.i64 q0, q9, q0 141*68546e56SEric Biggers vadd.i64 q4, q4, q14 142*68546e56SEric Biggers vst1.8 d12, [r2, : 64]! 143*68546e56SEric Biggers vshl.i64 q6, q13, #4 144*68546e56SEric Biggers vsub.i64 q7, q8, q12 145*68546e56SEric Biggers vshr.s64 q0, q0, #25 146*68546e56SEric Biggers vadd.i64 q4, q4, q6 147*68546e56SEric Biggers vadd.i64 q6, q10, q0 148*68546e56SEric Biggers vshl.i64 q0, q0, #25 149*68546e56SEric Biggers vadd.i64 q8, q6, q1 150*68546e56SEric Biggers vadd.i64 q4, q4, q13 151*68546e56SEric Biggers vshl.i64 q10, q13, #25 152*68546e56SEric Biggers vadd.i64 q1, q4, q1 153*68546e56SEric Biggers vsub.i64 q0, q9, q0 154*68546e56SEric Biggers vshr.s64 q8, q8, #26 155*68546e56SEric Biggers vsub.i64 q3, q3, q10 156*68546e56SEric Biggers vtrn.32 d14, d0 157*68546e56SEric Biggers vshr.s64 q1, q1, #26 158*68546e56SEric Biggers vtrn.32 d15, d1 159*68546e56SEric Biggers vadd.i64 q0, q11, q8 160*68546e56SEric Biggers vst1.8 d14, [r2, : 64] 161*68546e56SEric Biggers vshl.i64 q7, q8, #26 162*68546e56SEric Biggers vadd.i64 q5, q5, q1 163*68546e56SEric Biggers vtrn.32 d4, d6 164*68546e56SEric Biggers vshl.i64 q1, q1, #26 165*68546e56SEric Biggers vtrn.32 d5, d7 166*68546e56SEric Biggers vsub.i64 q3, q6, q7 167*68546e56SEric Biggers add r2, r2, #16 168*68546e56SEric Biggers vsub.i64 q1, q4, q1 169*68546e56SEric Biggers vst1.8 d4, [r2, : 64] 170*68546e56SEric Biggers vtrn.32 d6, d0 171*68546e56SEric Biggers vtrn.32 d7, d1 172*68546e56SEric Biggers sub r2, r2, #8 173*68546e56SEric Biggers vtrn.32 d2, d10 174*68546e56SEric Biggers vtrn.32 d3, d11 175*68546e56SEric Biggers vst1.8 d6, [r2, : 64] 176*68546e56SEric Biggers sub r2, r2, #24 177*68546e56SEric Biggers vst1.8 d2, [r2, : 64] 178*68546e56SEric Biggers add r2, r3, #96 179*68546e56SEric Biggers vmov.i32 q0, #0 180*68546e56SEric Biggers vmov.i64 d2, #0xff 181*68546e56SEric Biggers vmov.i64 d3, #0 182*68546e56SEric Biggers vshr.u32 q1, q1, #7 183*68546e56SEric Biggers vst1.8 {d2-d3}, [r2, : 128]! 184*68546e56SEric Biggers vst1.8 {d0-d1}, [r2, : 128]! 185*68546e56SEric Biggers vst1.8 d0, [r2, : 64] 186*68546e56SEric Biggers add r2, r3, #144 187*68546e56SEric Biggers vmov.i32 q0, #0 188*68546e56SEric Biggers vst1.8 {d0-d1}, [r2, : 128]! 189*68546e56SEric Biggers vst1.8 {d0-d1}, [r2, : 128]! 190*68546e56SEric Biggers vst1.8 d0, [r2, : 64] 191*68546e56SEric Biggers add r2, r3, #240 192*68546e56SEric Biggers vmov.i32 q0, #0 193*68546e56SEric Biggers vmov.i64 d2, #0xff 194*68546e56SEric Biggers vmov.i64 d3, #0 195*68546e56SEric Biggers vshr.u32 q1, q1, #7 196*68546e56SEric Biggers vst1.8 {d2-d3}, [r2, : 128]! 197*68546e56SEric Biggers vst1.8 {d0-d1}, [r2, : 128]! 198*68546e56SEric Biggers vst1.8 d0, [r2, : 64] 199*68546e56SEric Biggers add r2, r3, #48 200*68546e56SEric Biggers add r6, r3, #192 201*68546e56SEric Biggers vld1.8 {d0-d1}, [r2, : 128]! 202*68546e56SEric Biggers vld1.8 {d2-d3}, [r2, : 128]! 203*68546e56SEric Biggers vld1.8 {d4}, [r2, : 64] 204*68546e56SEric Biggers vst1.8 {d0-d1}, [r6, : 128]! 205*68546e56SEric Biggers vst1.8 {d2-d3}, [r6, : 128]! 206*68546e56SEric Biggers vst1.8 d4, [r6, : 64] 207*68546e56SEric Biggers.Lmainloop: 208*68546e56SEric Biggers mov r2, r5, LSR #3 209*68546e56SEric Biggers and r6, r5, #7 210*68546e56SEric Biggers ldrb r2, [r1, r2] 211*68546e56SEric Biggers mov r2, r2, LSR r6 212*68546e56SEric Biggers and r2, r2, #1 213*68546e56SEric Biggers str r5, [sp, #456] 214*68546e56SEric Biggers eor r4, r4, r2 215*68546e56SEric Biggers str r2, [sp, #460] 216*68546e56SEric Biggers neg r2, r4 217*68546e56SEric Biggers add r4, r3, #96 218*68546e56SEric Biggers add r5, r3, #192 219*68546e56SEric Biggers add r6, r3, #144 220*68546e56SEric Biggers vld1.8 {d8-d9}, [r4, : 128]! 221*68546e56SEric Biggers add r7, r3, #240 222*68546e56SEric Biggers vld1.8 {d10-d11}, [r5, : 128]! 223*68546e56SEric Biggers veor q6, q4, q5 224*68546e56SEric Biggers vld1.8 {d14-d15}, [r6, : 128]! 225*68546e56SEric Biggers vdup.i32 q8, r2 226*68546e56SEric Biggers vld1.8 {d18-d19}, [r7, : 128]! 227*68546e56SEric Biggers veor q10, q7, q9 228*68546e56SEric Biggers vld1.8 {d22-d23}, [r4, : 128]! 229*68546e56SEric Biggers vand q6, q6, q8 230*68546e56SEric Biggers vld1.8 {d24-d25}, [r5, : 128]! 231*68546e56SEric Biggers vand q10, q10, q8 232*68546e56SEric Biggers vld1.8 {d26-d27}, [r6, : 128]! 233*68546e56SEric Biggers veor q4, q4, q6 234*68546e56SEric Biggers vld1.8 {d28-d29}, [r7, : 128]! 235*68546e56SEric Biggers veor q5, q5, q6 236*68546e56SEric Biggers vld1.8 {d0}, [r4, : 64] 237*68546e56SEric Biggers veor q6, q7, q10 238*68546e56SEric Biggers vld1.8 {d2}, [r5, : 64] 239*68546e56SEric Biggers veor q7, q9, q10 240*68546e56SEric Biggers vld1.8 {d4}, [r6, : 64] 241*68546e56SEric Biggers veor q9, q11, q12 242*68546e56SEric Biggers vld1.8 {d6}, [r7, : 64] 243*68546e56SEric Biggers veor q10, q0, q1 244*68546e56SEric Biggers sub r2, r4, #32 245*68546e56SEric Biggers vand q9, q9, q8 246*68546e56SEric Biggers sub r4, r5, #32 247*68546e56SEric Biggers vand q10, q10, q8 248*68546e56SEric Biggers sub r5, r6, #32 249*68546e56SEric Biggers veor q11, q11, q9 250*68546e56SEric Biggers sub r6, r7, #32 251*68546e56SEric Biggers veor q0, q0, q10 252*68546e56SEric Biggers veor q9, q12, q9 253*68546e56SEric Biggers veor q1, q1, q10 254*68546e56SEric Biggers veor q10, q13, q14 255*68546e56SEric Biggers veor q12, q2, q3 256*68546e56SEric Biggers vand q10, q10, q8 257*68546e56SEric Biggers vand q8, q12, q8 258*68546e56SEric Biggers veor q12, q13, q10 259*68546e56SEric Biggers veor q2, q2, q8 260*68546e56SEric Biggers veor q10, q14, q10 261*68546e56SEric Biggers veor q3, q3, q8 262*68546e56SEric Biggers vadd.i32 q8, q4, q6 263*68546e56SEric Biggers vsub.i32 q4, q4, q6 264*68546e56SEric Biggers vst1.8 {d16-d17}, [r2, : 128]! 265*68546e56SEric Biggers vadd.i32 q6, q11, q12 266*68546e56SEric Biggers vst1.8 {d8-d9}, [r5, : 128]! 267*68546e56SEric Biggers vsub.i32 q4, q11, q12 268*68546e56SEric Biggers vst1.8 {d12-d13}, [r2, : 128]! 269*68546e56SEric Biggers vadd.i32 q6, q0, q2 270*68546e56SEric Biggers vst1.8 {d8-d9}, [r5, : 128]! 271*68546e56SEric Biggers vsub.i32 q0, q0, q2 272*68546e56SEric Biggers vst1.8 d12, [r2, : 64] 273*68546e56SEric Biggers vadd.i32 q2, q5, q7 274*68546e56SEric Biggers vst1.8 d0, [r5, : 64] 275*68546e56SEric Biggers vsub.i32 q0, q5, q7 276*68546e56SEric Biggers vst1.8 {d4-d5}, [r4, : 128]! 277*68546e56SEric Biggers vadd.i32 q2, q9, q10 278*68546e56SEric Biggers vst1.8 {d0-d1}, [r6, : 128]! 279*68546e56SEric Biggers vsub.i32 q0, q9, q10 280*68546e56SEric Biggers vst1.8 {d4-d5}, [r4, : 128]! 281*68546e56SEric Biggers vadd.i32 q2, q1, q3 282*68546e56SEric Biggers vst1.8 {d0-d1}, [r6, : 128]! 283*68546e56SEric Biggers vsub.i32 q0, q1, q3 284*68546e56SEric Biggers vst1.8 d4, [r4, : 64] 285*68546e56SEric Biggers vst1.8 d0, [r6, : 64] 286*68546e56SEric Biggers add r2, sp, #512 287*68546e56SEric Biggers add r4, r3, #96 288*68546e56SEric Biggers add r5, r3, #144 289*68546e56SEric Biggers vld1.8 {d0-d1}, [r2, : 128] 290*68546e56SEric Biggers vld1.8 {d2-d3}, [r4, : 128]! 291*68546e56SEric Biggers vld1.8 {d4-d5}, [r5, : 128]! 292*68546e56SEric Biggers vzip.i32 q1, q2 293*68546e56SEric Biggers vld1.8 {d6-d7}, [r4, : 128]! 294*68546e56SEric Biggers vld1.8 {d8-d9}, [r5, : 128]! 295*68546e56SEric Biggers vshl.i32 q5, q1, #1 296*68546e56SEric Biggers vzip.i32 q3, q4 297*68546e56SEric Biggers vshl.i32 q6, q2, #1 298*68546e56SEric Biggers vld1.8 {d14}, [r4, : 64] 299*68546e56SEric Biggers vshl.i32 q8, q3, #1 300*68546e56SEric Biggers vld1.8 {d15}, [r5, : 64] 301*68546e56SEric Biggers vshl.i32 q9, q4, #1 302*68546e56SEric Biggers vmul.i32 d21, d7, d1 303*68546e56SEric Biggers vtrn.32 d14, d15 304*68546e56SEric Biggers vmul.i32 q11, q4, q0 305*68546e56SEric Biggers vmul.i32 q0, q7, q0 306*68546e56SEric Biggers vmull.s32 q12, d2, d2 307*68546e56SEric Biggers vmlal.s32 q12, d11, d1 308*68546e56SEric Biggers vmlal.s32 q12, d12, d0 309*68546e56SEric Biggers vmlal.s32 q12, d13, d23 310*68546e56SEric Biggers vmlal.s32 q12, d16, d22 311*68546e56SEric Biggers vmlal.s32 q12, d7, d21 312*68546e56SEric Biggers vmull.s32 q10, d2, d11 313*68546e56SEric Biggers vmlal.s32 q10, d4, d1 314*68546e56SEric Biggers vmlal.s32 q10, d13, d0 315*68546e56SEric Biggers vmlal.s32 q10, d6, d23 316*68546e56SEric Biggers vmlal.s32 q10, d17, d22 317*68546e56SEric Biggers vmull.s32 q13, d10, d4 318*68546e56SEric Biggers vmlal.s32 q13, d11, d3 319*68546e56SEric Biggers vmlal.s32 q13, d13, d1 320*68546e56SEric Biggers vmlal.s32 q13, d16, d0 321*68546e56SEric Biggers vmlal.s32 q13, d17, d23 322*68546e56SEric Biggers vmlal.s32 q13, d8, d22 323*68546e56SEric Biggers vmull.s32 q1, d10, d5 324*68546e56SEric Biggers vmlal.s32 q1, d11, d4 325*68546e56SEric Biggers vmlal.s32 q1, d6, d1 326*68546e56SEric Biggers vmlal.s32 q1, d17, d0 327*68546e56SEric Biggers vmlal.s32 q1, d8, d23 328*68546e56SEric Biggers vmull.s32 q14, d10, d6 329*68546e56SEric Biggers vmlal.s32 q14, d11, d13 330*68546e56SEric Biggers vmlal.s32 q14, d4, d4 331*68546e56SEric Biggers vmlal.s32 q14, d17, d1 332*68546e56SEric Biggers vmlal.s32 q14, d18, d0 333*68546e56SEric Biggers vmlal.s32 q14, d9, d23 334*68546e56SEric Biggers vmull.s32 q11, d10, d7 335*68546e56SEric Biggers vmlal.s32 q11, d11, d6 336*68546e56SEric Biggers vmlal.s32 q11, d12, d5 337*68546e56SEric Biggers vmlal.s32 q11, d8, d1 338*68546e56SEric Biggers vmlal.s32 q11, d19, d0 339*68546e56SEric Biggers vmull.s32 q15, d10, d8 340*68546e56SEric Biggers vmlal.s32 q15, d11, d17 341*68546e56SEric Biggers vmlal.s32 q15, d12, d6 342*68546e56SEric Biggers vmlal.s32 q15, d13, d5 343*68546e56SEric Biggers vmlal.s32 q15, d19, d1 344*68546e56SEric Biggers vmlal.s32 q15, d14, d0 345*68546e56SEric Biggers vmull.s32 q2, d10, d9 346*68546e56SEric Biggers vmlal.s32 q2, d11, d8 347*68546e56SEric Biggers vmlal.s32 q2, d12, d7 348*68546e56SEric Biggers vmlal.s32 q2, d13, d6 349*68546e56SEric Biggers vmlal.s32 q2, d14, d1 350*68546e56SEric Biggers vmull.s32 q0, d15, d1 351*68546e56SEric Biggers vmlal.s32 q0, d10, d14 352*68546e56SEric Biggers vmlal.s32 q0, d11, d19 353*68546e56SEric Biggers vmlal.s32 q0, d12, d8 354*68546e56SEric Biggers vmlal.s32 q0, d13, d17 355*68546e56SEric Biggers vmlal.s32 q0, d6, d6 356*68546e56SEric Biggers add r2, sp, #480 357*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128]! 358*68546e56SEric Biggers vmull.s32 q3, d16, d7 359*68546e56SEric Biggers vmlal.s32 q3, d10, d15 360*68546e56SEric Biggers vmlal.s32 q3, d11, d14 361*68546e56SEric Biggers vmlal.s32 q3, d12, d9 362*68546e56SEric Biggers vmlal.s32 q3, d13, d8 363*68546e56SEric Biggers vld1.8 {d8-d9}, [r2, : 128] 364*68546e56SEric Biggers vadd.i64 q5, q12, q9 365*68546e56SEric Biggers vadd.i64 q6, q15, q9 366*68546e56SEric Biggers vshr.s64 q5, q5, #26 367*68546e56SEric Biggers vshr.s64 q6, q6, #26 368*68546e56SEric Biggers vadd.i64 q7, q10, q5 369*68546e56SEric Biggers vshl.i64 q5, q5, #26 370*68546e56SEric Biggers vadd.i64 q8, q7, q4 371*68546e56SEric Biggers vadd.i64 q2, q2, q6 372*68546e56SEric Biggers vshl.i64 q6, q6, #26 373*68546e56SEric Biggers vadd.i64 q10, q2, q4 374*68546e56SEric Biggers vsub.i64 q5, q12, q5 375*68546e56SEric Biggers vshr.s64 q8, q8, #25 376*68546e56SEric Biggers vsub.i64 q6, q15, q6 377*68546e56SEric Biggers vshr.s64 q10, q10, #25 378*68546e56SEric Biggers vadd.i64 q12, q13, q8 379*68546e56SEric Biggers vshl.i64 q8, q8, #25 380*68546e56SEric Biggers vadd.i64 q13, q12, q9 381*68546e56SEric Biggers vadd.i64 q0, q0, q10 382*68546e56SEric Biggers vsub.i64 q7, q7, q8 383*68546e56SEric Biggers vshr.s64 q8, q13, #26 384*68546e56SEric Biggers vshl.i64 q10, q10, #25 385*68546e56SEric Biggers vadd.i64 q13, q0, q9 386*68546e56SEric Biggers vadd.i64 q1, q1, q8 387*68546e56SEric Biggers vshl.i64 q8, q8, #26 388*68546e56SEric Biggers vadd.i64 q15, q1, q4 389*68546e56SEric Biggers vsub.i64 q2, q2, q10 390*68546e56SEric Biggers vshr.s64 q10, q13, #26 391*68546e56SEric Biggers vsub.i64 q8, q12, q8 392*68546e56SEric Biggers vshr.s64 q12, q15, #25 393*68546e56SEric Biggers vadd.i64 q3, q3, q10 394*68546e56SEric Biggers vshl.i64 q10, q10, #26 395*68546e56SEric Biggers vadd.i64 q13, q3, q4 396*68546e56SEric Biggers vadd.i64 q14, q14, q12 397*68546e56SEric Biggers add r2, r3, #288 398*68546e56SEric Biggers vshl.i64 q12, q12, #25 399*68546e56SEric Biggers add r4, r3, #336 400*68546e56SEric Biggers vadd.i64 q15, q14, q9 401*68546e56SEric Biggers add r2, r2, #8 402*68546e56SEric Biggers vsub.i64 q0, q0, q10 403*68546e56SEric Biggers add r4, r4, #8 404*68546e56SEric Biggers vshr.s64 q10, q13, #25 405*68546e56SEric Biggers vsub.i64 q1, q1, q12 406*68546e56SEric Biggers vshr.s64 q12, q15, #26 407*68546e56SEric Biggers vadd.i64 q13, q10, q10 408*68546e56SEric Biggers vadd.i64 q11, q11, q12 409*68546e56SEric Biggers vtrn.32 d16, d2 410*68546e56SEric Biggers vshl.i64 q12, q12, #26 411*68546e56SEric Biggers vtrn.32 d17, d3 412*68546e56SEric Biggers vadd.i64 q1, q11, q4 413*68546e56SEric Biggers vadd.i64 q4, q5, q13 414*68546e56SEric Biggers vst1.8 d16, [r2, : 64]! 415*68546e56SEric Biggers vshl.i64 q5, q10, #4 416*68546e56SEric Biggers vst1.8 d17, [r4, : 64]! 417*68546e56SEric Biggers vsub.i64 q8, q14, q12 418*68546e56SEric Biggers vshr.s64 q1, q1, #25 419*68546e56SEric Biggers vadd.i64 q4, q4, q5 420*68546e56SEric Biggers vadd.i64 q5, q6, q1 421*68546e56SEric Biggers vshl.i64 q1, q1, #25 422*68546e56SEric Biggers vadd.i64 q6, q5, q9 423*68546e56SEric Biggers vadd.i64 q4, q4, q10 424*68546e56SEric Biggers vshl.i64 q10, q10, #25 425*68546e56SEric Biggers vadd.i64 q9, q4, q9 426*68546e56SEric Biggers vsub.i64 q1, q11, q1 427*68546e56SEric Biggers vshr.s64 q6, q6, #26 428*68546e56SEric Biggers vsub.i64 q3, q3, q10 429*68546e56SEric Biggers vtrn.32 d16, d2 430*68546e56SEric Biggers vshr.s64 q9, q9, #26 431*68546e56SEric Biggers vtrn.32 d17, d3 432*68546e56SEric Biggers vadd.i64 q1, q2, q6 433*68546e56SEric Biggers vst1.8 d16, [r2, : 64] 434*68546e56SEric Biggers vshl.i64 q2, q6, #26 435*68546e56SEric Biggers vst1.8 d17, [r4, : 64] 436*68546e56SEric Biggers vadd.i64 q6, q7, q9 437*68546e56SEric Biggers vtrn.32 d0, d6 438*68546e56SEric Biggers vshl.i64 q7, q9, #26 439*68546e56SEric Biggers vtrn.32 d1, d7 440*68546e56SEric Biggers vsub.i64 q2, q5, q2 441*68546e56SEric Biggers add r2, r2, #16 442*68546e56SEric Biggers vsub.i64 q3, q4, q7 443*68546e56SEric Biggers vst1.8 d0, [r2, : 64] 444*68546e56SEric Biggers add r4, r4, #16 445*68546e56SEric Biggers vst1.8 d1, [r4, : 64] 446*68546e56SEric Biggers vtrn.32 d4, d2 447*68546e56SEric Biggers vtrn.32 d5, d3 448*68546e56SEric Biggers sub r2, r2, #8 449*68546e56SEric Biggers sub r4, r4, #8 450*68546e56SEric Biggers vtrn.32 d6, d12 451*68546e56SEric Biggers vtrn.32 d7, d13 452*68546e56SEric Biggers vst1.8 d4, [r2, : 64] 453*68546e56SEric Biggers vst1.8 d5, [r4, : 64] 454*68546e56SEric Biggers sub r2, r2, #24 455*68546e56SEric Biggers sub r4, r4, #24 456*68546e56SEric Biggers vst1.8 d6, [r2, : 64] 457*68546e56SEric Biggers vst1.8 d7, [r4, : 64] 458*68546e56SEric Biggers add r2, r3, #240 459*68546e56SEric Biggers add r4, r3, #96 460*68546e56SEric Biggers vld1.8 {d0-d1}, [r4, : 128]! 461*68546e56SEric Biggers vld1.8 {d2-d3}, [r4, : 128]! 462*68546e56SEric Biggers vld1.8 {d4}, [r4, : 64] 463*68546e56SEric Biggers add r4, r3, #144 464*68546e56SEric Biggers vld1.8 {d6-d7}, [r4, : 128]! 465*68546e56SEric Biggers vtrn.32 q0, q3 466*68546e56SEric Biggers vld1.8 {d8-d9}, [r4, : 128]! 467*68546e56SEric Biggers vshl.i32 q5, q0, #4 468*68546e56SEric Biggers vtrn.32 q1, q4 469*68546e56SEric Biggers vshl.i32 q6, q3, #4 470*68546e56SEric Biggers vadd.i32 q5, q5, q0 471*68546e56SEric Biggers vadd.i32 q6, q6, q3 472*68546e56SEric Biggers vshl.i32 q7, q1, #4 473*68546e56SEric Biggers vld1.8 {d5}, [r4, : 64] 474*68546e56SEric Biggers vshl.i32 q8, q4, #4 475*68546e56SEric Biggers vtrn.32 d4, d5 476*68546e56SEric Biggers vadd.i32 q7, q7, q1 477*68546e56SEric Biggers vadd.i32 q8, q8, q4 478*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128]! 479*68546e56SEric Biggers vshl.i32 q10, q2, #4 480*68546e56SEric Biggers vld1.8 {d22-d23}, [r2, : 128]! 481*68546e56SEric Biggers vadd.i32 q10, q10, q2 482*68546e56SEric Biggers vld1.8 {d24}, [r2, : 64] 483*68546e56SEric Biggers vadd.i32 q5, q5, q0 484*68546e56SEric Biggers add r2, r3, #192 485*68546e56SEric Biggers vld1.8 {d26-d27}, [r2, : 128]! 486*68546e56SEric Biggers vadd.i32 q6, q6, q3 487*68546e56SEric Biggers vld1.8 {d28-d29}, [r2, : 128]! 488*68546e56SEric Biggers vadd.i32 q8, q8, q4 489*68546e56SEric Biggers vld1.8 {d25}, [r2, : 64] 490*68546e56SEric Biggers vadd.i32 q10, q10, q2 491*68546e56SEric Biggers vtrn.32 q9, q13 492*68546e56SEric Biggers vadd.i32 q7, q7, q1 493*68546e56SEric Biggers vadd.i32 q5, q5, q0 494*68546e56SEric Biggers vtrn.32 q11, q14 495*68546e56SEric Biggers vadd.i32 q6, q6, q3 496*68546e56SEric Biggers add r2, sp, #528 497*68546e56SEric Biggers vadd.i32 q10, q10, q2 498*68546e56SEric Biggers vtrn.32 d24, d25 499*68546e56SEric Biggers vst1.8 {d12-d13}, [r2, : 128]! 500*68546e56SEric Biggers vshl.i32 q6, q13, #1 501*68546e56SEric Biggers vst1.8 {d20-d21}, [r2, : 128]! 502*68546e56SEric Biggers vshl.i32 q10, q14, #1 503*68546e56SEric Biggers vst1.8 {d12-d13}, [r2, : 128]! 504*68546e56SEric Biggers vshl.i32 q15, q12, #1 505*68546e56SEric Biggers vadd.i32 q8, q8, q4 506*68546e56SEric Biggers vext.32 d10, d31, d30, #0 507*68546e56SEric Biggers vadd.i32 q7, q7, q1 508*68546e56SEric Biggers vst1.8 {d16-d17}, [r2, : 128]! 509*68546e56SEric Biggers vmull.s32 q8, d18, d5 510*68546e56SEric Biggers vmlal.s32 q8, d26, d4 511*68546e56SEric Biggers vmlal.s32 q8, d19, d9 512*68546e56SEric Biggers vmlal.s32 q8, d27, d3 513*68546e56SEric Biggers vmlal.s32 q8, d22, d8 514*68546e56SEric Biggers vmlal.s32 q8, d28, d2 515*68546e56SEric Biggers vmlal.s32 q8, d23, d7 516*68546e56SEric Biggers vmlal.s32 q8, d29, d1 517*68546e56SEric Biggers vmlal.s32 q8, d24, d6 518*68546e56SEric Biggers vmlal.s32 q8, d25, d0 519*68546e56SEric Biggers vst1.8 {d14-d15}, [r2, : 128]! 520*68546e56SEric Biggers vmull.s32 q2, d18, d4 521*68546e56SEric Biggers vmlal.s32 q2, d12, d9 522*68546e56SEric Biggers vmlal.s32 q2, d13, d8 523*68546e56SEric Biggers vmlal.s32 q2, d19, d3 524*68546e56SEric Biggers vmlal.s32 q2, d22, d2 525*68546e56SEric Biggers vmlal.s32 q2, d23, d1 526*68546e56SEric Biggers vmlal.s32 q2, d24, d0 527*68546e56SEric Biggers vst1.8 {d20-d21}, [r2, : 128]! 528*68546e56SEric Biggers vmull.s32 q7, d18, d9 529*68546e56SEric Biggers vmlal.s32 q7, d26, d3 530*68546e56SEric Biggers vmlal.s32 q7, d19, d8 531*68546e56SEric Biggers vmlal.s32 q7, d27, d2 532*68546e56SEric Biggers vmlal.s32 q7, d22, d7 533*68546e56SEric Biggers vmlal.s32 q7, d28, d1 534*68546e56SEric Biggers vmlal.s32 q7, d23, d6 535*68546e56SEric Biggers vmlal.s32 q7, d29, d0 536*68546e56SEric Biggers vst1.8 {d10-d11}, [r2, : 128]! 537*68546e56SEric Biggers vmull.s32 q5, d18, d3 538*68546e56SEric Biggers vmlal.s32 q5, d19, d2 539*68546e56SEric Biggers vmlal.s32 q5, d22, d1 540*68546e56SEric Biggers vmlal.s32 q5, d23, d0 541*68546e56SEric Biggers vmlal.s32 q5, d12, d8 542*68546e56SEric Biggers vst1.8 {d16-d17}, [r2, : 128] 543*68546e56SEric Biggers vmull.s32 q4, d18, d8 544*68546e56SEric Biggers vmlal.s32 q4, d26, d2 545*68546e56SEric Biggers vmlal.s32 q4, d19, d7 546*68546e56SEric Biggers vmlal.s32 q4, d27, d1 547*68546e56SEric Biggers vmlal.s32 q4, d22, d6 548*68546e56SEric Biggers vmlal.s32 q4, d28, d0 549*68546e56SEric Biggers vmull.s32 q8, d18, d7 550*68546e56SEric Biggers vmlal.s32 q8, d26, d1 551*68546e56SEric Biggers vmlal.s32 q8, d19, d6 552*68546e56SEric Biggers vmlal.s32 q8, d27, d0 553*68546e56SEric Biggers add r2, sp, #544 554*68546e56SEric Biggers vld1.8 {d20-d21}, [r2, : 128] 555*68546e56SEric Biggers vmlal.s32 q7, d24, d21 556*68546e56SEric Biggers vmlal.s32 q7, d25, d20 557*68546e56SEric Biggers vmlal.s32 q4, d23, d21 558*68546e56SEric Biggers vmlal.s32 q4, d29, d20 559*68546e56SEric Biggers vmlal.s32 q8, d22, d21 560*68546e56SEric Biggers vmlal.s32 q8, d28, d20 561*68546e56SEric Biggers vmlal.s32 q5, d24, d20 562*68546e56SEric Biggers vst1.8 {d14-d15}, [r2, : 128] 563*68546e56SEric Biggers vmull.s32 q7, d18, d6 564*68546e56SEric Biggers vmlal.s32 q7, d26, d0 565*68546e56SEric Biggers add r2, sp, #624 566*68546e56SEric Biggers vld1.8 {d30-d31}, [r2, : 128] 567*68546e56SEric Biggers vmlal.s32 q2, d30, d21 568*68546e56SEric Biggers vmlal.s32 q7, d19, d21 569*68546e56SEric Biggers vmlal.s32 q7, d27, d20 570*68546e56SEric Biggers add r2, sp, #592 571*68546e56SEric Biggers vld1.8 {d26-d27}, [r2, : 128] 572*68546e56SEric Biggers vmlal.s32 q4, d25, d27 573*68546e56SEric Biggers vmlal.s32 q8, d29, d27 574*68546e56SEric Biggers vmlal.s32 q8, d25, d26 575*68546e56SEric Biggers vmlal.s32 q7, d28, d27 576*68546e56SEric Biggers vmlal.s32 q7, d29, d26 577*68546e56SEric Biggers add r2, sp, #576 578*68546e56SEric Biggers vld1.8 {d28-d29}, [r2, : 128] 579*68546e56SEric Biggers vmlal.s32 q4, d24, d29 580*68546e56SEric Biggers vmlal.s32 q8, d23, d29 581*68546e56SEric Biggers vmlal.s32 q8, d24, d28 582*68546e56SEric Biggers vmlal.s32 q7, d22, d29 583*68546e56SEric Biggers vmlal.s32 q7, d23, d28 584*68546e56SEric Biggers vst1.8 {d8-d9}, [r2, : 128] 585*68546e56SEric Biggers add r2, sp, #528 586*68546e56SEric Biggers vld1.8 {d8-d9}, [r2, : 128] 587*68546e56SEric Biggers vmlal.s32 q7, d24, d9 588*68546e56SEric Biggers vmlal.s32 q7, d25, d31 589*68546e56SEric Biggers vmull.s32 q1, d18, d2 590*68546e56SEric Biggers vmlal.s32 q1, d19, d1 591*68546e56SEric Biggers vmlal.s32 q1, d22, d0 592*68546e56SEric Biggers vmlal.s32 q1, d24, d27 593*68546e56SEric Biggers vmlal.s32 q1, d23, d20 594*68546e56SEric Biggers vmlal.s32 q1, d12, d7 595*68546e56SEric Biggers vmlal.s32 q1, d13, d6 596*68546e56SEric Biggers vmull.s32 q6, d18, d1 597*68546e56SEric Biggers vmlal.s32 q6, d19, d0 598*68546e56SEric Biggers vmlal.s32 q6, d23, d27 599*68546e56SEric Biggers vmlal.s32 q6, d22, d20 600*68546e56SEric Biggers vmlal.s32 q6, d24, d26 601*68546e56SEric Biggers vmull.s32 q0, d18, d0 602*68546e56SEric Biggers vmlal.s32 q0, d22, d27 603*68546e56SEric Biggers vmlal.s32 q0, d23, d26 604*68546e56SEric Biggers vmlal.s32 q0, d24, d31 605*68546e56SEric Biggers vmlal.s32 q0, d19, d20 606*68546e56SEric Biggers add r2, sp, #608 607*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128] 608*68546e56SEric Biggers vmlal.s32 q2, d18, d7 609*68546e56SEric Biggers vmlal.s32 q5, d18, d6 610*68546e56SEric Biggers vmlal.s32 q1, d18, d21 611*68546e56SEric Biggers vmlal.s32 q0, d18, d28 612*68546e56SEric Biggers vmlal.s32 q6, d18, d29 613*68546e56SEric Biggers vmlal.s32 q2, d19, d6 614*68546e56SEric Biggers vmlal.s32 q5, d19, d21 615*68546e56SEric Biggers vmlal.s32 q1, d19, d29 616*68546e56SEric Biggers vmlal.s32 q0, d19, d9 617*68546e56SEric Biggers vmlal.s32 q6, d19, d28 618*68546e56SEric Biggers add r2, sp, #560 619*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128] 620*68546e56SEric Biggers add r2, sp, #480 621*68546e56SEric Biggers vld1.8 {d22-d23}, [r2, : 128] 622*68546e56SEric Biggers vmlal.s32 q5, d19, d7 623*68546e56SEric Biggers vmlal.s32 q0, d18, d21 624*68546e56SEric Biggers vmlal.s32 q0, d19, d29 625*68546e56SEric Biggers vmlal.s32 q6, d18, d6 626*68546e56SEric Biggers add r2, sp, #496 627*68546e56SEric Biggers vld1.8 {d6-d7}, [r2, : 128] 628*68546e56SEric Biggers vmlal.s32 q6, d19, d21 629*68546e56SEric Biggers add r2, sp, #544 630*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128] 631*68546e56SEric Biggers vmlal.s32 q0, d30, d8 632*68546e56SEric Biggers add r2, sp, #640 633*68546e56SEric Biggers vld1.8 {d20-d21}, [r2, : 128] 634*68546e56SEric Biggers vmlal.s32 q5, d30, d29 635*68546e56SEric Biggers add r2, sp, #576 636*68546e56SEric Biggers vld1.8 {d24-d25}, [r2, : 128] 637*68546e56SEric Biggers vmlal.s32 q1, d30, d28 638*68546e56SEric Biggers vadd.i64 q13, q0, q11 639*68546e56SEric Biggers vadd.i64 q14, q5, q11 640*68546e56SEric Biggers vmlal.s32 q6, d30, d9 641*68546e56SEric Biggers vshr.s64 q4, q13, #26 642*68546e56SEric Biggers vshr.s64 q13, q14, #26 643*68546e56SEric Biggers vadd.i64 q7, q7, q4 644*68546e56SEric Biggers vshl.i64 q4, q4, #26 645*68546e56SEric Biggers vadd.i64 q14, q7, q3 646*68546e56SEric Biggers vadd.i64 q9, q9, q13 647*68546e56SEric Biggers vshl.i64 q13, q13, #26 648*68546e56SEric Biggers vadd.i64 q15, q9, q3 649*68546e56SEric Biggers vsub.i64 q0, q0, q4 650*68546e56SEric Biggers vshr.s64 q4, q14, #25 651*68546e56SEric Biggers vsub.i64 q5, q5, q13 652*68546e56SEric Biggers vshr.s64 q13, q15, #25 653*68546e56SEric Biggers vadd.i64 q6, q6, q4 654*68546e56SEric Biggers vshl.i64 q4, q4, #25 655*68546e56SEric Biggers vadd.i64 q14, q6, q11 656*68546e56SEric Biggers vadd.i64 q2, q2, q13 657*68546e56SEric Biggers vsub.i64 q4, q7, q4 658*68546e56SEric Biggers vshr.s64 q7, q14, #26 659*68546e56SEric Biggers vshl.i64 q13, q13, #25 660*68546e56SEric Biggers vadd.i64 q14, q2, q11 661*68546e56SEric Biggers vadd.i64 q8, q8, q7 662*68546e56SEric Biggers vshl.i64 q7, q7, #26 663*68546e56SEric Biggers vadd.i64 q15, q8, q3 664*68546e56SEric Biggers vsub.i64 q9, q9, q13 665*68546e56SEric Biggers vshr.s64 q13, q14, #26 666*68546e56SEric Biggers vsub.i64 q6, q6, q7 667*68546e56SEric Biggers vshr.s64 q7, q15, #25 668*68546e56SEric Biggers vadd.i64 q10, q10, q13 669*68546e56SEric Biggers vshl.i64 q13, q13, #26 670*68546e56SEric Biggers vadd.i64 q14, q10, q3 671*68546e56SEric Biggers vadd.i64 q1, q1, q7 672*68546e56SEric Biggers add r2, r3, #144 673*68546e56SEric Biggers vshl.i64 q7, q7, #25 674*68546e56SEric Biggers add r4, r3, #96 675*68546e56SEric Biggers vadd.i64 q15, q1, q11 676*68546e56SEric Biggers add r2, r2, #8 677*68546e56SEric Biggers vsub.i64 q2, q2, q13 678*68546e56SEric Biggers add r4, r4, #8 679*68546e56SEric Biggers vshr.s64 q13, q14, #25 680*68546e56SEric Biggers vsub.i64 q7, q8, q7 681*68546e56SEric Biggers vshr.s64 q8, q15, #26 682*68546e56SEric Biggers vadd.i64 q14, q13, q13 683*68546e56SEric Biggers vadd.i64 q12, q12, q8 684*68546e56SEric Biggers vtrn.32 d12, d14 685*68546e56SEric Biggers vshl.i64 q8, q8, #26 686*68546e56SEric Biggers vtrn.32 d13, d15 687*68546e56SEric Biggers vadd.i64 q3, q12, q3 688*68546e56SEric Biggers vadd.i64 q0, q0, q14 689*68546e56SEric Biggers vst1.8 d12, [r2, : 64]! 690*68546e56SEric Biggers vshl.i64 q7, q13, #4 691*68546e56SEric Biggers vst1.8 d13, [r4, : 64]! 692*68546e56SEric Biggers vsub.i64 q1, q1, q8 693*68546e56SEric Biggers vshr.s64 q3, q3, #25 694*68546e56SEric Biggers vadd.i64 q0, q0, q7 695*68546e56SEric Biggers vadd.i64 q5, q5, q3 696*68546e56SEric Biggers vshl.i64 q3, q3, #25 697*68546e56SEric Biggers vadd.i64 q6, q5, q11 698*68546e56SEric Biggers vadd.i64 q0, q0, q13 699*68546e56SEric Biggers vshl.i64 q7, q13, #25 700*68546e56SEric Biggers vadd.i64 q8, q0, q11 701*68546e56SEric Biggers vsub.i64 q3, q12, q3 702*68546e56SEric Biggers vshr.s64 q6, q6, #26 703*68546e56SEric Biggers vsub.i64 q7, q10, q7 704*68546e56SEric Biggers vtrn.32 d2, d6 705*68546e56SEric Biggers vshr.s64 q8, q8, #26 706*68546e56SEric Biggers vtrn.32 d3, d7 707*68546e56SEric Biggers vadd.i64 q3, q9, q6 708*68546e56SEric Biggers vst1.8 d2, [r2, : 64] 709*68546e56SEric Biggers vshl.i64 q6, q6, #26 710*68546e56SEric Biggers vst1.8 d3, [r4, : 64] 711*68546e56SEric Biggers vadd.i64 q1, q4, q8 712*68546e56SEric Biggers vtrn.32 d4, d14 713*68546e56SEric Biggers vshl.i64 q4, q8, #26 714*68546e56SEric Biggers vtrn.32 d5, d15 715*68546e56SEric Biggers vsub.i64 q5, q5, q6 716*68546e56SEric Biggers add r2, r2, #16 717*68546e56SEric Biggers vsub.i64 q0, q0, q4 718*68546e56SEric Biggers vst1.8 d4, [r2, : 64] 719*68546e56SEric Biggers add r4, r4, #16 720*68546e56SEric Biggers vst1.8 d5, [r4, : 64] 721*68546e56SEric Biggers vtrn.32 d10, d6 722*68546e56SEric Biggers vtrn.32 d11, d7 723*68546e56SEric Biggers sub r2, r2, #8 724*68546e56SEric Biggers sub r4, r4, #8 725*68546e56SEric Biggers vtrn.32 d0, d2 726*68546e56SEric Biggers vtrn.32 d1, d3 727*68546e56SEric Biggers vst1.8 d10, [r2, : 64] 728*68546e56SEric Biggers vst1.8 d11, [r4, : 64] 729*68546e56SEric Biggers sub r2, r2, #24 730*68546e56SEric Biggers sub r4, r4, #24 731*68546e56SEric Biggers vst1.8 d0, [r2, : 64] 732*68546e56SEric Biggers vst1.8 d1, [r4, : 64] 733*68546e56SEric Biggers add r2, r3, #288 734*68546e56SEric Biggers add r4, r3, #336 735*68546e56SEric Biggers vld1.8 {d0-d1}, [r2, : 128]! 736*68546e56SEric Biggers vld1.8 {d2-d3}, [r4, : 128]! 737*68546e56SEric Biggers vsub.i32 q0, q0, q1 738*68546e56SEric Biggers vld1.8 {d2-d3}, [r2, : 128]! 739*68546e56SEric Biggers vld1.8 {d4-d5}, [r4, : 128]! 740*68546e56SEric Biggers vsub.i32 q1, q1, q2 741*68546e56SEric Biggers add r5, r3, #240 742*68546e56SEric Biggers vld1.8 {d4}, [r2, : 64] 743*68546e56SEric Biggers vld1.8 {d6}, [r4, : 64] 744*68546e56SEric Biggers vsub.i32 q2, q2, q3 745*68546e56SEric Biggers vst1.8 {d0-d1}, [r5, : 128]! 746*68546e56SEric Biggers vst1.8 {d2-d3}, [r5, : 128]! 747*68546e56SEric Biggers vst1.8 d4, [r5, : 64] 748*68546e56SEric Biggers add r2, r3, #144 749*68546e56SEric Biggers add r4, r3, #96 750*68546e56SEric Biggers add r5, r3, #144 751*68546e56SEric Biggers add r6, r3, #192 752*68546e56SEric Biggers vld1.8 {d0-d1}, [r2, : 128]! 753*68546e56SEric Biggers vld1.8 {d2-d3}, [r4, : 128]! 754*68546e56SEric Biggers vsub.i32 q2, q0, q1 755*68546e56SEric Biggers vadd.i32 q0, q0, q1 756*68546e56SEric Biggers vld1.8 {d2-d3}, [r2, : 128]! 757*68546e56SEric Biggers vld1.8 {d6-d7}, [r4, : 128]! 758*68546e56SEric Biggers vsub.i32 q4, q1, q3 759*68546e56SEric Biggers vadd.i32 q1, q1, q3 760*68546e56SEric Biggers vld1.8 {d6}, [r2, : 64] 761*68546e56SEric Biggers vld1.8 {d10}, [r4, : 64] 762*68546e56SEric Biggers vsub.i32 q6, q3, q5 763*68546e56SEric Biggers vadd.i32 q3, q3, q5 764*68546e56SEric Biggers vst1.8 {d4-d5}, [r5, : 128]! 765*68546e56SEric Biggers vst1.8 {d0-d1}, [r6, : 128]! 766*68546e56SEric Biggers vst1.8 {d8-d9}, [r5, : 128]! 767*68546e56SEric Biggers vst1.8 {d2-d3}, [r6, : 128]! 768*68546e56SEric Biggers vst1.8 d12, [r5, : 64] 769*68546e56SEric Biggers vst1.8 d6, [r6, : 64] 770*68546e56SEric Biggers add r2, r3, #0 771*68546e56SEric Biggers add r4, r3, #240 772*68546e56SEric Biggers vld1.8 {d0-d1}, [r4, : 128]! 773*68546e56SEric Biggers vld1.8 {d2-d3}, [r4, : 128]! 774*68546e56SEric Biggers vld1.8 {d4}, [r4, : 64] 775*68546e56SEric Biggers add r4, r3, #336 776*68546e56SEric Biggers vld1.8 {d6-d7}, [r4, : 128]! 777*68546e56SEric Biggers vtrn.32 q0, q3 778*68546e56SEric Biggers vld1.8 {d8-d9}, [r4, : 128]! 779*68546e56SEric Biggers vshl.i32 q5, q0, #4 780*68546e56SEric Biggers vtrn.32 q1, q4 781*68546e56SEric Biggers vshl.i32 q6, q3, #4 782*68546e56SEric Biggers vadd.i32 q5, q5, q0 783*68546e56SEric Biggers vadd.i32 q6, q6, q3 784*68546e56SEric Biggers vshl.i32 q7, q1, #4 785*68546e56SEric Biggers vld1.8 {d5}, [r4, : 64] 786*68546e56SEric Biggers vshl.i32 q8, q4, #4 787*68546e56SEric Biggers vtrn.32 d4, d5 788*68546e56SEric Biggers vadd.i32 q7, q7, q1 789*68546e56SEric Biggers vadd.i32 q8, q8, q4 790*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128]! 791*68546e56SEric Biggers vshl.i32 q10, q2, #4 792*68546e56SEric Biggers vld1.8 {d22-d23}, [r2, : 128]! 793*68546e56SEric Biggers vadd.i32 q10, q10, q2 794*68546e56SEric Biggers vld1.8 {d24}, [r2, : 64] 795*68546e56SEric Biggers vadd.i32 q5, q5, q0 796*68546e56SEric Biggers add r2, r3, #288 797*68546e56SEric Biggers vld1.8 {d26-d27}, [r2, : 128]! 798*68546e56SEric Biggers vadd.i32 q6, q6, q3 799*68546e56SEric Biggers vld1.8 {d28-d29}, [r2, : 128]! 800*68546e56SEric Biggers vadd.i32 q8, q8, q4 801*68546e56SEric Biggers vld1.8 {d25}, [r2, : 64] 802*68546e56SEric Biggers vadd.i32 q10, q10, q2 803*68546e56SEric Biggers vtrn.32 q9, q13 804*68546e56SEric Biggers vadd.i32 q7, q7, q1 805*68546e56SEric Biggers vadd.i32 q5, q5, q0 806*68546e56SEric Biggers vtrn.32 q11, q14 807*68546e56SEric Biggers vadd.i32 q6, q6, q3 808*68546e56SEric Biggers add r2, sp, #528 809*68546e56SEric Biggers vadd.i32 q10, q10, q2 810*68546e56SEric Biggers vtrn.32 d24, d25 811*68546e56SEric Biggers vst1.8 {d12-d13}, [r2, : 128]! 812*68546e56SEric Biggers vshl.i32 q6, q13, #1 813*68546e56SEric Biggers vst1.8 {d20-d21}, [r2, : 128]! 814*68546e56SEric Biggers vshl.i32 q10, q14, #1 815*68546e56SEric Biggers vst1.8 {d12-d13}, [r2, : 128]! 816*68546e56SEric Biggers vshl.i32 q15, q12, #1 817*68546e56SEric Biggers vadd.i32 q8, q8, q4 818*68546e56SEric Biggers vext.32 d10, d31, d30, #0 819*68546e56SEric Biggers vadd.i32 q7, q7, q1 820*68546e56SEric Biggers vst1.8 {d16-d17}, [r2, : 128]! 821*68546e56SEric Biggers vmull.s32 q8, d18, d5 822*68546e56SEric Biggers vmlal.s32 q8, d26, d4 823*68546e56SEric Biggers vmlal.s32 q8, d19, d9 824*68546e56SEric Biggers vmlal.s32 q8, d27, d3 825*68546e56SEric Biggers vmlal.s32 q8, d22, d8 826*68546e56SEric Biggers vmlal.s32 q8, d28, d2 827*68546e56SEric Biggers vmlal.s32 q8, d23, d7 828*68546e56SEric Biggers vmlal.s32 q8, d29, d1 829*68546e56SEric Biggers vmlal.s32 q8, d24, d6 830*68546e56SEric Biggers vmlal.s32 q8, d25, d0 831*68546e56SEric Biggers vst1.8 {d14-d15}, [r2, : 128]! 832*68546e56SEric Biggers vmull.s32 q2, d18, d4 833*68546e56SEric Biggers vmlal.s32 q2, d12, d9 834*68546e56SEric Biggers vmlal.s32 q2, d13, d8 835*68546e56SEric Biggers vmlal.s32 q2, d19, d3 836*68546e56SEric Biggers vmlal.s32 q2, d22, d2 837*68546e56SEric Biggers vmlal.s32 q2, d23, d1 838*68546e56SEric Biggers vmlal.s32 q2, d24, d0 839*68546e56SEric Biggers vst1.8 {d20-d21}, [r2, : 128]! 840*68546e56SEric Biggers vmull.s32 q7, d18, d9 841*68546e56SEric Biggers vmlal.s32 q7, d26, d3 842*68546e56SEric Biggers vmlal.s32 q7, d19, d8 843*68546e56SEric Biggers vmlal.s32 q7, d27, d2 844*68546e56SEric Biggers vmlal.s32 q7, d22, d7 845*68546e56SEric Biggers vmlal.s32 q7, d28, d1 846*68546e56SEric Biggers vmlal.s32 q7, d23, d6 847*68546e56SEric Biggers vmlal.s32 q7, d29, d0 848*68546e56SEric Biggers vst1.8 {d10-d11}, [r2, : 128]! 849*68546e56SEric Biggers vmull.s32 q5, d18, d3 850*68546e56SEric Biggers vmlal.s32 q5, d19, d2 851*68546e56SEric Biggers vmlal.s32 q5, d22, d1 852*68546e56SEric Biggers vmlal.s32 q5, d23, d0 853*68546e56SEric Biggers vmlal.s32 q5, d12, d8 854*68546e56SEric Biggers vst1.8 {d16-d17}, [r2, : 128]! 855*68546e56SEric Biggers vmull.s32 q4, d18, d8 856*68546e56SEric Biggers vmlal.s32 q4, d26, d2 857*68546e56SEric Biggers vmlal.s32 q4, d19, d7 858*68546e56SEric Biggers vmlal.s32 q4, d27, d1 859*68546e56SEric Biggers vmlal.s32 q4, d22, d6 860*68546e56SEric Biggers vmlal.s32 q4, d28, d0 861*68546e56SEric Biggers vmull.s32 q8, d18, d7 862*68546e56SEric Biggers vmlal.s32 q8, d26, d1 863*68546e56SEric Biggers vmlal.s32 q8, d19, d6 864*68546e56SEric Biggers vmlal.s32 q8, d27, d0 865*68546e56SEric Biggers add r2, sp, #544 866*68546e56SEric Biggers vld1.8 {d20-d21}, [r2, : 128] 867*68546e56SEric Biggers vmlal.s32 q7, d24, d21 868*68546e56SEric Biggers vmlal.s32 q7, d25, d20 869*68546e56SEric Biggers vmlal.s32 q4, d23, d21 870*68546e56SEric Biggers vmlal.s32 q4, d29, d20 871*68546e56SEric Biggers vmlal.s32 q8, d22, d21 872*68546e56SEric Biggers vmlal.s32 q8, d28, d20 873*68546e56SEric Biggers vmlal.s32 q5, d24, d20 874*68546e56SEric Biggers vst1.8 {d14-d15}, [r2, : 128] 875*68546e56SEric Biggers vmull.s32 q7, d18, d6 876*68546e56SEric Biggers vmlal.s32 q7, d26, d0 877*68546e56SEric Biggers add r2, sp, #624 878*68546e56SEric Biggers vld1.8 {d30-d31}, [r2, : 128] 879*68546e56SEric Biggers vmlal.s32 q2, d30, d21 880*68546e56SEric Biggers vmlal.s32 q7, d19, d21 881*68546e56SEric Biggers vmlal.s32 q7, d27, d20 882*68546e56SEric Biggers add r2, sp, #592 883*68546e56SEric Biggers vld1.8 {d26-d27}, [r2, : 128] 884*68546e56SEric Biggers vmlal.s32 q4, d25, d27 885*68546e56SEric Biggers vmlal.s32 q8, d29, d27 886*68546e56SEric Biggers vmlal.s32 q8, d25, d26 887*68546e56SEric Biggers vmlal.s32 q7, d28, d27 888*68546e56SEric Biggers vmlal.s32 q7, d29, d26 889*68546e56SEric Biggers add r2, sp, #576 890*68546e56SEric Biggers vld1.8 {d28-d29}, [r2, : 128] 891*68546e56SEric Biggers vmlal.s32 q4, d24, d29 892*68546e56SEric Biggers vmlal.s32 q8, d23, d29 893*68546e56SEric Biggers vmlal.s32 q8, d24, d28 894*68546e56SEric Biggers vmlal.s32 q7, d22, d29 895*68546e56SEric Biggers vmlal.s32 q7, d23, d28 896*68546e56SEric Biggers vst1.8 {d8-d9}, [r2, : 128] 897*68546e56SEric Biggers add r2, sp, #528 898*68546e56SEric Biggers vld1.8 {d8-d9}, [r2, : 128] 899*68546e56SEric Biggers vmlal.s32 q7, d24, d9 900*68546e56SEric Biggers vmlal.s32 q7, d25, d31 901*68546e56SEric Biggers vmull.s32 q1, d18, d2 902*68546e56SEric Biggers vmlal.s32 q1, d19, d1 903*68546e56SEric Biggers vmlal.s32 q1, d22, d0 904*68546e56SEric Biggers vmlal.s32 q1, d24, d27 905*68546e56SEric Biggers vmlal.s32 q1, d23, d20 906*68546e56SEric Biggers vmlal.s32 q1, d12, d7 907*68546e56SEric Biggers vmlal.s32 q1, d13, d6 908*68546e56SEric Biggers vmull.s32 q6, d18, d1 909*68546e56SEric Biggers vmlal.s32 q6, d19, d0 910*68546e56SEric Biggers vmlal.s32 q6, d23, d27 911*68546e56SEric Biggers vmlal.s32 q6, d22, d20 912*68546e56SEric Biggers vmlal.s32 q6, d24, d26 913*68546e56SEric Biggers vmull.s32 q0, d18, d0 914*68546e56SEric Biggers vmlal.s32 q0, d22, d27 915*68546e56SEric Biggers vmlal.s32 q0, d23, d26 916*68546e56SEric Biggers vmlal.s32 q0, d24, d31 917*68546e56SEric Biggers vmlal.s32 q0, d19, d20 918*68546e56SEric Biggers add r2, sp, #608 919*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128] 920*68546e56SEric Biggers vmlal.s32 q2, d18, d7 921*68546e56SEric Biggers vmlal.s32 q5, d18, d6 922*68546e56SEric Biggers vmlal.s32 q1, d18, d21 923*68546e56SEric Biggers vmlal.s32 q0, d18, d28 924*68546e56SEric Biggers vmlal.s32 q6, d18, d29 925*68546e56SEric Biggers vmlal.s32 q2, d19, d6 926*68546e56SEric Biggers vmlal.s32 q5, d19, d21 927*68546e56SEric Biggers vmlal.s32 q1, d19, d29 928*68546e56SEric Biggers vmlal.s32 q0, d19, d9 929*68546e56SEric Biggers vmlal.s32 q6, d19, d28 930*68546e56SEric Biggers add r2, sp, #560 931*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128] 932*68546e56SEric Biggers add r2, sp, #480 933*68546e56SEric Biggers vld1.8 {d22-d23}, [r2, : 128] 934*68546e56SEric Biggers vmlal.s32 q5, d19, d7 935*68546e56SEric Biggers vmlal.s32 q0, d18, d21 936*68546e56SEric Biggers vmlal.s32 q0, d19, d29 937*68546e56SEric Biggers vmlal.s32 q6, d18, d6 938*68546e56SEric Biggers add r2, sp, #496 939*68546e56SEric Biggers vld1.8 {d6-d7}, [r2, : 128] 940*68546e56SEric Biggers vmlal.s32 q6, d19, d21 941*68546e56SEric Biggers add r2, sp, #544 942*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128] 943*68546e56SEric Biggers vmlal.s32 q0, d30, d8 944*68546e56SEric Biggers add r2, sp, #640 945*68546e56SEric Biggers vld1.8 {d20-d21}, [r2, : 128] 946*68546e56SEric Biggers vmlal.s32 q5, d30, d29 947*68546e56SEric Biggers add r2, sp, #576 948*68546e56SEric Biggers vld1.8 {d24-d25}, [r2, : 128] 949*68546e56SEric Biggers vmlal.s32 q1, d30, d28 950*68546e56SEric Biggers vadd.i64 q13, q0, q11 951*68546e56SEric Biggers vadd.i64 q14, q5, q11 952*68546e56SEric Biggers vmlal.s32 q6, d30, d9 953*68546e56SEric Biggers vshr.s64 q4, q13, #26 954*68546e56SEric Biggers vshr.s64 q13, q14, #26 955*68546e56SEric Biggers vadd.i64 q7, q7, q4 956*68546e56SEric Biggers vshl.i64 q4, q4, #26 957*68546e56SEric Biggers vadd.i64 q14, q7, q3 958*68546e56SEric Biggers vadd.i64 q9, q9, q13 959*68546e56SEric Biggers vshl.i64 q13, q13, #26 960*68546e56SEric Biggers vadd.i64 q15, q9, q3 961*68546e56SEric Biggers vsub.i64 q0, q0, q4 962*68546e56SEric Biggers vshr.s64 q4, q14, #25 963*68546e56SEric Biggers vsub.i64 q5, q5, q13 964*68546e56SEric Biggers vshr.s64 q13, q15, #25 965*68546e56SEric Biggers vadd.i64 q6, q6, q4 966*68546e56SEric Biggers vshl.i64 q4, q4, #25 967*68546e56SEric Biggers vadd.i64 q14, q6, q11 968*68546e56SEric Biggers vadd.i64 q2, q2, q13 969*68546e56SEric Biggers vsub.i64 q4, q7, q4 970*68546e56SEric Biggers vshr.s64 q7, q14, #26 971*68546e56SEric Biggers vshl.i64 q13, q13, #25 972*68546e56SEric Biggers vadd.i64 q14, q2, q11 973*68546e56SEric Biggers vadd.i64 q8, q8, q7 974*68546e56SEric Biggers vshl.i64 q7, q7, #26 975*68546e56SEric Biggers vadd.i64 q15, q8, q3 976*68546e56SEric Biggers vsub.i64 q9, q9, q13 977*68546e56SEric Biggers vshr.s64 q13, q14, #26 978*68546e56SEric Biggers vsub.i64 q6, q6, q7 979*68546e56SEric Biggers vshr.s64 q7, q15, #25 980*68546e56SEric Biggers vadd.i64 q10, q10, q13 981*68546e56SEric Biggers vshl.i64 q13, q13, #26 982*68546e56SEric Biggers vadd.i64 q14, q10, q3 983*68546e56SEric Biggers vadd.i64 q1, q1, q7 984*68546e56SEric Biggers add r2, r3, #288 985*68546e56SEric Biggers vshl.i64 q7, q7, #25 986*68546e56SEric Biggers add r4, r3, #96 987*68546e56SEric Biggers vadd.i64 q15, q1, q11 988*68546e56SEric Biggers add r2, r2, #8 989*68546e56SEric Biggers vsub.i64 q2, q2, q13 990*68546e56SEric Biggers add r4, r4, #8 991*68546e56SEric Biggers vshr.s64 q13, q14, #25 992*68546e56SEric Biggers vsub.i64 q7, q8, q7 993*68546e56SEric Biggers vshr.s64 q8, q15, #26 994*68546e56SEric Biggers vadd.i64 q14, q13, q13 995*68546e56SEric Biggers vadd.i64 q12, q12, q8 996*68546e56SEric Biggers vtrn.32 d12, d14 997*68546e56SEric Biggers vshl.i64 q8, q8, #26 998*68546e56SEric Biggers vtrn.32 d13, d15 999*68546e56SEric Biggers vadd.i64 q3, q12, q3 1000*68546e56SEric Biggers vadd.i64 q0, q0, q14 1001*68546e56SEric Biggers vst1.8 d12, [r2, : 64]! 1002*68546e56SEric Biggers vshl.i64 q7, q13, #4 1003*68546e56SEric Biggers vst1.8 d13, [r4, : 64]! 1004*68546e56SEric Biggers vsub.i64 q1, q1, q8 1005*68546e56SEric Biggers vshr.s64 q3, q3, #25 1006*68546e56SEric Biggers vadd.i64 q0, q0, q7 1007*68546e56SEric Biggers vadd.i64 q5, q5, q3 1008*68546e56SEric Biggers vshl.i64 q3, q3, #25 1009*68546e56SEric Biggers vadd.i64 q6, q5, q11 1010*68546e56SEric Biggers vadd.i64 q0, q0, q13 1011*68546e56SEric Biggers vshl.i64 q7, q13, #25 1012*68546e56SEric Biggers vadd.i64 q8, q0, q11 1013*68546e56SEric Biggers vsub.i64 q3, q12, q3 1014*68546e56SEric Biggers vshr.s64 q6, q6, #26 1015*68546e56SEric Biggers vsub.i64 q7, q10, q7 1016*68546e56SEric Biggers vtrn.32 d2, d6 1017*68546e56SEric Biggers vshr.s64 q8, q8, #26 1018*68546e56SEric Biggers vtrn.32 d3, d7 1019*68546e56SEric Biggers vadd.i64 q3, q9, q6 1020*68546e56SEric Biggers vst1.8 d2, [r2, : 64] 1021*68546e56SEric Biggers vshl.i64 q6, q6, #26 1022*68546e56SEric Biggers vst1.8 d3, [r4, : 64] 1023*68546e56SEric Biggers vadd.i64 q1, q4, q8 1024*68546e56SEric Biggers vtrn.32 d4, d14 1025*68546e56SEric Biggers vshl.i64 q4, q8, #26 1026*68546e56SEric Biggers vtrn.32 d5, d15 1027*68546e56SEric Biggers vsub.i64 q5, q5, q6 1028*68546e56SEric Biggers add r2, r2, #16 1029*68546e56SEric Biggers vsub.i64 q0, q0, q4 1030*68546e56SEric Biggers vst1.8 d4, [r2, : 64] 1031*68546e56SEric Biggers add r4, r4, #16 1032*68546e56SEric Biggers vst1.8 d5, [r4, : 64] 1033*68546e56SEric Biggers vtrn.32 d10, d6 1034*68546e56SEric Biggers vtrn.32 d11, d7 1035*68546e56SEric Biggers sub r2, r2, #8 1036*68546e56SEric Biggers sub r4, r4, #8 1037*68546e56SEric Biggers vtrn.32 d0, d2 1038*68546e56SEric Biggers vtrn.32 d1, d3 1039*68546e56SEric Biggers vst1.8 d10, [r2, : 64] 1040*68546e56SEric Biggers vst1.8 d11, [r4, : 64] 1041*68546e56SEric Biggers sub r2, r2, #24 1042*68546e56SEric Biggers sub r4, r4, #24 1043*68546e56SEric Biggers vst1.8 d0, [r2, : 64] 1044*68546e56SEric Biggers vst1.8 d1, [r4, : 64] 1045*68546e56SEric Biggers add r2, sp, #512 1046*68546e56SEric Biggers add r4, r3, #144 1047*68546e56SEric Biggers add r5, r3, #192 1048*68546e56SEric Biggers vld1.8 {d0-d1}, [r2, : 128] 1049*68546e56SEric Biggers vld1.8 {d2-d3}, [r4, : 128]! 1050*68546e56SEric Biggers vld1.8 {d4-d5}, [r5, : 128]! 1051*68546e56SEric Biggers vzip.i32 q1, q2 1052*68546e56SEric Biggers vld1.8 {d6-d7}, [r4, : 128]! 1053*68546e56SEric Biggers vld1.8 {d8-d9}, [r5, : 128]! 1054*68546e56SEric Biggers vshl.i32 q5, q1, #1 1055*68546e56SEric Biggers vzip.i32 q3, q4 1056*68546e56SEric Biggers vshl.i32 q6, q2, #1 1057*68546e56SEric Biggers vld1.8 {d14}, [r4, : 64] 1058*68546e56SEric Biggers vshl.i32 q8, q3, #1 1059*68546e56SEric Biggers vld1.8 {d15}, [r5, : 64] 1060*68546e56SEric Biggers vshl.i32 q9, q4, #1 1061*68546e56SEric Biggers vmul.i32 d21, d7, d1 1062*68546e56SEric Biggers vtrn.32 d14, d15 1063*68546e56SEric Biggers vmul.i32 q11, q4, q0 1064*68546e56SEric Biggers vmul.i32 q0, q7, q0 1065*68546e56SEric Biggers vmull.s32 q12, d2, d2 1066*68546e56SEric Biggers vmlal.s32 q12, d11, d1 1067*68546e56SEric Biggers vmlal.s32 q12, d12, d0 1068*68546e56SEric Biggers vmlal.s32 q12, d13, d23 1069*68546e56SEric Biggers vmlal.s32 q12, d16, d22 1070*68546e56SEric Biggers vmlal.s32 q12, d7, d21 1071*68546e56SEric Biggers vmull.s32 q10, d2, d11 1072*68546e56SEric Biggers vmlal.s32 q10, d4, d1 1073*68546e56SEric Biggers vmlal.s32 q10, d13, d0 1074*68546e56SEric Biggers vmlal.s32 q10, d6, d23 1075*68546e56SEric Biggers vmlal.s32 q10, d17, d22 1076*68546e56SEric Biggers vmull.s32 q13, d10, d4 1077*68546e56SEric Biggers vmlal.s32 q13, d11, d3 1078*68546e56SEric Biggers vmlal.s32 q13, d13, d1 1079*68546e56SEric Biggers vmlal.s32 q13, d16, d0 1080*68546e56SEric Biggers vmlal.s32 q13, d17, d23 1081*68546e56SEric Biggers vmlal.s32 q13, d8, d22 1082*68546e56SEric Biggers vmull.s32 q1, d10, d5 1083*68546e56SEric Biggers vmlal.s32 q1, d11, d4 1084*68546e56SEric Biggers vmlal.s32 q1, d6, d1 1085*68546e56SEric Biggers vmlal.s32 q1, d17, d0 1086*68546e56SEric Biggers vmlal.s32 q1, d8, d23 1087*68546e56SEric Biggers vmull.s32 q14, d10, d6 1088*68546e56SEric Biggers vmlal.s32 q14, d11, d13 1089*68546e56SEric Biggers vmlal.s32 q14, d4, d4 1090*68546e56SEric Biggers vmlal.s32 q14, d17, d1 1091*68546e56SEric Biggers vmlal.s32 q14, d18, d0 1092*68546e56SEric Biggers vmlal.s32 q14, d9, d23 1093*68546e56SEric Biggers vmull.s32 q11, d10, d7 1094*68546e56SEric Biggers vmlal.s32 q11, d11, d6 1095*68546e56SEric Biggers vmlal.s32 q11, d12, d5 1096*68546e56SEric Biggers vmlal.s32 q11, d8, d1 1097*68546e56SEric Biggers vmlal.s32 q11, d19, d0 1098*68546e56SEric Biggers vmull.s32 q15, d10, d8 1099*68546e56SEric Biggers vmlal.s32 q15, d11, d17 1100*68546e56SEric Biggers vmlal.s32 q15, d12, d6 1101*68546e56SEric Biggers vmlal.s32 q15, d13, d5 1102*68546e56SEric Biggers vmlal.s32 q15, d19, d1 1103*68546e56SEric Biggers vmlal.s32 q15, d14, d0 1104*68546e56SEric Biggers vmull.s32 q2, d10, d9 1105*68546e56SEric Biggers vmlal.s32 q2, d11, d8 1106*68546e56SEric Biggers vmlal.s32 q2, d12, d7 1107*68546e56SEric Biggers vmlal.s32 q2, d13, d6 1108*68546e56SEric Biggers vmlal.s32 q2, d14, d1 1109*68546e56SEric Biggers vmull.s32 q0, d15, d1 1110*68546e56SEric Biggers vmlal.s32 q0, d10, d14 1111*68546e56SEric Biggers vmlal.s32 q0, d11, d19 1112*68546e56SEric Biggers vmlal.s32 q0, d12, d8 1113*68546e56SEric Biggers vmlal.s32 q0, d13, d17 1114*68546e56SEric Biggers vmlal.s32 q0, d6, d6 1115*68546e56SEric Biggers add r2, sp, #480 1116*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128]! 1117*68546e56SEric Biggers vmull.s32 q3, d16, d7 1118*68546e56SEric Biggers vmlal.s32 q3, d10, d15 1119*68546e56SEric Biggers vmlal.s32 q3, d11, d14 1120*68546e56SEric Biggers vmlal.s32 q3, d12, d9 1121*68546e56SEric Biggers vmlal.s32 q3, d13, d8 1122*68546e56SEric Biggers vld1.8 {d8-d9}, [r2, : 128] 1123*68546e56SEric Biggers vadd.i64 q5, q12, q9 1124*68546e56SEric Biggers vadd.i64 q6, q15, q9 1125*68546e56SEric Biggers vshr.s64 q5, q5, #26 1126*68546e56SEric Biggers vshr.s64 q6, q6, #26 1127*68546e56SEric Biggers vadd.i64 q7, q10, q5 1128*68546e56SEric Biggers vshl.i64 q5, q5, #26 1129*68546e56SEric Biggers vadd.i64 q8, q7, q4 1130*68546e56SEric Biggers vadd.i64 q2, q2, q6 1131*68546e56SEric Biggers vshl.i64 q6, q6, #26 1132*68546e56SEric Biggers vadd.i64 q10, q2, q4 1133*68546e56SEric Biggers vsub.i64 q5, q12, q5 1134*68546e56SEric Biggers vshr.s64 q8, q8, #25 1135*68546e56SEric Biggers vsub.i64 q6, q15, q6 1136*68546e56SEric Biggers vshr.s64 q10, q10, #25 1137*68546e56SEric Biggers vadd.i64 q12, q13, q8 1138*68546e56SEric Biggers vshl.i64 q8, q8, #25 1139*68546e56SEric Biggers vadd.i64 q13, q12, q9 1140*68546e56SEric Biggers vadd.i64 q0, q0, q10 1141*68546e56SEric Biggers vsub.i64 q7, q7, q8 1142*68546e56SEric Biggers vshr.s64 q8, q13, #26 1143*68546e56SEric Biggers vshl.i64 q10, q10, #25 1144*68546e56SEric Biggers vadd.i64 q13, q0, q9 1145*68546e56SEric Biggers vadd.i64 q1, q1, q8 1146*68546e56SEric Biggers vshl.i64 q8, q8, #26 1147*68546e56SEric Biggers vadd.i64 q15, q1, q4 1148*68546e56SEric Biggers vsub.i64 q2, q2, q10 1149*68546e56SEric Biggers vshr.s64 q10, q13, #26 1150*68546e56SEric Biggers vsub.i64 q8, q12, q8 1151*68546e56SEric Biggers vshr.s64 q12, q15, #25 1152*68546e56SEric Biggers vadd.i64 q3, q3, q10 1153*68546e56SEric Biggers vshl.i64 q10, q10, #26 1154*68546e56SEric Biggers vadd.i64 q13, q3, q4 1155*68546e56SEric Biggers vadd.i64 q14, q14, q12 1156*68546e56SEric Biggers add r2, r3, #144 1157*68546e56SEric Biggers vshl.i64 q12, q12, #25 1158*68546e56SEric Biggers add r4, r3, #192 1159*68546e56SEric Biggers vadd.i64 q15, q14, q9 1160*68546e56SEric Biggers add r2, r2, #8 1161*68546e56SEric Biggers vsub.i64 q0, q0, q10 1162*68546e56SEric Biggers add r4, r4, #8 1163*68546e56SEric Biggers vshr.s64 q10, q13, #25 1164*68546e56SEric Biggers vsub.i64 q1, q1, q12 1165*68546e56SEric Biggers vshr.s64 q12, q15, #26 1166*68546e56SEric Biggers vadd.i64 q13, q10, q10 1167*68546e56SEric Biggers vadd.i64 q11, q11, q12 1168*68546e56SEric Biggers vtrn.32 d16, d2 1169*68546e56SEric Biggers vshl.i64 q12, q12, #26 1170*68546e56SEric Biggers vtrn.32 d17, d3 1171*68546e56SEric Biggers vadd.i64 q1, q11, q4 1172*68546e56SEric Biggers vadd.i64 q4, q5, q13 1173*68546e56SEric Biggers vst1.8 d16, [r2, : 64]! 1174*68546e56SEric Biggers vshl.i64 q5, q10, #4 1175*68546e56SEric Biggers vst1.8 d17, [r4, : 64]! 1176*68546e56SEric Biggers vsub.i64 q8, q14, q12 1177*68546e56SEric Biggers vshr.s64 q1, q1, #25 1178*68546e56SEric Biggers vadd.i64 q4, q4, q5 1179*68546e56SEric Biggers vadd.i64 q5, q6, q1 1180*68546e56SEric Biggers vshl.i64 q1, q1, #25 1181*68546e56SEric Biggers vadd.i64 q6, q5, q9 1182*68546e56SEric Biggers vadd.i64 q4, q4, q10 1183*68546e56SEric Biggers vshl.i64 q10, q10, #25 1184*68546e56SEric Biggers vadd.i64 q9, q4, q9 1185*68546e56SEric Biggers vsub.i64 q1, q11, q1 1186*68546e56SEric Biggers vshr.s64 q6, q6, #26 1187*68546e56SEric Biggers vsub.i64 q3, q3, q10 1188*68546e56SEric Biggers vtrn.32 d16, d2 1189*68546e56SEric Biggers vshr.s64 q9, q9, #26 1190*68546e56SEric Biggers vtrn.32 d17, d3 1191*68546e56SEric Biggers vadd.i64 q1, q2, q6 1192*68546e56SEric Biggers vst1.8 d16, [r2, : 64] 1193*68546e56SEric Biggers vshl.i64 q2, q6, #26 1194*68546e56SEric Biggers vst1.8 d17, [r4, : 64] 1195*68546e56SEric Biggers vadd.i64 q6, q7, q9 1196*68546e56SEric Biggers vtrn.32 d0, d6 1197*68546e56SEric Biggers vshl.i64 q7, q9, #26 1198*68546e56SEric Biggers vtrn.32 d1, d7 1199*68546e56SEric Biggers vsub.i64 q2, q5, q2 1200*68546e56SEric Biggers add r2, r2, #16 1201*68546e56SEric Biggers vsub.i64 q3, q4, q7 1202*68546e56SEric Biggers vst1.8 d0, [r2, : 64] 1203*68546e56SEric Biggers add r4, r4, #16 1204*68546e56SEric Biggers vst1.8 d1, [r4, : 64] 1205*68546e56SEric Biggers vtrn.32 d4, d2 1206*68546e56SEric Biggers vtrn.32 d5, d3 1207*68546e56SEric Biggers sub r2, r2, #8 1208*68546e56SEric Biggers sub r4, r4, #8 1209*68546e56SEric Biggers vtrn.32 d6, d12 1210*68546e56SEric Biggers vtrn.32 d7, d13 1211*68546e56SEric Biggers vst1.8 d4, [r2, : 64] 1212*68546e56SEric Biggers vst1.8 d5, [r4, : 64] 1213*68546e56SEric Biggers sub r2, r2, #24 1214*68546e56SEric Biggers sub r4, r4, #24 1215*68546e56SEric Biggers vst1.8 d6, [r2, : 64] 1216*68546e56SEric Biggers vst1.8 d7, [r4, : 64] 1217*68546e56SEric Biggers add r2, r3, #336 1218*68546e56SEric Biggers add r4, r3, #288 1219*68546e56SEric Biggers vld1.8 {d0-d1}, [r2, : 128]! 1220*68546e56SEric Biggers vld1.8 {d2-d3}, [r4, : 128]! 1221*68546e56SEric Biggers vadd.i32 q0, q0, q1 1222*68546e56SEric Biggers vld1.8 {d2-d3}, [r2, : 128]! 1223*68546e56SEric Biggers vld1.8 {d4-d5}, [r4, : 128]! 1224*68546e56SEric Biggers vadd.i32 q1, q1, q2 1225*68546e56SEric Biggers add r5, r3, #288 1226*68546e56SEric Biggers vld1.8 {d4}, [r2, : 64] 1227*68546e56SEric Biggers vld1.8 {d6}, [r4, : 64] 1228*68546e56SEric Biggers vadd.i32 q2, q2, q3 1229*68546e56SEric Biggers vst1.8 {d0-d1}, [r5, : 128]! 1230*68546e56SEric Biggers vst1.8 {d2-d3}, [r5, : 128]! 1231*68546e56SEric Biggers vst1.8 d4, [r5, : 64] 1232*68546e56SEric Biggers add r2, r3, #48 1233*68546e56SEric Biggers add r4, r3, #144 1234*68546e56SEric Biggers vld1.8 {d0-d1}, [r4, : 128]! 1235*68546e56SEric Biggers vld1.8 {d2-d3}, [r4, : 128]! 1236*68546e56SEric Biggers vld1.8 {d4}, [r4, : 64] 1237*68546e56SEric Biggers add r4, r3, #288 1238*68546e56SEric Biggers vld1.8 {d6-d7}, [r4, : 128]! 1239*68546e56SEric Biggers vtrn.32 q0, q3 1240*68546e56SEric Biggers vld1.8 {d8-d9}, [r4, : 128]! 1241*68546e56SEric Biggers vshl.i32 q5, q0, #4 1242*68546e56SEric Biggers vtrn.32 q1, q4 1243*68546e56SEric Biggers vshl.i32 q6, q3, #4 1244*68546e56SEric Biggers vadd.i32 q5, q5, q0 1245*68546e56SEric Biggers vadd.i32 q6, q6, q3 1246*68546e56SEric Biggers vshl.i32 q7, q1, #4 1247*68546e56SEric Biggers vld1.8 {d5}, [r4, : 64] 1248*68546e56SEric Biggers vshl.i32 q8, q4, #4 1249*68546e56SEric Biggers vtrn.32 d4, d5 1250*68546e56SEric Biggers vadd.i32 q7, q7, q1 1251*68546e56SEric Biggers vadd.i32 q8, q8, q4 1252*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128]! 1253*68546e56SEric Biggers vshl.i32 q10, q2, #4 1254*68546e56SEric Biggers vld1.8 {d22-d23}, [r2, : 128]! 1255*68546e56SEric Biggers vadd.i32 q10, q10, q2 1256*68546e56SEric Biggers vld1.8 {d24}, [r2, : 64] 1257*68546e56SEric Biggers vadd.i32 q5, q5, q0 1258*68546e56SEric Biggers add r2, r3, #240 1259*68546e56SEric Biggers vld1.8 {d26-d27}, [r2, : 128]! 1260*68546e56SEric Biggers vadd.i32 q6, q6, q3 1261*68546e56SEric Biggers vld1.8 {d28-d29}, [r2, : 128]! 1262*68546e56SEric Biggers vadd.i32 q8, q8, q4 1263*68546e56SEric Biggers vld1.8 {d25}, [r2, : 64] 1264*68546e56SEric Biggers vadd.i32 q10, q10, q2 1265*68546e56SEric Biggers vtrn.32 q9, q13 1266*68546e56SEric Biggers vadd.i32 q7, q7, q1 1267*68546e56SEric Biggers vadd.i32 q5, q5, q0 1268*68546e56SEric Biggers vtrn.32 q11, q14 1269*68546e56SEric Biggers vadd.i32 q6, q6, q3 1270*68546e56SEric Biggers add r2, sp, #528 1271*68546e56SEric Biggers vadd.i32 q10, q10, q2 1272*68546e56SEric Biggers vtrn.32 d24, d25 1273*68546e56SEric Biggers vst1.8 {d12-d13}, [r2, : 128]! 1274*68546e56SEric Biggers vshl.i32 q6, q13, #1 1275*68546e56SEric Biggers vst1.8 {d20-d21}, [r2, : 128]! 1276*68546e56SEric Biggers vshl.i32 q10, q14, #1 1277*68546e56SEric Biggers vst1.8 {d12-d13}, [r2, : 128]! 1278*68546e56SEric Biggers vshl.i32 q15, q12, #1 1279*68546e56SEric Biggers vadd.i32 q8, q8, q4 1280*68546e56SEric Biggers vext.32 d10, d31, d30, #0 1281*68546e56SEric Biggers vadd.i32 q7, q7, q1 1282*68546e56SEric Biggers vst1.8 {d16-d17}, [r2, : 128]! 1283*68546e56SEric Biggers vmull.s32 q8, d18, d5 1284*68546e56SEric Biggers vmlal.s32 q8, d26, d4 1285*68546e56SEric Biggers vmlal.s32 q8, d19, d9 1286*68546e56SEric Biggers vmlal.s32 q8, d27, d3 1287*68546e56SEric Biggers vmlal.s32 q8, d22, d8 1288*68546e56SEric Biggers vmlal.s32 q8, d28, d2 1289*68546e56SEric Biggers vmlal.s32 q8, d23, d7 1290*68546e56SEric Biggers vmlal.s32 q8, d29, d1 1291*68546e56SEric Biggers vmlal.s32 q8, d24, d6 1292*68546e56SEric Biggers vmlal.s32 q8, d25, d0 1293*68546e56SEric Biggers vst1.8 {d14-d15}, [r2, : 128]! 1294*68546e56SEric Biggers vmull.s32 q2, d18, d4 1295*68546e56SEric Biggers vmlal.s32 q2, d12, d9 1296*68546e56SEric Biggers vmlal.s32 q2, d13, d8 1297*68546e56SEric Biggers vmlal.s32 q2, d19, d3 1298*68546e56SEric Biggers vmlal.s32 q2, d22, d2 1299*68546e56SEric Biggers vmlal.s32 q2, d23, d1 1300*68546e56SEric Biggers vmlal.s32 q2, d24, d0 1301*68546e56SEric Biggers vst1.8 {d20-d21}, [r2, : 128]! 1302*68546e56SEric Biggers vmull.s32 q7, d18, d9 1303*68546e56SEric Biggers vmlal.s32 q7, d26, d3 1304*68546e56SEric Biggers vmlal.s32 q7, d19, d8 1305*68546e56SEric Biggers vmlal.s32 q7, d27, d2 1306*68546e56SEric Biggers vmlal.s32 q7, d22, d7 1307*68546e56SEric Biggers vmlal.s32 q7, d28, d1 1308*68546e56SEric Biggers vmlal.s32 q7, d23, d6 1309*68546e56SEric Biggers vmlal.s32 q7, d29, d0 1310*68546e56SEric Biggers vst1.8 {d10-d11}, [r2, : 128]! 1311*68546e56SEric Biggers vmull.s32 q5, d18, d3 1312*68546e56SEric Biggers vmlal.s32 q5, d19, d2 1313*68546e56SEric Biggers vmlal.s32 q5, d22, d1 1314*68546e56SEric Biggers vmlal.s32 q5, d23, d0 1315*68546e56SEric Biggers vmlal.s32 q5, d12, d8 1316*68546e56SEric Biggers vst1.8 {d16-d17}, [r2, : 128]! 1317*68546e56SEric Biggers vmull.s32 q4, d18, d8 1318*68546e56SEric Biggers vmlal.s32 q4, d26, d2 1319*68546e56SEric Biggers vmlal.s32 q4, d19, d7 1320*68546e56SEric Biggers vmlal.s32 q4, d27, d1 1321*68546e56SEric Biggers vmlal.s32 q4, d22, d6 1322*68546e56SEric Biggers vmlal.s32 q4, d28, d0 1323*68546e56SEric Biggers vmull.s32 q8, d18, d7 1324*68546e56SEric Biggers vmlal.s32 q8, d26, d1 1325*68546e56SEric Biggers vmlal.s32 q8, d19, d6 1326*68546e56SEric Biggers vmlal.s32 q8, d27, d0 1327*68546e56SEric Biggers add r2, sp, #544 1328*68546e56SEric Biggers vld1.8 {d20-d21}, [r2, : 128] 1329*68546e56SEric Biggers vmlal.s32 q7, d24, d21 1330*68546e56SEric Biggers vmlal.s32 q7, d25, d20 1331*68546e56SEric Biggers vmlal.s32 q4, d23, d21 1332*68546e56SEric Biggers vmlal.s32 q4, d29, d20 1333*68546e56SEric Biggers vmlal.s32 q8, d22, d21 1334*68546e56SEric Biggers vmlal.s32 q8, d28, d20 1335*68546e56SEric Biggers vmlal.s32 q5, d24, d20 1336*68546e56SEric Biggers vst1.8 {d14-d15}, [r2, : 128] 1337*68546e56SEric Biggers vmull.s32 q7, d18, d6 1338*68546e56SEric Biggers vmlal.s32 q7, d26, d0 1339*68546e56SEric Biggers add r2, sp, #624 1340*68546e56SEric Biggers vld1.8 {d30-d31}, [r2, : 128] 1341*68546e56SEric Biggers vmlal.s32 q2, d30, d21 1342*68546e56SEric Biggers vmlal.s32 q7, d19, d21 1343*68546e56SEric Biggers vmlal.s32 q7, d27, d20 1344*68546e56SEric Biggers add r2, sp, #592 1345*68546e56SEric Biggers vld1.8 {d26-d27}, [r2, : 128] 1346*68546e56SEric Biggers vmlal.s32 q4, d25, d27 1347*68546e56SEric Biggers vmlal.s32 q8, d29, d27 1348*68546e56SEric Biggers vmlal.s32 q8, d25, d26 1349*68546e56SEric Biggers vmlal.s32 q7, d28, d27 1350*68546e56SEric Biggers vmlal.s32 q7, d29, d26 1351*68546e56SEric Biggers add r2, sp, #576 1352*68546e56SEric Biggers vld1.8 {d28-d29}, [r2, : 128] 1353*68546e56SEric Biggers vmlal.s32 q4, d24, d29 1354*68546e56SEric Biggers vmlal.s32 q8, d23, d29 1355*68546e56SEric Biggers vmlal.s32 q8, d24, d28 1356*68546e56SEric Biggers vmlal.s32 q7, d22, d29 1357*68546e56SEric Biggers vmlal.s32 q7, d23, d28 1358*68546e56SEric Biggers vst1.8 {d8-d9}, [r2, : 128] 1359*68546e56SEric Biggers add r2, sp, #528 1360*68546e56SEric Biggers vld1.8 {d8-d9}, [r2, : 128] 1361*68546e56SEric Biggers vmlal.s32 q7, d24, d9 1362*68546e56SEric Biggers vmlal.s32 q7, d25, d31 1363*68546e56SEric Biggers vmull.s32 q1, d18, d2 1364*68546e56SEric Biggers vmlal.s32 q1, d19, d1 1365*68546e56SEric Biggers vmlal.s32 q1, d22, d0 1366*68546e56SEric Biggers vmlal.s32 q1, d24, d27 1367*68546e56SEric Biggers vmlal.s32 q1, d23, d20 1368*68546e56SEric Biggers vmlal.s32 q1, d12, d7 1369*68546e56SEric Biggers vmlal.s32 q1, d13, d6 1370*68546e56SEric Biggers vmull.s32 q6, d18, d1 1371*68546e56SEric Biggers vmlal.s32 q6, d19, d0 1372*68546e56SEric Biggers vmlal.s32 q6, d23, d27 1373*68546e56SEric Biggers vmlal.s32 q6, d22, d20 1374*68546e56SEric Biggers vmlal.s32 q6, d24, d26 1375*68546e56SEric Biggers vmull.s32 q0, d18, d0 1376*68546e56SEric Biggers vmlal.s32 q0, d22, d27 1377*68546e56SEric Biggers vmlal.s32 q0, d23, d26 1378*68546e56SEric Biggers vmlal.s32 q0, d24, d31 1379*68546e56SEric Biggers vmlal.s32 q0, d19, d20 1380*68546e56SEric Biggers add r2, sp, #608 1381*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128] 1382*68546e56SEric Biggers vmlal.s32 q2, d18, d7 1383*68546e56SEric Biggers vmlal.s32 q5, d18, d6 1384*68546e56SEric Biggers vmlal.s32 q1, d18, d21 1385*68546e56SEric Biggers vmlal.s32 q0, d18, d28 1386*68546e56SEric Biggers vmlal.s32 q6, d18, d29 1387*68546e56SEric Biggers vmlal.s32 q2, d19, d6 1388*68546e56SEric Biggers vmlal.s32 q5, d19, d21 1389*68546e56SEric Biggers vmlal.s32 q1, d19, d29 1390*68546e56SEric Biggers vmlal.s32 q0, d19, d9 1391*68546e56SEric Biggers vmlal.s32 q6, d19, d28 1392*68546e56SEric Biggers add r2, sp, #560 1393*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128] 1394*68546e56SEric Biggers add r2, sp, #480 1395*68546e56SEric Biggers vld1.8 {d22-d23}, [r2, : 128] 1396*68546e56SEric Biggers vmlal.s32 q5, d19, d7 1397*68546e56SEric Biggers vmlal.s32 q0, d18, d21 1398*68546e56SEric Biggers vmlal.s32 q0, d19, d29 1399*68546e56SEric Biggers vmlal.s32 q6, d18, d6 1400*68546e56SEric Biggers add r2, sp, #496 1401*68546e56SEric Biggers vld1.8 {d6-d7}, [r2, : 128] 1402*68546e56SEric Biggers vmlal.s32 q6, d19, d21 1403*68546e56SEric Biggers add r2, sp, #544 1404*68546e56SEric Biggers vld1.8 {d18-d19}, [r2, : 128] 1405*68546e56SEric Biggers vmlal.s32 q0, d30, d8 1406*68546e56SEric Biggers add r2, sp, #640 1407*68546e56SEric Biggers vld1.8 {d20-d21}, [r2, : 128] 1408*68546e56SEric Biggers vmlal.s32 q5, d30, d29 1409*68546e56SEric Biggers add r2, sp, #576 1410*68546e56SEric Biggers vld1.8 {d24-d25}, [r2, : 128] 1411*68546e56SEric Biggers vmlal.s32 q1, d30, d28 1412*68546e56SEric Biggers vadd.i64 q13, q0, q11 1413*68546e56SEric Biggers vadd.i64 q14, q5, q11 1414*68546e56SEric Biggers vmlal.s32 q6, d30, d9 1415*68546e56SEric Biggers vshr.s64 q4, q13, #26 1416*68546e56SEric Biggers vshr.s64 q13, q14, #26 1417*68546e56SEric Biggers vadd.i64 q7, q7, q4 1418*68546e56SEric Biggers vshl.i64 q4, q4, #26 1419*68546e56SEric Biggers vadd.i64 q14, q7, q3 1420*68546e56SEric Biggers vadd.i64 q9, q9, q13 1421*68546e56SEric Biggers vshl.i64 q13, q13, #26 1422*68546e56SEric Biggers vadd.i64 q15, q9, q3 1423*68546e56SEric Biggers vsub.i64 q0, q0, q4 1424*68546e56SEric Biggers vshr.s64 q4, q14, #25 1425*68546e56SEric Biggers vsub.i64 q5, q5, q13 1426*68546e56SEric Biggers vshr.s64 q13, q15, #25 1427*68546e56SEric Biggers vadd.i64 q6, q6, q4 1428*68546e56SEric Biggers vshl.i64 q4, q4, #25 1429*68546e56SEric Biggers vadd.i64 q14, q6, q11 1430*68546e56SEric Biggers vadd.i64 q2, q2, q13 1431*68546e56SEric Biggers vsub.i64 q4, q7, q4 1432*68546e56SEric Biggers vshr.s64 q7, q14, #26 1433*68546e56SEric Biggers vshl.i64 q13, q13, #25 1434*68546e56SEric Biggers vadd.i64 q14, q2, q11 1435*68546e56SEric Biggers vadd.i64 q8, q8, q7 1436*68546e56SEric Biggers vshl.i64 q7, q7, #26 1437*68546e56SEric Biggers vadd.i64 q15, q8, q3 1438*68546e56SEric Biggers vsub.i64 q9, q9, q13 1439*68546e56SEric Biggers vshr.s64 q13, q14, #26 1440*68546e56SEric Biggers vsub.i64 q6, q6, q7 1441*68546e56SEric Biggers vshr.s64 q7, q15, #25 1442*68546e56SEric Biggers vadd.i64 q10, q10, q13 1443*68546e56SEric Biggers vshl.i64 q13, q13, #26 1444*68546e56SEric Biggers vadd.i64 q14, q10, q3 1445*68546e56SEric Biggers vadd.i64 q1, q1, q7 1446*68546e56SEric Biggers add r2, r3, #240 1447*68546e56SEric Biggers vshl.i64 q7, q7, #25 1448*68546e56SEric Biggers add r4, r3, #144 1449*68546e56SEric Biggers vadd.i64 q15, q1, q11 1450*68546e56SEric Biggers add r2, r2, #8 1451*68546e56SEric Biggers vsub.i64 q2, q2, q13 1452*68546e56SEric Biggers add r4, r4, #8 1453*68546e56SEric Biggers vshr.s64 q13, q14, #25 1454*68546e56SEric Biggers vsub.i64 q7, q8, q7 1455*68546e56SEric Biggers vshr.s64 q8, q15, #26 1456*68546e56SEric Biggers vadd.i64 q14, q13, q13 1457*68546e56SEric Biggers vadd.i64 q12, q12, q8 1458*68546e56SEric Biggers vtrn.32 d12, d14 1459*68546e56SEric Biggers vshl.i64 q8, q8, #26 1460*68546e56SEric Biggers vtrn.32 d13, d15 1461*68546e56SEric Biggers vadd.i64 q3, q12, q3 1462*68546e56SEric Biggers vadd.i64 q0, q0, q14 1463*68546e56SEric Biggers vst1.8 d12, [r2, : 64]! 1464*68546e56SEric Biggers vshl.i64 q7, q13, #4 1465*68546e56SEric Biggers vst1.8 d13, [r4, : 64]! 1466*68546e56SEric Biggers vsub.i64 q1, q1, q8 1467*68546e56SEric Biggers vshr.s64 q3, q3, #25 1468*68546e56SEric Biggers vadd.i64 q0, q0, q7 1469*68546e56SEric Biggers vadd.i64 q5, q5, q3 1470*68546e56SEric Biggers vshl.i64 q3, q3, #25 1471*68546e56SEric Biggers vadd.i64 q6, q5, q11 1472*68546e56SEric Biggers vadd.i64 q0, q0, q13 1473*68546e56SEric Biggers vshl.i64 q7, q13, #25 1474*68546e56SEric Biggers vadd.i64 q8, q0, q11 1475*68546e56SEric Biggers vsub.i64 q3, q12, q3 1476*68546e56SEric Biggers vshr.s64 q6, q6, #26 1477*68546e56SEric Biggers vsub.i64 q7, q10, q7 1478*68546e56SEric Biggers vtrn.32 d2, d6 1479*68546e56SEric Biggers vshr.s64 q8, q8, #26 1480*68546e56SEric Biggers vtrn.32 d3, d7 1481*68546e56SEric Biggers vadd.i64 q3, q9, q6 1482*68546e56SEric Biggers vst1.8 d2, [r2, : 64] 1483*68546e56SEric Biggers vshl.i64 q6, q6, #26 1484*68546e56SEric Biggers vst1.8 d3, [r4, : 64] 1485*68546e56SEric Biggers vadd.i64 q1, q4, q8 1486*68546e56SEric Biggers vtrn.32 d4, d14 1487*68546e56SEric Biggers vshl.i64 q4, q8, #26 1488*68546e56SEric Biggers vtrn.32 d5, d15 1489*68546e56SEric Biggers vsub.i64 q5, q5, q6 1490*68546e56SEric Biggers add r2, r2, #16 1491*68546e56SEric Biggers vsub.i64 q0, q0, q4 1492*68546e56SEric Biggers vst1.8 d4, [r2, : 64] 1493*68546e56SEric Biggers add r4, r4, #16 1494*68546e56SEric Biggers vst1.8 d5, [r4, : 64] 1495*68546e56SEric Biggers vtrn.32 d10, d6 1496*68546e56SEric Biggers vtrn.32 d11, d7 1497*68546e56SEric Biggers sub r2, r2, #8 1498*68546e56SEric Biggers sub r4, r4, #8 1499*68546e56SEric Biggers vtrn.32 d0, d2 1500*68546e56SEric Biggers vtrn.32 d1, d3 1501*68546e56SEric Biggers vst1.8 d10, [r2, : 64] 1502*68546e56SEric Biggers vst1.8 d11, [r4, : 64] 1503*68546e56SEric Biggers sub r2, r2, #24 1504*68546e56SEric Biggers sub r4, r4, #24 1505*68546e56SEric Biggers vst1.8 d0, [r2, : 64] 1506*68546e56SEric Biggers vst1.8 d1, [r4, : 64] 1507*68546e56SEric Biggers ldr r2, [sp, #456] 1508*68546e56SEric Biggers ldr r4, [sp, #460] 1509*68546e56SEric Biggers subs r5, r2, #1 1510*68546e56SEric Biggers bge .Lmainloop 1511*68546e56SEric Biggers add r1, r3, #144 1512*68546e56SEric Biggers add r2, r3, #336 1513*68546e56SEric Biggers vld1.8 {d0-d1}, [r1, : 128]! 1514*68546e56SEric Biggers vld1.8 {d2-d3}, [r1, : 128]! 1515*68546e56SEric Biggers vld1.8 {d4}, [r1, : 64] 1516*68546e56SEric Biggers vst1.8 {d0-d1}, [r2, : 128]! 1517*68546e56SEric Biggers vst1.8 {d2-d3}, [r2, : 128]! 1518*68546e56SEric Biggers vst1.8 d4, [r2, : 64] 1519*68546e56SEric Biggers movw r1, #0 1520*68546e56SEric Biggers.Linvertloop: 1521*68546e56SEric Biggers add r2, r3, #144 1522*68546e56SEric Biggers movw r4, #0 1523*68546e56SEric Biggers movw r5, #2 1524*68546e56SEric Biggers cmp r1, #1 1525*68546e56SEric Biggers moveq r5, #1 1526*68546e56SEric Biggers addeq r2, r3, #336 1527*68546e56SEric Biggers addeq r4, r3, #48 1528*68546e56SEric Biggers cmp r1, #2 1529*68546e56SEric Biggers moveq r5, #1 1530*68546e56SEric Biggers addeq r2, r3, #48 1531*68546e56SEric Biggers cmp r1, #3 1532*68546e56SEric Biggers moveq r5, #5 1533*68546e56SEric Biggers addeq r4, r3, #336 1534*68546e56SEric Biggers cmp r1, #4 1535*68546e56SEric Biggers moveq r5, #10 1536*68546e56SEric Biggers cmp r1, #5 1537*68546e56SEric Biggers moveq r5, #20 1538*68546e56SEric Biggers cmp r1, #6 1539*68546e56SEric Biggers moveq r5, #10 1540*68546e56SEric Biggers addeq r2, r3, #336 1541*68546e56SEric Biggers addeq r4, r3, #336 1542*68546e56SEric Biggers cmp r1, #7 1543*68546e56SEric Biggers moveq r5, #50 1544*68546e56SEric Biggers cmp r1, #8 1545*68546e56SEric Biggers moveq r5, #100 1546*68546e56SEric Biggers cmp r1, #9 1547*68546e56SEric Biggers moveq r5, #50 1548*68546e56SEric Biggers addeq r2, r3, #336 1549*68546e56SEric Biggers cmp r1, #10 1550*68546e56SEric Biggers moveq r5, #5 1551*68546e56SEric Biggers addeq r2, r3, #48 1552*68546e56SEric Biggers cmp r1, #11 1553*68546e56SEric Biggers moveq r5, #0 1554*68546e56SEric Biggers addeq r2, r3, #96 1555*68546e56SEric Biggers add r6, r3, #144 1556*68546e56SEric Biggers add r7, r3, #288 1557*68546e56SEric Biggers vld1.8 {d0-d1}, [r6, : 128]! 1558*68546e56SEric Biggers vld1.8 {d2-d3}, [r6, : 128]! 1559*68546e56SEric Biggers vld1.8 {d4}, [r6, : 64] 1560*68546e56SEric Biggers vst1.8 {d0-d1}, [r7, : 128]! 1561*68546e56SEric Biggers vst1.8 {d2-d3}, [r7, : 128]! 1562*68546e56SEric Biggers vst1.8 d4, [r7, : 64] 1563*68546e56SEric Biggers cmp r5, #0 1564*68546e56SEric Biggers beq .Lskipsquaringloop 1565*68546e56SEric Biggers.Lsquaringloop: 1566*68546e56SEric Biggers add r6, r3, #288 1567*68546e56SEric Biggers add r7, r3, #288 1568*68546e56SEric Biggers add r8, r3, #288 1569*68546e56SEric Biggers vmov.i32 q0, #19 1570*68546e56SEric Biggers vmov.i32 q1, #0 1571*68546e56SEric Biggers vmov.i32 q2, #1 1572*68546e56SEric Biggers vzip.i32 q1, q2 1573*68546e56SEric Biggers vld1.8 {d4-d5}, [r7, : 128]! 1574*68546e56SEric Biggers vld1.8 {d6-d7}, [r7, : 128]! 1575*68546e56SEric Biggers vld1.8 {d9}, [r7, : 64] 1576*68546e56SEric Biggers vld1.8 {d10-d11}, [r6, : 128]! 1577*68546e56SEric Biggers add r7, sp, #384 1578*68546e56SEric Biggers vld1.8 {d12-d13}, [r6, : 128]! 1579*68546e56SEric Biggers vmul.i32 q7, q2, q0 1580*68546e56SEric Biggers vld1.8 {d8}, [r6, : 64] 1581*68546e56SEric Biggers vext.32 d17, d11, d10, #1 1582*68546e56SEric Biggers vmul.i32 q9, q3, q0 1583*68546e56SEric Biggers vext.32 d16, d10, d8, #1 1584*68546e56SEric Biggers vshl.u32 q10, q5, q1 1585*68546e56SEric Biggers vext.32 d22, d14, d4, #1 1586*68546e56SEric Biggers vext.32 d24, d18, d6, #1 1587*68546e56SEric Biggers vshl.u32 q13, q6, q1 1588*68546e56SEric Biggers vshl.u32 d28, d8, d2 1589*68546e56SEric Biggers vrev64.i32 d22, d22 1590*68546e56SEric Biggers vmul.i32 d1, d9, d1 1591*68546e56SEric Biggers vrev64.i32 d24, d24 1592*68546e56SEric Biggers vext.32 d29, d8, d13, #1 1593*68546e56SEric Biggers vext.32 d0, d1, d9, #1 1594*68546e56SEric Biggers vrev64.i32 d0, d0 1595*68546e56SEric Biggers vext.32 d2, d9, d1, #1 1596*68546e56SEric Biggers vext.32 d23, d15, d5, #1 1597*68546e56SEric Biggers vmull.s32 q4, d20, d4 1598*68546e56SEric Biggers vrev64.i32 d23, d23 1599*68546e56SEric Biggers vmlal.s32 q4, d21, d1 1600*68546e56SEric Biggers vrev64.i32 d2, d2 1601*68546e56SEric Biggers vmlal.s32 q4, d26, d19 1602*68546e56SEric Biggers vext.32 d3, d5, d15, #1 1603*68546e56SEric Biggers vmlal.s32 q4, d27, d18 1604*68546e56SEric Biggers vrev64.i32 d3, d3 1605*68546e56SEric Biggers vmlal.s32 q4, d28, d15 1606*68546e56SEric Biggers vext.32 d14, d12, d11, #1 1607*68546e56SEric Biggers vmull.s32 q5, d16, d23 1608*68546e56SEric Biggers vext.32 d15, d13, d12, #1 1609*68546e56SEric Biggers vmlal.s32 q5, d17, d4 1610*68546e56SEric Biggers vst1.8 d8, [r7, : 64]! 1611*68546e56SEric Biggers vmlal.s32 q5, d14, d1 1612*68546e56SEric Biggers vext.32 d12, d9, d8, #0 1613*68546e56SEric Biggers vmlal.s32 q5, d15, d19 1614*68546e56SEric Biggers vmov.i64 d13, #0 1615*68546e56SEric Biggers vmlal.s32 q5, d29, d18 1616*68546e56SEric Biggers vext.32 d25, d19, d7, #1 1617*68546e56SEric Biggers vmlal.s32 q6, d20, d5 1618*68546e56SEric Biggers vrev64.i32 d25, d25 1619*68546e56SEric Biggers vmlal.s32 q6, d21, d4 1620*68546e56SEric Biggers vst1.8 d11, [r7, : 64]! 1621*68546e56SEric Biggers vmlal.s32 q6, d26, d1 1622*68546e56SEric Biggers vext.32 d9, d10, d10, #0 1623*68546e56SEric Biggers vmlal.s32 q6, d27, d19 1624*68546e56SEric Biggers vmov.i64 d8, #0 1625*68546e56SEric Biggers vmlal.s32 q6, d28, d18 1626*68546e56SEric Biggers vmlal.s32 q4, d16, d24 1627*68546e56SEric Biggers vmlal.s32 q4, d17, d5 1628*68546e56SEric Biggers vmlal.s32 q4, d14, d4 1629*68546e56SEric Biggers vst1.8 d12, [r7, : 64]! 1630*68546e56SEric Biggers vmlal.s32 q4, d15, d1 1631*68546e56SEric Biggers vext.32 d10, d13, d12, #0 1632*68546e56SEric Biggers vmlal.s32 q4, d29, d19 1633*68546e56SEric Biggers vmov.i64 d11, #0 1634*68546e56SEric Biggers vmlal.s32 q5, d20, d6 1635*68546e56SEric Biggers vmlal.s32 q5, d21, d5 1636*68546e56SEric Biggers vmlal.s32 q5, d26, d4 1637*68546e56SEric Biggers vext.32 d13, d8, d8, #0 1638*68546e56SEric Biggers vmlal.s32 q5, d27, d1 1639*68546e56SEric Biggers vmov.i64 d12, #0 1640*68546e56SEric Biggers vmlal.s32 q5, d28, d19 1641*68546e56SEric Biggers vst1.8 d9, [r7, : 64]! 1642*68546e56SEric Biggers vmlal.s32 q6, d16, d25 1643*68546e56SEric Biggers vmlal.s32 q6, d17, d6 1644*68546e56SEric Biggers vst1.8 d10, [r7, : 64] 1645*68546e56SEric Biggers vmlal.s32 q6, d14, d5 1646*68546e56SEric Biggers vext.32 d8, d11, d10, #0 1647*68546e56SEric Biggers vmlal.s32 q6, d15, d4 1648*68546e56SEric Biggers vmov.i64 d9, #0 1649*68546e56SEric Biggers vmlal.s32 q6, d29, d1 1650*68546e56SEric Biggers vmlal.s32 q4, d20, d7 1651*68546e56SEric Biggers vmlal.s32 q4, d21, d6 1652*68546e56SEric Biggers vmlal.s32 q4, d26, d5 1653*68546e56SEric Biggers vext.32 d11, d12, d12, #0 1654*68546e56SEric Biggers vmlal.s32 q4, d27, d4 1655*68546e56SEric Biggers vmov.i64 d10, #0 1656*68546e56SEric Biggers vmlal.s32 q4, d28, d1 1657*68546e56SEric Biggers vmlal.s32 q5, d16, d0 1658*68546e56SEric Biggers sub r6, r7, #32 1659*68546e56SEric Biggers vmlal.s32 q5, d17, d7 1660*68546e56SEric Biggers vmlal.s32 q5, d14, d6 1661*68546e56SEric Biggers vext.32 d30, d9, d8, #0 1662*68546e56SEric Biggers vmlal.s32 q5, d15, d5 1663*68546e56SEric Biggers vld1.8 {d31}, [r6, : 64]! 1664*68546e56SEric Biggers vmlal.s32 q5, d29, d4 1665*68546e56SEric Biggers vmlal.s32 q15, d20, d0 1666*68546e56SEric Biggers vext.32 d0, d6, d18, #1 1667*68546e56SEric Biggers vmlal.s32 q15, d21, d25 1668*68546e56SEric Biggers vrev64.i32 d0, d0 1669*68546e56SEric Biggers vmlal.s32 q15, d26, d24 1670*68546e56SEric Biggers vext.32 d1, d7, d19, #1 1671*68546e56SEric Biggers vext.32 d7, d10, d10, #0 1672*68546e56SEric Biggers vmlal.s32 q15, d27, d23 1673*68546e56SEric Biggers vrev64.i32 d1, d1 1674*68546e56SEric Biggers vld1.8 {d6}, [r6, : 64] 1675*68546e56SEric Biggers vmlal.s32 q15, d28, d22 1676*68546e56SEric Biggers vmlal.s32 q3, d16, d4 1677*68546e56SEric Biggers add r6, r6, #24 1678*68546e56SEric Biggers vmlal.s32 q3, d17, d2 1679*68546e56SEric Biggers vext.32 d4, d31, d30, #0 1680*68546e56SEric Biggers vmov d17, d11 1681*68546e56SEric Biggers vmlal.s32 q3, d14, d1 1682*68546e56SEric Biggers vext.32 d11, d13, d13, #0 1683*68546e56SEric Biggers vext.32 d13, d30, d30, #0 1684*68546e56SEric Biggers vmlal.s32 q3, d15, d0 1685*68546e56SEric Biggers vext.32 d1, d8, d8, #0 1686*68546e56SEric Biggers vmlal.s32 q3, d29, d3 1687*68546e56SEric Biggers vld1.8 {d5}, [r6, : 64] 1688*68546e56SEric Biggers sub r6, r6, #16 1689*68546e56SEric Biggers vext.32 d10, d6, d6, #0 1690*68546e56SEric Biggers vmov.i32 q1, #0xffffffff 1691*68546e56SEric Biggers vshl.i64 q4, q1, #25 1692*68546e56SEric Biggers add r7, sp, #480 1693*68546e56SEric Biggers vld1.8 {d14-d15}, [r7, : 128] 1694*68546e56SEric Biggers vadd.i64 q9, q2, q7 1695*68546e56SEric Biggers vshl.i64 q1, q1, #26 1696*68546e56SEric Biggers vshr.s64 q10, q9, #26 1697*68546e56SEric Biggers vld1.8 {d0}, [r6, : 64]! 1698*68546e56SEric Biggers vadd.i64 q5, q5, q10 1699*68546e56SEric Biggers vand q9, q9, q1 1700*68546e56SEric Biggers vld1.8 {d16}, [r6, : 64]! 1701*68546e56SEric Biggers add r6, sp, #496 1702*68546e56SEric Biggers vld1.8 {d20-d21}, [r6, : 128] 1703*68546e56SEric Biggers vadd.i64 q11, q5, q10 1704*68546e56SEric Biggers vsub.i64 q2, q2, q9 1705*68546e56SEric Biggers vshr.s64 q9, q11, #25 1706*68546e56SEric Biggers vext.32 d12, d5, d4, #0 1707*68546e56SEric Biggers vand q11, q11, q4 1708*68546e56SEric Biggers vadd.i64 q0, q0, q9 1709*68546e56SEric Biggers vmov d19, d7 1710*68546e56SEric Biggers vadd.i64 q3, q0, q7 1711*68546e56SEric Biggers vsub.i64 q5, q5, q11 1712*68546e56SEric Biggers vshr.s64 q11, q3, #26 1713*68546e56SEric Biggers vext.32 d18, d11, d10, #0 1714*68546e56SEric Biggers vand q3, q3, q1 1715*68546e56SEric Biggers vadd.i64 q8, q8, q11 1716*68546e56SEric Biggers vadd.i64 q11, q8, q10 1717*68546e56SEric Biggers vsub.i64 q0, q0, q3 1718*68546e56SEric Biggers vshr.s64 q3, q11, #25 1719*68546e56SEric Biggers vand q11, q11, q4 1720*68546e56SEric Biggers vadd.i64 q3, q6, q3 1721*68546e56SEric Biggers vadd.i64 q6, q3, q7 1722*68546e56SEric Biggers vsub.i64 q8, q8, q11 1723*68546e56SEric Biggers vshr.s64 q11, q6, #26 1724*68546e56SEric Biggers vand q6, q6, q1 1725*68546e56SEric Biggers vadd.i64 q9, q9, q11 1726*68546e56SEric Biggers vadd.i64 d25, d19, d21 1727*68546e56SEric Biggers vsub.i64 q3, q3, q6 1728*68546e56SEric Biggers vshr.s64 d23, d25, #25 1729*68546e56SEric Biggers vand q4, q12, q4 1730*68546e56SEric Biggers vadd.i64 d21, d23, d23 1731*68546e56SEric Biggers vshl.i64 d25, d23, #4 1732*68546e56SEric Biggers vadd.i64 d21, d21, d23 1733*68546e56SEric Biggers vadd.i64 d25, d25, d21 1734*68546e56SEric Biggers vadd.i64 d4, d4, d25 1735*68546e56SEric Biggers vzip.i32 q0, q8 1736*68546e56SEric Biggers vadd.i64 d12, d4, d14 1737*68546e56SEric Biggers add r6, r8, #8 1738*68546e56SEric Biggers vst1.8 d0, [r6, : 64] 1739*68546e56SEric Biggers vsub.i64 d19, d19, d9 1740*68546e56SEric Biggers add r6, r6, #16 1741*68546e56SEric Biggers vst1.8 d16, [r6, : 64] 1742*68546e56SEric Biggers vshr.s64 d22, d12, #26 1743*68546e56SEric Biggers vand q0, q6, q1 1744*68546e56SEric Biggers vadd.i64 d10, d10, d22 1745*68546e56SEric Biggers vzip.i32 q3, q9 1746*68546e56SEric Biggers vsub.i64 d4, d4, d0 1747*68546e56SEric Biggers sub r6, r6, #8 1748*68546e56SEric Biggers vst1.8 d6, [r6, : 64] 1749*68546e56SEric Biggers add r6, r6, #16 1750*68546e56SEric Biggers vst1.8 d18, [r6, : 64] 1751*68546e56SEric Biggers vzip.i32 q2, q5 1752*68546e56SEric Biggers sub r6, r6, #32 1753*68546e56SEric Biggers vst1.8 d4, [r6, : 64] 1754*68546e56SEric Biggers subs r5, r5, #1 1755*68546e56SEric Biggers bhi .Lsquaringloop 1756*68546e56SEric Biggers.Lskipsquaringloop: 1757*68546e56SEric Biggers mov r2, r2 1758*68546e56SEric Biggers add r5, r3, #288 1759*68546e56SEric Biggers add r6, r3, #144 1760*68546e56SEric Biggers vmov.i32 q0, #19 1761*68546e56SEric Biggers vmov.i32 q1, #0 1762*68546e56SEric Biggers vmov.i32 q2, #1 1763*68546e56SEric Biggers vzip.i32 q1, q2 1764*68546e56SEric Biggers vld1.8 {d4-d5}, [r5, : 128]! 1765*68546e56SEric Biggers vld1.8 {d6-d7}, [r5, : 128]! 1766*68546e56SEric Biggers vld1.8 {d9}, [r5, : 64] 1767*68546e56SEric Biggers vld1.8 {d10-d11}, [r2, : 128]! 1768*68546e56SEric Biggers add r5, sp, #384 1769*68546e56SEric Biggers vld1.8 {d12-d13}, [r2, : 128]! 1770*68546e56SEric Biggers vmul.i32 q7, q2, q0 1771*68546e56SEric Biggers vld1.8 {d8}, [r2, : 64] 1772*68546e56SEric Biggers vext.32 d17, d11, d10, #1 1773*68546e56SEric Biggers vmul.i32 q9, q3, q0 1774*68546e56SEric Biggers vext.32 d16, d10, d8, #1 1775*68546e56SEric Biggers vshl.u32 q10, q5, q1 1776*68546e56SEric Biggers vext.32 d22, d14, d4, #1 1777*68546e56SEric Biggers vext.32 d24, d18, d6, #1 1778*68546e56SEric Biggers vshl.u32 q13, q6, q1 1779*68546e56SEric Biggers vshl.u32 d28, d8, d2 1780*68546e56SEric Biggers vrev64.i32 d22, d22 1781*68546e56SEric Biggers vmul.i32 d1, d9, d1 1782*68546e56SEric Biggers vrev64.i32 d24, d24 1783*68546e56SEric Biggers vext.32 d29, d8, d13, #1 1784*68546e56SEric Biggers vext.32 d0, d1, d9, #1 1785*68546e56SEric Biggers vrev64.i32 d0, d0 1786*68546e56SEric Biggers vext.32 d2, d9, d1, #1 1787*68546e56SEric Biggers vext.32 d23, d15, d5, #1 1788*68546e56SEric Biggers vmull.s32 q4, d20, d4 1789*68546e56SEric Biggers vrev64.i32 d23, d23 1790*68546e56SEric Biggers vmlal.s32 q4, d21, d1 1791*68546e56SEric Biggers vrev64.i32 d2, d2 1792*68546e56SEric Biggers vmlal.s32 q4, d26, d19 1793*68546e56SEric Biggers vext.32 d3, d5, d15, #1 1794*68546e56SEric Biggers vmlal.s32 q4, d27, d18 1795*68546e56SEric Biggers vrev64.i32 d3, d3 1796*68546e56SEric Biggers vmlal.s32 q4, d28, d15 1797*68546e56SEric Biggers vext.32 d14, d12, d11, #1 1798*68546e56SEric Biggers vmull.s32 q5, d16, d23 1799*68546e56SEric Biggers vext.32 d15, d13, d12, #1 1800*68546e56SEric Biggers vmlal.s32 q5, d17, d4 1801*68546e56SEric Biggers vst1.8 d8, [r5, : 64]! 1802*68546e56SEric Biggers vmlal.s32 q5, d14, d1 1803*68546e56SEric Biggers vext.32 d12, d9, d8, #0 1804*68546e56SEric Biggers vmlal.s32 q5, d15, d19 1805*68546e56SEric Biggers vmov.i64 d13, #0 1806*68546e56SEric Biggers vmlal.s32 q5, d29, d18 1807*68546e56SEric Biggers vext.32 d25, d19, d7, #1 1808*68546e56SEric Biggers vmlal.s32 q6, d20, d5 1809*68546e56SEric Biggers vrev64.i32 d25, d25 1810*68546e56SEric Biggers vmlal.s32 q6, d21, d4 1811*68546e56SEric Biggers vst1.8 d11, [r5, : 64]! 1812*68546e56SEric Biggers vmlal.s32 q6, d26, d1 1813*68546e56SEric Biggers vext.32 d9, d10, d10, #0 1814*68546e56SEric Biggers vmlal.s32 q6, d27, d19 1815*68546e56SEric Biggers vmov.i64 d8, #0 1816*68546e56SEric Biggers vmlal.s32 q6, d28, d18 1817*68546e56SEric Biggers vmlal.s32 q4, d16, d24 1818*68546e56SEric Biggers vmlal.s32 q4, d17, d5 1819*68546e56SEric Biggers vmlal.s32 q4, d14, d4 1820*68546e56SEric Biggers vst1.8 d12, [r5, : 64]! 1821*68546e56SEric Biggers vmlal.s32 q4, d15, d1 1822*68546e56SEric Biggers vext.32 d10, d13, d12, #0 1823*68546e56SEric Biggers vmlal.s32 q4, d29, d19 1824*68546e56SEric Biggers vmov.i64 d11, #0 1825*68546e56SEric Biggers vmlal.s32 q5, d20, d6 1826*68546e56SEric Biggers vmlal.s32 q5, d21, d5 1827*68546e56SEric Biggers vmlal.s32 q5, d26, d4 1828*68546e56SEric Biggers vext.32 d13, d8, d8, #0 1829*68546e56SEric Biggers vmlal.s32 q5, d27, d1 1830*68546e56SEric Biggers vmov.i64 d12, #0 1831*68546e56SEric Biggers vmlal.s32 q5, d28, d19 1832*68546e56SEric Biggers vst1.8 d9, [r5, : 64]! 1833*68546e56SEric Biggers vmlal.s32 q6, d16, d25 1834*68546e56SEric Biggers vmlal.s32 q6, d17, d6 1835*68546e56SEric Biggers vst1.8 d10, [r5, : 64] 1836*68546e56SEric Biggers vmlal.s32 q6, d14, d5 1837*68546e56SEric Biggers vext.32 d8, d11, d10, #0 1838*68546e56SEric Biggers vmlal.s32 q6, d15, d4 1839*68546e56SEric Biggers vmov.i64 d9, #0 1840*68546e56SEric Biggers vmlal.s32 q6, d29, d1 1841*68546e56SEric Biggers vmlal.s32 q4, d20, d7 1842*68546e56SEric Biggers vmlal.s32 q4, d21, d6 1843*68546e56SEric Biggers vmlal.s32 q4, d26, d5 1844*68546e56SEric Biggers vext.32 d11, d12, d12, #0 1845*68546e56SEric Biggers vmlal.s32 q4, d27, d4 1846*68546e56SEric Biggers vmov.i64 d10, #0 1847*68546e56SEric Biggers vmlal.s32 q4, d28, d1 1848*68546e56SEric Biggers vmlal.s32 q5, d16, d0 1849*68546e56SEric Biggers sub r2, r5, #32 1850*68546e56SEric Biggers vmlal.s32 q5, d17, d7 1851*68546e56SEric Biggers vmlal.s32 q5, d14, d6 1852*68546e56SEric Biggers vext.32 d30, d9, d8, #0 1853*68546e56SEric Biggers vmlal.s32 q5, d15, d5 1854*68546e56SEric Biggers vld1.8 {d31}, [r2, : 64]! 1855*68546e56SEric Biggers vmlal.s32 q5, d29, d4 1856*68546e56SEric Biggers vmlal.s32 q15, d20, d0 1857*68546e56SEric Biggers vext.32 d0, d6, d18, #1 1858*68546e56SEric Biggers vmlal.s32 q15, d21, d25 1859*68546e56SEric Biggers vrev64.i32 d0, d0 1860*68546e56SEric Biggers vmlal.s32 q15, d26, d24 1861*68546e56SEric Biggers vext.32 d1, d7, d19, #1 1862*68546e56SEric Biggers vext.32 d7, d10, d10, #0 1863*68546e56SEric Biggers vmlal.s32 q15, d27, d23 1864*68546e56SEric Biggers vrev64.i32 d1, d1 1865*68546e56SEric Biggers vld1.8 {d6}, [r2, : 64] 1866*68546e56SEric Biggers vmlal.s32 q15, d28, d22 1867*68546e56SEric Biggers vmlal.s32 q3, d16, d4 1868*68546e56SEric Biggers add r2, r2, #24 1869*68546e56SEric Biggers vmlal.s32 q3, d17, d2 1870*68546e56SEric Biggers vext.32 d4, d31, d30, #0 1871*68546e56SEric Biggers vmov d17, d11 1872*68546e56SEric Biggers vmlal.s32 q3, d14, d1 1873*68546e56SEric Biggers vext.32 d11, d13, d13, #0 1874*68546e56SEric Biggers vext.32 d13, d30, d30, #0 1875*68546e56SEric Biggers vmlal.s32 q3, d15, d0 1876*68546e56SEric Biggers vext.32 d1, d8, d8, #0 1877*68546e56SEric Biggers vmlal.s32 q3, d29, d3 1878*68546e56SEric Biggers vld1.8 {d5}, [r2, : 64] 1879*68546e56SEric Biggers sub r2, r2, #16 1880*68546e56SEric Biggers vext.32 d10, d6, d6, #0 1881*68546e56SEric Biggers vmov.i32 q1, #0xffffffff 1882*68546e56SEric Biggers vshl.i64 q4, q1, #25 1883*68546e56SEric Biggers add r5, sp, #480 1884*68546e56SEric Biggers vld1.8 {d14-d15}, [r5, : 128] 1885*68546e56SEric Biggers vadd.i64 q9, q2, q7 1886*68546e56SEric Biggers vshl.i64 q1, q1, #26 1887*68546e56SEric Biggers vshr.s64 q10, q9, #26 1888*68546e56SEric Biggers vld1.8 {d0}, [r2, : 64]! 1889*68546e56SEric Biggers vadd.i64 q5, q5, q10 1890*68546e56SEric Biggers vand q9, q9, q1 1891*68546e56SEric Biggers vld1.8 {d16}, [r2, : 64]! 1892*68546e56SEric Biggers add r2, sp, #496 1893*68546e56SEric Biggers vld1.8 {d20-d21}, [r2, : 128] 1894*68546e56SEric Biggers vadd.i64 q11, q5, q10 1895*68546e56SEric Biggers vsub.i64 q2, q2, q9 1896*68546e56SEric Biggers vshr.s64 q9, q11, #25 1897*68546e56SEric Biggers vext.32 d12, d5, d4, #0 1898*68546e56SEric Biggers vand q11, q11, q4 1899*68546e56SEric Biggers vadd.i64 q0, q0, q9 1900*68546e56SEric Biggers vmov d19, d7 1901*68546e56SEric Biggers vadd.i64 q3, q0, q7 1902*68546e56SEric Biggers vsub.i64 q5, q5, q11 1903*68546e56SEric Biggers vshr.s64 q11, q3, #26 1904*68546e56SEric Biggers vext.32 d18, d11, d10, #0 1905*68546e56SEric Biggers vand q3, q3, q1 1906*68546e56SEric Biggers vadd.i64 q8, q8, q11 1907*68546e56SEric Biggers vadd.i64 q11, q8, q10 1908*68546e56SEric Biggers vsub.i64 q0, q0, q3 1909*68546e56SEric Biggers vshr.s64 q3, q11, #25 1910*68546e56SEric Biggers vand q11, q11, q4 1911*68546e56SEric Biggers vadd.i64 q3, q6, q3 1912*68546e56SEric Biggers vadd.i64 q6, q3, q7 1913*68546e56SEric Biggers vsub.i64 q8, q8, q11 1914*68546e56SEric Biggers vshr.s64 q11, q6, #26 1915*68546e56SEric Biggers vand q6, q6, q1 1916*68546e56SEric Biggers vadd.i64 q9, q9, q11 1917*68546e56SEric Biggers vadd.i64 d25, d19, d21 1918*68546e56SEric Biggers vsub.i64 q3, q3, q6 1919*68546e56SEric Biggers vshr.s64 d23, d25, #25 1920*68546e56SEric Biggers vand q4, q12, q4 1921*68546e56SEric Biggers vadd.i64 d21, d23, d23 1922*68546e56SEric Biggers vshl.i64 d25, d23, #4 1923*68546e56SEric Biggers vadd.i64 d21, d21, d23 1924*68546e56SEric Biggers vadd.i64 d25, d25, d21 1925*68546e56SEric Biggers vadd.i64 d4, d4, d25 1926*68546e56SEric Biggers vzip.i32 q0, q8 1927*68546e56SEric Biggers vadd.i64 d12, d4, d14 1928*68546e56SEric Biggers add r2, r6, #8 1929*68546e56SEric Biggers vst1.8 d0, [r2, : 64] 1930*68546e56SEric Biggers vsub.i64 d19, d19, d9 1931*68546e56SEric Biggers add r2, r2, #16 1932*68546e56SEric Biggers vst1.8 d16, [r2, : 64] 1933*68546e56SEric Biggers vshr.s64 d22, d12, #26 1934*68546e56SEric Biggers vand q0, q6, q1 1935*68546e56SEric Biggers vadd.i64 d10, d10, d22 1936*68546e56SEric Biggers vzip.i32 q3, q9 1937*68546e56SEric Biggers vsub.i64 d4, d4, d0 1938*68546e56SEric Biggers sub r2, r2, #8 1939*68546e56SEric Biggers vst1.8 d6, [r2, : 64] 1940*68546e56SEric Biggers add r2, r2, #16 1941*68546e56SEric Biggers vst1.8 d18, [r2, : 64] 1942*68546e56SEric Biggers vzip.i32 q2, q5 1943*68546e56SEric Biggers sub r2, r2, #32 1944*68546e56SEric Biggers vst1.8 d4, [r2, : 64] 1945*68546e56SEric Biggers cmp r4, #0 1946*68546e56SEric Biggers beq .Lskippostcopy 1947*68546e56SEric Biggers add r2, r3, #144 1948*68546e56SEric Biggers mov r4, r4 1949*68546e56SEric Biggers vld1.8 {d0-d1}, [r2, : 128]! 1950*68546e56SEric Biggers vld1.8 {d2-d3}, [r2, : 128]! 1951*68546e56SEric Biggers vld1.8 {d4}, [r2, : 64] 1952*68546e56SEric Biggers vst1.8 {d0-d1}, [r4, : 128]! 1953*68546e56SEric Biggers vst1.8 {d2-d3}, [r4, : 128]! 1954*68546e56SEric Biggers vst1.8 d4, [r4, : 64] 1955*68546e56SEric Biggers.Lskippostcopy: 1956*68546e56SEric Biggers cmp r1, #1 1957*68546e56SEric Biggers bne .Lskipfinalcopy 1958*68546e56SEric Biggers add r2, r3, #288 1959*68546e56SEric Biggers add r4, r3, #144 1960*68546e56SEric Biggers vld1.8 {d0-d1}, [r2, : 128]! 1961*68546e56SEric Biggers vld1.8 {d2-d3}, [r2, : 128]! 1962*68546e56SEric Biggers vld1.8 {d4}, [r2, : 64] 1963*68546e56SEric Biggers vst1.8 {d0-d1}, [r4, : 128]! 1964*68546e56SEric Biggers vst1.8 {d2-d3}, [r4, : 128]! 1965*68546e56SEric Biggers vst1.8 d4, [r4, : 64] 1966*68546e56SEric Biggers.Lskipfinalcopy: 1967*68546e56SEric Biggers add r1, r1, #1 1968*68546e56SEric Biggers cmp r1, #12 1969*68546e56SEric Biggers blo .Linvertloop 1970*68546e56SEric Biggers add r1, r3, #144 1971*68546e56SEric Biggers ldr r2, [r1], #4 1972*68546e56SEric Biggers ldr r3, [r1], #4 1973*68546e56SEric Biggers ldr r4, [r1], #4 1974*68546e56SEric Biggers ldr r5, [r1], #4 1975*68546e56SEric Biggers ldr r6, [r1], #4 1976*68546e56SEric Biggers ldr r7, [r1], #4 1977*68546e56SEric Biggers ldr r8, [r1], #4 1978*68546e56SEric Biggers ldr r9, [r1], #4 1979*68546e56SEric Biggers ldr r10, [r1], #4 1980*68546e56SEric Biggers ldr r1, [r1] 1981*68546e56SEric Biggers add r11, r1, r1, LSL #4 1982*68546e56SEric Biggers add r11, r11, r1, LSL #1 1983*68546e56SEric Biggers add r11, r11, #16777216 1984*68546e56SEric Biggers mov r11, r11, ASR #25 1985*68546e56SEric Biggers add r11, r11, r2 1986*68546e56SEric Biggers mov r11, r11, ASR #26 1987*68546e56SEric Biggers add r11, r11, r3 1988*68546e56SEric Biggers mov r11, r11, ASR #25 1989*68546e56SEric Biggers add r11, r11, r4 1990*68546e56SEric Biggers mov r11, r11, ASR #26 1991*68546e56SEric Biggers add r11, r11, r5 1992*68546e56SEric Biggers mov r11, r11, ASR #25 1993*68546e56SEric Biggers add r11, r11, r6 1994*68546e56SEric Biggers mov r11, r11, ASR #26 1995*68546e56SEric Biggers add r11, r11, r7 1996*68546e56SEric Biggers mov r11, r11, ASR #25 1997*68546e56SEric Biggers add r11, r11, r8 1998*68546e56SEric Biggers mov r11, r11, ASR #26 1999*68546e56SEric Biggers add r11, r11, r9 2000*68546e56SEric Biggers mov r11, r11, ASR #25 2001*68546e56SEric Biggers add r11, r11, r10 2002*68546e56SEric Biggers mov r11, r11, ASR #26 2003*68546e56SEric Biggers add r11, r11, r1 2004*68546e56SEric Biggers mov r11, r11, ASR #25 2005*68546e56SEric Biggers add r2, r2, r11 2006*68546e56SEric Biggers add r2, r2, r11, LSL #1 2007*68546e56SEric Biggers add r2, r2, r11, LSL #4 2008*68546e56SEric Biggers mov r11, r2, ASR #26 2009*68546e56SEric Biggers add r3, r3, r11 2010*68546e56SEric Biggers sub r2, r2, r11, LSL #26 2011*68546e56SEric Biggers mov r11, r3, ASR #25 2012*68546e56SEric Biggers add r4, r4, r11 2013*68546e56SEric Biggers sub r3, r3, r11, LSL #25 2014*68546e56SEric Biggers mov r11, r4, ASR #26 2015*68546e56SEric Biggers add r5, r5, r11 2016*68546e56SEric Biggers sub r4, r4, r11, LSL #26 2017*68546e56SEric Biggers mov r11, r5, ASR #25 2018*68546e56SEric Biggers add r6, r6, r11 2019*68546e56SEric Biggers sub r5, r5, r11, LSL #25 2020*68546e56SEric Biggers mov r11, r6, ASR #26 2021*68546e56SEric Biggers add r7, r7, r11 2022*68546e56SEric Biggers sub r6, r6, r11, LSL #26 2023*68546e56SEric Biggers mov r11, r7, ASR #25 2024*68546e56SEric Biggers add r8, r8, r11 2025*68546e56SEric Biggers sub r7, r7, r11, LSL #25 2026*68546e56SEric Biggers mov r11, r8, ASR #26 2027*68546e56SEric Biggers add r9, r9, r11 2028*68546e56SEric Biggers sub r8, r8, r11, LSL #26 2029*68546e56SEric Biggers mov r11, r9, ASR #25 2030*68546e56SEric Biggers add r10, r10, r11 2031*68546e56SEric Biggers sub r9, r9, r11, LSL #25 2032*68546e56SEric Biggers mov r11, r10, ASR #26 2033*68546e56SEric Biggers add r1, r1, r11 2034*68546e56SEric Biggers sub r10, r10, r11, LSL #26 2035*68546e56SEric Biggers mov r11, r1, ASR #25 2036*68546e56SEric Biggers sub r1, r1, r11, LSL #25 2037*68546e56SEric Biggers add r2, r2, r3, LSL #26 2038*68546e56SEric Biggers mov r3, r3, LSR #6 2039*68546e56SEric Biggers add r3, r3, r4, LSL #19 2040*68546e56SEric Biggers mov r4, r4, LSR #13 2041*68546e56SEric Biggers add r4, r4, r5, LSL #13 2042*68546e56SEric Biggers mov r5, r5, LSR #19 2043*68546e56SEric Biggers add r5, r5, r6, LSL #6 2044*68546e56SEric Biggers add r6, r7, r8, LSL #25 2045*68546e56SEric Biggers mov r7, r8, LSR #7 2046*68546e56SEric Biggers add r7, r7, r9, LSL #19 2047*68546e56SEric Biggers mov r8, r9, LSR #13 2048*68546e56SEric Biggers add r8, r8, r10, LSL #12 2049*68546e56SEric Biggers mov r9, r10, LSR #20 2050*68546e56SEric Biggers add r1, r9, r1, LSL #6 2051*68546e56SEric Biggers str r2, [r0] 2052*68546e56SEric Biggers str r3, [r0, #4] 2053*68546e56SEric Biggers str r4, [r0, #8] 2054*68546e56SEric Biggers str r5, [r0, #12] 2055*68546e56SEric Biggers str r6, [r0, #16] 2056*68546e56SEric Biggers str r7, [r0, #20] 2057*68546e56SEric Biggers str r8, [r0, #24] 2058*68546e56SEric Biggers str r1, [r0, #28] 2059*68546e56SEric Biggers movw r0, #0 2060*68546e56SEric Biggers mov sp, ip 2061*68546e56SEric Biggers pop {r4-r11, pc} 2062*68546e56SEric BiggersENDPROC(curve25519_neon) 2063