1*4757b351SPierre Pronchery/* Do not modify. This file is auto-generated from chacha-armv8-sve.pl. */ 2*4757b351SPierre Pronchery// Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved. 3*4757b351SPierre Pronchery// 4*4757b351SPierre Pronchery// Licensed under the Apache License 2.0 (the "License"). You may not use 5*4757b351SPierre Pronchery// this file except in compliance with the License. You can obtain a copy 6*4757b351SPierre Pronchery// in the file LICENSE in the source distribution or at 7*4757b351SPierre Pronchery// https://www.openssl.org/source/license.html 8*4757b351SPierre Pronchery// 9*4757b351SPierre Pronchery// 10*4757b351SPierre Pronchery// ChaCha20 for ARMv8 via SVE 11*4757b351SPierre Pronchery// 12*4757b351SPierre Pronchery// $output is the last argument if it looks like a file (it has an extension) 13*4757b351SPierre Pronchery// $flavour is the first argument if it doesn't look like a file 14*4757b351SPierre Pronchery#include "arm_arch.h" 15*4757b351SPierre Pronchery 16*4757b351SPierre Pronchery.arch armv8-a 17*4757b351SPierre Pronchery 18*4757b351SPierre Pronchery 19*4757b351SPierre Pronchery.hidden OPENSSL_armcap_P 20*4757b351SPierre Pronchery 21*4757b351SPierre Pronchery.text 22*4757b351SPierre Pronchery 23*4757b351SPierre Pronchery.section .rodata 24*4757b351SPierre Pronchery.align 5 25*4757b351SPierre Pronchery.type _chacha_sve_consts,%object 26*4757b351SPierre Pronchery_chacha_sve_consts: 27*4757b351SPierre Pronchery.Lchacha20_consts: 28*4757b351SPierre Pronchery.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral 29*4757b351SPierre Pronchery.Lrot8: 30*4757b351SPierre Pronchery.word 0x02010003,0x04040404,0x02010003,0x04040404 31*4757b351SPierre Pronchery.size _chacha_sve_consts,.-_chacha_sve_consts 32*4757b351SPierre Pronchery 33*4757b351SPierre Pronchery.previous 34*4757b351SPierre Pronchery 35*4757b351SPierre Pronchery.globl ChaCha20_ctr32_sve 36*4757b351SPierre Pronchery.type ChaCha20_ctr32_sve,%function 37*4757b351SPierre Pronchery.align 5 38*4757b351SPierre ProncheryChaCha20_ctr32_sve: 39*4757b351SPierre Pronchery AARCH64_VALID_CALL_TARGET 40*4757b351SPierre Pronchery.inst 0x04a0e3e5 //cntw x5, ALL, MUL #1 41*4757b351SPierre Pronchery cmp x2,x5,lsl #6 42*4757b351SPierre Pronchery b.lt .Lreturn 43*4757b351SPierre Pronchery mov x7,0 44*4757b351SPierre Pronchery adrp x6,OPENSSL_armcap_P 45*4757b351SPierre Pronchery ldr w6,[x6,#:lo12:OPENSSL_armcap_P] 46*4757b351SPierre Pronchery tst w6,#ARMV8_SVE2 47*4757b351SPierre Pronchery b.eq 1f 48*4757b351SPierre Pronchery mov x7,1 49*4757b351SPierre Pronchery b 2f 50*4757b351SPierre Pronchery1: 51*4757b351SPierre Pronchery cmp x5,4 52*4757b351SPierre Pronchery b.le .Lreturn 53*4757b351SPierre Pronchery adrp x6,.Lrot8 54*4757b351SPierre Pronchery add x6,x6,#:lo12:.Lrot8 55*4757b351SPierre Pronchery ldp w9,w10,[x6] 56*4757b351SPierre Pronchery.inst 0x04aa4d3f //index z31.s,w9,w10 57*4757b351SPierre Pronchery2: 58*4757b351SPierre Pronchery AARCH64_SIGN_LINK_REGISTER 59*4757b351SPierre Pronchery stp d8,d9,[sp,-192]! 60*4757b351SPierre Pronchery stp d10,d11,[sp,16] 61*4757b351SPierre Pronchery stp d12,d13,[sp,32] 62*4757b351SPierre Pronchery stp d14,d15,[sp,48] 63*4757b351SPierre Pronchery stp x16,x17,[sp,64] 64*4757b351SPierre Pronchery stp x18,x19,[sp,80] 65*4757b351SPierre Pronchery stp x20,x21,[sp,96] 66*4757b351SPierre Pronchery stp x22,x23,[sp,112] 67*4757b351SPierre Pronchery stp x24,x25,[sp,128] 68*4757b351SPierre Pronchery stp x26,x27,[sp,144] 69*4757b351SPierre Pronchery stp x28,x29,[sp,160] 70*4757b351SPierre Pronchery str x30,[sp,176] 71*4757b351SPierre Pronchery 72*4757b351SPierre Pronchery adrp x6,.Lchacha20_consts 73*4757b351SPierre Pronchery add x6,x6,#:lo12:.Lchacha20_consts 74*4757b351SPierre Pronchery ldp x23,x24,[x6] 75*4757b351SPierre Pronchery ldp x25,x26,[x3] 76*4757b351SPierre Pronchery ldp x27,x28,[x3, 16] 77*4757b351SPierre Pronchery ldp x29,x30,[x4] 78*4757b351SPierre Pronchery.inst 0x2599e3e0 //ptrues p0.s,ALL 79*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 80*4757b351SPierre Pronchery ror x25,x25,#32 81*4757b351SPierre Pronchery ror x26,x26,#32 82*4757b351SPierre Pronchery ror x27,x27,#32 83*4757b351SPierre Pronchery ror x28,x28,#32 84*4757b351SPierre Pronchery ror x29,x29,#32 85*4757b351SPierre Pronchery ror x30,x30,#32 86*4757b351SPierre Pronchery#endif 87*4757b351SPierre Pronchery cbz x7, 1f 88*4757b351SPierre Pronchery.align 5 89*4757b351SPierre Pronchery100: 90*4757b351SPierre Pronchery subs x7,x2,x5,lsl #6 91*4757b351SPierre Pronchery b.lt 110f 92*4757b351SPierre Pronchery mov x2,x7 93*4757b351SPierre Pronchery b.eq 101f 94*4757b351SPierre Pronchery cmp x2,64 95*4757b351SPierre Pronchery b.lt 101f 96*4757b351SPierre Pronchery mixin=1 97*4757b351SPierre Pronchery lsr x8,x23,#32 98*4757b351SPierre Pronchery.inst 0x05a03ae0 //dup z0.s,w23 99*4757b351SPierre Pronchery.inst 0x05a03af9 //dup z25.s,w23 100*4757b351SPierre Pronchery.if mixin == 1 101*4757b351SPierre Pronchery mov w7,w23 102*4757b351SPierre Pronchery.endif 103*4757b351SPierre Pronchery.inst 0x05a03904 //dup z4.s,w8 104*4757b351SPierre Pronchery.inst 0x05a0391a //dup z26.s,w8 105*4757b351SPierre Pronchery lsr x10,x24,#32 106*4757b351SPierre Pronchery.inst 0x05a03b08 //dup z8.s,w24 107*4757b351SPierre Pronchery.inst 0x05a03b1b //dup z27.s,w24 108*4757b351SPierre Pronchery.if mixin == 1 109*4757b351SPierre Pronchery mov w9,w24 110*4757b351SPierre Pronchery.endif 111*4757b351SPierre Pronchery.inst 0x05a0394c //dup z12.s,w10 112*4757b351SPierre Pronchery.inst 0x05a0395c //dup z28.s,w10 113*4757b351SPierre Pronchery lsr x12,x25,#32 114*4757b351SPierre Pronchery.inst 0x05a03b21 //dup z1.s,w25 115*4757b351SPierre Pronchery.inst 0x05a03b3d //dup z29.s,w25 116*4757b351SPierre Pronchery.if mixin == 1 117*4757b351SPierre Pronchery mov w11,w25 118*4757b351SPierre Pronchery.endif 119*4757b351SPierre Pronchery.inst 0x05a03985 //dup z5.s,w12 120*4757b351SPierre Pronchery.inst 0x05a0399e //dup z30.s,w12 121*4757b351SPierre Pronchery lsr x14,x26,#32 122*4757b351SPierre Pronchery.inst 0x05a03b49 //dup z9.s,w26 123*4757b351SPierre Pronchery.inst 0x05a03b55 //dup z21.s,w26 124*4757b351SPierre Pronchery.if mixin == 1 125*4757b351SPierre Pronchery mov w13,w26 126*4757b351SPierre Pronchery.endif 127*4757b351SPierre Pronchery.inst 0x05a039cd //dup z13.s,w14 128*4757b351SPierre Pronchery.inst 0x05a039d6 //dup z22.s,w14 129*4757b351SPierre Pronchery lsr x16,x27,#32 130*4757b351SPierre Pronchery.inst 0x05a03b62 //dup z2.s,w27 131*4757b351SPierre Pronchery.inst 0x05a03b77 //dup z23.s,w27 132*4757b351SPierre Pronchery.if mixin == 1 133*4757b351SPierre Pronchery mov w15,w27 134*4757b351SPierre Pronchery.endif 135*4757b351SPierre Pronchery.inst 0x05a03a06 //dup z6.s,w16 136*4757b351SPierre Pronchery.inst 0x05a03a18 //dup z24.s,w16 137*4757b351SPierre Pronchery lsr x18,x28,#32 138*4757b351SPierre Pronchery.inst 0x05a03b8a //dup z10.s,w28 139*4757b351SPierre Pronchery.inst 0x05a03b91 //dup z17.s,w28 140*4757b351SPierre Pronchery.if mixin == 1 141*4757b351SPierre Pronchery mov w17,w28 142*4757b351SPierre Pronchery.endif 143*4757b351SPierre Pronchery.inst 0x05a03a4e //dup z14.s,w18 144*4757b351SPierre Pronchery.inst 0x05a03a52 //dup z18.s,w18 145*4757b351SPierre Pronchery lsr x22,x30,#32 146*4757b351SPierre Pronchery.inst 0x05a03bcb //dup z11.s,w30 147*4757b351SPierre Pronchery.inst 0x05a03bd4 //dup z20.s,w30 148*4757b351SPierre Pronchery.if mixin == 1 149*4757b351SPierre Pronchery mov w21,w30 150*4757b351SPierre Pronchery.endif 151*4757b351SPierre Pronchery.inst 0x05a03acf //dup z15.s,w22 152*4757b351SPierre Pronchery.inst 0x05a03adf //dup z31.s,w22 153*4757b351SPierre Pronchery.if mixin == 1 154*4757b351SPierre Pronchery add w20,w29,#1 155*4757b351SPierre Pronchery mov w19,w29 156*4757b351SPierre Pronchery.inst 0x04a14690 //index z16.s,w20,1 157*4757b351SPierre Pronchery.inst 0x04a14683 //index z3.s,w20,1 158*4757b351SPierre Pronchery.else 159*4757b351SPierre Pronchery.inst 0x04a147b0 //index z16.s,w29,1 160*4757b351SPierre Pronchery.inst 0x04a147a3 //index z3.s,w29,1 161*4757b351SPierre Pronchery.endif 162*4757b351SPierre Pronchery lsr x20,x29,#32 163*4757b351SPierre Pronchery.inst 0x05a03a87 //dup z7.s,w20 164*4757b351SPierre Pronchery.inst 0x05a03a93 //dup z19.s,w20 165*4757b351SPierre Pronchery mov x6,#10 166*4757b351SPierre Pronchery10: 167*4757b351SPierre Pronchery.align 5 168*4757b351SPierre Pronchery.inst 0x04a10000 //add z0.s,z0.s,z1.s 169*4757b351SPierre Pronchery.if mixin == 1 170*4757b351SPierre Pronchery add w7,w7,w11 171*4757b351SPierre Pronchery.endif 172*4757b351SPierre Pronchery.inst 0x04a50084 //add z4.s,z4.s,z5.s 173*4757b351SPierre Pronchery.if mixin == 1 174*4757b351SPierre Pronchery add w8,w8,w12 175*4757b351SPierre Pronchery.endif 176*4757b351SPierre Pronchery.inst 0x04a90108 //add z8.s,z8.s,z9.s 177*4757b351SPierre Pronchery.if mixin == 1 178*4757b351SPierre Pronchery add w9,w9,w13 179*4757b351SPierre Pronchery.endif 180*4757b351SPierre Pronchery.inst 0x04ad018c //add z12.s,z12.s,z13.s 181*4757b351SPierre Pronchery.if mixin == 1 182*4757b351SPierre Pronchery add w10,w10,w14 183*4757b351SPierre Pronchery.endif 184*4757b351SPierre Pronchery.if mixin == 1 185*4757b351SPierre Pronchery eor w19,w19,w7 186*4757b351SPierre Pronchery.endif 187*4757b351SPierre Pronchery.inst 0x04703403 //xar z3.s,z3.s,z0.s,16 188*4757b351SPierre Pronchery.if mixin == 1 189*4757b351SPierre Pronchery ror w19,w19,16 190*4757b351SPierre Pronchery.endif 191*4757b351SPierre Pronchery.if mixin == 1 192*4757b351SPierre Pronchery eor w20,w20,w8 193*4757b351SPierre Pronchery.endif 194*4757b351SPierre Pronchery.inst 0x04703487 //xar z7.s,z7.s,z4.s,16 195*4757b351SPierre Pronchery.if mixin == 1 196*4757b351SPierre Pronchery ror w20,w20,16 197*4757b351SPierre Pronchery.endif 198*4757b351SPierre Pronchery.if mixin == 1 199*4757b351SPierre Pronchery eor w21,w21,w9 200*4757b351SPierre Pronchery.endif 201*4757b351SPierre Pronchery.inst 0x0470350b //xar z11.s,z11.s,z8.s,16 202*4757b351SPierre Pronchery.if mixin == 1 203*4757b351SPierre Pronchery ror w21,w21,16 204*4757b351SPierre Pronchery.endif 205*4757b351SPierre Pronchery.if mixin == 1 206*4757b351SPierre Pronchery eor w22,w22,w10 207*4757b351SPierre Pronchery.endif 208*4757b351SPierre Pronchery.inst 0x0470358f //xar z15.s,z15.s,z12.s,16 209*4757b351SPierre Pronchery.if mixin == 1 210*4757b351SPierre Pronchery ror w22,w22,16 211*4757b351SPierre Pronchery.endif 212*4757b351SPierre Pronchery.inst 0x04a30042 //add z2.s,z2.s,z3.s 213*4757b351SPierre Pronchery.if mixin == 1 214*4757b351SPierre Pronchery add w15,w15,w19 215*4757b351SPierre Pronchery.endif 216*4757b351SPierre Pronchery.inst 0x04a700c6 //add z6.s,z6.s,z7.s 217*4757b351SPierre Pronchery.if mixin == 1 218*4757b351SPierre Pronchery add w16,w16,w20 219*4757b351SPierre Pronchery.endif 220*4757b351SPierre Pronchery.inst 0x04ab014a //add z10.s,z10.s,z11.s 221*4757b351SPierre Pronchery.if mixin == 1 222*4757b351SPierre Pronchery add w17,w17,w21 223*4757b351SPierre Pronchery.endif 224*4757b351SPierre Pronchery.inst 0x04af01ce //add z14.s,z14.s,z15.s 225*4757b351SPierre Pronchery.if mixin == 1 226*4757b351SPierre Pronchery add w18,w18,w22 227*4757b351SPierre Pronchery.endif 228*4757b351SPierre Pronchery.if mixin == 1 229*4757b351SPierre Pronchery eor w11,w11,w15 230*4757b351SPierre Pronchery.endif 231*4757b351SPierre Pronchery.inst 0x046c3441 //xar z1.s,z1.s,z2.s,20 232*4757b351SPierre Pronchery.if mixin == 1 233*4757b351SPierre Pronchery ror w11,w11,20 234*4757b351SPierre Pronchery.endif 235*4757b351SPierre Pronchery.if mixin == 1 236*4757b351SPierre Pronchery eor w12,w12,w16 237*4757b351SPierre Pronchery.endif 238*4757b351SPierre Pronchery.inst 0x046c34c5 //xar z5.s,z5.s,z6.s,20 239*4757b351SPierre Pronchery.if mixin == 1 240*4757b351SPierre Pronchery ror w12,w12,20 241*4757b351SPierre Pronchery.endif 242*4757b351SPierre Pronchery.if mixin == 1 243*4757b351SPierre Pronchery eor w13,w13,w17 244*4757b351SPierre Pronchery.endif 245*4757b351SPierre Pronchery.inst 0x046c3549 //xar z9.s,z9.s,z10.s,20 246*4757b351SPierre Pronchery.if mixin == 1 247*4757b351SPierre Pronchery ror w13,w13,20 248*4757b351SPierre Pronchery.endif 249*4757b351SPierre Pronchery.if mixin == 1 250*4757b351SPierre Pronchery eor w14,w14,w18 251*4757b351SPierre Pronchery.endif 252*4757b351SPierre Pronchery.inst 0x046c35cd //xar z13.s,z13.s,z14.s,20 253*4757b351SPierre Pronchery.if mixin == 1 254*4757b351SPierre Pronchery ror w14,w14,20 255*4757b351SPierre Pronchery.endif 256*4757b351SPierre Pronchery.inst 0x04a10000 //add z0.s,z0.s,z1.s 257*4757b351SPierre Pronchery.if mixin == 1 258*4757b351SPierre Pronchery add w7,w7,w11 259*4757b351SPierre Pronchery.endif 260*4757b351SPierre Pronchery.inst 0x04a50084 //add z4.s,z4.s,z5.s 261*4757b351SPierre Pronchery.if mixin == 1 262*4757b351SPierre Pronchery add w8,w8,w12 263*4757b351SPierre Pronchery.endif 264*4757b351SPierre Pronchery.inst 0x04a90108 //add z8.s,z8.s,z9.s 265*4757b351SPierre Pronchery.if mixin == 1 266*4757b351SPierre Pronchery add w9,w9,w13 267*4757b351SPierre Pronchery.endif 268*4757b351SPierre Pronchery.inst 0x04ad018c //add z12.s,z12.s,z13.s 269*4757b351SPierre Pronchery.if mixin == 1 270*4757b351SPierre Pronchery add w10,w10,w14 271*4757b351SPierre Pronchery.endif 272*4757b351SPierre Pronchery.if mixin == 1 273*4757b351SPierre Pronchery eor w19,w19,w7 274*4757b351SPierre Pronchery.endif 275*4757b351SPierre Pronchery.inst 0x04683403 //xar z3.s,z3.s,z0.s,24 276*4757b351SPierre Pronchery.if mixin == 1 277*4757b351SPierre Pronchery ror w19,w19,24 278*4757b351SPierre Pronchery.endif 279*4757b351SPierre Pronchery.if mixin == 1 280*4757b351SPierre Pronchery eor w20,w20,w8 281*4757b351SPierre Pronchery.endif 282*4757b351SPierre Pronchery.inst 0x04683487 //xar z7.s,z7.s,z4.s,24 283*4757b351SPierre Pronchery.if mixin == 1 284*4757b351SPierre Pronchery ror w20,w20,24 285*4757b351SPierre Pronchery.endif 286*4757b351SPierre Pronchery.if mixin == 1 287*4757b351SPierre Pronchery eor w21,w21,w9 288*4757b351SPierre Pronchery.endif 289*4757b351SPierre Pronchery.inst 0x0468350b //xar z11.s,z11.s,z8.s,24 290*4757b351SPierre Pronchery.if mixin == 1 291*4757b351SPierre Pronchery ror w21,w21,24 292*4757b351SPierre Pronchery.endif 293*4757b351SPierre Pronchery.if mixin == 1 294*4757b351SPierre Pronchery eor w22,w22,w10 295*4757b351SPierre Pronchery.endif 296*4757b351SPierre Pronchery.inst 0x0468358f //xar z15.s,z15.s,z12.s,24 297*4757b351SPierre Pronchery.if mixin == 1 298*4757b351SPierre Pronchery ror w22,w22,24 299*4757b351SPierre Pronchery.endif 300*4757b351SPierre Pronchery.inst 0x04a30042 //add z2.s,z2.s,z3.s 301*4757b351SPierre Pronchery.if mixin == 1 302*4757b351SPierre Pronchery add w15,w15,w19 303*4757b351SPierre Pronchery.endif 304*4757b351SPierre Pronchery.inst 0x04a700c6 //add z6.s,z6.s,z7.s 305*4757b351SPierre Pronchery.if mixin == 1 306*4757b351SPierre Pronchery add w16,w16,w20 307*4757b351SPierre Pronchery.endif 308*4757b351SPierre Pronchery.inst 0x04ab014a //add z10.s,z10.s,z11.s 309*4757b351SPierre Pronchery.if mixin == 1 310*4757b351SPierre Pronchery add w17,w17,w21 311*4757b351SPierre Pronchery.endif 312*4757b351SPierre Pronchery.inst 0x04af01ce //add z14.s,z14.s,z15.s 313*4757b351SPierre Pronchery.if mixin == 1 314*4757b351SPierre Pronchery add w18,w18,w22 315*4757b351SPierre Pronchery.endif 316*4757b351SPierre Pronchery.if mixin == 1 317*4757b351SPierre Pronchery eor w11,w11,w15 318*4757b351SPierre Pronchery.endif 319*4757b351SPierre Pronchery.inst 0x04673441 //xar z1.s,z1.s,z2.s,25 320*4757b351SPierre Pronchery.if mixin == 1 321*4757b351SPierre Pronchery ror w11,w11,25 322*4757b351SPierre Pronchery.endif 323*4757b351SPierre Pronchery.if mixin == 1 324*4757b351SPierre Pronchery eor w12,w12,w16 325*4757b351SPierre Pronchery.endif 326*4757b351SPierre Pronchery.inst 0x046734c5 //xar z5.s,z5.s,z6.s,25 327*4757b351SPierre Pronchery.if mixin == 1 328*4757b351SPierre Pronchery ror w12,w12,25 329*4757b351SPierre Pronchery.endif 330*4757b351SPierre Pronchery.if mixin == 1 331*4757b351SPierre Pronchery eor w13,w13,w17 332*4757b351SPierre Pronchery.endif 333*4757b351SPierre Pronchery.inst 0x04673549 //xar z9.s,z9.s,z10.s,25 334*4757b351SPierre Pronchery.if mixin == 1 335*4757b351SPierre Pronchery ror w13,w13,25 336*4757b351SPierre Pronchery.endif 337*4757b351SPierre Pronchery.if mixin == 1 338*4757b351SPierre Pronchery eor w14,w14,w18 339*4757b351SPierre Pronchery.endif 340*4757b351SPierre Pronchery.inst 0x046735cd //xar z13.s,z13.s,z14.s,25 341*4757b351SPierre Pronchery.if mixin == 1 342*4757b351SPierre Pronchery ror w14,w14,25 343*4757b351SPierre Pronchery.endif 344*4757b351SPierre Pronchery.inst 0x04a50000 //add z0.s,z0.s,z5.s 345*4757b351SPierre Pronchery.if mixin == 1 346*4757b351SPierre Pronchery add w7,w7,w12 347*4757b351SPierre Pronchery.endif 348*4757b351SPierre Pronchery.inst 0x04a90084 //add z4.s,z4.s,z9.s 349*4757b351SPierre Pronchery.if mixin == 1 350*4757b351SPierre Pronchery add w8,w8,w13 351*4757b351SPierre Pronchery.endif 352*4757b351SPierre Pronchery.inst 0x04ad0108 //add z8.s,z8.s,z13.s 353*4757b351SPierre Pronchery.if mixin == 1 354*4757b351SPierre Pronchery add w9,w9,w14 355*4757b351SPierre Pronchery.endif 356*4757b351SPierre Pronchery.inst 0x04a1018c //add z12.s,z12.s,z1.s 357*4757b351SPierre Pronchery.if mixin == 1 358*4757b351SPierre Pronchery add w10,w10,w11 359*4757b351SPierre Pronchery.endif 360*4757b351SPierre Pronchery.if mixin == 1 361*4757b351SPierre Pronchery eor w22,w22,w7 362*4757b351SPierre Pronchery.endif 363*4757b351SPierre Pronchery.inst 0x0470340f //xar z15.s,z15.s,z0.s,16 364*4757b351SPierre Pronchery.if mixin == 1 365*4757b351SPierre Pronchery ror w22,w22,16 366*4757b351SPierre Pronchery.endif 367*4757b351SPierre Pronchery.if mixin == 1 368*4757b351SPierre Pronchery eor w19,w19,w8 369*4757b351SPierre Pronchery.endif 370*4757b351SPierre Pronchery.inst 0x04703483 //xar z3.s,z3.s,z4.s,16 371*4757b351SPierre Pronchery.if mixin == 1 372*4757b351SPierre Pronchery ror w19,w19,16 373*4757b351SPierre Pronchery.endif 374*4757b351SPierre Pronchery.if mixin == 1 375*4757b351SPierre Pronchery eor w20,w20,w9 376*4757b351SPierre Pronchery.endif 377*4757b351SPierre Pronchery.inst 0x04703507 //xar z7.s,z7.s,z8.s,16 378*4757b351SPierre Pronchery.if mixin == 1 379*4757b351SPierre Pronchery ror w20,w20,16 380*4757b351SPierre Pronchery.endif 381*4757b351SPierre Pronchery.if mixin == 1 382*4757b351SPierre Pronchery eor w21,w21,w10 383*4757b351SPierre Pronchery.endif 384*4757b351SPierre Pronchery.inst 0x0470358b //xar z11.s,z11.s,z12.s,16 385*4757b351SPierre Pronchery.if mixin == 1 386*4757b351SPierre Pronchery ror w21,w21,16 387*4757b351SPierre Pronchery.endif 388*4757b351SPierre Pronchery.inst 0x04af014a //add z10.s,z10.s,z15.s 389*4757b351SPierre Pronchery.if mixin == 1 390*4757b351SPierre Pronchery add w17,w17,w22 391*4757b351SPierre Pronchery.endif 392*4757b351SPierre Pronchery.inst 0x04a301ce //add z14.s,z14.s,z3.s 393*4757b351SPierre Pronchery.if mixin == 1 394*4757b351SPierre Pronchery add w18,w18,w19 395*4757b351SPierre Pronchery.endif 396*4757b351SPierre Pronchery.inst 0x04a70042 //add z2.s,z2.s,z7.s 397*4757b351SPierre Pronchery.if mixin == 1 398*4757b351SPierre Pronchery add w15,w15,w20 399*4757b351SPierre Pronchery.endif 400*4757b351SPierre Pronchery.inst 0x04ab00c6 //add z6.s,z6.s,z11.s 401*4757b351SPierre Pronchery.if mixin == 1 402*4757b351SPierre Pronchery add w16,w16,w21 403*4757b351SPierre Pronchery.endif 404*4757b351SPierre Pronchery.if mixin == 1 405*4757b351SPierre Pronchery eor w12,w12,w17 406*4757b351SPierre Pronchery.endif 407*4757b351SPierre Pronchery.inst 0x046c3545 //xar z5.s,z5.s,z10.s,20 408*4757b351SPierre Pronchery.if mixin == 1 409*4757b351SPierre Pronchery ror w12,w12,20 410*4757b351SPierre Pronchery.endif 411*4757b351SPierre Pronchery.if mixin == 1 412*4757b351SPierre Pronchery eor w13,w13,w18 413*4757b351SPierre Pronchery.endif 414*4757b351SPierre Pronchery.inst 0x046c35c9 //xar z9.s,z9.s,z14.s,20 415*4757b351SPierre Pronchery.if mixin == 1 416*4757b351SPierre Pronchery ror w13,w13,20 417*4757b351SPierre Pronchery.endif 418*4757b351SPierre Pronchery.if mixin == 1 419*4757b351SPierre Pronchery eor w14,w14,w15 420*4757b351SPierre Pronchery.endif 421*4757b351SPierre Pronchery.inst 0x046c344d //xar z13.s,z13.s,z2.s,20 422*4757b351SPierre Pronchery.if mixin == 1 423*4757b351SPierre Pronchery ror w14,w14,20 424*4757b351SPierre Pronchery.endif 425*4757b351SPierre Pronchery.if mixin == 1 426*4757b351SPierre Pronchery eor w11,w11,w16 427*4757b351SPierre Pronchery.endif 428*4757b351SPierre Pronchery.inst 0x046c34c1 //xar z1.s,z1.s,z6.s,20 429*4757b351SPierre Pronchery.if mixin == 1 430*4757b351SPierre Pronchery ror w11,w11,20 431*4757b351SPierre Pronchery.endif 432*4757b351SPierre Pronchery.inst 0x04a50000 //add z0.s,z0.s,z5.s 433*4757b351SPierre Pronchery.if mixin == 1 434*4757b351SPierre Pronchery add w7,w7,w12 435*4757b351SPierre Pronchery.endif 436*4757b351SPierre Pronchery.inst 0x04a90084 //add z4.s,z4.s,z9.s 437*4757b351SPierre Pronchery.if mixin == 1 438*4757b351SPierre Pronchery add w8,w8,w13 439*4757b351SPierre Pronchery.endif 440*4757b351SPierre Pronchery.inst 0x04ad0108 //add z8.s,z8.s,z13.s 441*4757b351SPierre Pronchery.if mixin == 1 442*4757b351SPierre Pronchery add w9,w9,w14 443*4757b351SPierre Pronchery.endif 444*4757b351SPierre Pronchery.inst 0x04a1018c //add z12.s,z12.s,z1.s 445*4757b351SPierre Pronchery.if mixin == 1 446*4757b351SPierre Pronchery add w10,w10,w11 447*4757b351SPierre Pronchery.endif 448*4757b351SPierre Pronchery.if mixin == 1 449*4757b351SPierre Pronchery eor w22,w22,w7 450*4757b351SPierre Pronchery.endif 451*4757b351SPierre Pronchery.inst 0x0468340f //xar z15.s,z15.s,z0.s,24 452*4757b351SPierre Pronchery.if mixin == 1 453*4757b351SPierre Pronchery ror w22,w22,24 454*4757b351SPierre Pronchery.endif 455*4757b351SPierre Pronchery.if mixin == 1 456*4757b351SPierre Pronchery eor w19,w19,w8 457*4757b351SPierre Pronchery.endif 458*4757b351SPierre Pronchery.inst 0x04683483 //xar z3.s,z3.s,z4.s,24 459*4757b351SPierre Pronchery.if mixin == 1 460*4757b351SPierre Pronchery ror w19,w19,24 461*4757b351SPierre Pronchery.endif 462*4757b351SPierre Pronchery.if mixin == 1 463*4757b351SPierre Pronchery eor w20,w20,w9 464*4757b351SPierre Pronchery.endif 465*4757b351SPierre Pronchery.inst 0x04683507 //xar z7.s,z7.s,z8.s,24 466*4757b351SPierre Pronchery.if mixin == 1 467*4757b351SPierre Pronchery ror w20,w20,24 468*4757b351SPierre Pronchery.endif 469*4757b351SPierre Pronchery.if mixin == 1 470*4757b351SPierre Pronchery eor w21,w21,w10 471*4757b351SPierre Pronchery.endif 472*4757b351SPierre Pronchery.inst 0x0468358b //xar z11.s,z11.s,z12.s,24 473*4757b351SPierre Pronchery.if mixin == 1 474*4757b351SPierre Pronchery ror w21,w21,24 475*4757b351SPierre Pronchery.endif 476*4757b351SPierre Pronchery.inst 0x04af014a //add z10.s,z10.s,z15.s 477*4757b351SPierre Pronchery.if mixin == 1 478*4757b351SPierre Pronchery add w17,w17,w22 479*4757b351SPierre Pronchery.endif 480*4757b351SPierre Pronchery.inst 0x04a301ce //add z14.s,z14.s,z3.s 481*4757b351SPierre Pronchery.if mixin == 1 482*4757b351SPierre Pronchery add w18,w18,w19 483*4757b351SPierre Pronchery.endif 484*4757b351SPierre Pronchery.inst 0x04a70042 //add z2.s,z2.s,z7.s 485*4757b351SPierre Pronchery.if mixin == 1 486*4757b351SPierre Pronchery add w15,w15,w20 487*4757b351SPierre Pronchery.endif 488*4757b351SPierre Pronchery.inst 0x04ab00c6 //add z6.s,z6.s,z11.s 489*4757b351SPierre Pronchery.if mixin == 1 490*4757b351SPierre Pronchery add w16,w16,w21 491*4757b351SPierre Pronchery.endif 492*4757b351SPierre Pronchery.if mixin == 1 493*4757b351SPierre Pronchery eor w12,w12,w17 494*4757b351SPierre Pronchery.endif 495*4757b351SPierre Pronchery.inst 0x04673545 //xar z5.s,z5.s,z10.s,25 496*4757b351SPierre Pronchery.if mixin == 1 497*4757b351SPierre Pronchery ror w12,w12,25 498*4757b351SPierre Pronchery.endif 499*4757b351SPierre Pronchery.if mixin == 1 500*4757b351SPierre Pronchery eor w13,w13,w18 501*4757b351SPierre Pronchery.endif 502*4757b351SPierre Pronchery.inst 0x046735c9 //xar z9.s,z9.s,z14.s,25 503*4757b351SPierre Pronchery.if mixin == 1 504*4757b351SPierre Pronchery ror w13,w13,25 505*4757b351SPierre Pronchery.endif 506*4757b351SPierre Pronchery.if mixin == 1 507*4757b351SPierre Pronchery eor w14,w14,w15 508*4757b351SPierre Pronchery.endif 509*4757b351SPierre Pronchery.inst 0x0467344d //xar z13.s,z13.s,z2.s,25 510*4757b351SPierre Pronchery.if mixin == 1 511*4757b351SPierre Pronchery ror w14,w14,25 512*4757b351SPierre Pronchery.endif 513*4757b351SPierre Pronchery.if mixin == 1 514*4757b351SPierre Pronchery eor w11,w11,w16 515*4757b351SPierre Pronchery.endif 516*4757b351SPierre Pronchery.inst 0x046734c1 //xar z1.s,z1.s,z6.s,25 517*4757b351SPierre Pronchery.if mixin == 1 518*4757b351SPierre Pronchery ror w11,w11,25 519*4757b351SPierre Pronchery.endif 520*4757b351SPierre Pronchery sub x6,x6,1 521*4757b351SPierre Pronchery cbnz x6,10b 522*4757b351SPierre Pronchery.if mixin == 1 523*4757b351SPierre Pronchery add w7,w7,w23 524*4757b351SPierre Pronchery.endif 525*4757b351SPierre Pronchery.inst 0x04b90000 //add z0.s,z0.s,z25.s 526*4757b351SPierre Pronchery.if mixin == 1 527*4757b351SPierre Pronchery add x8,x8,x23,lsr #32 528*4757b351SPierre Pronchery.endif 529*4757b351SPierre Pronchery.inst 0x04ba0084 //add z4.s,z4.s,z26.s 530*4757b351SPierre Pronchery.if mixin == 1 531*4757b351SPierre Pronchery add x7,x7,x8,lsl #32 // pack 532*4757b351SPierre Pronchery.endif 533*4757b351SPierre Pronchery.if mixin == 1 534*4757b351SPierre Pronchery add w9,w9,w24 535*4757b351SPierre Pronchery.endif 536*4757b351SPierre Pronchery.inst 0x04bb0108 //add z8.s,z8.s,z27.s 537*4757b351SPierre Pronchery.if mixin == 1 538*4757b351SPierre Pronchery add x10,x10,x24,lsr #32 539*4757b351SPierre Pronchery.endif 540*4757b351SPierre Pronchery.inst 0x04bc018c //add z12.s,z12.s,z28.s 541*4757b351SPierre Pronchery.if mixin == 1 542*4757b351SPierre Pronchery add x9,x9,x10,lsl #32 // pack 543*4757b351SPierre Pronchery.endif 544*4757b351SPierre Pronchery.if mixin == 1 545*4757b351SPierre Pronchery ldp x8,x10,[x1],#16 546*4757b351SPierre Pronchery.endif 547*4757b351SPierre Pronchery.if mixin == 1 548*4757b351SPierre Pronchery add w11,w11,w25 549*4757b351SPierre Pronchery.endif 550*4757b351SPierre Pronchery.inst 0x04bd0021 //add z1.s,z1.s,z29.s 551*4757b351SPierre Pronchery.if mixin == 1 552*4757b351SPierre Pronchery add x12,x12,x25,lsr #32 553*4757b351SPierre Pronchery.endif 554*4757b351SPierre Pronchery.inst 0x04be00a5 //add z5.s,z5.s,z30.s 555*4757b351SPierre Pronchery.if mixin == 1 556*4757b351SPierre Pronchery add x11,x11,x12,lsl #32 // pack 557*4757b351SPierre Pronchery.endif 558*4757b351SPierre Pronchery.if mixin == 1 559*4757b351SPierre Pronchery add w13,w13,w26 560*4757b351SPierre Pronchery.endif 561*4757b351SPierre Pronchery.inst 0x04b50129 //add z9.s,z9.s,z21.s 562*4757b351SPierre Pronchery.if mixin == 1 563*4757b351SPierre Pronchery add x14,x14,x26,lsr #32 564*4757b351SPierre Pronchery.endif 565*4757b351SPierre Pronchery.inst 0x04b601ad //add z13.s,z13.s,z22.s 566*4757b351SPierre Pronchery.if mixin == 1 567*4757b351SPierre Pronchery add x13,x13,x14,lsl #32 // pack 568*4757b351SPierre Pronchery.endif 569*4757b351SPierre Pronchery.if mixin == 1 570*4757b351SPierre Pronchery ldp x12,x14,[x1],#16 571*4757b351SPierre Pronchery.endif 572*4757b351SPierre Pronchery.if mixin == 1 573*4757b351SPierre Pronchery add w15,w15,w27 574*4757b351SPierre Pronchery.endif 575*4757b351SPierre Pronchery.inst 0x04b70042 //add z2.s,z2.s,z23.s 576*4757b351SPierre Pronchery.if mixin == 1 577*4757b351SPierre Pronchery add x16,x16,x27,lsr #32 578*4757b351SPierre Pronchery.endif 579*4757b351SPierre Pronchery.inst 0x04b800c6 //add z6.s,z6.s,z24.s 580*4757b351SPierre Pronchery.if mixin == 1 581*4757b351SPierre Pronchery add x15,x15,x16,lsl #32 // pack 582*4757b351SPierre Pronchery.endif 583*4757b351SPierre Pronchery.if mixin == 1 584*4757b351SPierre Pronchery add w17,w17,w28 585*4757b351SPierre Pronchery.endif 586*4757b351SPierre Pronchery.inst 0x04b1014a //add z10.s,z10.s,z17.s 587*4757b351SPierre Pronchery.if mixin == 1 588*4757b351SPierre Pronchery add x18,x18,x28,lsr #32 589*4757b351SPierre Pronchery.endif 590*4757b351SPierre Pronchery.inst 0x04b201ce //add z14.s,z14.s,z18.s 591*4757b351SPierre Pronchery.if mixin == 1 592*4757b351SPierre Pronchery add x17,x17,x18,lsl #32 // pack 593*4757b351SPierre Pronchery.endif 594*4757b351SPierre Pronchery.if mixin == 1 595*4757b351SPierre Pronchery ldp x16,x18,[x1],#16 596*4757b351SPierre Pronchery.endif 597*4757b351SPierre Pronchery.if mixin == 1 598*4757b351SPierre Pronchery add w19,w19,w29 599*4757b351SPierre Pronchery.endif 600*4757b351SPierre Pronchery.inst 0x04b00063 //add z3.s,z3.s,z16.s 601*4757b351SPierre Pronchery.if mixin == 1 602*4757b351SPierre Pronchery add x20,x20,x29,lsr #32 603*4757b351SPierre Pronchery.endif 604*4757b351SPierre Pronchery.inst 0x04b300e7 //add z7.s,z7.s,z19.s 605*4757b351SPierre Pronchery.if mixin == 1 606*4757b351SPierre Pronchery add x19,x19,x20,lsl #32 // pack 607*4757b351SPierre Pronchery.endif 608*4757b351SPierre Pronchery.if mixin == 1 609*4757b351SPierre Pronchery add w21,w21,w30 610*4757b351SPierre Pronchery.endif 611*4757b351SPierre Pronchery.inst 0x04b4016b //add z11.s,z11.s,z20.s 612*4757b351SPierre Pronchery.if mixin == 1 613*4757b351SPierre Pronchery add x22,x22,x30,lsr #32 614*4757b351SPierre Pronchery.endif 615*4757b351SPierre Pronchery.inst 0x04bf01ef //add z15.s,z15.s,z31.s 616*4757b351SPierre Pronchery.if mixin == 1 617*4757b351SPierre Pronchery add x21,x21,x22,lsl #32 // pack 618*4757b351SPierre Pronchery.endif 619*4757b351SPierre Pronchery.if mixin == 1 620*4757b351SPierre Pronchery ldp x20,x22,[x1],#16 621*4757b351SPierre Pronchery.endif 622*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 623*4757b351SPierre Pronchery rev x7,x7 624*4757b351SPierre Pronchery.inst 0x05a48000 //revb z0.s,p0/m,z0.s 625*4757b351SPierre Pronchery.inst 0x05a48084 //revb z4.s,p0/m,z4.s 626*4757b351SPierre Pronchery rev x9,x9 627*4757b351SPierre Pronchery.inst 0x05a48108 //revb z8.s,p0/m,z8.s 628*4757b351SPierre Pronchery.inst 0x05a4818c //revb z12.s,p0/m,z12.s 629*4757b351SPierre Pronchery rev x11,x11 630*4757b351SPierre Pronchery.inst 0x05a48021 //revb z1.s,p0/m,z1.s 631*4757b351SPierre Pronchery.inst 0x05a480a5 //revb z5.s,p0/m,z5.s 632*4757b351SPierre Pronchery rev x13,x13 633*4757b351SPierre Pronchery.inst 0x05a48129 //revb z9.s,p0/m,z9.s 634*4757b351SPierre Pronchery.inst 0x05a481ad //revb z13.s,p0/m,z13.s 635*4757b351SPierre Pronchery rev x15,x15 636*4757b351SPierre Pronchery.inst 0x05a48042 //revb z2.s,p0/m,z2.s 637*4757b351SPierre Pronchery.inst 0x05a480c6 //revb z6.s,p0/m,z6.s 638*4757b351SPierre Pronchery rev x17,x17 639*4757b351SPierre Pronchery.inst 0x05a4814a //revb z10.s,p0/m,z10.s 640*4757b351SPierre Pronchery.inst 0x05a481ce //revb z14.s,p0/m,z14.s 641*4757b351SPierre Pronchery rev x19,x19 642*4757b351SPierre Pronchery.inst 0x05a48063 //revb z3.s,p0/m,z3.s 643*4757b351SPierre Pronchery.inst 0x05a480e7 //revb z7.s,p0/m,z7.s 644*4757b351SPierre Pronchery rev x21,x21 645*4757b351SPierre Pronchery.inst 0x05a4816b //revb z11.s,p0/m,z11.s 646*4757b351SPierre Pronchery.inst 0x05a481ef //revb z15.s,p0/m,z15.s 647*4757b351SPierre Pronchery#endif 648*4757b351SPierre Pronchery.if mixin == 1 649*4757b351SPierre Pronchery add x29,x29,#1 650*4757b351SPierre Pronchery.endif 651*4757b351SPierre Pronchery cmp x5,4 652*4757b351SPierre Pronchery b.ne 200f 653*4757b351SPierre Pronchery.if mixin == 1 654*4757b351SPierre Pronchery eor x7,x7,x8 655*4757b351SPierre Pronchery.endif 656*4757b351SPierre Pronchery.if mixin == 1 657*4757b351SPierre Pronchery eor x9,x9,x10 658*4757b351SPierre Pronchery.endif 659*4757b351SPierre Pronchery.if mixin == 1 660*4757b351SPierre Pronchery eor x11,x11,x12 661*4757b351SPierre Pronchery.endif 662*4757b351SPierre Pronchery.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s 663*4757b351SPierre Pronchery.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s 664*4757b351SPierre Pronchery.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s 665*4757b351SPierre Pronchery.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s 666*4757b351SPierre Pronchery 667*4757b351SPierre Pronchery.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s 668*4757b351SPierre Pronchery.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s 669*4757b351SPierre Pronchery.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s 670*4757b351SPierre Pronchery.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s 671*4757b351SPierre Pronchery 672*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 673*4757b351SPierre Pronchery.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d 674*4757b351SPierre Pronchery.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d 675*4757b351SPierre Pronchery.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d 676*4757b351SPierre Pronchery 677*4757b351SPierre Pronchery.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d 678*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 679*4757b351SPierre Pronchery.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d 680*4757b351SPierre Pronchery.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d 681*4757b351SPierre Pronchery.if mixin == 1 682*4757b351SPierre Pronchery eor x13,x13,x14 683*4757b351SPierre Pronchery.endif 684*4757b351SPierre Pronchery.if mixin == 1 685*4757b351SPierre Pronchery eor x15,x15,x16 686*4757b351SPierre Pronchery.endif 687*4757b351SPierre Pronchery.if mixin == 1 688*4757b351SPierre Pronchery eor x17,x17,x18 689*4757b351SPierre Pronchery.endif 690*4757b351SPierre Pronchery.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s 691*4757b351SPierre Pronchery.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s 692*4757b351SPierre Pronchery.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s 693*4757b351SPierre Pronchery.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s 694*4757b351SPierre Pronchery 695*4757b351SPierre Pronchery.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s 696*4757b351SPierre Pronchery.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s 697*4757b351SPierre Pronchery.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s 698*4757b351SPierre Pronchery.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s 699*4757b351SPierre Pronchery 700*4757b351SPierre Pronchery.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d 701*4757b351SPierre Pronchery.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d 702*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 703*4757b351SPierre Pronchery.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d 704*4757b351SPierre Pronchery 705*4757b351SPierre Pronchery.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d 706*4757b351SPierre Pronchery.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d 707*4757b351SPierre Pronchery.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d 708*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 709*4757b351SPierre Pronchery.if mixin == 1 710*4757b351SPierre Pronchery eor x19,x19,x20 711*4757b351SPierre Pronchery.endif 712*4757b351SPierre Pronchery.if mixin == 1 713*4757b351SPierre Pronchery eor x21,x21,x22 714*4757b351SPierre Pronchery.endif 715*4757b351SPierre Pronchery ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 716*4757b351SPierre Pronchery ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 717*4757b351SPierre Pronchery.inst 0x04b13000 //eor z0.d,z0.d,z17.d 718*4757b351SPierre Pronchery.inst 0x04b23021 //eor z1.d,z1.d,z18.d 719*4757b351SPierre Pronchery.inst 0x04b33042 //eor z2.d,z2.d,z19.d 720*4757b351SPierre Pronchery.inst 0x04b43063 //eor z3.d,z3.d,z20.d 721*4757b351SPierre Pronchery.inst 0x04b53084 //eor z4.d,z4.d,z21.d 722*4757b351SPierre Pronchery.inst 0x04b630a5 //eor z5.d,z5.d,z22.d 723*4757b351SPierre Pronchery.inst 0x04b730c6 //eor z6.d,z6.d,z23.d 724*4757b351SPierre Pronchery.inst 0x04b830e7 //eor z7.d,z7.d,z24.d 725*4757b351SPierre Pronchery ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 726*4757b351SPierre Pronchery ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 727*4757b351SPierre Pronchery.if mixin == 1 728*4757b351SPierre Pronchery stp x7,x9,[x0],#16 729*4757b351SPierre Pronchery.endif 730*4757b351SPierre Pronchery.inst 0x04b13108 //eor z8.d,z8.d,z17.d 731*4757b351SPierre Pronchery.inst 0x04b23129 //eor z9.d,z9.d,z18.d 732*4757b351SPierre Pronchery.if mixin == 1 733*4757b351SPierre Pronchery stp x11,x13,[x0],#16 734*4757b351SPierre Pronchery.endif 735*4757b351SPierre Pronchery.inst 0x04b3314a //eor z10.d,z10.d,z19.d 736*4757b351SPierre Pronchery.inst 0x04b4316b //eor z11.d,z11.d,z20.d 737*4757b351SPierre Pronchery.if mixin == 1 738*4757b351SPierre Pronchery stp x15,x17,[x0],#16 739*4757b351SPierre Pronchery.endif 740*4757b351SPierre Pronchery.inst 0x04b5318c //eor z12.d,z12.d,z21.d 741*4757b351SPierre Pronchery.inst 0x04b631ad //eor z13.d,z13.d,z22.d 742*4757b351SPierre Pronchery.if mixin == 1 743*4757b351SPierre Pronchery stp x19,x21,[x0],#16 744*4757b351SPierre Pronchery.endif 745*4757b351SPierre Pronchery.inst 0x04b731ce //eor z14.d,z14.d,z23.d 746*4757b351SPierre Pronchery.inst 0x04b831ef //eor z15.d,z15.d,z24.d 747*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64 748*4757b351SPierre Pronchery st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 749*4757b351SPierre Pronchery st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 750*4757b351SPierre Pronchery st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 751*4757b351SPierre Pronchery b 210f 752*4757b351SPierre Pronchery200: 753*4757b351SPierre Pronchery.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s 754*4757b351SPierre Pronchery.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s 755*4757b351SPierre Pronchery.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s 756*4757b351SPierre Pronchery.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s 757*4757b351SPierre Pronchery 758*4757b351SPierre Pronchery.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s 759*4757b351SPierre Pronchery.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s 760*4757b351SPierre Pronchery.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s 761*4757b351SPierre Pronchery.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s 762*4757b351SPierre Pronchery 763*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 764*4757b351SPierre Pronchery.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d 765*4757b351SPierre Pronchery.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d 766*4757b351SPierre Pronchery.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d 767*4757b351SPierre Pronchery 768*4757b351SPierre Pronchery.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d 769*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 770*4757b351SPierre Pronchery.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d 771*4757b351SPierre Pronchery.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d 772*4757b351SPierre Pronchery.if mixin == 1 773*4757b351SPierre Pronchery eor x7,x7,x8 774*4757b351SPierre Pronchery.endif 775*4757b351SPierre Pronchery.if mixin == 1 776*4757b351SPierre Pronchery eor x9,x9,x10 777*4757b351SPierre Pronchery.endif 778*4757b351SPierre Pronchery.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s 779*4757b351SPierre Pronchery.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s 780*4757b351SPierre Pronchery.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s 781*4757b351SPierre Pronchery.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s 782*4757b351SPierre Pronchery 783*4757b351SPierre Pronchery.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s 784*4757b351SPierre Pronchery.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s 785*4757b351SPierre Pronchery.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s 786*4757b351SPierre Pronchery.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s 787*4757b351SPierre Pronchery 788*4757b351SPierre Pronchery.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d 789*4757b351SPierre Pronchery.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d 790*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 791*4757b351SPierre Pronchery.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d 792*4757b351SPierre Pronchery 793*4757b351SPierre Pronchery.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d 794*4757b351SPierre Pronchery.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d 795*4757b351SPierre Pronchery.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d 796*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 797*4757b351SPierre Pronchery.if mixin == 1 798*4757b351SPierre Pronchery eor x11,x11,x12 799*4757b351SPierre Pronchery.endif 800*4757b351SPierre Pronchery.if mixin == 1 801*4757b351SPierre Pronchery eor x13,x13,x14 802*4757b351SPierre Pronchery.endif 803*4757b351SPierre Pronchery.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s 804*4757b351SPierre Pronchery.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s 805*4757b351SPierre Pronchery.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s 806*4757b351SPierre Pronchery.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s 807*4757b351SPierre Pronchery 808*4757b351SPierre Pronchery.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s 809*4757b351SPierre Pronchery.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s 810*4757b351SPierre Pronchery.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s 811*4757b351SPierre Pronchery.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s 812*4757b351SPierre Pronchery 813*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 814*4757b351SPierre Pronchery.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d 815*4757b351SPierre Pronchery.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d 816*4757b351SPierre Pronchery.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d 817*4757b351SPierre Pronchery 818*4757b351SPierre Pronchery.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d 819*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 820*4757b351SPierre Pronchery.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d 821*4757b351SPierre Pronchery.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d 822*4757b351SPierre Pronchery.if mixin == 1 823*4757b351SPierre Pronchery eor x15,x15,x16 824*4757b351SPierre Pronchery.endif 825*4757b351SPierre Pronchery.if mixin == 1 826*4757b351SPierre Pronchery eor x17,x17,x18 827*4757b351SPierre Pronchery.endif 828*4757b351SPierre Pronchery.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s 829*4757b351SPierre Pronchery.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s 830*4757b351SPierre Pronchery.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s 831*4757b351SPierre Pronchery.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s 832*4757b351SPierre Pronchery 833*4757b351SPierre Pronchery.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s 834*4757b351SPierre Pronchery.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s 835*4757b351SPierre Pronchery.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s 836*4757b351SPierre Pronchery.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s 837*4757b351SPierre Pronchery 838*4757b351SPierre Pronchery.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d 839*4757b351SPierre Pronchery.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d 840*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 841*4757b351SPierre Pronchery.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d 842*4757b351SPierre Pronchery 843*4757b351SPierre Pronchery.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d 844*4757b351SPierre Pronchery.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d 845*4757b351SPierre Pronchery.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d 846*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 847*4757b351SPierre Pronchery.if mixin == 1 848*4757b351SPierre Pronchery eor x19,x19,x20 849*4757b351SPierre Pronchery.endif 850*4757b351SPierre Pronchery.if mixin == 1 851*4757b351SPierre Pronchery eor x21,x21,x22 852*4757b351SPierre Pronchery.endif 853*4757b351SPierre Pronchery.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] 854*4757b351SPierre Pronchery.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] 855*4757b351SPierre Pronchery.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] 856*4757b351SPierre Pronchery.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] 857*4757b351SPierre Pronchery.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] 858*4757b351SPierre Pronchery.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] 859*4757b351SPierre Pronchery.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] 860*4757b351SPierre Pronchery.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] 861*4757b351SPierre Pronchery.inst 0x04215101 //addvl x1,x1,8 862*4757b351SPierre Pronchery.inst 0x04b13000 //eor z0.d,z0.d,z17.d 863*4757b351SPierre Pronchery.inst 0x04b23084 //eor z4.d,z4.d,z18.d 864*4757b351SPierre Pronchery.inst 0x04b33108 //eor z8.d,z8.d,z19.d 865*4757b351SPierre Pronchery.inst 0x04b4318c //eor z12.d,z12.d,z20.d 866*4757b351SPierre Pronchery.inst 0x04b53021 //eor z1.d,z1.d,z21.d 867*4757b351SPierre Pronchery.inst 0x04b630a5 //eor z5.d,z5.d,z22.d 868*4757b351SPierre Pronchery.inst 0x04b73129 //eor z9.d,z9.d,z23.d 869*4757b351SPierre Pronchery.inst 0x04b831ad //eor z13.d,z13.d,z24.d 870*4757b351SPierre Pronchery.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] 871*4757b351SPierre Pronchery.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] 872*4757b351SPierre Pronchery.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] 873*4757b351SPierre Pronchery.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] 874*4757b351SPierre Pronchery.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] 875*4757b351SPierre Pronchery.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] 876*4757b351SPierre Pronchery.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] 877*4757b351SPierre Pronchery.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] 878*4757b351SPierre Pronchery.inst 0x04215101 //addvl x1,x1,8 879*4757b351SPierre Pronchery.if mixin == 1 880*4757b351SPierre Pronchery stp x7,x9,[x0],#16 881*4757b351SPierre Pronchery.endif 882*4757b351SPierre Pronchery.inst 0x04b13042 //eor z2.d,z2.d,z17.d 883*4757b351SPierre Pronchery.inst 0x04b230c6 //eor z6.d,z6.d,z18.d 884*4757b351SPierre Pronchery.if mixin == 1 885*4757b351SPierre Pronchery stp x11,x13,[x0],#16 886*4757b351SPierre Pronchery.endif 887*4757b351SPierre Pronchery.inst 0x04b3314a //eor z10.d,z10.d,z19.d 888*4757b351SPierre Pronchery.inst 0x04b431ce //eor z14.d,z14.d,z20.d 889*4757b351SPierre Pronchery.if mixin == 1 890*4757b351SPierre Pronchery stp x15,x17,[x0],#16 891*4757b351SPierre Pronchery.endif 892*4757b351SPierre Pronchery.inst 0x04b53063 //eor z3.d,z3.d,z21.d 893*4757b351SPierre Pronchery.inst 0x04b630e7 //eor z7.d,z7.d,z22.d 894*4757b351SPierre Pronchery.if mixin == 1 895*4757b351SPierre Pronchery stp x19,x21,[x0],#16 896*4757b351SPierre Pronchery.endif 897*4757b351SPierre Pronchery.inst 0x04b7316b //eor z11.d,z11.d,z23.d 898*4757b351SPierre Pronchery.inst 0x04b831ef //eor z15.d,z15.d,z24.d 899*4757b351SPierre Pronchery.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL] 900*4757b351SPierre Pronchery.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL] 901*4757b351SPierre Pronchery.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL] 902*4757b351SPierre Pronchery.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL] 903*4757b351SPierre Pronchery.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL] 904*4757b351SPierre Pronchery.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL] 905*4757b351SPierre Pronchery.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL] 906*4757b351SPierre Pronchery.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL] 907*4757b351SPierre Pronchery.inst 0x04205100 //addvl x0,x0,8 908*4757b351SPierre Pronchery.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL] 909*4757b351SPierre Pronchery.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL] 910*4757b351SPierre Pronchery.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL] 911*4757b351SPierre Pronchery.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL] 912*4757b351SPierre Pronchery.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL] 913*4757b351SPierre Pronchery.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL] 914*4757b351SPierre Pronchery.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL] 915*4757b351SPierre Pronchery.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL] 916*4757b351SPierre Pronchery.inst 0x04205100 //addvl x0,x0,8 917*4757b351SPierre Pronchery210: 918*4757b351SPierre Pronchery.inst 0x04b0e3fd //incw x29, ALL, MUL #1 919*4757b351SPierre Pronchery subs x2,x2,64 920*4757b351SPierre Pronchery b.gt 100b 921*4757b351SPierre Pronchery b 110f 922*4757b351SPierre Pronchery101: 923*4757b351SPierre Pronchery mixin=0 924*4757b351SPierre Pronchery lsr x8,x23,#32 925*4757b351SPierre Pronchery.inst 0x05a03ae0 //dup z0.s,w23 926*4757b351SPierre Pronchery.inst 0x05a03af9 //dup z25.s,w23 927*4757b351SPierre Pronchery.if mixin == 1 928*4757b351SPierre Pronchery mov w7,w23 929*4757b351SPierre Pronchery.endif 930*4757b351SPierre Pronchery.inst 0x05a03904 //dup z4.s,w8 931*4757b351SPierre Pronchery.inst 0x05a0391a //dup z26.s,w8 932*4757b351SPierre Pronchery lsr x10,x24,#32 933*4757b351SPierre Pronchery.inst 0x05a03b08 //dup z8.s,w24 934*4757b351SPierre Pronchery.inst 0x05a03b1b //dup z27.s,w24 935*4757b351SPierre Pronchery.if mixin == 1 936*4757b351SPierre Pronchery mov w9,w24 937*4757b351SPierre Pronchery.endif 938*4757b351SPierre Pronchery.inst 0x05a0394c //dup z12.s,w10 939*4757b351SPierre Pronchery.inst 0x05a0395c //dup z28.s,w10 940*4757b351SPierre Pronchery lsr x12,x25,#32 941*4757b351SPierre Pronchery.inst 0x05a03b21 //dup z1.s,w25 942*4757b351SPierre Pronchery.inst 0x05a03b3d //dup z29.s,w25 943*4757b351SPierre Pronchery.if mixin == 1 944*4757b351SPierre Pronchery mov w11,w25 945*4757b351SPierre Pronchery.endif 946*4757b351SPierre Pronchery.inst 0x05a03985 //dup z5.s,w12 947*4757b351SPierre Pronchery.inst 0x05a0399e //dup z30.s,w12 948*4757b351SPierre Pronchery lsr x14,x26,#32 949*4757b351SPierre Pronchery.inst 0x05a03b49 //dup z9.s,w26 950*4757b351SPierre Pronchery.inst 0x05a03b55 //dup z21.s,w26 951*4757b351SPierre Pronchery.if mixin == 1 952*4757b351SPierre Pronchery mov w13,w26 953*4757b351SPierre Pronchery.endif 954*4757b351SPierre Pronchery.inst 0x05a039cd //dup z13.s,w14 955*4757b351SPierre Pronchery.inst 0x05a039d6 //dup z22.s,w14 956*4757b351SPierre Pronchery lsr x16,x27,#32 957*4757b351SPierre Pronchery.inst 0x05a03b62 //dup z2.s,w27 958*4757b351SPierre Pronchery.inst 0x05a03b77 //dup z23.s,w27 959*4757b351SPierre Pronchery.if mixin == 1 960*4757b351SPierre Pronchery mov w15,w27 961*4757b351SPierre Pronchery.endif 962*4757b351SPierre Pronchery.inst 0x05a03a06 //dup z6.s,w16 963*4757b351SPierre Pronchery.inst 0x05a03a18 //dup z24.s,w16 964*4757b351SPierre Pronchery lsr x18,x28,#32 965*4757b351SPierre Pronchery.inst 0x05a03b8a //dup z10.s,w28 966*4757b351SPierre Pronchery.inst 0x05a03b91 //dup z17.s,w28 967*4757b351SPierre Pronchery.if mixin == 1 968*4757b351SPierre Pronchery mov w17,w28 969*4757b351SPierre Pronchery.endif 970*4757b351SPierre Pronchery.inst 0x05a03a4e //dup z14.s,w18 971*4757b351SPierre Pronchery.inst 0x05a03a52 //dup z18.s,w18 972*4757b351SPierre Pronchery lsr x22,x30,#32 973*4757b351SPierre Pronchery.inst 0x05a03bcb //dup z11.s,w30 974*4757b351SPierre Pronchery.inst 0x05a03bd4 //dup z20.s,w30 975*4757b351SPierre Pronchery.if mixin == 1 976*4757b351SPierre Pronchery mov w21,w30 977*4757b351SPierre Pronchery.endif 978*4757b351SPierre Pronchery.inst 0x05a03acf //dup z15.s,w22 979*4757b351SPierre Pronchery.inst 0x05a03adf //dup z31.s,w22 980*4757b351SPierre Pronchery.if mixin == 1 981*4757b351SPierre Pronchery add w20,w29,#1 982*4757b351SPierre Pronchery mov w19,w29 983*4757b351SPierre Pronchery.inst 0x04a14690 //index z16.s,w20,1 984*4757b351SPierre Pronchery.inst 0x04a14683 //index z3.s,w20,1 985*4757b351SPierre Pronchery.else 986*4757b351SPierre Pronchery.inst 0x04a147b0 //index z16.s,w29,1 987*4757b351SPierre Pronchery.inst 0x04a147a3 //index z3.s,w29,1 988*4757b351SPierre Pronchery.endif 989*4757b351SPierre Pronchery lsr x20,x29,#32 990*4757b351SPierre Pronchery.inst 0x05a03a87 //dup z7.s,w20 991*4757b351SPierre Pronchery.inst 0x05a03a93 //dup z19.s,w20 992*4757b351SPierre Pronchery mov x6,#10 993*4757b351SPierre Pronchery10: 994*4757b351SPierre Pronchery.align 5 995*4757b351SPierre Pronchery.inst 0x04a10000 //add z0.s,z0.s,z1.s 996*4757b351SPierre Pronchery.if mixin == 1 997*4757b351SPierre Pronchery add w7,w7,w11 998*4757b351SPierre Pronchery.endif 999*4757b351SPierre Pronchery.inst 0x04a50084 //add z4.s,z4.s,z5.s 1000*4757b351SPierre Pronchery.if mixin == 1 1001*4757b351SPierre Pronchery add w8,w8,w12 1002*4757b351SPierre Pronchery.endif 1003*4757b351SPierre Pronchery.inst 0x04a90108 //add z8.s,z8.s,z9.s 1004*4757b351SPierre Pronchery.if mixin == 1 1005*4757b351SPierre Pronchery add w9,w9,w13 1006*4757b351SPierre Pronchery.endif 1007*4757b351SPierre Pronchery.inst 0x04ad018c //add z12.s,z12.s,z13.s 1008*4757b351SPierre Pronchery.if mixin == 1 1009*4757b351SPierre Pronchery add w10,w10,w14 1010*4757b351SPierre Pronchery.endif 1011*4757b351SPierre Pronchery.if mixin == 1 1012*4757b351SPierre Pronchery eor w19,w19,w7 1013*4757b351SPierre Pronchery.endif 1014*4757b351SPierre Pronchery.inst 0x04703403 //xar z3.s,z3.s,z0.s,16 1015*4757b351SPierre Pronchery.if mixin == 1 1016*4757b351SPierre Pronchery ror w19,w19,16 1017*4757b351SPierre Pronchery.endif 1018*4757b351SPierre Pronchery.if mixin == 1 1019*4757b351SPierre Pronchery eor w20,w20,w8 1020*4757b351SPierre Pronchery.endif 1021*4757b351SPierre Pronchery.inst 0x04703487 //xar z7.s,z7.s,z4.s,16 1022*4757b351SPierre Pronchery.if mixin == 1 1023*4757b351SPierre Pronchery ror w20,w20,16 1024*4757b351SPierre Pronchery.endif 1025*4757b351SPierre Pronchery.if mixin == 1 1026*4757b351SPierre Pronchery eor w21,w21,w9 1027*4757b351SPierre Pronchery.endif 1028*4757b351SPierre Pronchery.inst 0x0470350b //xar z11.s,z11.s,z8.s,16 1029*4757b351SPierre Pronchery.if mixin == 1 1030*4757b351SPierre Pronchery ror w21,w21,16 1031*4757b351SPierre Pronchery.endif 1032*4757b351SPierre Pronchery.if mixin == 1 1033*4757b351SPierre Pronchery eor w22,w22,w10 1034*4757b351SPierre Pronchery.endif 1035*4757b351SPierre Pronchery.inst 0x0470358f //xar z15.s,z15.s,z12.s,16 1036*4757b351SPierre Pronchery.if mixin == 1 1037*4757b351SPierre Pronchery ror w22,w22,16 1038*4757b351SPierre Pronchery.endif 1039*4757b351SPierre Pronchery.inst 0x04a30042 //add z2.s,z2.s,z3.s 1040*4757b351SPierre Pronchery.if mixin == 1 1041*4757b351SPierre Pronchery add w15,w15,w19 1042*4757b351SPierre Pronchery.endif 1043*4757b351SPierre Pronchery.inst 0x04a700c6 //add z6.s,z6.s,z7.s 1044*4757b351SPierre Pronchery.if mixin == 1 1045*4757b351SPierre Pronchery add w16,w16,w20 1046*4757b351SPierre Pronchery.endif 1047*4757b351SPierre Pronchery.inst 0x04ab014a //add z10.s,z10.s,z11.s 1048*4757b351SPierre Pronchery.if mixin == 1 1049*4757b351SPierre Pronchery add w17,w17,w21 1050*4757b351SPierre Pronchery.endif 1051*4757b351SPierre Pronchery.inst 0x04af01ce //add z14.s,z14.s,z15.s 1052*4757b351SPierre Pronchery.if mixin == 1 1053*4757b351SPierre Pronchery add w18,w18,w22 1054*4757b351SPierre Pronchery.endif 1055*4757b351SPierre Pronchery.if mixin == 1 1056*4757b351SPierre Pronchery eor w11,w11,w15 1057*4757b351SPierre Pronchery.endif 1058*4757b351SPierre Pronchery.inst 0x046c3441 //xar z1.s,z1.s,z2.s,20 1059*4757b351SPierre Pronchery.if mixin == 1 1060*4757b351SPierre Pronchery ror w11,w11,20 1061*4757b351SPierre Pronchery.endif 1062*4757b351SPierre Pronchery.if mixin == 1 1063*4757b351SPierre Pronchery eor w12,w12,w16 1064*4757b351SPierre Pronchery.endif 1065*4757b351SPierre Pronchery.inst 0x046c34c5 //xar z5.s,z5.s,z6.s,20 1066*4757b351SPierre Pronchery.if mixin == 1 1067*4757b351SPierre Pronchery ror w12,w12,20 1068*4757b351SPierre Pronchery.endif 1069*4757b351SPierre Pronchery.if mixin == 1 1070*4757b351SPierre Pronchery eor w13,w13,w17 1071*4757b351SPierre Pronchery.endif 1072*4757b351SPierre Pronchery.inst 0x046c3549 //xar z9.s,z9.s,z10.s,20 1073*4757b351SPierre Pronchery.if mixin == 1 1074*4757b351SPierre Pronchery ror w13,w13,20 1075*4757b351SPierre Pronchery.endif 1076*4757b351SPierre Pronchery.if mixin == 1 1077*4757b351SPierre Pronchery eor w14,w14,w18 1078*4757b351SPierre Pronchery.endif 1079*4757b351SPierre Pronchery.inst 0x046c35cd //xar z13.s,z13.s,z14.s,20 1080*4757b351SPierre Pronchery.if mixin == 1 1081*4757b351SPierre Pronchery ror w14,w14,20 1082*4757b351SPierre Pronchery.endif 1083*4757b351SPierre Pronchery.inst 0x04a10000 //add z0.s,z0.s,z1.s 1084*4757b351SPierre Pronchery.if mixin == 1 1085*4757b351SPierre Pronchery add w7,w7,w11 1086*4757b351SPierre Pronchery.endif 1087*4757b351SPierre Pronchery.inst 0x04a50084 //add z4.s,z4.s,z5.s 1088*4757b351SPierre Pronchery.if mixin == 1 1089*4757b351SPierre Pronchery add w8,w8,w12 1090*4757b351SPierre Pronchery.endif 1091*4757b351SPierre Pronchery.inst 0x04a90108 //add z8.s,z8.s,z9.s 1092*4757b351SPierre Pronchery.if mixin == 1 1093*4757b351SPierre Pronchery add w9,w9,w13 1094*4757b351SPierre Pronchery.endif 1095*4757b351SPierre Pronchery.inst 0x04ad018c //add z12.s,z12.s,z13.s 1096*4757b351SPierre Pronchery.if mixin == 1 1097*4757b351SPierre Pronchery add w10,w10,w14 1098*4757b351SPierre Pronchery.endif 1099*4757b351SPierre Pronchery.if mixin == 1 1100*4757b351SPierre Pronchery eor w19,w19,w7 1101*4757b351SPierre Pronchery.endif 1102*4757b351SPierre Pronchery.inst 0x04683403 //xar z3.s,z3.s,z0.s,24 1103*4757b351SPierre Pronchery.if mixin == 1 1104*4757b351SPierre Pronchery ror w19,w19,24 1105*4757b351SPierre Pronchery.endif 1106*4757b351SPierre Pronchery.if mixin == 1 1107*4757b351SPierre Pronchery eor w20,w20,w8 1108*4757b351SPierre Pronchery.endif 1109*4757b351SPierre Pronchery.inst 0x04683487 //xar z7.s,z7.s,z4.s,24 1110*4757b351SPierre Pronchery.if mixin == 1 1111*4757b351SPierre Pronchery ror w20,w20,24 1112*4757b351SPierre Pronchery.endif 1113*4757b351SPierre Pronchery.if mixin == 1 1114*4757b351SPierre Pronchery eor w21,w21,w9 1115*4757b351SPierre Pronchery.endif 1116*4757b351SPierre Pronchery.inst 0x0468350b //xar z11.s,z11.s,z8.s,24 1117*4757b351SPierre Pronchery.if mixin == 1 1118*4757b351SPierre Pronchery ror w21,w21,24 1119*4757b351SPierre Pronchery.endif 1120*4757b351SPierre Pronchery.if mixin == 1 1121*4757b351SPierre Pronchery eor w22,w22,w10 1122*4757b351SPierre Pronchery.endif 1123*4757b351SPierre Pronchery.inst 0x0468358f //xar z15.s,z15.s,z12.s,24 1124*4757b351SPierre Pronchery.if mixin == 1 1125*4757b351SPierre Pronchery ror w22,w22,24 1126*4757b351SPierre Pronchery.endif 1127*4757b351SPierre Pronchery.inst 0x04a30042 //add z2.s,z2.s,z3.s 1128*4757b351SPierre Pronchery.if mixin == 1 1129*4757b351SPierre Pronchery add w15,w15,w19 1130*4757b351SPierre Pronchery.endif 1131*4757b351SPierre Pronchery.inst 0x04a700c6 //add z6.s,z6.s,z7.s 1132*4757b351SPierre Pronchery.if mixin == 1 1133*4757b351SPierre Pronchery add w16,w16,w20 1134*4757b351SPierre Pronchery.endif 1135*4757b351SPierre Pronchery.inst 0x04ab014a //add z10.s,z10.s,z11.s 1136*4757b351SPierre Pronchery.if mixin == 1 1137*4757b351SPierre Pronchery add w17,w17,w21 1138*4757b351SPierre Pronchery.endif 1139*4757b351SPierre Pronchery.inst 0x04af01ce //add z14.s,z14.s,z15.s 1140*4757b351SPierre Pronchery.if mixin == 1 1141*4757b351SPierre Pronchery add w18,w18,w22 1142*4757b351SPierre Pronchery.endif 1143*4757b351SPierre Pronchery.if mixin == 1 1144*4757b351SPierre Pronchery eor w11,w11,w15 1145*4757b351SPierre Pronchery.endif 1146*4757b351SPierre Pronchery.inst 0x04673441 //xar z1.s,z1.s,z2.s,25 1147*4757b351SPierre Pronchery.if mixin == 1 1148*4757b351SPierre Pronchery ror w11,w11,25 1149*4757b351SPierre Pronchery.endif 1150*4757b351SPierre Pronchery.if mixin == 1 1151*4757b351SPierre Pronchery eor w12,w12,w16 1152*4757b351SPierre Pronchery.endif 1153*4757b351SPierre Pronchery.inst 0x046734c5 //xar z5.s,z5.s,z6.s,25 1154*4757b351SPierre Pronchery.if mixin == 1 1155*4757b351SPierre Pronchery ror w12,w12,25 1156*4757b351SPierre Pronchery.endif 1157*4757b351SPierre Pronchery.if mixin == 1 1158*4757b351SPierre Pronchery eor w13,w13,w17 1159*4757b351SPierre Pronchery.endif 1160*4757b351SPierre Pronchery.inst 0x04673549 //xar z9.s,z9.s,z10.s,25 1161*4757b351SPierre Pronchery.if mixin == 1 1162*4757b351SPierre Pronchery ror w13,w13,25 1163*4757b351SPierre Pronchery.endif 1164*4757b351SPierre Pronchery.if mixin == 1 1165*4757b351SPierre Pronchery eor w14,w14,w18 1166*4757b351SPierre Pronchery.endif 1167*4757b351SPierre Pronchery.inst 0x046735cd //xar z13.s,z13.s,z14.s,25 1168*4757b351SPierre Pronchery.if mixin == 1 1169*4757b351SPierre Pronchery ror w14,w14,25 1170*4757b351SPierre Pronchery.endif 1171*4757b351SPierre Pronchery.inst 0x04a50000 //add z0.s,z0.s,z5.s 1172*4757b351SPierre Pronchery.if mixin == 1 1173*4757b351SPierre Pronchery add w7,w7,w12 1174*4757b351SPierre Pronchery.endif 1175*4757b351SPierre Pronchery.inst 0x04a90084 //add z4.s,z4.s,z9.s 1176*4757b351SPierre Pronchery.if mixin == 1 1177*4757b351SPierre Pronchery add w8,w8,w13 1178*4757b351SPierre Pronchery.endif 1179*4757b351SPierre Pronchery.inst 0x04ad0108 //add z8.s,z8.s,z13.s 1180*4757b351SPierre Pronchery.if mixin == 1 1181*4757b351SPierre Pronchery add w9,w9,w14 1182*4757b351SPierre Pronchery.endif 1183*4757b351SPierre Pronchery.inst 0x04a1018c //add z12.s,z12.s,z1.s 1184*4757b351SPierre Pronchery.if mixin == 1 1185*4757b351SPierre Pronchery add w10,w10,w11 1186*4757b351SPierre Pronchery.endif 1187*4757b351SPierre Pronchery.if mixin == 1 1188*4757b351SPierre Pronchery eor w22,w22,w7 1189*4757b351SPierre Pronchery.endif 1190*4757b351SPierre Pronchery.inst 0x0470340f //xar z15.s,z15.s,z0.s,16 1191*4757b351SPierre Pronchery.if mixin == 1 1192*4757b351SPierre Pronchery ror w22,w22,16 1193*4757b351SPierre Pronchery.endif 1194*4757b351SPierre Pronchery.if mixin == 1 1195*4757b351SPierre Pronchery eor w19,w19,w8 1196*4757b351SPierre Pronchery.endif 1197*4757b351SPierre Pronchery.inst 0x04703483 //xar z3.s,z3.s,z4.s,16 1198*4757b351SPierre Pronchery.if mixin == 1 1199*4757b351SPierre Pronchery ror w19,w19,16 1200*4757b351SPierre Pronchery.endif 1201*4757b351SPierre Pronchery.if mixin == 1 1202*4757b351SPierre Pronchery eor w20,w20,w9 1203*4757b351SPierre Pronchery.endif 1204*4757b351SPierre Pronchery.inst 0x04703507 //xar z7.s,z7.s,z8.s,16 1205*4757b351SPierre Pronchery.if mixin == 1 1206*4757b351SPierre Pronchery ror w20,w20,16 1207*4757b351SPierre Pronchery.endif 1208*4757b351SPierre Pronchery.if mixin == 1 1209*4757b351SPierre Pronchery eor w21,w21,w10 1210*4757b351SPierre Pronchery.endif 1211*4757b351SPierre Pronchery.inst 0x0470358b //xar z11.s,z11.s,z12.s,16 1212*4757b351SPierre Pronchery.if mixin == 1 1213*4757b351SPierre Pronchery ror w21,w21,16 1214*4757b351SPierre Pronchery.endif 1215*4757b351SPierre Pronchery.inst 0x04af014a //add z10.s,z10.s,z15.s 1216*4757b351SPierre Pronchery.if mixin == 1 1217*4757b351SPierre Pronchery add w17,w17,w22 1218*4757b351SPierre Pronchery.endif 1219*4757b351SPierre Pronchery.inst 0x04a301ce //add z14.s,z14.s,z3.s 1220*4757b351SPierre Pronchery.if mixin == 1 1221*4757b351SPierre Pronchery add w18,w18,w19 1222*4757b351SPierre Pronchery.endif 1223*4757b351SPierre Pronchery.inst 0x04a70042 //add z2.s,z2.s,z7.s 1224*4757b351SPierre Pronchery.if mixin == 1 1225*4757b351SPierre Pronchery add w15,w15,w20 1226*4757b351SPierre Pronchery.endif 1227*4757b351SPierre Pronchery.inst 0x04ab00c6 //add z6.s,z6.s,z11.s 1228*4757b351SPierre Pronchery.if mixin == 1 1229*4757b351SPierre Pronchery add w16,w16,w21 1230*4757b351SPierre Pronchery.endif 1231*4757b351SPierre Pronchery.if mixin == 1 1232*4757b351SPierre Pronchery eor w12,w12,w17 1233*4757b351SPierre Pronchery.endif 1234*4757b351SPierre Pronchery.inst 0x046c3545 //xar z5.s,z5.s,z10.s,20 1235*4757b351SPierre Pronchery.if mixin == 1 1236*4757b351SPierre Pronchery ror w12,w12,20 1237*4757b351SPierre Pronchery.endif 1238*4757b351SPierre Pronchery.if mixin == 1 1239*4757b351SPierre Pronchery eor w13,w13,w18 1240*4757b351SPierre Pronchery.endif 1241*4757b351SPierre Pronchery.inst 0x046c35c9 //xar z9.s,z9.s,z14.s,20 1242*4757b351SPierre Pronchery.if mixin == 1 1243*4757b351SPierre Pronchery ror w13,w13,20 1244*4757b351SPierre Pronchery.endif 1245*4757b351SPierre Pronchery.if mixin == 1 1246*4757b351SPierre Pronchery eor w14,w14,w15 1247*4757b351SPierre Pronchery.endif 1248*4757b351SPierre Pronchery.inst 0x046c344d //xar z13.s,z13.s,z2.s,20 1249*4757b351SPierre Pronchery.if mixin == 1 1250*4757b351SPierre Pronchery ror w14,w14,20 1251*4757b351SPierre Pronchery.endif 1252*4757b351SPierre Pronchery.if mixin == 1 1253*4757b351SPierre Pronchery eor w11,w11,w16 1254*4757b351SPierre Pronchery.endif 1255*4757b351SPierre Pronchery.inst 0x046c34c1 //xar z1.s,z1.s,z6.s,20 1256*4757b351SPierre Pronchery.if mixin == 1 1257*4757b351SPierre Pronchery ror w11,w11,20 1258*4757b351SPierre Pronchery.endif 1259*4757b351SPierre Pronchery.inst 0x04a50000 //add z0.s,z0.s,z5.s 1260*4757b351SPierre Pronchery.if mixin == 1 1261*4757b351SPierre Pronchery add w7,w7,w12 1262*4757b351SPierre Pronchery.endif 1263*4757b351SPierre Pronchery.inst 0x04a90084 //add z4.s,z4.s,z9.s 1264*4757b351SPierre Pronchery.if mixin == 1 1265*4757b351SPierre Pronchery add w8,w8,w13 1266*4757b351SPierre Pronchery.endif 1267*4757b351SPierre Pronchery.inst 0x04ad0108 //add z8.s,z8.s,z13.s 1268*4757b351SPierre Pronchery.if mixin == 1 1269*4757b351SPierre Pronchery add w9,w9,w14 1270*4757b351SPierre Pronchery.endif 1271*4757b351SPierre Pronchery.inst 0x04a1018c //add z12.s,z12.s,z1.s 1272*4757b351SPierre Pronchery.if mixin == 1 1273*4757b351SPierre Pronchery add w10,w10,w11 1274*4757b351SPierre Pronchery.endif 1275*4757b351SPierre Pronchery.if mixin == 1 1276*4757b351SPierre Pronchery eor w22,w22,w7 1277*4757b351SPierre Pronchery.endif 1278*4757b351SPierre Pronchery.inst 0x0468340f //xar z15.s,z15.s,z0.s,24 1279*4757b351SPierre Pronchery.if mixin == 1 1280*4757b351SPierre Pronchery ror w22,w22,24 1281*4757b351SPierre Pronchery.endif 1282*4757b351SPierre Pronchery.if mixin == 1 1283*4757b351SPierre Pronchery eor w19,w19,w8 1284*4757b351SPierre Pronchery.endif 1285*4757b351SPierre Pronchery.inst 0x04683483 //xar z3.s,z3.s,z4.s,24 1286*4757b351SPierre Pronchery.if mixin == 1 1287*4757b351SPierre Pronchery ror w19,w19,24 1288*4757b351SPierre Pronchery.endif 1289*4757b351SPierre Pronchery.if mixin == 1 1290*4757b351SPierre Pronchery eor w20,w20,w9 1291*4757b351SPierre Pronchery.endif 1292*4757b351SPierre Pronchery.inst 0x04683507 //xar z7.s,z7.s,z8.s,24 1293*4757b351SPierre Pronchery.if mixin == 1 1294*4757b351SPierre Pronchery ror w20,w20,24 1295*4757b351SPierre Pronchery.endif 1296*4757b351SPierre Pronchery.if mixin == 1 1297*4757b351SPierre Pronchery eor w21,w21,w10 1298*4757b351SPierre Pronchery.endif 1299*4757b351SPierre Pronchery.inst 0x0468358b //xar z11.s,z11.s,z12.s,24 1300*4757b351SPierre Pronchery.if mixin == 1 1301*4757b351SPierre Pronchery ror w21,w21,24 1302*4757b351SPierre Pronchery.endif 1303*4757b351SPierre Pronchery.inst 0x04af014a //add z10.s,z10.s,z15.s 1304*4757b351SPierre Pronchery.if mixin == 1 1305*4757b351SPierre Pronchery add w17,w17,w22 1306*4757b351SPierre Pronchery.endif 1307*4757b351SPierre Pronchery.inst 0x04a301ce //add z14.s,z14.s,z3.s 1308*4757b351SPierre Pronchery.if mixin == 1 1309*4757b351SPierre Pronchery add w18,w18,w19 1310*4757b351SPierre Pronchery.endif 1311*4757b351SPierre Pronchery.inst 0x04a70042 //add z2.s,z2.s,z7.s 1312*4757b351SPierre Pronchery.if mixin == 1 1313*4757b351SPierre Pronchery add w15,w15,w20 1314*4757b351SPierre Pronchery.endif 1315*4757b351SPierre Pronchery.inst 0x04ab00c6 //add z6.s,z6.s,z11.s 1316*4757b351SPierre Pronchery.if mixin == 1 1317*4757b351SPierre Pronchery add w16,w16,w21 1318*4757b351SPierre Pronchery.endif 1319*4757b351SPierre Pronchery.if mixin == 1 1320*4757b351SPierre Pronchery eor w12,w12,w17 1321*4757b351SPierre Pronchery.endif 1322*4757b351SPierre Pronchery.inst 0x04673545 //xar z5.s,z5.s,z10.s,25 1323*4757b351SPierre Pronchery.if mixin == 1 1324*4757b351SPierre Pronchery ror w12,w12,25 1325*4757b351SPierre Pronchery.endif 1326*4757b351SPierre Pronchery.if mixin == 1 1327*4757b351SPierre Pronchery eor w13,w13,w18 1328*4757b351SPierre Pronchery.endif 1329*4757b351SPierre Pronchery.inst 0x046735c9 //xar z9.s,z9.s,z14.s,25 1330*4757b351SPierre Pronchery.if mixin == 1 1331*4757b351SPierre Pronchery ror w13,w13,25 1332*4757b351SPierre Pronchery.endif 1333*4757b351SPierre Pronchery.if mixin == 1 1334*4757b351SPierre Pronchery eor w14,w14,w15 1335*4757b351SPierre Pronchery.endif 1336*4757b351SPierre Pronchery.inst 0x0467344d //xar z13.s,z13.s,z2.s,25 1337*4757b351SPierre Pronchery.if mixin == 1 1338*4757b351SPierre Pronchery ror w14,w14,25 1339*4757b351SPierre Pronchery.endif 1340*4757b351SPierre Pronchery.if mixin == 1 1341*4757b351SPierre Pronchery eor w11,w11,w16 1342*4757b351SPierre Pronchery.endif 1343*4757b351SPierre Pronchery.inst 0x046734c1 //xar z1.s,z1.s,z6.s,25 1344*4757b351SPierre Pronchery.if mixin == 1 1345*4757b351SPierre Pronchery ror w11,w11,25 1346*4757b351SPierre Pronchery.endif 1347*4757b351SPierre Pronchery sub x6,x6,1 1348*4757b351SPierre Pronchery cbnz x6,10b 1349*4757b351SPierre Pronchery.if mixin == 1 1350*4757b351SPierre Pronchery add w7,w7,w23 1351*4757b351SPierre Pronchery.endif 1352*4757b351SPierre Pronchery.inst 0x04b90000 //add z0.s,z0.s,z25.s 1353*4757b351SPierre Pronchery.if mixin == 1 1354*4757b351SPierre Pronchery add x8,x8,x23,lsr #32 1355*4757b351SPierre Pronchery.endif 1356*4757b351SPierre Pronchery.inst 0x04ba0084 //add z4.s,z4.s,z26.s 1357*4757b351SPierre Pronchery.if mixin == 1 1358*4757b351SPierre Pronchery add x7,x7,x8,lsl #32 // pack 1359*4757b351SPierre Pronchery.endif 1360*4757b351SPierre Pronchery.if mixin == 1 1361*4757b351SPierre Pronchery add w9,w9,w24 1362*4757b351SPierre Pronchery.endif 1363*4757b351SPierre Pronchery.inst 0x04bb0108 //add z8.s,z8.s,z27.s 1364*4757b351SPierre Pronchery.if mixin == 1 1365*4757b351SPierre Pronchery add x10,x10,x24,lsr #32 1366*4757b351SPierre Pronchery.endif 1367*4757b351SPierre Pronchery.inst 0x04bc018c //add z12.s,z12.s,z28.s 1368*4757b351SPierre Pronchery.if mixin == 1 1369*4757b351SPierre Pronchery add x9,x9,x10,lsl #32 // pack 1370*4757b351SPierre Pronchery.endif 1371*4757b351SPierre Pronchery.if mixin == 1 1372*4757b351SPierre Pronchery ldp x8,x10,[x1],#16 1373*4757b351SPierre Pronchery.endif 1374*4757b351SPierre Pronchery.if mixin == 1 1375*4757b351SPierre Pronchery add w11,w11,w25 1376*4757b351SPierre Pronchery.endif 1377*4757b351SPierre Pronchery.inst 0x04bd0021 //add z1.s,z1.s,z29.s 1378*4757b351SPierre Pronchery.if mixin == 1 1379*4757b351SPierre Pronchery add x12,x12,x25,lsr #32 1380*4757b351SPierre Pronchery.endif 1381*4757b351SPierre Pronchery.inst 0x04be00a5 //add z5.s,z5.s,z30.s 1382*4757b351SPierre Pronchery.if mixin == 1 1383*4757b351SPierre Pronchery add x11,x11,x12,lsl #32 // pack 1384*4757b351SPierre Pronchery.endif 1385*4757b351SPierre Pronchery.if mixin == 1 1386*4757b351SPierre Pronchery add w13,w13,w26 1387*4757b351SPierre Pronchery.endif 1388*4757b351SPierre Pronchery.inst 0x04b50129 //add z9.s,z9.s,z21.s 1389*4757b351SPierre Pronchery.if mixin == 1 1390*4757b351SPierre Pronchery add x14,x14,x26,lsr #32 1391*4757b351SPierre Pronchery.endif 1392*4757b351SPierre Pronchery.inst 0x04b601ad //add z13.s,z13.s,z22.s 1393*4757b351SPierre Pronchery.if mixin == 1 1394*4757b351SPierre Pronchery add x13,x13,x14,lsl #32 // pack 1395*4757b351SPierre Pronchery.endif 1396*4757b351SPierre Pronchery.if mixin == 1 1397*4757b351SPierre Pronchery ldp x12,x14,[x1],#16 1398*4757b351SPierre Pronchery.endif 1399*4757b351SPierre Pronchery.if mixin == 1 1400*4757b351SPierre Pronchery add w15,w15,w27 1401*4757b351SPierre Pronchery.endif 1402*4757b351SPierre Pronchery.inst 0x04b70042 //add z2.s,z2.s,z23.s 1403*4757b351SPierre Pronchery.if mixin == 1 1404*4757b351SPierre Pronchery add x16,x16,x27,lsr #32 1405*4757b351SPierre Pronchery.endif 1406*4757b351SPierre Pronchery.inst 0x04b800c6 //add z6.s,z6.s,z24.s 1407*4757b351SPierre Pronchery.if mixin == 1 1408*4757b351SPierre Pronchery add x15,x15,x16,lsl #32 // pack 1409*4757b351SPierre Pronchery.endif 1410*4757b351SPierre Pronchery.if mixin == 1 1411*4757b351SPierre Pronchery add w17,w17,w28 1412*4757b351SPierre Pronchery.endif 1413*4757b351SPierre Pronchery.inst 0x04b1014a //add z10.s,z10.s,z17.s 1414*4757b351SPierre Pronchery.if mixin == 1 1415*4757b351SPierre Pronchery add x18,x18,x28,lsr #32 1416*4757b351SPierre Pronchery.endif 1417*4757b351SPierre Pronchery.inst 0x04b201ce //add z14.s,z14.s,z18.s 1418*4757b351SPierre Pronchery.if mixin == 1 1419*4757b351SPierre Pronchery add x17,x17,x18,lsl #32 // pack 1420*4757b351SPierre Pronchery.endif 1421*4757b351SPierre Pronchery.if mixin == 1 1422*4757b351SPierre Pronchery ldp x16,x18,[x1],#16 1423*4757b351SPierre Pronchery.endif 1424*4757b351SPierre Pronchery.if mixin == 1 1425*4757b351SPierre Pronchery add w19,w19,w29 1426*4757b351SPierre Pronchery.endif 1427*4757b351SPierre Pronchery.inst 0x04b00063 //add z3.s,z3.s,z16.s 1428*4757b351SPierre Pronchery.if mixin == 1 1429*4757b351SPierre Pronchery add x20,x20,x29,lsr #32 1430*4757b351SPierre Pronchery.endif 1431*4757b351SPierre Pronchery.inst 0x04b300e7 //add z7.s,z7.s,z19.s 1432*4757b351SPierre Pronchery.if mixin == 1 1433*4757b351SPierre Pronchery add x19,x19,x20,lsl #32 // pack 1434*4757b351SPierre Pronchery.endif 1435*4757b351SPierre Pronchery.if mixin == 1 1436*4757b351SPierre Pronchery add w21,w21,w30 1437*4757b351SPierre Pronchery.endif 1438*4757b351SPierre Pronchery.inst 0x04b4016b //add z11.s,z11.s,z20.s 1439*4757b351SPierre Pronchery.if mixin == 1 1440*4757b351SPierre Pronchery add x22,x22,x30,lsr #32 1441*4757b351SPierre Pronchery.endif 1442*4757b351SPierre Pronchery.inst 0x04bf01ef //add z15.s,z15.s,z31.s 1443*4757b351SPierre Pronchery.if mixin == 1 1444*4757b351SPierre Pronchery add x21,x21,x22,lsl #32 // pack 1445*4757b351SPierre Pronchery.endif 1446*4757b351SPierre Pronchery.if mixin == 1 1447*4757b351SPierre Pronchery ldp x20,x22,[x1],#16 1448*4757b351SPierre Pronchery.endif 1449*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 1450*4757b351SPierre Pronchery rev x7,x7 1451*4757b351SPierre Pronchery.inst 0x05a48000 //revb z0.s,p0/m,z0.s 1452*4757b351SPierre Pronchery.inst 0x05a48084 //revb z4.s,p0/m,z4.s 1453*4757b351SPierre Pronchery rev x9,x9 1454*4757b351SPierre Pronchery.inst 0x05a48108 //revb z8.s,p0/m,z8.s 1455*4757b351SPierre Pronchery.inst 0x05a4818c //revb z12.s,p0/m,z12.s 1456*4757b351SPierre Pronchery rev x11,x11 1457*4757b351SPierre Pronchery.inst 0x05a48021 //revb z1.s,p0/m,z1.s 1458*4757b351SPierre Pronchery.inst 0x05a480a5 //revb z5.s,p0/m,z5.s 1459*4757b351SPierre Pronchery rev x13,x13 1460*4757b351SPierre Pronchery.inst 0x05a48129 //revb z9.s,p0/m,z9.s 1461*4757b351SPierre Pronchery.inst 0x05a481ad //revb z13.s,p0/m,z13.s 1462*4757b351SPierre Pronchery rev x15,x15 1463*4757b351SPierre Pronchery.inst 0x05a48042 //revb z2.s,p0/m,z2.s 1464*4757b351SPierre Pronchery.inst 0x05a480c6 //revb z6.s,p0/m,z6.s 1465*4757b351SPierre Pronchery rev x17,x17 1466*4757b351SPierre Pronchery.inst 0x05a4814a //revb z10.s,p0/m,z10.s 1467*4757b351SPierre Pronchery.inst 0x05a481ce //revb z14.s,p0/m,z14.s 1468*4757b351SPierre Pronchery rev x19,x19 1469*4757b351SPierre Pronchery.inst 0x05a48063 //revb z3.s,p0/m,z3.s 1470*4757b351SPierre Pronchery.inst 0x05a480e7 //revb z7.s,p0/m,z7.s 1471*4757b351SPierre Pronchery rev x21,x21 1472*4757b351SPierre Pronchery.inst 0x05a4816b //revb z11.s,p0/m,z11.s 1473*4757b351SPierre Pronchery.inst 0x05a481ef //revb z15.s,p0/m,z15.s 1474*4757b351SPierre Pronchery#endif 1475*4757b351SPierre Pronchery.if mixin == 1 1476*4757b351SPierre Pronchery add x29,x29,#1 1477*4757b351SPierre Pronchery.endif 1478*4757b351SPierre Pronchery cmp x5,4 1479*4757b351SPierre Pronchery b.ne 200f 1480*4757b351SPierre Pronchery.if mixin == 1 1481*4757b351SPierre Pronchery eor x7,x7,x8 1482*4757b351SPierre Pronchery.endif 1483*4757b351SPierre Pronchery.if mixin == 1 1484*4757b351SPierre Pronchery eor x9,x9,x10 1485*4757b351SPierre Pronchery.endif 1486*4757b351SPierre Pronchery.if mixin == 1 1487*4757b351SPierre Pronchery eor x11,x11,x12 1488*4757b351SPierre Pronchery.endif 1489*4757b351SPierre Pronchery.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s 1490*4757b351SPierre Pronchery.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s 1491*4757b351SPierre Pronchery.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s 1492*4757b351SPierre Pronchery.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s 1493*4757b351SPierre Pronchery 1494*4757b351SPierre Pronchery.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s 1495*4757b351SPierre Pronchery.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s 1496*4757b351SPierre Pronchery.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s 1497*4757b351SPierre Pronchery.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s 1498*4757b351SPierre Pronchery 1499*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 1500*4757b351SPierre Pronchery.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d 1501*4757b351SPierre Pronchery.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d 1502*4757b351SPierre Pronchery.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d 1503*4757b351SPierre Pronchery 1504*4757b351SPierre Pronchery.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d 1505*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 1506*4757b351SPierre Pronchery.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d 1507*4757b351SPierre Pronchery.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d 1508*4757b351SPierre Pronchery.if mixin == 1 1509*4757b351SPierre Pronchery eor x13,x13,x14 1510*4757b351SPierre Pronchery.endif 1511*4757b351SPierre Pronchery.if mixin == 1 1512*4757b351SPierre Pronchery eor x15,x15,x16 1513*4757b351SPierre Pronchery.endif 1514*4757b351SPierre Pronchery.if mixin == 1 1515*4757b351SPierre Pronchery eor x17,x17,x18 1516*4757b351SPierre Pronchery.endif 1517*4757b351SPierre Pronchery.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s 1518*4757b351SPierre Pronchery.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s 1519*4757b351SPierre Pronchery.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s 1520*4757b351SPierre Pronchery.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s 1521*4757b351SPierre Pronchery 1522*4757b351SPierre Pronchery.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s 1523*4757b351SPierre Pronchery.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s 1524*4757b351SPierre Pronchery.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s 1525*4757b351SPierre Pronchery.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s 1526*4757b351SPierre Pronchery 1527*4757b351SPierre Pronchery.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d 1528*4757b351SPierre Pronchery.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d 1529*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 1530*4757b351SPierre Pronchery.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d 1531*4757b351SPierre Pronchery 1532*4757b351SPierre Pronchery.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d 1533*4757b351SPierre Pronchery.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d 1534*4757b351SPierre Pronchery.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d 1535*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 1536*4757b351SPierre Pronchery.if mixin == 1 1537*4757b351SPierre Pronchery eor x19,x19,x20 1538*4757b351SPierre Pronchery.endif 1539*4757b351SPierre Pronchery.if mixin == 1 1540*4757b351SPierre Pronchery eor x21,x21,x22 1541*4757b351SPierre Pronchery.endif 1542*4757b351SPierre Pronchery ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 1543*4757b351SPierre Pronchery ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 1544*4757b351SPierre Pronchery.inst 0x04b13000 //eor z0.d,z0.d,z17.d 1545*4757b351SPierre Pronchery.inst 0x04b23021 //eor z1.d,z1.d,z18.d 1546*4757b351SPierre Pronchery.inst 0x04b33042 //eor z2.d,z2.d,z19.d 1547*4757b351SPierre Pronchery.inst 0x04b43063 //eor z3.d,z3.d,z20.d 1548*4757b351SPierre Pronchery.inst 0x04b53084 //eor z4.d,z4.d,z21.d 1549*4757b351SPierre Pronchery.inst 0x04b630a5 //eor z5.d,z5.d,z22.d 1550*4757b351SPierre Pronchery.inst 0x04b730c6 //eor z6.d,z6.d,z23.d 1551*4757b351SPierre Pronchery.inst 0x04b830e7 //eor z7.d,z7.d,z24.d 1552*4757b351SPierre Pronchery ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 1553*4757b351SPierre Pronchery ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 1554*4757b351SPierre Pronchery.if mixin == 1 1555*4757b351SPierre Pronchery stp x7,x9,[x0],#16 1556*4757b351SPierre Pronchery.endif 1557*4757b351SPierre Pronchery.inst 0x04b13108 //eor z8.d,z8.d,z17.d 1558*4757b351SPierre Pronchery.inst 0x04b23129 //eor z9.d,z9.d,z18.d 1559*4757b351SPierre Pronchery.if mixin == 1 1560*4757b351SPierre Pronchery stp x11,x13,[x0],#16 1561*4757b351SPierre Pronchery.endif 1562*4757b351SPierre Pronchery.inst 0x04b3314a //eor z10.d,z10.d,z19.d 1563*4757b351SPierre Pronchery.inst 0x04b4316b //eor z11.d,z11.d,z20.d 1564*4757b351SPierre Pronchery.if mixin == 1 1565*4757b351SPierre Pronchery stp x15,x17,[x0],#16 1566*4757b351SPierre Pronchery.endif 1567*4757b351SPierre Pronchery.inst 0x04b5318c //eor z12.d,z12.d,z21.d 1568*4757b351SPierre Pronchery.inst 0x04b631ad //eor z13.d,z13.d,z22.d 1569*4757b351SPierre Pronchery.if mixin == 1 1570*4757b351SPierre Pronchery stp x19,x21,[x0],#16 1571*4757b351SPierre Pronchery.endif 1572*4757b351SPierre Pronchery.inst 0x04b731ce //eor z14.d,z14.d,z23.d 1573*4757b351SPierre Pronchery.inst 0x04b831ef //eor z15.d,z15.d,z24.d 1574*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64 1575*4757b351SPierre Pronchery st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 1576*4757b351SPierre Pronchery st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 1577*4757b351SPierre Pronchery st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 1578*4757b351SPierre Pronchery b 210f 1579*4757b351SPierre Pronchery200: 1580*4757b351SPierre Pronchery.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s 1581*4757b351SPierre Pronchery.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s 1582*4757b351SPierre Pronchery.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s 1583*4757b351SPierre Pronchery.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s 1584*4757b351SPierre Pronchery 1585*4757b351SPierre Pronchery.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s 1586*4757b351SPierre Pronchery.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s 1587*4757b351SPierre Pronchery.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s 1588*4757b351SPierre Pronchery.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s 1589*4757b351SPierre Pronchery 1590*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 1591*4757b351SPierre Pronchery.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d 1592*4757b351SPierre Pronchery.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d 1593*4757b351SPierre Pronchery.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d 1594*4757b351SPierre Pronchery 1595*4757b351SPierre Pronchery.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d 1596*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 1597*4757b351SPierre Pronchery.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d 1598*4757b351SPierre Pronchery.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d 1599*4757b351SPierre Pronchery.if mixin == 1 1600*4757b351SPierre Pronchery eor x7,x7,x8 1601*4757b351SPierre Pronchery.endif 1602*4757b351SPierre Pronchery.if mixin == 1 1603*4757b351SPierre Pronchery eor x9,x9,x10 1604*4757b351SPierre Pronchery.endif 1605*4757b351SPierre Pronchery.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s 1606*4757b351SPierre Pronchery.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s 1607*4757b351SPierre Pronchery.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s 1608*4757b351SPierre Pronchery.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s 1609*4757b351SPierre Pronchery 1610*4757b351SPierre Pronchery.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s 1611*4757b351SPierre Pronchery.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s 1612*4757b351SPierre Pronchery.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s 1613*4757b351SPierre Pronchery.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s 1614*4757b351SPierre Pronchery 1615*4757b351SPierre Pronchery.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d 1616*4757b351SPierre Pronchery.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d 1617*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 1618*4757b351SPierre Pronchery.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d 1619*4757b351SPierre Pronchery 1620*4757b351SPierre Pronchery.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d 1621*4757b351SPierre Pronchery.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d 1622*4757b351SPierre Pronchery.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d 1623*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 1624*4757b351SPierre Pronchery.if mixin == 1 1625*4757b351SPierre Pronchery eor x11,x11,x12 1626*4757b351SPierre Pronchery.endif 1627*4757b351SPierre Pronchery.if mixin == 1 1628*4757b351SPierre Pronchery eor x13,x13,x14 1629*4757b351SPierre Pronchery.endif 1630*4757b351SPierre Pronchery.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s 1631*4757b351SPierre Pronchery.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s 1632*4757b351SPierre Pronchery.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s 1633*4757b351SPierre Pronchery.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s 1634*4757b351SPierre Pronchery 1635*4757b351SPierre Pronchery.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s 1636*4757b351SPierre Pronchery.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s 1637*4757b351SPierre Pronchery.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s 1638*4757b351SPierre Pronchery.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s 1639*4757b351SPierre Pronchery 1640*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 1641*4757b351SPierre Pronchery.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d 1642*4757b351SPierre Pronchery.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d 1643*4757b351SPierre Pronchery.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d 1644*4757b351SPierre Pronchery 1645*4757b351SPierre Pronchery.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d 1646*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 1647*4757b351SPierre Pronchery.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d 1648*4757b351SPierre Pronchery.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d 1649*4757b351SPierre Pronchery.if mixin == 1 1650*4757b351SPierre Pronchery eor x15,x15,x16 1651*4757b351SPierre Pronchery.endif 1652*4757b351SPierre Pronchery.if mixin == 1 1653*4757b351SPierre Pronchery eor x17,x17,x18 1654*4757b351SPierre Pronchery.endif 1655*4757b351SPierre Pronchery.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s 1656*4757b351SPierre Pronchery.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s 1657*4757b351SPierre Pronchery.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s 1658*4757b351SPierre Pronchery.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s 1659*4757b351SPierre Pronchery 1660*4757b351SPierre Pronchery.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s 1661*4757b351SPierre Pronchery.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s 1662*4757b351SPierre Pronchery.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s 1663*4757b351SPierre Pronchery.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s 1664*4757b351SPierre Pronchery 1665*4757b351SPierre Pronchery.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d 1666*4757b351SPierre Pronchery.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d 1667*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 1668*4757b351SPierre Pronchery.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d 1669*4757b351SPierre Pronchery 1670*4757b351SPierre Pronchery.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d 1671*4757b351SPierre Pronchery.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d 1672*4757b351SPierre Pronchery.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d 1673*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 1674*4757b351SPierre Pronchery.if mixin == 1 1675*4757b351SPierre Pronchery eor x19,x19,x20 1676*4757b351SPierre Pronchery.endif 1677*4757b351SPierre Pronchery.if mixin == 1 1678*4757b351SPierre Pronchery eor x21,x21,x22 1679*4757b351SPierre Pronchery.endif 1680*4757b351SPierre Pronchery.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] 1681*4757b351SPierre Pronchery.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] 1682*4757b351SPierre Pronchery.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] 1683*4757b351SPierre Pronchery.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] 1684*4757b351SPierre Pronchery.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] 1685*4757b351SPierre Pronchery.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] 1686*4757b351SPierre Pronchery.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] 1687*4757b351SPierre Pronchery.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] 1688*4757b351SPierre Pronchery.inst 0x04215101 //addvl x1,x1,8 1689*4757b351SPierre Pronchery.inst 0x04b13000 //eor z0.d,z0.d,z17.d 1690*4757b351SPierre Pronchery.inst 0x04b23084 //eor z4.d,z4.d,z18.d 1691*4757b351SPierre Pronchery.inst 0x04b33108 //eor z8.d,z8.d,z19.d 1692*4757b351SPierre Pronchery.inst 0x04b4318c //eor z12.d,z12.d,z20.d 1693*4757b351SPierre Pronchery.inst 0x04b53021 //eor z1.d,z1.d,z21.d 1694*4757b351SPierre Pronchery.inst 0x04b630a5 //eor z5.d,z5.d,z22.d 1695*4757b351SPierre Pronchery.inst 0x04b73129 //eor z9.d,z9.d,z23.d 1696*4757b351SPierre Pronchery.inst 0x04b831ad //eor z13.d,z13.d,z24.d 1697*4757b351SPierre Pronchery.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] 1698*4757b351SPierre Pronchery.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] 1699*4757b351SPierre Pronchery.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] 1700*4757b351SPierre Pronchery.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] 1701*4757b351SPierre Pronchery.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] 1702*4757b351SPierre Pronchery.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] 1703*4757b351SPierre Pronchery.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] 1704*4757b351SPierre Pronchery.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] 1705*4757b351SPierre Pronchery.inst 0x04215101 //addvl x1,x1,8 1706*4757b351SPierre Pronchery.if mixin == 1 1707*4757b351SPierre Pronchery stp x7,x9,[x0],#16 1708*4757b351SPierre Pronchery.endif 1709*4757b351SPierre Pronchery.inst 0x04b13042 //eor z2.d,z2.d,z17.d 1710*4757b351SPierre Pronchery.inst 0x04b230c6 //eor z6.d,z6.d,z18.d 1711*4757b351SPierre Pronchery.if mixin == 1 1712*4757b351SPierre Pronchery stp x11,x13,[x0],#16 1713*4757b351SPierre Pronchery.endif 1714*4757b351SPierre Pronchery.inst 0x04b3314a //eor z10.d,z10.d,z19.d 1715*4757b351SPierre Pronchery.inst 0x04b431ce //eor z14.d,z14.d,z20.d 1716*4757b351SPierre Pronchery.if mixin == 1 1717*4757b351SPierre Pronchery stp x15,x17,[x0],#16 1718*4757b351SPierre Pronchery.endif 1719*4757b351SPierre Pronchery.inst 0x04b53063 //eor z3.d,z3.d,z21.d 1720*4757b351SPierre Pronchery.inst 0x04b630e7 //eor z7.d,z7.d,z22.d 1721*4757b351SPierre Pronchery.if mixin == 1 1722*4757b351SPierre Pronchery stp x19,x21,[x0],#16 1723*4757b351SPierre Pronchery.endif 1724*4757b351SPierre Pronchery.inst 0x04b7316b //eor z11.d,z11.d,z23.d 1725*4757b351SPierre Pronchery.inst 0x04b831ef //eor z15.d,z15.d,z24.d 1726*4757b351SPierre Pronchery.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL] 1727*4757b351SPierre Pronchery.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL] 1728*4757b351SPierre Pronchery.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL] 1729*4757b351SPierre Pronchery.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL] 1730*4757b351SPierre Pronchery.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL] 1731*4757b351SPierre Pronchery.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL] 1732*4757b351SPierre Pronchery.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL] 1733*4757b351SPierre Pronchery.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL] 1734*4757b351SPierre Pronchery.inst 0x04205100 //addvl x0,x0,8 1735*4757b351SPierre Pronchery.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL] 1736*4757b351SPierre Pronchery.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL] 1737*4757b351SPierre Pronchery.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL] 1738*4757b351SPierre Pronchery.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL] 1739*4757b351SPierre Pronchery.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL] 1740*4757b351SPierre Pronchery.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL] 1741*4757b351SPierre Pronchery.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL] 1742*4757b351SPierre Pronchery.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL] 1743*4757b351SPierre Pronchery.inst 0x04205100 //addvl x0,x0,8 1744*4757b351SPierre Pronchery210: 1745*4757b351SPierre Pronchery.inst 0x04b0e3fd //incw x29, ALL, MUL #1 1746*4757b351SPierre Pronchery110: 1747*4757b351SPierre Pronchery b 2f 1748*4757b351SPierre Pronchery1: 1749*4757b351SPierre Pronchery.align 5 1750*4757b351SPierre Pronchery100: 1751*4757b351SPierre Pronchery subs x7,x2,x5,lsl #6 1752*4757b351SPierre Pronchery b.lt 110f 1753*4757b351SPierre Pronchery mov x2,x7 1754*4757b351SPierre Pronchery b.eq 101f 1755*4757b351SPierre Pronchery cmp x2,64 1756*4757b351SPierre Pronchery b.lt 101f 1757*4757b351SPierre Pronchery mixin=1 1758*4757b351SPierre Pronchery lsr x8,x23,#32 1759*4757b351SPierre Pronchery.inst 0x05a03ae0 //dup z0.s,w23 1760*4757b351SPierre Pronchery.inst 0x05a03af9 //dup z25.s,w23 1761*4757b351SPierre Pronchery.if mixin == 1 1762*4757b351SPierre Pronchery mov w7,w23 1763*4757b351SPierre Pronchery.endif 1764*4757b351SPierre Pronchery.inst 0x05a03904 //dup z4.s,w8 1765*4757b351SPierre Pronchery.inst 0x05a0391a //dup z26.s,w8 1766*4757b351SPierre Pronchery lsr x10,x24,#32 1767*4757b351SPierre Pronchery.inst 0x05a03b08 //dup z8.s,w24 1768*4757b351SPierre Pronchery.inst 0x05a03b1b //dup z27.s,w24 1769*4757b351SPierre Pronchery.if mixin == 1 1770*4757b351SPierre Pronchery mov w9,w24 1771*4757b351SPierre Pronchery.endif 1772*4757b351SPierre Pronchery.inst 0x05a0394c //dup z12.s,w10 1773*4757b351SPierre Pronchery.inst 0x05a0395c //dup z28.s,w10 1774*4757b351SPierre Pronchery lsr x12,x25,#32 1775*4757b351SPierre Pronchery.inst 0x05a03b21 //dup z1.s,w25 1776*4757b351SPierre Pronchery.inst 0x05a03b3d //dup z29.s,w25 1777*4757b351SPierre Pronchery.if mixin == 1 1778*4757b351SPierre Pronchery mov w11,w25 1779*4757b351SPierre Pronchery.endif 1780*4757b351SPierre Pronchery.inst 0x05a03985 //dup z5.s,w12 1781*4757b351SPierre Pronchery.inst 0x05a0399e //dup z30.s,w12 1782*4757b351SPierre Pronchery lsr x14,x26,#32 1783*4757b351SPierre Pronchery.inst 0x05a03b49 //dup z9.s,w26 1784*4757b351SPierre Pronchery.inst 0x05a03b55 //dup z21.s,w26 1785*4757b351SPierre Pronchery.if mixin == 1 1786*4757b351SPierre Pronchery mov w13,w26 1787*4757b351SPierre Pronchery.endif 1788*4757b351SPierre Pronchery.inst 0x05a039cd //dup z13.s,w14 1789*4757b351SPierre Pronchery.inst 0x05a039d6 //dup z22.s,w14 1790*4757b351SPierre Pronchery lsr x16,x27,#32 1791*4757b351SPierre Pronchery.inst 0x05a03b62 //dup z2.s,w27 1792*4757b351SPierre Pronchery.inst 0x05a03b77 //dup z23.s,w27 1793*4757b351SPierre Pronchery.if mixin == 1 1794*4757b351SPierre Pronchery mov w15,w27 1795*4757b351SPierre Pronchery.endif 1796*4757b351SPierre Pronchery.inst 0x05a03a06 //dup z6.s,w16 1797*4757b351SPierre Pronchery.inst 0x05a03a18 //dup z24.s,w16 1798*4757b351SPierre Pronchery lsr x18,x28,#32 1799*4757b351SPierre Pronchery.inst 0x05a03b8a //dup z10.s,w28 1800*4757b351SPierre Pronchery.if mixin == 1 1801*4757b351SPierre Pronchery mov w17,w28 1802*4757b351SPierre Pronchery.endif 1803*4757b351SPierre Pronchery.inst 0x05a03a4e //dup z14.s,w18 1804*4757b351SPierre Pronchery lsr x22,x30,#32 1805*4757b351SPierre Pronchery.inst 0x05a03bcb //dup z11.s,w30 1806*4757b351SPierre Pronchery.if mixin == 1 1807*4757b351SPierre Pronchery mov w21,w30 1808*4757b351SPierre Pronchery.endif 1809*4757b351SPierre Pronchery.inst 0x05a03acf //dup z15.s,w22 1810*4757b351SPierre Pronchery.if mixin == 1 1811*4757b351SPierre Pronchery add w20,w29,#1 1812*4757b351SPierre Pronchery mov w19,w29 1813*4757b351SPierre Pronchery.inst 0x04a14690 //index z16.s,w20,1 1814*4757b351SPierre Pronchery.inst 0x04a14683 //index z3.s,w20,1 1815*4757b351SPierre Pronchery.else 1816*4757b351SPierre Pronchery.inst 0x04a147b0 //index z16.s,w29,1 1817*4757b351SPierre Pronchery.inst 0x04a147a3 //index z3.s,w29,1 1818*4757b351SPierre Pronchery.endif 1819*4757b351SPierre Pronchery lsr x20,x29,#32 1820*4757b351SPierre Pronchery.inst 0x05a03a87 //dup z7.s,w20 1821*4757b351SPierre Pronchery mov x6,#10 1822*4757b351SPierre Pronchery10: 1823*4757b351SPierre Pronchery.align 5 1824*4757b351SPierre Pronchery.inst 0x04a10000 //add z0.s,z0.s,z1.s 1825*4757b351SPierre Pronchery.if mixin == 1 1826*4757b351SPierre Pronchery add w7,w7,w11 1827*4757b351SPierre Pronchery.endif 1828*4757b351SPierre Pronchery.inst 0x04a50084 //add z4.s,z4.s,z5.s 1829*4757b351SPierre Pronchery.if mixin == 1 1830*4757b351SPierre Pronchery add w8,w8,w12 1831*4757b351SPierre Pronchery.endif 1832*4757b351SPierre Pronchery.inst 0x04a90108 //add z8.s,z8.s,z9.s 1833*4757b351SPierre Pronchery.if mixin == 1 1834*4757b351SPierre Pronchery add w9,w9,w13 1835*4757b351SPierre Pronchery.endif 1836*4757b351SPierre Pronchery.inst 0x04ad018c //add z12.s,z12.s,z13.s 1837*4757b351SPierre Pronchery.if mixin == 1 1838*4757b351SPierre Pronchery add w10,w10,w14 1839*4757b351SPierre Pronchery.endif 1840*4757b351SPierre Pronchery.inst 0x04a03063 //eor z3.d,z3.d,z0.d 1841*4757b351SPierre Pronchery.if mixin == 1 1842*4757b351SPierre Pronchery eor w19,w19,w7 1843*4757b351SPierre Pronchery.endif 1844*4757b351SPierre Pronchery.inst 0x04a430e7 //eor z7.d,z7.d,z4.d 1845*4757b351SPierre Pronchery.if mixin == 1 1846*4757b351SPierre Pronchery eor w20,w20,w8 1847*4757b351SPierre Pronchery.endif 1848*4757b351SPierre Pronchery.inst 0x04a8316b //eor z11.d,z11.d,z8.d 1849*4757b351SPierre Pronchery.if mixin == 1 1850*4757b351SPierre Pronchery eor w21,w21,w9 1851*4757b351SPierre Pronchery.endif 1852*4757b351SPierre Pronchery.inst 0x04ac31ef //eor z15.d,z15.d,z12.d 1853*4757b351SPierre Pronchery.if mixin == 1 1854*4757b351SPierre Pronchery eor w22,w22,w10 1855*4757b351SPierre Pronchery.endif 1856*4757b351SPierre Pronchery.inst 0x05a58063 //revh z3.s,p0/m,z3.s 1857*4757b351SPierre Pronchery.if mixin == 1 1858*4757b351SPierre Pronchery ror w19,w19,#16 1859*4757b351SPierre Pronchery.endif 1860*4757b351SPierre Pronchery.inst 0x05a580e7 //revh z7.s,p0/m,z7.s 1861*4757b351SPierre Pronchery.if mixin == 1 1862*4757b351SPierre Pronchery ror w20,w20,#16 1863*4757b351SPierre Pronchery.endif 1864*4757b351SPierre Pronchery.inst 0x05a5816b //revh z11.s,p0/m,z11.s 1865*4757b351SPierre Pronchery.if mixin == 1 1866*4757b351SPierre Pronchery ror w21,w21,#16 1867*4757b351SPierre Pronchery.endif 1868*4757b351SPierre Pronchery.inst 0x05a581ef //revh z15.s,p0/m,z15.s 1869*4757b351SPierre Pronchery.if mixin == 1 1870*4757b351SPierre Pronchery ror w22,w22,#16 1871*4757b351SPierre Pronchery.endif 1872*4757b351SPierre Pronchery.inst 0x04a30042 //add z2.s,z2.s,z3.s 1873*4757b351SPierre Pronchery.if mixin == 1 1874*4757b351SPierre Pronchery add w15,w15,w19 1875*4757b351SPierre Pronchery.endif 1876*4757b351SPierre Pronchery.inst 0x04a700c6 //add z6.s,z6.s,z7.s 1877*4757b351SPierre Pronchery.if mixin == 1 1878*4757b351SPierre Pronchery add w16,w16,w20 1879*4757b351SPierre Pronchery.endif 1880*4757b351SPierre Pronchery.inst 0x04ab014a //add z10.s,z10.s,z11.s 1881*4757b351SPierre Pronchery.if mixin == 1 1882*4757b351SPierre Pronchery add w17,w17,w21 1883*4757b351SPierre Pronchery.endif 1884*4757b351SPierre Pronchery.inst 0x04af01ce //add z14.s,z14.s,z15.s 1885*4757b351SPierre Pronchery.if mixin == 1 1886*4757b351SPierre Pronchery add w18,w18,w22 1887*4757b351SPierre Pronchery.endif 1888*4757b351SPierre Pronchery.inst 0x04a23021 //eor z1.d,z1.d,z2.d 1889*4757b351SPierre Pronchery.if mixin == 1 1890*4757b351SPierre Pronchery eor w11,w11,w15 1891*4757b351SPierre Pronchery.endif 1892*4757b351SPierre Pronchery.inst 0x04a630a5 //eor z5.d,z5.d,z6.d 1893*4757b351SPierre Pronchery.if mixin == 1 1894*4757b351SPierre Pronchery eor w12,w12,w16 1895*4757b351SPierre Pronchery.endif 1896*4757b351SPierre Pronchery.inst 0x04aa3129 //eor z9.d,z9.d,z10.d 1897*4757b351SPierre Pronchery.if mixin == 1 1898*4757b351SPierre Pronchery eor w13,w13,w17 1899*4757b351SPierre Pronchery.endif 1900*4757b351SPierre Pronchery.inst 0x04ae31ad //eor z13.d,z13.d,z14.d 1901*4757b351SPierre Pronchery.if mixin == 1 1902*4757b351SPierre Pronchery eor w14,w14,w18 1903*4757b351SPierre Pronchery.endif 1904*4757b351SPierre Pronchery.inst 0x046c9c31 //lsl z17.s,z1.s,12 1905*4757b351SPierre Pronchery.inst 0x046c9cb2 //lsl z18.s,z5.s,12 1906*4757b351SPierre Pronchery.inst 0x046c9d33 //lsl z19.s,z9.s,12 1907*4757b351SPierre Pronchery.inst 0x046c9db4 //lsl z20.s,z13.s,12 1908*4757b351SPierre Pronchery.inst 0x046c9421 //lsr z1.s,z1.s,20 1909*4757b351SPierre Pronchery.if mixin == 1 1910*4757b351SPierre Pronchery ror w11,w11,20 1911*4757b351SPierre Pronchery.endif 1912*4757b351SPierre Pronchery.inst 0x046c94a5 //lsr z5.s,z5.s,20 1913*4757b351SPierre Pronchery.if mixin == 1 1914*4757b351SPierre Pronchery ror w12,w12,20 1915*4757b351SPierre Pronchery.endif 1916*4757b351SPierre Pronchery.inst 0x046c9529 //lsr z9.s,z9.s,20 1917*4757b351SPierre Pronchery.if mixin == 1 1918*4757b351SPierre Pronchery ror w13,w13,20 1919*4757b351SPierre Pronchery.endif 1920*4757b351SPierre Pronchery.inst 0x046c95ad //lsr z13.s,z13.s,20 1921*4757b351SPierre Pronchery.if mixin == 1 1922*4757b351SPierre Pronchery ror w14,w14,20 1923*4757b351SPierre Pronchery.endif 1924*4757b351SPierre Pronchery.inst 0x04713021 //orr z1.d,z1.d,z17.d 1925*4757b351SPierre Pronchery.inst 0x047230a5 //orr z5.d,z5.d,z18.d 1926*4757b351SPierre Pronchery.inst 0x04733129 //orr z9.d,z9.d,z19.d 1927*4757b351SPierre Pronchery.inst 0x047431ad //orr z13.d,z13.d,z20.d 1928*4757b351SPierre Pronchery.inst 0x04a10000 //add z0.s,z0.s,z1.s 1929*4757b351SPierre Pronchery.if mixin == 1 1930*4757b351SPierre Pronchery add w7,w7,w11 1931*4757b351SPierre Pronchery.endif 1932*4757b351SPierre Pronchery.inst 0x04a50084 //add z4.s,z4.s,z5.s 1933*4757b351SPierre Pronchery.if mixin == 1 1934*4757b351SPierre Pronchery add w8,w8,w12 1935*4757b351SPierre Pronchery.endif 1936*4757b351SPierre Pronchery.inst 0x04a90108 //add z8.s,z8.s,z9.s 1937*4757b351SPierre Pronchery.if mixin == 1 1938*4757b351SPierre Pronchery add w9,w9,w13 1939*4757b351SPierre Pronchery.endif 1940*4757b351SPierre Pronchery.inst 0x04ad018c //add z12.s,z12.s,z13.s 1941*4757b351SPierre Pronchery.if mixin == 1 1942*4757b351SPierre Pronchery add w10,w10,w14 1943*4757b351SPierre Pronchery.endif 1944*4757b351SPierre Pronchery.inst 0x04a03063 //eor z3.d,z3.d,z0.d 1945*4757b351SPierre Pronchery.if mixin == 1 1946*4757b351SPierre Pronchery eor w19,w19,w7 1947*4757b351SPierre Pronchery.endif 1948*4757b351SPierre Pronchery.inst 0x04a430e7 //eor z7.d,z7.d,z4.d 1949*4757b351SPierre Pronchery.if mixin == 1 1950*4757b351SPierre Pronchery eor w20,w20,w8 1951*4757b351SPierre Pronchery.endif 1952*4757b351SPierre Pronchery.inst 0x04a8316b //eor z11.d,z11.d,z8.d 1953*4757b351SPierre Pronchery.if mixin == 1 1954*4757b351SPierre Pronchery eor w21,w21,w9 1955*4757b351SPierre Pronchery.endif 1956*4757b351SPierre Pronchery.inst 0x04ac31ef //eor z15.d,z15.d,z12.d 1957*4757b351SPierre Pronchery.if mixin == 1 1958*4757b351SPierre Pronchery eor w22,w22,w10 1959*4757b351SPierre Pronchery.endif 1960*4757b351SPierre Pronchery.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b 1961*4757b351SPierre Pronchery.if mixin == 1 1962*4757b351SPierre Pronchery ror w19,w19,#24 1963*4757b351SPierre Pronchery.endif 1964*4757b351SPierre Pronchery.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b 1965*4757b351SPierre Pronchery.if mixin == 1 1966*4757b351SPierre Pronchery ror w20,w20,#24 1967*4757b351SPierre Pronchery.endif 1968*4757b351SPierre Pronchery.inst 0x053f316b //tbl z11.b,{z11.b},z31.b 1969*4757b351SPierre Pronchery.if mixin == 1 1970*4757b351SPierre Pronchery ror w21,w21,#24 1971*4757b351SPierre Pronchery.endif 1972*4757b351SPierre Pronchery.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b 1973*4757b351SPierre Pronchery.if mixin == 1 1974*4757b351SPierre Pronchery ror w22,w22,#24 1975*4757b351SPierre Pronchery.endif 1976*4757b351SPierre Pronchery.inst 0x04a30042 //add z2.s,z2.s,z3.s 1977*4757b351SPierre Pronchery.if mixin == 1 1978*4757b351SPierre Pronchery add w15,w15,w19 1979*4757b351SPierre Pronchery.endif 1980*4757b351SPierre Pronchery.inst 0x04a700c6 //add z6.s,z6.s,z7.s 1981*4757b351SPierre Pronchery.if mixin == 1 1982*4757b351SPierre Pronchery add w16,w16,w20 1983*4757b351SPierre Pronchery.endif 1984*4757b351SPierre Pronchery.inst 0x04ab014a //add z10.s,z10.s,z11.s 1985*4757b351SPierre Pronchery.if mixin == 1 1986*4757b351SPierre Pronchery add w17,w17,w21 1987*4757b351SPierre Pronchery.endif 1988*4757b351SPierre Pronchery.inst 0x04af01ce //add z14.s,z14.s,z15.s 1989*4757b351SPierre Pronchery.if mixin == 1 1990*4757b351SPierre Pronchery add w18,w18,w22 1991*4757b351SPierre Pronchery.endif 1992*4757b351SPierre Pronchery.inst 0x04a23021 //eor z1.d,z1.d,z2.d 1993*4757b351SPierre Pronchery.if mixin == 1 1994*4757b351SPierre Pronchery eor w11,w11,w15 1995*4757b351SPierre Pronchery.endif 1996*4757b351SPierre Pronchery.inst 0x04a630a5 //eor z5.d,z5.d,z6.d 1997*4757b351SPierre Pronchery.if mixin == 1 1998*4757b351SPierre Pronchery eor w12,w12,w16 1999*4757b351SPierre Pronchery.endif 2000*4757b351SPierre Pronchery.inst 0x04aa3129 //eor z9.d,z9.d,z10.d 2001*4757b351SPierre Pronchery.if mixin == 1 2002*4757b351SPierre Pronchery eor w13,w13,w17 2003*4757b351SPierre Pronchery.endif 2004*4757b351SPierre Pronchery.inst 0x04ae31ad //eor z13.d,z13.d,z14.d 2005*4757b351SPierre Pronchery.if mixin == 1 2006*4757b351SPierre Pronchery eor w14,w14,w18 2007*4757b351SPierre Pronchery.endif 2008*4757b351SPierre Pronchery.inst 0x04679c31 //lsl z17.s,z1.s,7 2009*4757b351SPierre Pronchery.inst 0x04679cb2 //lsl z18.s,z5.s,7 2010*4757b351SPierre Pronchery.inst 0x04679d33 //lsl z19.s,z9.s,7 2011*4757b351SPierre Pronchery.inst 0x04679db4 //lsl z20.s,z13.s,7 2012*4757b351SPierre Pronchery.inst 0x04679421 //lsr z1.s,z1.s,25 2013*4757b351SPierre Pronchery.if mixin == 1 2014*4757b351SPierre Pronchery ror w11,w11,25 2015*4757b351SPierre Pronchery.endif 2016*4757b351SPierre Pronchery.inst 0x046794a5 //lsr z5.s,z5.s,25 2017*4757b351SPierre Pronchery.if mixin == 1 2018*4757b351SPierre Pronchery ror w12,w12,25 2019*4757b351SPierre Pronchery.endif 2020*4757b351SPierre Pronchery.inst 0x04679529 //lsr z9.s,z9.s,25 2021*4757b351SPierre Pronchery.if mixin == 1 2022*4757b351SPierre Pronchery ror w13,w13,25 2023*4757b351SPierre Pronchery.endif 2024*4757b351SPierre Pronchery.inst 0x046795ad //lsr z13.s,z13.s,25 2025*4757b351SPierre Pronchery.if mixin == 1 2026*4757b351SPierre Pronchery ror w14,w14,25 2027*4757b351SPierre Pronchery.endif 2028*4757b351SPierre Pronchery.inst 0x04713021 //orr z1.d,z1.d,z17.d 2029*4757b351SPierre Pronchery.inst 0x047230a5 //orr z5.d,z5.d,z18.d 2030*4757b351SPierre Pronchery.inst 0x04733129 //orr z9.d,z9.d,z19.d 2031*4757b351SPierre Pronchery.inst 0x047431ad //orr z13.d,z13.d,z20.d 2032*4757b351SPierre Pronchery.inst 0x04a50000 //add z0.s,z0.s,z5.s 2033*4757b351SPierre Pronchery.if mixin == 1 2034*4757b351SPierre Pronchery add w7,w7,w12 2035*4757b351SPierre Pronchery.endif 2036*4757b351SPierre Pronchery.inst 0x04a90084 //add z4.s,z4.s,z9.s 2037*4757b351SPierre Pronchery.if mixin == 1 2038*4757b351SPierre Pronchery add w8,w8,w13 2039*4757b351SPierre Pronchery.endif 2040*4757b351SPierre Pronchery.inst 0x04ad0108 //add z8.s,z8.s,z13.s 2041*4757b351SPierre Pronchery.if mixin == 1 2042*4757b351SPierre Pronchery add w9,w9,w14 2043*4757b351SPierre Pronchery.endif 2044*4757b351SPierre Pronchery.inst 0x04a1018c //add z12.s,z12.s,z1.s 2045*4757b351SPierre Pronchery.if mixin == 1 2046*4757b351SPierre Pronchery add w10,w10,w11 2047*4757b351SPierre Pronchery.endif 2048*4757b351SPierre Pronchery.inst 0x04a031ef //eor z15.d,z15.d,z0.d 2049*4757b351SPierre Pronchery.if mixin == 1 2050*4757b351SPierre Pronchery eor w22,w22,w7 2051*4757b351SPierre Pronchery.endif 2052*4757b351SPierre Pronchery.inst 0x04a43063 //eor z3.d,z3.d,z4.d 2053*4757b351SPierre Pronchery.if mixin == 1 2054*4757b351SPierre Pronchery eor w19,w19,w8 2055*4757b351SPierre Pronchery.endif 2056*4757b351SPierre Pronchery.inst 0x04a830e7 //eor z7.d,z7.d,z8.d 2057*4757b351SPierre Pronchery.if mixin == 1 2058*4757b351SPierre Pronchery eor w20,w20,w9 2059*4757b351SPierre Pronchery.endif 2060*4757b351SPierre Pronchery.inst 0x04ac316b //eor z11.d,z11.d,z12.d 2061*4757b351SPierre Pronchery.if mixin == 1 2062*4757b351SPierre Pronchery eor w21,w21,w10 2063*4757b351SPierre Pronchery.endif 2064*4757b351SPierre Pronchery.inst 0x05a581ef //revh z15.s,p0/m,z15.s 2065*4757b351SPierre Pronchery.if mixin == 1 2066*4757b351SPierre Pronchery ror w22,w22,#16 2067*4757b351SPierre Pronchery.endif 2068*4757b351SPierre Pronchery.inst 0x05a58063 //revh z3.s,p0/m,z3.s 2069*4757b351SPierre Pronchery.if mixin == 1 2070*4757b351SPierre Pronchery ror w19,w19,#16 2071*4757b351SPierre Pronchery.endif 2072*4757b351SPierre Pronchery.inst 0x05a580e7 //revh z7.s,p0/m,z7.s 2073*4757b351SPierre Pronchery.if mixin == 1 2074*4757b351SPierre Pronchery ror w20,w20,#16 2075*4757b351SPierre Pronchery.endif 2076*4757b351SPierre Pronchery.inst 0x05a5816b //revh z11.s,p0/m,z11.s 2077*4757b351SPierre Pronchery.if mixin == 1 2078*4757b351SPierre Pronchery ror w21,w21,#16 2079*4757b351SPierre Pronchery.endif 2080*4757b351SPierre Pronchery.inst 0x04af014a //add z10.s,z10.s,z15.s 2081*4757b351SPierre Pronchery.if mixin == 1 2082*4757b351SPierre Pronchery add w17,w17,w22 2083*4757b351SPierre Pronchery.endif 2084*4757b351SPierre Pronchery.inst 0x04a301ce //add z14.s,z14.s,z3.s 2085*4757b351SPierre Pronchery.if mixin == 1 2086*4757b351SPierre Pronchery add w18,w18,w19 2087*4757b351SPierre Pronchery.endif 2088*4757b351SPierre Pronchery.inst 0x04a70042 //add z2.s,z2.s,z7.s 2089*4757b351SPierre Pronchery.if mixin == 1 2090*4757b351SPierre Pronchery add w15,w15,w20 2091*4757b351SPierre Pronchery.endif 2092*4757b351SPierre Pronchery.inst 0x04ab00c6 //add z6.s,z6.s,z11.s 2093*4757b351SPierre Pronchery.if mixin == 1 2094*4757b351SPierre Pronchery add w16,w16,w21 2095*4757b351SPierre Pronchery.endif 2096*4757b351SPierre Pronchery.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d 2097*4757b351SPierre Pronchery.if mixin == 1 2098*4757b351SPierre Pronchery eor w12,w12,w17 2099*4757b351SPierre Pronchery.endif 2100*4757b351SPierre Pronchery.inst 0x04ae3129 //eor z9.d,z9.d,z14.d 2101*4757b351SPierre Pronchery.if mixin == 1 2102*4757b351SPierre Pronchery eor w13,w13,w18 2103*4757b351SPierre Pronchery.endif 2104*4757b351SPierre Pronchery.inst 0x04a231ad //eor z13.d,z13.d,z2.d 2105*4757b351SPierre Pronchery.if mixin == 1 2106*4757b351SPierre Pronchery eor w14,w14,w15 2107*4757b351SPierre Pronchery.endif 2108*4757b351SPierre Pronchery.inst 0x04a63021 //eor z1.d,z1.d,z6.d 2109*4757b351SPierre Pronchery.if mixin == 1 2110*4757b351SPierre Pronchery eor w11,w11,w16 2111*4757b351SPierre Pronchery.endif 2112*4757b351SPierre Pronchery.inst 0x046c9cb1 //lsl z17.s,z5.s,12 2113*4757b351SPierre Pronchery.inst 0x046c9d32 //lsl z18.s,z9.s,12 2114*4757b351SPierre Pronchery.inst 0x046c9db3 //lsl z19.s,z13.s,12 2115*4757b351SPierre Pronchery.inst 0x046c9c34 //lsl z20.s,z1.s,12 2116*4757b351SPierre Pronchery.inst 0x046c94a5 //lsr z5.s,z5.s,20 2117*4757b351SPierre Pronchery.if mixin == 1 2118*4757b351SPierre Pronchery ror w12,w12,20 2119*4757b351SPierre Pronchery.endif 2120*4757b351SPierre Pronchery.inst 0x046c9529 //lsr z9.s,z9.s,20 2121*4757b351SPierre Pronchery.if mixin == 1 2122*4757b351SPierre Pronchery ror w13,w13,20 2123*4757b351SPierre Pronchery.endif 2124*4757b351SPierre Pronchery.inst 0x046c95ad //lsr z13.s,z13.s,20 2125*4757b351SPierre Pronchery.if mixin == 1 2126*4757b351SPierre Pronchery ror w14,w14,20 2127*4757b351SPierre Pronchery.endif 2128*4757b351SPierre Pronchery.inst 0x046c9421 //lsr z1.s,z1.s,20 2129*4757b351SPierre Pronchery.if mixin == 1 2130*4757b351SPierre Pronchery ror w11,w11,20 2131*4757b351SPierre Pronchery.endif 2132*4757b351SPierre Pronchery.inst 0x047130a5 //orr z5.d,z5.d,z17.d 2133*4757b351SPierre Pronchery.inst 0x04723129 //orr z9.d,z9.d,z18.d 2134*4757b351SPierre Pronchery.inst 0x047331ad //orr z13.d,z13.d,z19.d 2135*4757b351SPierre Pronchery.inst 0x04743021 //orr z1.d,z1.d,z20.d 2136*4757b351SPierre Pronchery.inst 0x04a50000 //add z0.s,z0.s,z5.s 2137*4757b351SPierre Pronchery.if mixin == 1 2138*4757b351SPierre Pronchery add w7,w7,w12 2139*4757b351SPierre Pronchery.endif 2140*4757b351SPierre Pronchery.inst 0x04a90084 //add z4.s,z4.s,z9.s 2141*4757b351SPierre Pronchery.if mixin == 1 2142*4757b351SPierre Pronchery add w8,w8,w13 2143*4757b351SPierre Pronchery.endif 2144*4757b351SPierre Pronchery.inst 0x04ad0108 //add z8.s,z8.s,z13.s 2145*4757b351SPierre Pronchery.if mixin == 1 2146*4757b351SPierre Pronchery add w9,w9,w14 2147*4757b351SPierre Pronchery.endif 2148*4757b351SPierre Pronchery.inst 0x04a1018c //add z12.s,z12.s,z1.s 2149*4757b351SPierre Pronchery.if mixin == 1 2150*4757b351SPierre Pronchery add w10,w10,w11 2151*4757b351SPierre Pronchery.endif 2152*4757b351SPierre Pronchery.inst 0x04a031ef //eor z15.d,z15.d,z0.d 2153*4757b351SPierre Pronchery.if mixin == 1 2154*4757b351SPierre Pronchery eor w22,w22,w7 2155*4757b351SPierre Pronchery.endif 2156*4757b351SPierre Pronchery.inst 0x04a43063 //eor z3.d,z3.d,z4.d 2157*4757b351SPierre Pronchery.if mixin == 1 2158*4757b351SPierre Pronchery eor w19,w19,w8 2159*4757b351SPierre Pronchery.endif 2160*4757b351SPierre Pronchery.inst 0x04a830e7 //eor z7.d,z7.d,z8.d 2161*4757b351SPierre Pronchery.if mixin == 1 2162*4757b351SPierre Pronchery eor w20,w20,w9 2163*4757b351SPierre Pronchery.endif 2164*4757b351SPierre Pronchery.inst 0x04ac316b //eor z11.d,z11.d,z12.d 2165*4757b351SPierre Pronchery.if mixin == 1 2166*4757b351SPierre Pronchery eor w21,w21,w10 2167*4757b351SPierre Pronchery.endif 2168*4757b351SPierre Pronchery.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b 2169*4757b351SPierre Pronchery.if mixin == 1 2170*4757b351SPierre Pronchery ror w22,w22,#24 2171*4757b351SPierre Pronchery.endif 2172*4757b351SPierre Pronchery.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b 2173*4757b351SPierre Pronchery.if mixin == 1 2174*4757b351SPierre Pronchery ror w19,w19,#24 2175*4757b351SPierre Pronchery.endif 2176*4757b351SPierre Pronchery.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b 2177*4757b351SPierre Pronchery.if mixin == 1 2178*4757b351SPierre Pronchery ror w20,w20,#24 2179*4757b351SPierre Pronchery.endif 2180*4757b351SPierre Pronchery.inst 0x053f316b //tbl z11.b,{z11.b},z31.b 2181*4757b351SPierre Pronchery.if mixin == 1 2182*4757b351SPierre Pronchery ror w21,w21,#24 2183*4757b351SPierre Pronchery.endif 2184*4757b351SPierre Pronchery.inst 0x04af014a //add z10.s,z10.s,z15.s 2185*4757b351SPierre Pronchery.if mixin == 1 2186*4757b351SPierre Pronchery add w17,w17,w22 2187*4757b351SPierre Pronchery.endif 2188*4757b351SPierre Pronchery.inst 0x04a301ce //add z14.s,z14.s,z3.s 2189*4757b351SPierre Pronchery.if mixin == 1 2190*4757b351SPierre Pronchery add w18,w18,w19 2191*4757b351SPierre Pronchery.endif 2192*4757b351SPierre Pronchery.inst 0x04a70042 //add z2.s,z2.s,z7.s 2193*4757b351SPierre Pronchery.if mixin == 1 2194*4757b351SPierre Pronchery add w15,w15,w20 2195*4757b351SPierre Pronchery.endif 2196*4757b351SPierre Pronchery.inst 0x04ab00c6 //add z6.s,z6.s,z11.s 2197*4757b351SPierre Pronchery.if mixin == 1 2198*4757b351SPierre Pronchery add w16,w16,w21 2199*4757b351SPierre Pronchery.endif 2200*4757b351SPierre Pronchery.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d 2201*4757b351SPierre Pronchery.if mixin == 1 2202*4757b351SPierre Pronchery eor w12,w12,w17 2203*4757b351SPierre Pronchery.endif 2204*4757b351SPierre Pronchery.inst 0x04ae3129 //eor z9.d,z9.d,z14.d 2205*4757b351SPierre Pronchery.if mixin == 1 2206*4757b351SPierre Pronchery eor w13,w13,w18 2207*4757b351SPierre Pronchery.endif 2208*4757b351SPierre Pronchery.inst 0x04a231ad //eor z13.d,z13.d,z2.d 2209*4757b351SPierre Pronchery.if mixin == 1 2210*4757b351SPierre Pronchery eor w14,w14,w15 2211*4757b351SPierre Pronchery.endif 2212*4757b351SPierre Pronchery.inst 0x04a63021 //eor z1.d,z1.d,z6.d 2213*4757b351SPierre Pronchery.if mixin == 1 2214*4757b351SPierre Pronchery eor w11,w11,w16 2215*4757b351SPierre Pronchery.endif 2216*4757b351SPierre Pronchery.inst 0x04679cb1 //lsl z17.s,z5.s,7 2217*4757b351SPierre Pronchery.inst 0x04679d32 //lsl z18.s,z9.s,7 2218*4757b351SPierre Pronchery.inst 0x04679db3 //lsl z19.s,z13.s,7 2219*4757b351SPierre Pronchery.inst 0x04679c34 //lsl z20.s,z1.s,7 2220*4757b351SPierre Pronchery.inst 0x046794a5 //lsr z5.s,z5.s,25 2221*4757b351SPierre Pronchery.if mixin == 1 2222*4757b351SPierre Pronchery ror w12,w12,25 2223*4757b351SPierre Pronchery.endif 2224*4757b351SPierre Pronchery.inst 0x04679529 //lsr z9.s,z9.s,25 2225*4757b351SPierre Pronchery.if mixin == 1 2226*4757b351SPierre Pronchery ror w13,w13,25 2227*4757b351SPierre Pronchery.endif 2228*4757b351SPierre Pronchery.inst 0x046795ad //lsr z13.s,z13.s,25 2229*4757b351SPierre Pronchery.if mixin == 1 2230*4757b351SPierre Pronchery ror w14,w14,25 2231*4757b351SPierre Pronchery.endif 2232*4757b351SPierre Pronchery.inst 0x04679421 //lsr z1.s,z1.s,25 2233*4757b351SPierre Pronchery.if mixin == 1 2234*4757b351SPierre Pronchery ror w11,w11,25 2235*4757b351SPierre Pronchery.endif 2236*4757b351SPierre Pronchery.inst 0x047130a5 //orr z5.d,z5.d,z17.d 2237*4757b351SPierre Pronchery.inst 0x04723129 //orr z9.d,z9.d,z18.d 2238*4757b351SPierre Pronchery.inst 0x047331ad //orr z13.d,z13.d,z19.d 2239*4757b351SPierre Pronchery.inst 0x04743021 //orr z1.d,z1.d,z20.d 2240*4757b351SPierre Pronchery sub x6,x6,1 2241*4757b351SPierre Pronchery cbnz x6,10b 2242*4757b351SPierre Pronchery lsr x6,x28,#32 2243*4757b351SPierre Pronchery.inst 0x05a03b91 //dup z17.s,w28 2244*4757b351SPierre Pronchery.inst 0x05a038d2 //dup z18.s,w6 2245*4757b351SPierre Pronchery lsr x6,x29,#32 2246*4757b351SPierre Pronchery.inst 0x05a038d3 //dup z19.s,w6 2247*4757b351SPierre Pronchery lsr x6,x30,#32 2248*4757b351SPierre Pronchery.if mixin == 1 2249*4757b351SPierre Pronchery add w7,w7,w23 2250*4757b351SPierre Pronchery.endif 2251*4757b351SPierre Pronchery.inst 0x04b90000 //add z0.s,z0.s,z25.s 2252*4757b351SPierre Pronchery.if mixin == 1 2253*4757b351SPierre Pronchery add x8,x8,x23,lsr #32 2254*4757b351SPierre Pronchery.endif 2255*4757b351SPierre Pronchery.inst 0x04ba0084 //add z4.s,z4.s,z26.s 2256*4757b351SPierre Pronchery.if mixin == 1 2257*4757b351SPierre Pronchery add x7,x7,x8,lsl #32 // pack 2258*4757b351SPierre Pronchery.endif 2259*4757b351SPierre Pronchery.if mixin == 1 2260*4757b351SPierre Pronchery add w9,w9,w24 2261*4757b351SPierre Pronchery.endif 2262*4757b351SPierre Pronchery.inst 0x04bb0108 //add z8.s,z8.s,z27.s 2263*4757b351SPierre Pronchery.if mixin == 1 2264*4757b351SPierre Pronchery add x10,x10,x24,lsr #32 2265*4757b351SPierre Pronchery.endif 2266*4757b351SPierre Pronchery.inst 0x04bc018c //add z12.s,z12.s,z28.s 2267*4757b351SPierre Pronchery.if mixin == 1 2268*4757b351SPierre Pronchery add x9,x9,x10,lsl #32 // pack 2269*4757b351SPierre Pronchery.endif 2270*4757b351SPierre Pronchery.if mixin == 1 2271*4757b351SPierre Pronchery ldp x8,x10,[x1],#16 2272*4757b351SPierre Pronchery.endif 2273*4757b351SPierre Pronchery.if mixin == 1 2274*4757b351SPierre Pronchery add w11,w11,w25 2275*4757b351SPierre Pronchery.endif 2276*4757b351SPierre Pronchery.inst 0x04bd0021 //add z1.s,z1.s,z29.s 2277*4757b351SPierre Pronchery.if mixin == 1 2278*4757b351SPierre Pronchery add x12,x12,x25,lsr #32 2279*4757b351SPierre Pronchery.endif 2280*4757b351SPierre Pronchery.inst 0x04be00a5 //add z5.s,z5.s,z30.s 2281*4757b351SPierre Pronchery.if mixin == 1 2282*4757b351SPierre Pronchery add x11,x11,x12,lsl #32 // pack 2283*4757b351SPierre Pronchery.endif 2284*4757b351SPierre Pronchery.if mixin == 1 2285*4757b351SPierre Pronchery add w13,w13,w26 2286*4757b351SPierre Pronchery.endif 2287*4757b351SPierre Pronchery.inst 0x04b50129 //add z9.s,z9.s,z21.s 2288*4757b351SPierre Pronchery.if mixin == 1 2289*4757b351SPierre Pronchery add x14,x14,x26,lsr #32 2290*4757b351SPierre Pronchery.endif 2291*4757b351SPierre Pronchery.inst 0x04b601ad //add z13.s,z13.s,z22.s 2292*4757b351SPierre Pronchery.if mixin == 1 2293*4757b351SPierre Pronchery add x13,x13,x14,lsl #32 // pack 2294*4757b351SPierre Pronchery.endif 2295*4757b351SPierre Pronchery.if mixin == 1 2296*4757b351SPierre Pronchery ldp x12,x14,[x1],#16 2297*4757b351SPierre Pronchery.endif 2298*4757b351SPierre Pronchery.if mixin == 1 2299*4757b351SPierre Pronchery add w15,w15,w27 2300*4757b351SPierre Pronchery.endif 2301*4757b351SPierre Pronchery.inst 0x04b70042 //add z2.s,z2.s,z23.s 2302*4757b351SPierre Pronchery.if mixin == 1 2303*4757b351SPierre Pronchery add x16,x16,x27,lsr #32 2304*4757b351SPierre Pronchery.endif 2305*4757b351SPierre Pronchery.inst 0x04b800c6 //add z6.s,z6.s,z24.s 2306*4757b351SPierre Pronchery.if mixin == 1 2307*4757b351SPierre Pronchery add x15,x15,x16,lsl #32 // pack 2308*4757b351SPierre Pronchery.endif 2309*4757b351SPierre Pronchery.if mixin == 1 2310*4757b351SPierre Pronchery add w17,w17,w28 2311*4757b351SPierre Pronchery.endif 2312*4757b351SPierre Pronchery.inst 0x04b1014a //add z10.s,z10.s,z17.s 2313*4757b351SPierre Pronchery.if mixin == 1 2314*4757b351SPierre Pronchery add x18,x18,x28,lsr #32 2315*4757b351SPierre Pronchery.endif 2316*4757b351SPierre Pronchery.inst 0x04b201ce //add z14.s,z14.s,z18.s 2317*4757b351SPierre Pronchery.if mixin == 1 2318*4757b351SPierre Pronchery add x17,x17,x18,lsl #32 // pack 2319*4757b351SPierre Pronchery.endif 2320*4757b351SPierre Pronchery.if mixin == 1 2321*4757b351SPierre Pronchery ldp x16,x18,[x1],#16 2322*4757b351SPierre Pronchery.endif 2323*4757b351SPierre Pronchery.inst 0x05a03bd4 //dup z20.s,w30 2324*4757b351SPierre Pronchery.inst 0x05a038d9 //dup z25.s,w6 // bak[15] not available for SVE 2325*4757b351SPierre Pronchery.if mixin == 1 2326*4757b351SPierre Pronchery add w19,w19,w29 2327*4757b351SPierre Pronchery.endif 2328*4757b351SPierre Pronchery.inst 0x04b00063 //add z3.s,z3.s,z16.s 2329*4757b351SPierre Pronchery.if mixin == 1 2330*4757b351SPierre Pronchery add x20,x20,x29,lsr #32 2331*4757b351SPierre Pronchery.endif 2332*4757b351SPierre Pronchery.inst 0x04b300e7 //add z7.s,z7.s,z19.s 2333*4757b351SPierre Pronchery.if mixin == 1 2334*4757b351SPierre Pronchery add x19,x19,x20,lsl #32 // pack 2335*4757b351SPierre Pronchery.endif 2336*4757b351SPierre Pronchery.if mixin == 1 2337*4757b351SPierre Pronchery add w21,w21,w30 2338*4757b351SPierre Pronchery.endif 2339*4757b351SPierre Pronchery.inst 0x04b4016b //add z11.s,z11.s,z20.s 2340*4757b351SPierre Pronchery.if mixin == 1 2341*4757b351SPierre Pronchery add x22,x22,x30,lsr #32 2342*4757b351SPierre Pronchery.endif 2343*4757b351SPierre Pronchery.inst 0x04b901ef //add z15.s,z15.s,z25.s 2344*4757b351SPierre Pronchery.if mixin == 1 2345*4757b351SPierre Pronchery add x21,x21,x22,lsl #32 // pack 2346*4757b351SPierre Pronchery.endif 2347*4757b351SPierre Pronchery.if mixin == 1 2348*4757b351SPierre Pronchery ldp x20,x22,[x1],#16 2349*4757b351SPierre Pronchery.endif 2350*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 2351*4757b351SPierre Pronchery rev x7,x7 2352*4757b351SPierre Pronchery.inst 0x05a48000 //revb z0.s,p0/m,z0.s 2353*4757b351SPierre Pronchery.inst 0x05a48084 //revb z4.s,p0/m,z4.s 2354*4757b351SPierre Pronchery rev x9,x9 2355*4757b351SPierre Pronchery.inst 0x05a48108 //revb z8.s,p0/m,z8.s 2356*4757b351SPierre Pronchery.inst 0x05a4818c //revb z12.s,p0/m,z12.s 2357*4757b351SPierre Pronchery rev x11,x11 2358*4757b351SPierre Pronchery.inst 0x05a48021 //revb z1.s,p0/m,z1.s 2359*4757b351SPierre Pronchery.inst 0x05a480a5 //revb z5.s,p0/m,z5.s 2360*4757b351SPierre Pronchery rev x13,x13 2361*4757b351SPierre Pronchery.inst 0x05a48129 //revb z9.s,p0/m,z9.s 2362*4757b351SPierre Pronchery.inst 0x05a481ad //revb z13.s,p0/m,z13.s 2363*4757b351SPierre Pronchery rev x15,x15 2364*4757b351SPierre Pronchery.inst 0x05a48042 //revb z2.s,p0/m,z2.s 2365*4757b351SPierre Pronchery.inst 0x05a480c6 //revb z6.s,p0/m,z6.s 2366*4757b351SPierre Pronchery rev x17,x17 2367*4757b351SPierre Pronchery.inst 0x05a4814a //revb z10.s,p0/m,z10.s 2368*4757b351SPierre Pronchery.inst 0x05a481ce //revb z14.s,p0/m,z14.s 2369*4757b351SPierre Pronchery rev x19,x19 2370*4757b351SPierre Pronchery.inst 0x05a48063 //revb z3.s,p0/m,z3.s 2371*4757b351SPierre Pronchery.inst 0x05a480e7 //revb z7.s,p0/m,z7.s 2372*4757b351SPierre Pronchery rev x21,x21 2373*4757b351SPierre Pronchery.inst 0x05a4816b //revb z11.s,p0/m,z11.s 2374*4757b351SPierre Pronchery.inst 0x05a481ef //revb z15.s,p0/m,z15.s 2375*4757b351SPierre Pronchery#endif 2376*4757b351SPierre Pronchery.if mixin == 1 2377*4757b351SPierre Pronchery add x29,x29,#1 2378*4757b351SPierre Pronchery.endif 2379*4757b351SPierre Pronchery cmp x5,4 2380*4757b351SPierre Pronchery b.ne 200f 2381*4757b351SPierre Pronchery.if mixin == 1 2382*4757b351SPierre Pronchery eor x7,x7,x8 2383*4757b351SPierre Pronchery.endif 2384*4757b351SPierre Pronchery.if mixin == 1 2385*4757b351SPierre Pronchery eor x9,x9,x10 2386*4757b351SPierre Pronchery.endif 2387*4757b351SPierre Pronchery.if mixin == 1 2388*4757b351SPierre Pronchery eor x11,x11,x12 2389*4757b351SPierre Pronchery.endif 2390*4757b351SPierre Pronchery.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s 2391*4757b351SPierre Pronchery.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s 2392*4757b351SPierre Pronchery.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s 2393*4757b351SPierre Pronchery.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s 2394*4757b351SPierre Pronchery 2395*4757b351SPierre Pronchery.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s 2396*4757b351SPierre Pronchery.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s 2397*4757b351SPierre Pronchery.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s 2398*4757b351SPierre Pronchery.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s 2399*4757b351SPierre Pronchery 2400*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 2401*4757b351SPierre Pronchery.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d 2402*4757b351SPierre Pronchery.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d 2403*4757b351SPierre Pronchery.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d 2404*4757b351SPierre Pronchery 2405*4757b351SPierre Pronchery.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d 2406*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 2407*4757b351SPierre Pronchery.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d 2408*4757b351SPierre Pronchery.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d 2409*4757b351SPierre Pronchery.if mixin == 1 2410*4757b351SPierre Pronchery eor x13,x13,x14 2411*4757b351SPierre Pronchery.endif 2412*4757b351SPierre Pronchery.if mixin == 1 2413*4757b351SPierre Pronchery eor x15,x15,x16 2414*4757b351SPierre Pronchery.endif 2415*4757b351SPierre Pronchery.if mixin == 1 2416*4757b351SPierre Pronchery eor x17,x17,x18 2417*4757b351SPierre Pronchery.endif 2418*4757b351SPierre Pronchery.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s 2419*4757b351SPierre Pronchery.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s 2420*4757b351SPierre Pronchery.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s 2421*4757b351SPierre Pronchery.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s 2422*4757b351SPierre Pronchery 2423*4757b351SPierre Pronchery.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s 2424*4757b351SPierre Pronchery.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s 2425*4757b351SPierre Pronchery.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s 2426*4757b351SPierre Pronchery.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s 2427*4757b351SPierre Pronchery 2428*4757b351SPierre Pronchery.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d 2429*4757b351SPierre Pronchery.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d 2430*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 2431*4757b351SPierre Pronchery.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d 2432*4757b351SPierre Pronchery 2433*4757b351SPierre Pronchery.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d 2434*4757b351SPierre Pronchery.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d 2435*4757b351SPierre Pronchery.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d 2436*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 2437*4757b351SPierre Pronchery.if mixin == 1 2438*4757b351SPierre Pronchery eor x19,x19,x20 2439*4757b351SPierre Pronchery.endif 2440*4757b351SPierre Pronchery.if mixin == 1 2441*4757b351SPierre Pronchery eor x21,x21,x22 2442*4757b351SPierre Pronchery.endif 2443*4757b351SPierre Pronchery ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 2444*4757b351SPierre Pronchery ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 2445*4757b351SPierre Pronchery.inst 0x04b13000 //eor z0.d,z0.d,z17.d 2446*4757b351SPierre Pronchery.inst 0x04b23021 //eor z1.d,z1.d,z18.d 2447*4757b351SPierre Pronchery.inst 0x04b33042 //eor z2.d,z2.d,z19.d 2448*4757b351SPierre Pronchery.inst 0x04b43063 //eor z3.d,z3.d,z20.d 2449*4757b351SPierre Pronchery.inst 0x04b53084 //eor z4.d,z4.d,z21.d 2450*4757b351SPierre Pronchery.inst 0x04b630a5 //eor z5.d,z5.d,z22.d 2451*4757b351SPierre Pronchery.inst 0x04b730c6 //eor z6.d,z6.d,z23.d 2452*4757b351SPierre Pronchery.inst 0x04b830e7 //eor z7.d,z7.d,z24.d 2453*4757b351SPierre Pronchery ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 2454*4757b351SPierre Pronchery ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 2455*4757b351SPierre Pronchery.if mixin == 1 2456*4757b351SPierre Pronchery stp x7,x9,[x0],#16 2457*4757b351SPierre Pronchery.endif 2458*4757b351SPierre Pronchery.inst 0x04b13108 //eor z8.d,z8.d,z17.d 2459*4757b351SPierre Pronchery.inst 0x04b23129 //eor z9.d,z9.d,z18.d 2460*4757b351SPierre Pronchery.if mixin == 1 2461*4757b351SPierre Pronchery stp x11,x13,[x0],#16 2462*4757b351SPierre Pronchery.endif 2463*4757b351SPierre Pronchery.inst 0x04b3314a //eor z10.d,z10.d,z19.d 2464*4757b351SPierre Pronchery.inst 0x04b4316b //eor z11.d,z11.d,z20.d 2465*4757b351SPierre Pronchery.if mixin == 1 2466*4757b351SPierre Pronchery stp x15,x17,[x0],#16 2467*4757b351SPierre Pronchery.endif 2468*4757b351SPierre Pronchery.inst 0x04b5318c //eor z12.d,z12.d,z21.d 2469*4757b351SPierre Pronchery.inst 0x04b631ad //eor z13.d,z13.d,z22.d 2470*4757b351SPierre Pronchery.if mixin == 1 2471*4757b351SPierre Pronchery stp x19,x21,[x0],#16 2472*4757b351SPierre Pronchery.endif 2473*4757b351SPierre Pronchery.inst 0x04b731ce //eor z14.d,z14.d,z23.d 2474*4757b351SPierre Pronchery.inst 0x04b831ef //eor z15.d,z15.d,z24.d 2475*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64 2476*4757b351SPierre Pronchery st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 2477*4757b351SPierre Pronchery st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 2478*4757b351SPierre Pronchery st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 2479*4757b351SPierre Pronchery b 210f 2480*4757b351SPierre Pronchery200: 2481*4757b351SPierre Pronchery.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s 2482*4757b351SPierre Pronchery.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s 2483*4757b351SPierre Pronchery.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s 2484*4757b351SPierre Pronchery.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s 2485*4757b351SPierre Pronchery 2486*4757b351SPierre Pronchery.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s 2487*4757b351SPierre Pronchery.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s 2488*4757b351SPierre Pronchery.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s 2489*4757b351SPierre Pronchery.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s 2490*4757b351SPierre Pronchery 2491*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 2492*4757b351SPierre Pronchery.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d 2493*4757b351SPierre Pronchery.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d 2494*4757b351SPierre Pronchery.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d 2495*4757b351SPierre Pronchery 2496*4757b351SPierre Pronchery.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d 2497*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 2498*4757b351SPierre Pronchery.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d 2499*4757b351SPierre Pronchery.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d 2500*4757b351SPierre Pronchery.if mixin == 1 2501*4757b351SPierre Pronchery eor x7,x7,x8 2502*4757b351SPierre Pronchery.endif 2503*4757b351SPierre Pronchery.if mixin == 1 2504*4757b351SPierre Pronchery eor x9,x9,x10 2505*4757b351SPierre Pronchery.endif 2506*4757b351SPierre Pronchery.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s 2507*4757b351SPierre Pronchery.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s 2508*4757b351SPierre Pronchery.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s 2509*4757b351SPierre Pronchery.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s 2510*4757b351SPierre Pronchery 2511*4757b351SPierre Pronchery.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s 2512*4757b351SPierre Pronchery.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s 2513*4757b351SPierre Pronchery.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s 2514*4757b351SPierre Pronchery.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s 2515*4757b351SPierre Pronchery 2516*4757b351SPierre Pronchery.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d 2517*4757b351SPierre Pronchery.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d 2518*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 2519*4757b351SPierre Pronchery.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d 2520*4757b351SPierre Pronchery 2521*4757b351SPierre Pronchery.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d 2522*4757b351SPierre Pronchery.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d 2523*4757b351SPierre Pronchery.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d 2524*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 2525*4757b351SPierre Pronchery.if mixin == 1 2526*4757b351SPierre Pronchery eor x11,x11,x12 2527*4757b351SPierre Pronchery.endif 2528*4757b351SPierre Pronchery.if mixin == 1 2529*4757b351SPierre Pronchery eor x13,x13,x14 2530*4757b351SPierre Pronchery.endif 2531*4757b351SPierre Pronchery.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s 2532*4757b351SPierre Pronchery.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s 2533*4757b351SPierre Pronchery.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s 2534*4757b351SPierre Pronchery.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s 2535*4757b351SPierre Pronchery 2536*4757b351SPierre Pronchery.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s 2537*4757b351SPierre Pronchery.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s 2538*4757b351SPierre Pronchery.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s 2539*4757b351SPierre Pronchery.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s 2540*4757b351SPierre Pronchery 2541*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 2542*4757b351SPierre Pronchery.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d 2543*4757b351SPierre Pronchery.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d 2544*4757b351SPierre Pronchery.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d 2545*4757b351SPierre Pronchery 2546*4757b351SPierre Pronchery.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d 2547*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 2548*4757b351SPierre Pronchery.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d 2549*4757b351SPierre Pronchery.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d 2550*4757b351SPierre Pronchery.if mixin == 1 2551*4757b351SPierre Pronchery eor x15,x15,x16 2552*4757b351SPierre Pronchery.endif 2553*4757b351SPierre Pronchery.if mixin == 1 2554*4757b351SPierre Pronchery eor x17,x17,x18 2555*4757b351SPierre Pronchery.endif 2556*4757b351SPierre Pronchery.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s 2557*4757b351SPierre Pronchery.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s 2558*4757b351SPierre Pronchery.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s 2559*4757b351SPierre Pronchery.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s 2560*4757b351SPierre Pronchery 2561*4757b351SPierre Pronchery.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s 2562*4757b351SPierre Pronchery.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s 2563*4757b351SPierre Pronchery.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s 2564*4757b351SPierre Pronchery.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s 2565*4757b351SPierre Pronchery 2566*4757b351SPierre Pronchery.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d 2567*4757b351SPierre Pronchery.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d 2568*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 2569*4757b351SPierre Pronchery.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d 2570*4757b351SPierre Pronchery 2571*4757b351SPierre Pronchery.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d 2572*4757b351SPierre Pronchery.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d 2573*4757b351SPierre Pronchery.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d 2574*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 2575*4757b351SPierre Pronchery.if mixin == 1 2576*4757b351SPierre Pronchery eor x19,x19,x20 2577*4757b351SPierre Pronchery.endif 2578*4757b351SPierre Pronchery.if mixin == 1 2579*4757b351SPierre Pronchery eor x21,x21,x22 2580*4757b351SPierre Pronchery.endif 2581*4757b351SPierre Pronchery.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] 2582*4757b351SPierre Pronchery.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] 2583*4757b351SPierre Pronchery.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] 2584*4757b351SPierre Pronchery.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] 2585*4757b351SPierre Pronchery.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] 2586*4757b351SPierre Pronchery.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] 2587*4757b351SPierre Pronchery.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] 2588*4757b351SPierre Pronchery.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] 2589*4757b351SPierre Pronchery.inst 0x04215101 //addvl x1,x1,8 2590*4757b351SPierre Pronchery.inst 0x04b13000 //eor z0.d,z0.d,z17.d 2591*4757b351SPierre Pronchery.inst 0x04b23084 //eor z4.d,z4.d,z18.d 2592*4757b351SPierre Pronchery.inst 0x04b33108 //eor z8.d,z8.d,z19.d 2593*4757b351SPierre Pronchery.inst 0x04b4318c //eor z12.d,z12.d,z20.d 2594*4757b351SPierre Pronchery.inst 0x04b53021 //eor z1.d,z1.d,z21.d 2595*4757b351SPierre Pronchery.inst 0x04b630a5 //eor z5.d,z5.d,z22.d 2596*4757b351SPierre Pronchery.inst 0x04b73129 //eor z9.d,z9.d,z23.d 2597*4757b351SPierre Pronchery.inst 0x04b831ad //eor z13.d,z13.d,z24.d 2598*4757b351SPierre Pronchery.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] 2599*4757b351SPierre Pronchery.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] 2600*4757b351SPierre Pronchery.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] 2601*4757b351SPierre Pronchery.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] 2602*4757b351SPierre Pronchery.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] 2603*4757b351SPierre Pronchery.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] 2604*4757b351SPierre Pronchery.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] 2605*4757b351SPierre Pronchery.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] 2606*4757b351SPierre Pronchery.inst 0x04215101 //addvl x1,x1,8 2607*4757b351SPierre Pronchery.if mixin == 1 2608*4757b351SPierre Pronchery stp x7,x9,[x0],#16 2609*4757b351SPierre Pronchery.endif 2610*4757b351SPierre Pronchery.inst 0x04b13042 //eor z2.d,z2.d,z17.d 2611*4757b351SPierre Pronchery.inst 0x04b230c6 //eor z6.d,z6.d,z18.d 2612*4757b351SPierre Pronchery.if mixin == 1 2613*4757b351SPierre Pronchery stp x11,x13,[x0],#16 2614*4757b351SPierre Pronchery.endif 2615*4757b351SPierre Pronchery.inst 0x04b3314a //eor z10.d,z10.d,z19.d 2616*4757b351SPierre Pronchery.inst 0x04b431ce //eor z14.d,z14.d,z20.d 2617*4757b351SPierre Pronchery.if mixin == 1 2618*4757b351SPierre Pronchery stp x15,x17,[x0],#16 2619*4757b351SPierre Pronchery.endif 2620*4757b351SPierre Pronchery.inst 0x04b53063 //eor z3.d,z3.d,z21.d 2621*4757b351SPierre Pronchery.inst 0x04b630e7 //eor z7.d,z7.d,z22.d 2622*4757b351SPierre Pronchery.if mixin == 1 2623*4757b351SPierre Pronchery stp x19,x21,[x0],#16 2624*4757b351SPierre Pronchery.endif 2625*4757b351SPierre Pronchery.inst 0x04b7316b //eor z11.d,z11.d,z23.d 2626*4757b351SPierre Pronchery.inst 0x04b831ef //eor z15.d,z15.d,z24.d 2627*4757b351SPierre Pronchery.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL] 2628*4757b351SPierre Pronchery.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL] 2629*4757b351SPierre Pronchery.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL] 2630*4757b351SPierre Pronchery.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL] 2631*4757b351SPierre Pronchery.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL] 2632*4757b351SPierre Pronchery.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL] 2633*4757b351SPierre Pronchery.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL] 2634*4757b351SPierre Pronchery.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL] 2635*4757b351SPierre Pronchery.inst 0x04205100 //addvl x0,x0,8 2636*4757b351SPierre Pronchery.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL] 2637*4757b351SPierre Pronchery.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL] 2638*4757b351SPierre Pronchery.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL] 2639*4757b351SPierre Pronchery.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL] 2640*4757b351SPierre Pronchery.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL] 2641*4757b351SPierre Pronchery.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL] 2642*4757b351SPierre Pronchery.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL] 2643*4757b351SPierre Pronchery.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL] 2644*4757b351SPierre Pronchery.inst 0x04205100 //addvl x0,x0,8 2645*4757b351SPierre Pronchery210: 2646*4757b351SPierre Pronchery.inst 0x04b0e3fd //incw x29, ALL, MUL #1 2647*4757b351SPierre Pronchery subs x2,x2,64 2648*4757b351SPierre Pronchery b.gt 100b 2649*4757b351SPierre Pronchery b 110f 2650*4757b351SPierre Pronchery101: 2651*4757b351SPierre Pronchery mixin=0 2652*4757b351SPierre Pronchery lsr x8,x23,#32 2653*4757b351SPierre Pronchery.inst 0x05a03ae0 //dup z0.s,w23 2654*4757b351SPierre Pronchery.inst 0x05a03af9 //dup z25.s,w23 2655*4757b351SPierre Pronchery.if mixin == 1 2656*4757b351SPierre Pronchery mov w7,w23 2657*4757b351SPierre Pronchery.endif 2658*4757b351SPierre Pronchery.inst 0x05a03904 //dup z4.s,w8 2659*4757b351SPierre Pronchery.inst 0x05a0391a //dup z26.s,w8 2660*4757b351SPierre Pronchery lsr x10,x24,#32 2661*4757b351SPierre Pronchery.inst 0x05a03b08 //dup z8.s,w24 2662*4757b351SPierre Pronchery.inst 0x05a03b1b //dup z27.s,w24 2663*4757b351SPierre Pronchery.if mixin == 1 2664*4757b351SPierre Pronchery mov w9,w24 2665*4757b351SPierre Pronchery.endif 2666*4757b351SPierre Pronchery.inst 0x05a0394c //dup z12.s,w10 2667*4757b351SPierre Pronchery.inst 0x05a0395c //dup z28.s,w10 2668*4757b351SPierre Pronchery lsr x12,x25,#32 2669*4757b351SPierre Pronchery.inst 0x05a03b21 //dup z1.s,w25 2670*4757b351SPierre Pronchery.inst 0x05a03b3d //dup z29.s,w25 2671*4757b351SPierre Pronchery.if mixin == 1 2672*4757b351SPierre Pronchery mov w11,w25 2673*4757b351SPierre Pronchery.endif 2674*4757b351SPierre Pronchery.inst 0x05a03985 //dup z5.s,w12 2675*4757b351SPierre Pronchery.inst 0x05a0399e //dup z30.s,w12 2676*4757b351SPierre Pronchery lsr x14,x26,#32 2677*4757b351SPierre Pronchery.inst 0x05a03b49 //dup z9.s,w26 2678*4757b351SPierre Pronchery.inst 0x05a03b55 //dup z21.s,w26 2679*4757b351SPierre Pronchery.if mixin == 1 2680*4757b351SPierre Pronchery mov w13,w26 2681*4757b351SPierre Pronchery.endif 2682*4757b351SPierre Pronchery.inst 0x05a039cd //dup z13.s,w14 2683*4757b351SPierre Pronchery.inst 0x05a039d6 //dup z22.s,w14 2684*4757b351SPierre Pronchery lsr x16,x27,#32 2685*4757b351SPierre Pronchery.inst 0x05a03b62 //dup z2.s,w27 2686*4757b351SPierre Pronchery.inst 0x05a03b77 //dup z23.s,w27 2687*4757b351SPierre Pronchery.if mixin == 1 2688*4757b351SPierre Pronchery mov w15,w27 2689*4757b351SPierre Pronchery.endif 2690*4757b351SPierre Pronchery.inst 0x05a03a06 //dup z6.s,w16 2691*4757b351SPierre Pronchery.inst 0x05a03a18 //dup z24.s,w16 2692*4757b351SPierre Pronchery lsr x18,x28,#32 2693*4757b351SPierre Pronchery.inst 0x05a03b8a //dup z10.s,w28 2694*4757b351SPierre Pronchery.if mixin == 1 2695*4757b351SPierre Pronchery mov w17,w28 2696*4757b351SPierre Pronchery.endif 2697*4757b351SPierre Pronchery.inst 0x05a03a4e //dup z14.s,w18 2698*4757b351SPierre Pronchery lsr x22,x30,#32 2699*4757b351SPierre Pronchery.inst 0x05a03bcb //dup z11.s,w30 2700*4757b351SPierre Pronchery.if mixin == 1 2701*4757b351SPierre Pronchery mov w21,w30 2702*4757b351SPierre Pronchery.endif 2703*4757b351SPierre Pronchery.inst 0x05a03acf //dup z15.s,w22 2704*4757b351SPierre Pronchery.if mixin == 1 2705*4757b351SPierre Pronchery add w20,w29,#1 2706*4757b351SPierre Pronchery mov w19,w29 2707*4757b351SPierre Pronchery.inst 0x04a14690 //index z16.s,w20,1 2708*4757b351SPierre Pronchery.inst 0x04a14683 //index z3.s,w20,1 2709*4757b351SPierre Pronchery.else 2710*4757b351SPierre Pronchery.inst 0x04a147b0 //index z16.s,w29,1 2711*4757b351SPierre Pronchery.inst 0x04a147a3 //index z3.s,w29,1 2712*4757b351SPierre Pronchery.endif 2713*4757b351SPierre Pronchery lsr x20,x29,#32 2714*4757b351SPierre Pronchery.inst 0x05a03a87 //dup z7.s,w20 2715*4757b351SPierre Pronchery mov x6,#10 2716*4757b351SPierre Pronchery10: 2717*4757b351SPierre Pronchery.align 5 2718*4757b351SPierre Pronchery.inst 0x04a10000 //add z0.s,z0.s,z1.s 2719*4757b351SPierre Pronchery.if mixin == 1 2720*4757b351SPierre Pronchery add w7,w7,w11 2721*4757b351SPierre Pronchery.endif 2722*4757b351SPierre Pronchery.inst 0x04a50084 //add z4.s,z4.s,z5.s 2723*4757b351SPierre Pronchery.if mixin == 1 2724*4757b351SPierre Pronchery add w8,w8,w12 2725*4757b351SPierre Pronchery.endif 2726*4757b351SPierre Pronchery.inst 0x04a90108 //add z8.s,z8.s,z9.s 2727*4757b351SPierre Pronchery.if mixin == 1 2728*4757b351SPierre Pronchery add w9,w9,w13 2729*4757b351SPierre Pronchery.endif 2730*4757b351SPierre Pronchery.inst 0x04ad018c //add z12.s,z12.s,z13.s 2731*4757b351SPierre Pronchery.if mixin == 1 2732*4757b351SPierre Pronchery add w10,w10,w14 2733*4757b351SPierre Pronchery.endif 2734*4757b351SPierre Pronchery.inst 0x04a03063 //eor z3.d,z3.d,z0.d 2735*4757b351SPierre Pronchery.if mixin == 1 2736*4757b351SPierre Pronchery eor w19,w19,w7 2737*4757b351SPierre Pronchery.endif 2738*4757b351SPierre Pronchery.inst 0x04a430e7 //eor z7.d,z7.d,z4.d 2739*4757b351SPierre Pronchery.if mixin == 1 2740*4757b351SPierre Pronchery eor w20,w20,w8 2741*4757b351SPierre Pronchery.endif 2742*4757b351SPierre Pronchery.inst 0x04a8316b //eor z11.d,z11.d,z8.d 2743*4757b351SPierre Pronchery.if mixin == 1 2744*4757b351SPierre Pronchery eor w21,w21,w9 2745*4757b351SPierre Pronchery.endif 2746*4757b351SPierre Pronchery.inst 0x04ac31ef //eor z15.d,z15.d,z12.d 2747*4757b351SPierre Pronchery.if mixin == 1 2748*4757b351SPierre Pronchery eor w22,w22,w10 2749*4757b351SPierre Pronchery.endif 2750*4757b351SPierre Pronchery.inst 0x05a58063 //revh z3.s,p0/m,z3.s 2751*4757b351SPierre Pronchery.if mixin == 1 2752*4757b351SPierre Pronchery ror w19,w19,#16 2753*4757b351SPierre Pronchery.endif 2754*4757b351SPierre Pronchery.inst 0x05a580e7 //revh z7.s,p0/m,z7.s 2755*4757b351SPierre Pronchery.if mixin == 1 2756*4757b351SPierre Pronchery ror w20,w20,#16 2757*4757b351SPierre Pronchery.endif 2758*4757b351SPierre Pronchery.inst 0x05a5816b //revh z11.s,p0/m,z11.s 2759*4757b351SPierre Pronchery.if mixin == 1 2760*4757b351SPierre Pronchery ror w21,w21,#16 2761*4757b351SPierre Pronchery.endif 2762*4757b351SPierre Pronchery.inst 0x05a581ef //revh z15.s,p0/m,z15.s 2763*4757b351SPierre Pronchery.if mixin == 1 2764*4757b351SPierre Pronchery ror w22,w22,#16 2765*4757b351SPierre Pronchery.endif 2766*4757b351SPierre Pronchery.inst 0x04a30042 //add z2.s,z2.s,z3.s 2767*4757b351SPierre Pronchery.if mixin == 1 2768*4757b351SPierre Pronchery add w15,w15,w19 2769*4757b351SPierre Pronchery.endif 2770*4757b351SPierre Pronchery.inst 0x04a700c6 //add z6.s,z6.s,z7.s 2771*4757b351SPierre Pronchery.if mixin == 1 2772*4757b351SPierre Pronchery add w16,w16,w20 2773*4757b351SPierre Pronchery.endif 2774*4757b351SPierre Pronchery.inst 0x04ab014a //add z10.s,z10.s,z11.s 2775*4757b351SPierre Pronchery.if mixin == 1 2776*4757b351SPierre Pronchery add w17,w17,w21 2777*4757b351SPierre Pronchery.endif 2778*4757b351SPierre Pronchery.inst 0x04af01ce //add z14.s,z14.s,z15.s 2779*4757b351SPierre Pronchery.if mixin == 1 2780*4757b351SPierre Pronchery add w18,w18,w22 2781*4757b351SPierre Pronchery.endif 2782*4757b351SPierre Pronchery.inst 0x04a23021 //eor z1.d,z1.d,z2.d 2783*4757b351SPierre Pronchery.if mixin == 1 2784*4757b351SPierre Pronchery eor w11,w11,w15 2785*4757b351SPierre Pronchery.endif 2786*4757b351SPierre Pronchery.inst 0x04a630a5 //eor z5.d,z5.d,z6.d 2787*4757b351SPierre Pronchery.if mixin == 1 2788*4757b351SPierre Pronchery eor w12,w12,w16 2789*4757b351SPierre Pronchery.endif 2790*4757b351SPierre Pronchery.inst 0x04aa3129 //eor z9.d,z9.d,z10.d 2791*4757b351SPierre Pronchery.if mixin == 1 2792*4757b351SPierre Pronchery eor w13,w13,w17 2793*4757b351SPierre Pronchery.endif 2794*4757b351SPierre Pronchery.inst 0x04ae31ad //eor z13.d,z13.d,z14.d 2795*4757b351SPierre Pronchery.if mixin == 1 2796*4757b351SPierre Pronchery eor w14,w14,w18 2797*4757b351SPierre Pronchery.endif 2798*4757b351SPierre Pronchery.inst 0x046c9c31 //lsl z17.s,z1.s,12 2799*4757b351SPierre Pronchery.inst 0x046c9cb2 //lsl z18.s,z5.s,12 2800*4757b351SPierre Pronchery.inst 0x046c9d33 //lsl z19.s,z9.s,12 2801*4757b351SPierre Pronchery.inst 0x046c9db4 //lsl z20.s,z13.s,12 2802*4757b351SPierre Pronchery.inst 0x046c9421 //lsr z1.s,z1.s,20 2803*4757b351SPierre Pronchery.if mixin == 1 2804*4757b351SPierre Pronchery ror w11,w11,20 2805*4757b351SPierre Pronchery.endif 2806*4757b351SPierre Pronchery.inst 0x046c94a5 //lsr z5.s,z5.s,20 2807*4757b351SPierre Pronchery.if mixin == 1 2808*4757b351SPierre Pronchery ror w12,w12,20 2809*4757b351SPierre Pronchery.endif 2810*4757b351SPierre Pronchery.inst 0x046c9529 //lsr z9.s,z9.s,20 2811*4757b351SPierre Pronchery.if mixin == 1 2812*4757b351SPierre Pronchery ror w13,w13,20 2813*4757b351SPierre Pronchery.endif 2814*4757b351SPierre Pronchery.inst 0x046c95ad //lsr z13.s,z13.s,20 2815*4757b351SPierre Pronchery.if mixin == 1 2816*4757b351SPierre Pronchery ror w14,w14,20 2817*4757b351SPierre Pronchery.endif 2818*4757b351SPierre Pronchery.inst 0x04713021 //orr z1.d,z1.d,z17.d 2819*4757b351SPierre Pronchery.inst 0x047230a5 //orr z5.d,z5.d,z18.d 2820*4757b351SPierre Pronchery.inst 0x04733129 //orr z9.d,z9.d,z19.d 2821*4757b351SPierre Pronchery.inst 0x047431ad //orr z13.d,z13.d,z20.d 2822*4757b351SPierre Pronchery.inst 0x04a10000 //add z0.s,z0.s,z1.s 2823*4757b351SPierre Pronchery.if mixin == 1 2824*4757b351SPierre Pronchery add w7,w7,w11 2825*4757b351SPierre Pronchery.endif 2826*4757b351SPierre Pronchery.inst 0x04a50084 //add z4.s,z4.s,z5.s 2827*4757b351SPierre Pronchery.if mixin == 1 2828*4757b351SPierre Pronchery add w8,w8,w12 2829*4757b351SPierre Pronchery.endif 2830*4757b351SPierre Pronchery.inst 0x04a90108 //add z8.s,z8.s,z9.s 2831*4757b351SPierre Pronchery.if mixin == 1 2832*4757b351SPierre Pronchery add w9,w9,w13 2833*4757b351SPierre Pronchery.endif 2834*4757b351SPierre Pronchery.inst 0x04ad018c //add z12.s,z12.s,z13.s 2835*4757b351SPierre Pronchery.if mixin == 1 2836*4757b351SPierre Pronchery add w10,w10,w14 2837*4757b351SPierre Pronchery.endif 2838*4757b351SPierre Pronchery.inst 0x04a03063 //eor z3.d,z3.d,z0.d 2839*4757b351SPierre Pronchery.if mixin == 1 2840*4757b351SPierre Pronchery eor w19,w19,w7 2841*4757b351SPierre Pronchery.endif 2842*4757b351SPierre Pronchery.inst 0x04a430e7 //eor z7.d,z7.d,z4.d 2843*4757b351SPierre Pronchery.if mixin == 1 2844*4757b351SPierre Pronchery eor w20,w20,w8 2845*4757b351SPierre Pronchery.endif 2846*4757b351SPierre Pronchery.inst 0x04a8316b //eor z11.d,z11.d,z8.d 2847*4757b351SPierre Pronchery.if mixin == 1 2848*4757b351SPierre Pronchery eor w21,w21,w9 2849*4757b351SPierre Pronchery.endif 2850*4757b351SPierre Pronchery.inst 0x04ac31ef //eor z15.d,z15.d,z12.d 2851*4757b351SPierre Pronchery.if mixin == 1 2852*4757b351SPierre Pronchery eor w22,w22,w10 2853*4757b351SPierre Pronchery.endif 2854*4757b351SPierre Pronchery.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b 2855*4757b351SPierre Pronchery.if mixin == 1 2856*4757b351SPierre Pronchery ror w19,w19,#24 2857*4757b351SPierre Pronchery.endif 2858*4757b351SPierre Pronchery.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b 2859*4757b351SPierre Pronchery.if mixin == 1 2860*4757b351SPierre Pronchery ror w20,w20,#24 2861*4757b351SPierre Pronchery.endif 2862*4757b351SPierre Pronchery.inst 0x053f316b //tbl z11.b,{z11.b},z31.b 2863*4757b351SPierre Pronchery.if mixin == 1 2864*4757b351SPierre Pronchery ror w21,w21,#24 2865*4757b351SPierre Pronchery.endif 2866*4757b351SPierre Pronchery.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b 2867*4757b351SPierre Pronchery.if mixin == 1 2868*4757b351SPierre Pronchery ror w22,w22,#24 2869*4757b351SPierre Pronchery.endif 2870*4757b351SPierre Pronchery.inst 0x04a30042 //add z2.s,z2.s,z3.s 2871*4757b351SPierre Pronchery.if mixin == 1 2872*4757b351SPierre Pronchery add w15,w15,w19 2873*4757b351SPierre Pronchery.endif 2874*4757b351SPierre Pronchery.inst 0x04a700c6 //add z6.s,z6.s,z7.s 2875*4757b351SPierre Pronchery.if mixin == 1 2876*4757b351SPierre Pronchery add w16,w16,w20 2877*4757b351SPierre Pronchery.endif 2878*4757b351SPierre Pronchery.inst 0x04ab014a //add z10.s,z10.s,z11.s 2879*4757b351SPierre Pronchery.if mixin == 1 2880*4757b351SPierre Pronchery add w17,w17,w21 2881*4757b351SPierre Pronchery.endif 2882*4757b351SPierre Pronchery.inst 0x04af01ce //add z14.s,z14.s,z15.s 2883*4757b351SPierre Pronchery.if mixin == 1 2884*4757b351SPierre Pronchery add w18,w18,w22 2885*4757b351SPierre Pronchery.endif 2886*4757b351SPierre Pronchery.inst 0x04a23021 //eor z1.d,z1.d,z2.d 2887*4757b351SPierre Pronchery.if mixin == 1 2888*4757b351SPierre Pronchery eor w11,w11,w15 2889*4757b351SPierre Pronchery.endif 2890*4757b351SPierre Pronchery.inst 0x04a630a5 //eor z5.d,z5.d,z6.d 2891*4757b351SPierre Pronchery.if mixin == 1 2892*4757b351SPierre Pronchery eor w12,w12,w16 2893*4757b351SPierre Pronchery.endif 2894*4757b351SPierre Pronchery.inst 0x04aa3129 //eor z9.d,z9.d,z10.d 2895*4757b351SPierre Pronchery.if mixin == 1 2896*4757b351SPierre Pronchery eor w13,w13,w17 2897*4757b351SPierre Pronchery.endif 2898*4757b351SPierre Pronchery.inst 0x04ae31ad //eor z13.d,z13.d,z14.d 2899*4757b351SPierre Pronchery.if mixin == 1 2900*4757b351SPierre Pronchery eor w14,w14,w18 2901*4757b351SPierre Pronchery.endif 2902*4757b351SPierre Pronchery.inst 0x04679c31 //lsl z17.s,z1.s,7 2903*4757b351SPierre Pronchery.inst 0x04679cb2 //lsl z18.s,z5.s,7 2904*4757b351SPierre Pronchery.inst 0x04679d33 //lsl z19.s,z9.s,7 2905*4757b351SPierre Pronchery.inst 0x04679db4 //lsl z20.s,z13.s,7 2906*4757b351SPierre Pronchery.inst 0x04679421 //lsr z1.s,z1.s,25 2907*4757b351SPierre Pronchery.if mixin == 1 2908*4757b351SPierre Pronchery ror w11,w11,25 2909*4757b351SPierre Pronchery.endif 2910*4757b351SPierre Pronchery.inst 0x046794a5 //lsr z5.s,z5.s,25 2911*4757b351SPierre Pronchery.if mixin == 1 2912*4757b351SPierre Pronchery ror w12,w12,25 2913*4757b351SPierre Pronchery.endif 2914*4757b351SPierre Pronchery.inst 0x04679529 //lsr z9.s,z9.s,25 2915*4757b351SPierre Pronchery.if mixin == 1 2916*4757b351SPierre Pronchery ror w13,w13,25 2917*4757b351SPierre Pronchery.endif 2918*4757b351SPierre Pronchery.inst 0x046795ad //lsr z13.s,z13.s,25 2919*4757b351SPierre Pronchery.if mixin == 1 2920*4757b351SPierre Pronchery ror w14,w14,25 2921*4757b351SPierre Pronchery.endif 2922*4757b351SPierre Pronchery.inst 0x04713021 //orr z1.d,z1.d,z17.d 2923*4757b351SPierre Pronchery.inst 0x047230a5 //orr z5.d,z5.d,z18.d 2924*4757b351SPierre Pronchery.inst 0x04733129 //orr z9.d,z9.d,z19.d 2925*4757b351SPierre Pronchery.inst 0x047431ad //orr z13.d,z13.d,z20.d 2926*4757b351SPierre Pronchery.inst 0x04a50000 //add z0.s,z0.s,z5.s 2927*4757b351SPierre Pronchery.if mixin == 1 2928*4757b351SPierre Pronchery add w7,w7,w12 2929*4757b351SPierre Pronchery.endif 2930*4757b351SPierre Pronchery.inst 0x04a90084 //add z4.s,z4.s,z9.s 2931*4757b351SPierre Pronchery.if mixin == 1 2932*4757b351SPierre Pronchery add w8,w8,w13 2933*4757b351SPierre Pronchery.endif 2934*4757b351SPierre Pronchery.inst 0x04ad0108 //add z8.s,z8.s,z13.s 2935*4757b351SPierre Pronchery.if mixin == 1 2936*4757b351SPierre Pronchery add w9,w9,w14 2937*4757b351SPierre Pronchery.endif 2938*4757b351SPierre Pronchery.inst 0x04a1018c //add z12.s,z12.s,z1.s 2939*4757b351SPierre Pronchery.if mixin == 1 2940*4757b351SPierre Pronchery add w10,w10,w11 2941*4757b351SPierre Pronchery.endif 2942*4757b351SPierre Pronchery.inst 0x04a031ef //eor z15.d,z15.d,z0.d 2943*4757b351SPierre Pronchery.if mixin == 1 2944*4757b351SPierre Pronchery eor w22,w22,w7 2945*4757b351SPierre Pronchery.endif 2946*4757b351SPierre Pronchery.inst 0x04a43063 //eor z3.d,z3.d,z4.d 2947*4757b351SPierre Pronchery.if mixin == 1 2948*4757b351SPierre Pronchery eor w19,w19,w8 2949*4757b351SPierre Pronchery.endif 2950*4757b351SPierre Pronchery.inst 0x04a830e7 //eor z7.d,z7.d,z8.d 2951*4757b351SPierre Pronchery.if mixin == 1 2952*4757b351SPierre Pronchery eor w20,w20,w9 2953*4757b351SPierre Pronchery.endif 2954*4757b351SPierre Pronchery.inst 0x04ac316b //eor z11.d,z11.d,z12.d 2955*4757b351SPierre Pronchery.if mixin == 1 2956*4757b351SPierre Pronchery eor w21,w21,w10 2957*4757b351SPierre Pronchery.endif 2958*4757b351SPierre Pronchery.inst 0x05a581ef //revh z15.s,p0/m,z15.s 2959*4757b351SPierre Pronchery.if mixin == 1 2960*4757b351SPierre Pronchery ror w22,w22,#16 2961*4757b351SPierre Pronchery.endif 2962*4757b351SPierre Pronchery.inst 0x05a58063 //revh z3.s,p0/m,z3.s 2963*4757b351SPierre Pronchery.if mixin == 1 2964*4757b351SPierre Pronchery ror w19,w19,#16 2965*4757b351SPierre Pronchery.endif 2966*4757b351SPierre Pronchery.inst 0x05a580e7 //revh z7.s,p0/m,z7.s 2967*4757b351SPierre Pronchery.if mixin == 1 2968*4757b351SPierre Pronchery ror w20,w20,#16 2969*4757b351SPierre Pronchery.endif 2970*4757b351SPierre Pronchery.inst 0x05a5816b //revh z11.s,p0/m,z11.s 2971*4757b351SPierre Pronchery.if mixin == 1 2972*4757b351SPierre Pronchery ror w21,w21,#16 2973*4757b351SPierre Pronchery.endif 2974*4757b351SPierre Pronchery.inst 0x04af014a //add z10.s,z10.s,z15.s 2975*4757b351SPierre Pronchery.if mixin == 1 2976*4757b351SPierre Pronchery add w17,w17,w22 2977*4757b351SPierre Pronchery.endif 2978*4757b351SPierre Pronchery.inst 0x04a301ce //add z14.s,z14.s,z3.s 2979*4757b351SPierre Pronchery.if mixin == 1 2980*4757b351SPierre Pronchery add w18,w18,w19 2981*4757b351SPierre Pronchery.endif 2982*4757b351SPierre Pronchery.inst 0x04a70042 //add z2.s,z2.s,z7.s 2983*4757b351SPierre Pronchery.if mixin == 1 2984*4757b351SPierre Pronchery add w15,w15,w20 2985*4757b351SPierre Pronchery.endif 2986*4757b351SPierre Pronchery.inst 0x04ab00c6 //add z6.s,z6.s,z11.s 2987*4757b351SPierre Pronchery.if mixin == 1 2988*4757b351SPierre Pronchery add w16,w16,w21 2989*4757b351SPierre Pronchery.endif 2990*4757b351SPierre Pronchery.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d 2991*4757b351SPierre Pronchery.if mixin == 1 2992*4757b351SPierre Pronchery eor w12,w12,w17 2993*4757b351SPierre Pronchery.endif 2994*4757b351SPierre Pronchery.inst 0x04ae3129 //eor z9.d,z9.d,z14.d 2995*4757b351SPierre Pronchery.if mixin == 1 2996*4757b351SPierre Pronchery eor w13,w13,w18 2997*4757b351SPierre Pronchery.endif 2998*4757b351SPierre Pronchery.inst 0x04a231ad //eor z13.d,z13.d,z2.d 2999*4757b351SPierre Pronchery.if mixin == 1 3000*4757b351SPierre Pronchery eor w14,w14,w15 3001*4757b351SPierre Pronchery.endif 3002*4757b351SPierre Pronchery.inst 0x04a63021 //eor z1.d,z1.d,z6.d 3003*4757b351SPierre Pronchery.if mixin == 1 3004*4757b351SPierre Pronchery eor w11,w11,w16 3005*4757b351SPierre Pronchery.endif 3006*4757b351SPierre Pronchery.inst 0x046c9cb1 //lsl z17.s,z5.s,12 3007*4757b351SPierre Pronchery.inst 0x046c9d32 //lsl z18.s,z9.s,12 3008*4757b351SPierre Pronchery.inst 0x046c9db3 //lsl z19.s,z13.s,12 3009*4757b351SPierre Pronchery.inst 0x046c9c34 //lsl z20.s,z1.s,12 3010*4757b351SPierre Pronchery.inst 0x046c94a5 //lsr z5.s,z5.s,20 3011*4757b351SPierre Pronchery.if mixin == 1 3012*4757b351SPierre Pronchery ror w12,w12,20 3013*4757b351SPierre Pronchery.endif 3014*4757b351SPierre Pronchery.inst 0x046c9529 //lsr z9.s,z9.s,20 3015*4757b351SPierre Pronchery.if mixin == 1 3016*4757b351SPierre Pronchery ror w13,w13,20 3017*4757b351SPierre Pronchery.endif 3018*4757b351SPierre Pronchery.inst 0x046c95ad //lsr z13.s,z13.s,20 3019*4757b351SPierre Pronchery.if mixin == 1 3020*4757b351SPierre Pronchery ror w14,w14,20 3021*4757b351SPierre Pronchery.endif 3022*4757b351SPierre Pronchery.inst 0x046c9421 //lsr z1.s,z1.s,20 3023*4757b351SPierre Pronchery.if mixin == 1 3024*4757b351SPierre Pronchery ror w11,w11,20 3025*4757b351SPierre Pronchery.endif 3026*4757b351SPierre Pronchery.inst 0x047130a5 //orr z5.d,z5.d,z17.d 3027*4757b351SPierre Pronchery.inst 0x04723129 //orr z9.d,z9.d,z18.d 3028*4757b351SPierre Pronchery.inst 0x047331ad //orr z13.d,z13.d,z19.d 3029*4757b351SPierre Pronchery.inst 0x04743021 //orr z1.d,z1.d,z20.d 3030*4757b351SPierre Pronchery.inst 0x04a50000 //add z0.s,z0.s,z5.s 3031*4757b351SPierre Pronchery.if mixin == 1 3032*4757b351SPierre Pronchery add w7,w7,w12 3033*4757b351SPierre Pronchery.endif 3034*4757b351SPierre Pronchery.inst 0x04a90084 //add z4.s,z4.s,z9.s 3035*4757b351SPierre Pronchery.if mixin == 1 3036*4757b351SPierre Pronchery add w8,w8,w13 3037*4757b351SPierre Pronchery.endif 3038*4757b351SPierre Pronchery.inst 0x04ad0108 //add z8.s,z8.s,z13.s 3039*4757b351SPierre Pronchery.if mixin == 1 3040*4757b351SPierre Pronchery add w9,w9,w14 3041*4757b351SPierre Pronchery.endif 3042*4757b351SPierre Pronchery.inst 0x04a1018c //add z12.s,z12.s,z1.s 3043*4757b351SPierre Pronchery.if mixin == 1 3044*4757b351SPierre Pronchery add w10,w10,w11 3045*4757b351SPierre Pronchery.endif 3046*4757b351SPierre Pronchery.inst 0x04a031ef //eor z15.d,z15.d,z0.d 3047*4757b351SPierre Pronchery.if mixin == 1 3048*4757b351SPierre Pronchery eor w22,w22,w7 3049*4757b351SPierre Pronchery.endif 3050*4757b351SPierre Pronchery.inst 0x04a43063 //eor z3.d,z3.d,z4.d 3051*4757b351SPierre Pronchery.if mixin == 1 3052*4757b351SPierre Pronchery eor w19,w19,w8 3053*4757b351SPierre Pronchery.endif 3054*4757b351SPierre Pronchery.inst 0x04a830e7 //eor z7.d,z7.d,z8.d 3055*4757b351SPierre Pronchery.if mixin == 1 3056*4757b351SPierre Pronchery eor w20,w20,w9 3057*4757b351SPierre Pronchery.endif 3058*4757b351SPierre Pronchery.inst 0x04ac316b //eor z11.d,z11.d,z12.d 3059*4757b351SPierre Pronchery.if mixin == 1 3060*4757b351SPierre Pronchery eor w21,w21,w10 3061*4757b351SPierre Pronchery.endif 3062*4757b351SPierre Pronchery.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b 3063*4757b351SPierre Pronchery.if mixin == 1 3064*4757b351SPierre Pronchery ror w22,w22,#24 3065*4757b351SPierre Pronchery.endif 3066*4757b351SPierre Pronchery.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b 3067*4757b351SPierre Pronchery.if mixin == 1 3068*4757b351SPierre Pronchery ror w19,w19,#24 3069*4757b351SPierre Pronchery.endif 3070*4757b351SPierre Pronchery.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b 3071*4757b351SPierre Pronchery.if mixin == 1 3072*4757b351SPierre Pronchery ror w20,w20,#24 3073*4757b351SPierre Pronchery.endif 3074*4757b351SPierre Pronchery.inst 0x053f316b //tbl z11.b,{z11.b},z31.b 3075*4757b351SPierre Pronchery.if mixin == 1 3076*4757b351SPierre Pronchery ror w21,w21,#24 3077*4757b351SPierre Pronchery.endif 3078*4757b351SPierre Pronchery.inst 0x04af014a //add z10.s,z10.s,z15.s 3079*4757b351SPierre Pronchery.if mixin == 1 3080*4757b351SPierre Pronchery add w17,w17,w22 3081*4757b351SPierre Pronchery.endif 3082*4757b351SPierre Pronchery.inst 0x04a301ce //add z14.s,z14.s,z3.s 3083*4757b351SPierre Pronchery.if mixin == 1 3084*4757b351SPierre Pronchery add w18,w18,w19 3085*4757b351SPierre Pronchery.endif 3086*4757b351SPierre Pronchery.inst 0x04a70042 //add z2.s,z2.s,z7.s 3087*4757b351SPierre Pronchery.if mixin == 1 3088*4757b351SPierre Pronchery add w15,w15,w20 3089*4757b351SPierre Pronchery.endif 3090*4757b351SPierre Pronchery.inst 0x04ab00c6 //add z6.s,z6.s,z11.s 3091*4757b351SPierre Pronchery.if mixin == 1 3092*4757b351SPierre Pronchery add w16,w16,w21 3093*4757b351SPierre Pronchery.endif 3094*4757b351SPierre Pronchery.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d 3095*4757b351SPierre Pronchery.if mixin == 1 3096*4757b351SPierre Pronchery eor w12,w12,w17 3097*4757b351SPierre Pronchery.endif 3098*4757b351SPierre Pronchery.inst 0x04ae3129 //eor z9.d,z9.d,z14.d 3099*4757b351SPierre Pronchery.if mixin == 1 3100*4757b351SPierre Pronchery eor w13,w13,w18 3101*4757b351SPierre Pronchery.endif 3102*4757b351SPierre Pronchery.inst 0x04a231ad //eor z13.d,z13.d,z2.d 3103*4757b351SPierre Pronchery.if mixin == 1 3104*4757b351SPierre Pronchery eor w14,w14,w15 3105*4757b351SPierre Pronchery.endif 3106*4757b351SPierre Pronchery.inst 0x04a63021 //eor z1.d,z1.d,z6.d 3107*4757b351SPierre Pronchery.if mixin == 1 3108*4757b351SPierre Pronchery eor w11,w11,w16 3109*4757b351SPierre Pronchery.endif 3110*4757b351SPierre Pronchery.inst 0x04679cb1 //lsl z17.s,z5.s,7 3111*4757b351SPierre Pronchery.inst 0x04679d32 //lsl z18.s,z9.s,7 3112*4757b351SPierre Pronchery.inst 0x04679db3 //lsl z19.s,z13.s,7 3113*4757b351SPierre Pronchery.inst 0x04679c34 //lsl z20.s,z1.s,7 3114*4757b351SPierre Pronchery.inst 0x046794a5 //lsr z5.s,z5.s,25 3115*4757b351SPierre Pronchery.if mixin == 1 3116*4757b351SPierre Pronchery ror w12,w12,25 3117*4757b351SPierre Pronchery.endif 3118*4757b351SPierre Pronchery.inst 0x04679529 //lsr z9.s,z9.s,25 3119*4757b351SPierre Pronchery.if mixin == 1 3120*4757b351SPierre Pronchery ror w13,w13,25 3121*4757b351SPierre Pronchery.endif 3122*4757b351SPierre Pronchery.inst 0x046795ad //lsr z13.s,z13.s,25 3123*4757b351SPierre Pronchery.if mixin == 1 3124*4757b351SPierre Pronchery ror w14,w14,25 3125*4757b351SPierre Pronchery.endif 3126*4757b351SPierre Pronchery.inst 0x04679421 //lsr z1.s,z1.s,25 3127*4757b351SPierre Pronchery.if mixin == 1 3128*4757b351SPierre Pronchery ror w11,w11,25 3129*4757b351SPierre Pronchery.endif 3130*4757b351SPierre Pronchery.inst 0x047130a5 //orr z5.d,z5.d,z17.d 3131*4757b351SPierre Pronchery.inst 0x04723129 //orr z9.d,z9.d,z18.d 3132*4757b351SPierre Pronchery.inst 0x047331ad //orr z13.d,z13.d,z19.d 3133*4757b351SPierre Pronchery.inst 0x04743021 //orr z1.d,z1.d,z20.d 3134*4757b351SPierre Pronchery sub x6,x6,1 3135*4757b351SPierre Pronchery cbnz x6,10b 3136*4757b351SPierre Pronchery lsr x6,x28,#32 3137*4757b351SPierre Pronchery.inst 0x05a03b91 //dup z17.s,w28 3138*4757b351SPierre Pronchery.inst 0x05a038d2 //dup z18.s,w6 3139*4757b351SPierre Pronchery lsr x6,x29,#32 3140*4757b351SPierre Pronchery.inst 0x05a038d3 //dup z19.s,w6 3141*4757b351SPierre Pronchery lsr x6,x30,#32 3142*4757b351SPierre Pronchery.if mixin == 1 3143*4757b351SPierre Pronchery add w7,w7,w23 3144*4757b351SPierre Pronchery.endif 3145*4757b351SPierre Pronchery.inst 0x04b90000 //add z0.s,z0.s,z25.s 3146*4757b351SPierre Pronchery.if mixin == 1 3147*4757b351SPierre Pronchery add x8,x8,x23,lsr #32 3148*4757b351SPierre Pronchery.endif 3149*4757b351SPierre Pronchery.inst 0x04ba0084 //add z4.s,z4.s,z26.s 3150*4757b351SPierre Pronchery.if mixin == 1 3151*4757b351SPierre Pronchery add x7,x7,x8,lsl #32 // pack 3152*4757b351SPierre Pronchery.endif 3153*4757b351SPierre Pronchery.if mixin == 1 3154*4757b351SPierre Pronchery add w9,w9,w24 3155*4757b351SPierre Pronchery.endif 3156*4757b351SPierre Pronchery.inst 0x04bb0108 //add z8.s,z8.s,z27.s 3157*4757b351SPierre Pronchery.if mixin == 1 3158*4757b351SPierre Pronchery add x10,x10,x24,lsr #32 3159*4757b351SPierre Pronchery.endif 3160*4757b351SPierre Pronchery.inst 0x04bc018c //add z12.s,z12.s,z28.s 3161*4757b351SPierre Pronchery.if mixin == 1 3162*4757b351SPierre Pronchery add x9,x9,x10,lsl #32 // pack 3163*4757b351SPierre Pronchery.endif 3164*4757b351SPierre Pronchery.if mixin == 1 3165*4757b351SPierre Pronchery ldp x8,x10,[x1],#16 3166*4757b351SPierre Pronchery.endif 3167*4757b351SPierre Pronchery.if mixin == 1 3168*4757b351SPierre Pronchery add w11,w11,w25 3169*4757b351SPierre Pronchery.endif 3170*4757b351SPierre Pronchery.inst 0x04bd0021 //add z1.s,z1.s,z29.s 3171*4757b351SPierre Pronchery.if mixin == 1 3172*4757b351SPierre Pronchery add x12,x12,x25,lsr #32 3173*4757b351SPierre Pronchery.endif 3174*4757b351SPierre Pronchery.inst 0x04be00a5 //add z5.s,z5.s,z30.s 3175*4757b351SPierre Pronchery.if mixin == 1 3176*4757b351SPierre Pronchery add x11,x11,x12,lsl #32 // pack 3177*4757b351SPierre Pronchery.endif 3178*4757b351SPierre Pronchery.if mixin == 1 3179*4757b351SPierre Pronchery add w13,w13,w26 3180*4757b351SPierre Pronchery.endif 3181*4757b351SPierre Pronchery.inst 0x04b50129 //add z9.s,z9.s,z21.s 3182*4757b351SPierre Pronchery.if mixin == 1 3183*4757b351SPierre Pronchery add x14,x14,x26,lsr #32 3184*4757b351SPierre Pronchery.endif 3185*4757b351SPierre Pronchery.inst 0x04b601ad //add z13.s,z13.s,z22.s 3186*4757b351SPierre Pronchery.if mixin == 1 3187*4757b351SPierre Pronchery add x13,x13,x14,lsl #32 // pack 3188*4757b351SPierre Pronchery.endif 3189*4757b351SPierre Pronchery.if mixin == 1 3190*4757b351SPierre Pronchery ldp x12,x14,[x1],#16 3191*4757b351SPierre Pronchery.endif 3192*4757b351SPierre Pronchery.if mixin == 1 3193*4757b351SPierre Pronchery add w15,w15,w27 3194*4757b351SPierre Pronchery.endif 3195*4757b351SPierre Pronchery.inst 0x04b70042 //add z2.s,z2.s,z23.s 3196*4757b351SPierre Pronchery.if mixin == 1 3197*4757b351SPierre Pronchery add x16,x16,x27,lsr #32 3198*4757b351SPierre Pronchery.endif 3199*4757b351SPierre Pronchery.inst 0x04b800c6 //add z6.s,z6.s,z24.s 3200*4757b351SPierre Pronchery.if mixin == 1 3201*4757b351SPierre Pronchery add x15,x15,x16,lsl #32 // pack 3202*4757b351SPierre Pronchery.endif 3203*4757b351SPierre Pronchery.if mixin == 1 3204*4757b351SPierre Pronchery add w17,w17,w28 3205*4757b351SPierre Pronchery.endif 3206*4757b351SPierre Pronchery.inst 0x04b1014a //add z10.s,z10.s,z17.s 3207*4757b351SPierre Pronchery.if mixin == 1 3208*4757b351SPierre Pronchery add x18,x18,x28,lsr #32 3209*4757b351SPierre Pronchery.endif 3210*4757b351SPierre Pronchery.inst 0x04b201ce //add z14.s,z14.s,z18.s 3211*4757b351SPierre Pronchery.if mixin == 1 3212*4757b351SPierre Pronchery add x17,x17,x18,lsl #32 // pack 3213*4757b351SPierre Pronchery.endif 3214*4757b351SPierre Pronchery.if mixin == 1 3215*4757b351SPierre Pronchery ldp x16,x18,[x1],#16 3216*4757b351SPierre Pronchery.endif 3217*4757b351SPierre Pronchery.inst 0x05a03bd4 //dup z20.s,w30 3218*4757b351SPierre Pronchery.inst 0x05a038d9 //dup z25.s,w6 // bak[15] not available for SVE 3219*4757b351SPierre Pronchery.if mixin == 1 3220*4757b351SPierre Pronchery add w19,w19,w29 3221*4757b351SPierre Pronchery.endif 3222*4757b351SPierre Pronchery.inst 0x04b00063 //add z3.s,z3.s,z16.s 3223*4757b351SPierre Pronchery.if mixin == 1 3224*4757b351SPierre Pronchery add x20,x20,x29,lsr #32 3225*4757b351SPierre Pronchery.endif 3226*4757b351SPierre Pronchery.inst 0x04b300e7 //add z7.s,z7.s,z19.s 3227*4757b351SPierre Pronchery.if mixin == 1 3228*4757b351SPierre Pronchery add x19,x19,x20,lsl #32 // pack 3229*4757b351SPierre Pronchery.endif 3230*4757b351SPierre Pronchery.if mixin == 1 3231*4757b351SPierre Pronchery add w21,w21,w30 3232*4757b351SPierre Pronchery.endif 3233*4757b351SPierre Pronchery.inst 0x04b4016b //add z11.s,z11.s,z20.s 3234*4757b351SPierre Pronchery.if mixin == 1 3235*4757b351SPierre Pronchery add x22,x22,x30,lsr #32 3236*4757b351SPierre Pronchery.endif 3237*4757b351SPierre Pronchery.inst 0x04b901ef //add z15.s,z15.s,z25.s 3238*4757b351SPierre Pronchery.if mixin == 1 3239*4757b351SPierre Pronchery add x21,x21,x22,lsl #32 // pack 3240*4757b351SPierre Pronchery.endif 3241*4757b351SPierre Pronchery.if mixin == 1 3242*4757b351SPierre Pronchery ldp x20,x22,[x1],#16 3243*4757b351SPierre Pronchery.endif 3244*4757b351SPierre Pronchery#ifdef __AARCH64EB__ 3245*4757b351SPierre Pronchery rev x7,x7 3246*4757b351SPierre Pronchery.inst 0x05a48000 //revb z0.s,p0/m,z0.s 3247*4757b351SPierre Pronchery.inst 0x05a48084 //revb z4.s,p0/m,z4.s 3248*4757b351SPierre Pronchery rev x9,x9 3249*4757b351SPierre Pronchery.inst 0x05a48108 //revb z8.s,p0/m,z8.s 3250*4757b351SPierre Pronchery.inst 0x05a4818c //revb z12.s,p0/m,z12.s 3251*4757b351SPierre Pronchery rev x11,x11 3252*4757b351SPierre Pronchery.inst 0x05a48021 //revb z1.s,p0/m,z1.s 3253*4757b351SPierre Pronchery.inst 0x05a480a5 //revb z5.s,p0/m,z5.s 3254*4757b351SPierre Pronchery rev x13,x13 3255*4757b351SPierre Pronchery.inst 0x05a48129 //revb z9.s,p0/m,z9.s 3256*4757b351SPierre Pronchery.inst 0x05a481ad //revb z13.s,p0/m,z13.s 3257*4757b351SPierre Pronchery rev x15,x15 3258*4757b351SPierre Pronchery.inst 0x05a48042 //revb z2.s,p0/m,z2.s 3259*4757b351SPierre Pronchery.inst 0x05a480c6 //revb z6.s,p0/m,z6.s 3260*4757b351SPierre Pronchery rev x17,x17 3261*4757b351SPierre Pronchery.inst 0x05a4814a //revb z10.s,p0/m,z10.s 3262*4757b351SPierre Pronchery.inst 0x05a481ce //revb z14.s,p0/m,z14.s 3263*4757b351SPierre Pronchery rev x19,x19 3264*4757b351SPierre Pronchery.inst 0x05a48063 //revb z3.s,p0/m,z3.s 3265*4757b351SPierre Pronchery.inst 0x05a480e7 //revb z7.s,p0/m,z7.s 3266*4757b351SPierre Pronchery rev x21,x21 3267*4757b351SPierre Pronchery.inst 0x05a4816b //revb z11.s,p0/m,z11.s 3268*4757b351SPierre Pronchery.inst 0x05a481ef //revb z15.s,p0/m,z15.s 3269*4757b351SPierre Pronchery#endif 3270*4757b351SPierre Pronchery.if mixin == 1 3271*4757b351SPierre Pronchery add x29,x29,#1 3272*4757b351SPierre Pronchery.endif 3273*4757b351SPierre Pronchery cmp x5,4 3274*4757b351SPierre Pronchery b.ne 200f 3275*4757b351SPierre Pronchery.if mixin == 1 3276*4757b351SPierre Pronchery eor x7,x7,x8 3277*4757b351SPierre Pronchery.endif 3278*4757b351SPierre Pronchery.if mixin == 1 3279*4757b351SPierre Pronchery eor x9,x9,x10 3280*4757b351SPierre Pronchery.endif 3281*4757b351SPierre Pronchery.if mixin == 1 3282*4757b351SPierre Pronchery eor x11,x11,x12 3283*4757b351SPierre Pronchery.endif 3284*4757b351SPierre Pronchery.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s 3285*4757b351SPierre Pronchery.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s 3286*4757b351SPierre Pronchery.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s 3287*4757b351SPierre Pronchery.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s 3288*4757b351SPierre Pronchery 3289*4757b351SPierre Pronchery.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s 3290*4757b351SPierre Pronchery.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s 3291*4757b351SPierre Pronchery.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s 3292*4757b351SPierre Pronchery.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s 3293*4757b351SPierre Pronchery 3294*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 3295*4757b351SPierre Pronchery.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d 3296*4757b351SPierre Pronchery.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d 3297*4757b351SPierre Pronchery.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d 3298*4757b351SPierre Pronchery 3299*4757b351SPierre Pronchery.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d 3300*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 3301*4757b351SPierre Pronchery.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d 3302*4757b351SPierre Pronchery.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d 3303*4757b351SPierre Pronchery.if mixin == 1 3304*4757b351SPierre Pronchery eor x13,x13,x14 3305*4757b351SPierre Pronchery.endif 3306*4757b351SPierre Pronchery.if mixin == 1 3307*4757b351SPierre Pronchery eor x15,x15,x16 3308*4757b351SPierre Pronchery.endif 3309*4757b351SPierre Pronchery.if mixin == 1 3310*4757b351SPierre Pronchery eor x17,x17,x18 3311*4757b351SPierre Pronchery.endif 3312*4757b351SPierre Pronchery.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s 3313*4757b351SPierre Pronchery.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s 3314*4757b351SPierre Pronchery.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s 3315*4757b351SPierre Pronchery.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s 3316*4757b351SPierre Pronchery 3317*4757b351SPierre Pronchery.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s 3318*4757b351SPierre Pronchery.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s 3319*4757b351SPierre Pronchery.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s 3320*4757b351SPierre Pronchery.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s 3321*4757b351SPierre Pronchery 3322*4757b351SPierre Pronchery.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d 3323*4757b351SPierre Pronchery.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d 3324*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 3325*4757b351SPierre Pronchery.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d 3326*4757b351SPierre Pronchery 3327*4757b351SPierre Pronchery.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d 3328*4757b351SPierre Pronchery.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d 3329*4757b351SPierre Pronchery.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d 3330*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 3331*4757b351SPierre Pronchery.if mixin == 1 3332*4757b351SPierre Pronchery eor x19,x19,x20 3333*4757b351SPierre Pronchery.endif 3334*4757b351SPierre Pronchery.if mixin == 1 3335*4757b351SPierre Pronchery eor x21,x21,x22 3336*4757b351SPierre Pronchery.endif 3337*4757b351SPierre Pronchery ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 3338*4757b351SPierre Pronchery ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 3339*4757b351SPierre Pronchery.inst 0x04b13000 //eor z0.d,z0.d,z17.d 3340*4757b351SPierre Pronchery.inst 0x04b23021 //eor z1.d,z1.d,z18.d 3341*4757b351SPierre Pronchery.inst 0x04b33042 //eor z2.d,z2.d,z19.d 3342*4757b351SPierre Pronchery.inst 0x04b43063 //eor z3.d,z3.d,z20.d 3343*4757b351SPierre Pronchery.inst 0x04b53084 //eor z4.d,z4.d,z21.d 3344*4757b351SPierre Pronchery.inst 0x04b630a5 //eor z5.d,z5.d,z22.d 3345*4757b351SPierre Pronchery.inst 0x04b730c6 //eor z6.d,z6.d,z23.d 3346*4757b351SPierre Pronchery.inst 0x04b830e7 //eor z7.d,z7.d,z24.d 3347*4757b351SPierre Pronchery ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64 3348*4757b351SPierre Pronchery ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64 3349*4757b351SPierre Pronchery.if mixin == 1 3350*4757b351SPierre Pronchery stp x7,x9,[x0],#16 3351*4757b351SPierre Pronchery.endif 3352*4757b351SPierre Pronchery.inst 0x04b13108 //eor z8.d,z8.d,z17.d 3353*4757b351SPierre Pronchery.inst 0x04b23129 //eor z9.d,z9.d,z18.d 3354*4757b351SPierre Pronchery.if mixin == 1 3355*4757b351SPierre Pronchery stp x11,x13,[x0],#16 3356*4757b351SPierre Pronchery.endif 3357*4757b351SPierre Pronchery.inst 0x04b3314a //eor z10.d,z10.d,z19.d 3358*4757b351SPierre Pronchery.inst 0x04b4316b //eor z11.d,z11.d,z20.d 3359*4757b351SPierre Pronchery.if mixin == 1 3360*4757b351SPierre Pronchery stp x15,x17,[x0],#16 3361*4757b351SPierre Pronchery.endif 3362*4757b351SPierre Pronchery.inst 0x04b5318c //eor z12.d,z12.d,z21.d 3363*4757b351SPierre Pronchery.inst 0x04b631ad //eor z13.d,z13.d,z22.d 3364*4757b351SPierre Pronchery.if mixin == 1 3365*4757b351SPierre Pronchery stp x19,x21,[x0],#16 3366*4757b351SPierre Pronchery.endif 3367*4757b351SPierre Pronchery.inst 0x04b731ce //eor z14.d,z14.d,z23.d 3368*4757b351SPierre Pronchery.inst 0x04b831ef //eor z15.d,z15.d,z24.d 3369*4757b351SPierre Pronchery st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64 3370*4757b351SPierre Pronchery st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64 3371*4757b351SPierre Pronchery st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64 3372*4757b351SPierre Pronchery st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64 3373*4757b351SPierre Pronchery b 210f 3374*4757b351SPierre Pronchery200: 3375*4757b351SPierre Pronchery.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s 3376*4757b351SPierre Pronchery.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s 3377*4757b351SPierre Pronchery.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s 3378*4757b351SPierre Pronchery.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s 3379*4757b351SPierre Pronchery 3380*4757b351SPierre Pronchery.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s 3381*4757b351SPierre Pronchery.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s 3382*4757b351SPierre Pronchery.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s 3383*4757b351SPierre Pronchery.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s 3384*4757b351SPierre Pronchery 3385*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 3386*4757b351SPierre Pronchery.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d 3387*4757b351SPierre Pronchery.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d 3388*4757b351SPierre Pronchery.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d 3389*4757b351SPierre Pronchery 3390*4757b351SPierre Pronchery.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d 3391*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 3392*4757b351SPierre Pronchery.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d 3393*4757b351SPierre Pronchery.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d 3394*4757b351SPierre Pronchery.if mixin == 1 3395*4757b351SPierre Pronchery eor x7,x7,x8 3396*4757b351SPierre Pronchery.endif 3397*4757b351SPierre Pronchery.if mixin == 1 3398*4757b351SPierre Pronchery eor x9,x9,x10 3399*4757b351SPierre Pronchery.endif 3400*4757b351SPierre Pronchery.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s 3401*4757b351SPierre Pronchery.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s 3402*4757b351SPierre Pronchery.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s 3403*4757b351SPierre Pronchery.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s 3404*4757b351SPierre Pronchery 3405*4757b351SPierre Pronchery.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s 3406*4757b351SPierre Pronchery.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s 3407*4757b351SPierre Pronchery.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s 3408*4757b351SPierre Pronchery.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s 3409*4757b351SPierre Pronchery 3410*4757b351SPierre Pronchery.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d 3411*4757b351SPierre Pronchery.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d 3412*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 3413*4757b351SPierre Pronchery.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d 3414*4757b351SPierre Pronchery 3415*4757b351SPierre Pronchery.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d 3416*4757b351SPierre Pronchery.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d 3417*4757b351SPierre Pronchery.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d 3418*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 3419*4757b351SPierre Pronchery.if mixin == 1 3420*4757b351SPierre Pronchery eor x11,x11,x12 3421*4757b351SPierre Pronchery.endif 3422*4757b351SPierre Pronchery.if mixin == 1 3423*4757b351SPierre Pronchery eor x13,x13,x14 3424*4757b351SPierre Pronchery.endif 3425*4757b351SPierre Pronchery.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s 3426*4757b351SPierre Pronchery.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s 3427*4757b351SPierre Pronchery.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s 3428*4757b351SPierre Pronchery.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s 3429*4757b351SPierre Pronchery 3430*4757b351SPierre Pronchery.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s 3431*4757b351SPierre Pronchery.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s 3432*4757b351SPierre Pronchery.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s 3433*4757b351SPierre Pronchery.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s 3434*4757b351SPierre Pronchery 3435*4757b351SPierre Pronchery.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d 3436*4757b351SPierre Pronchery.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d 3437*4757b351SPierre Pronchery.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d 3438*4757b351SPierre Pronchery.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d 3439*4757b351SPierre Pronchery 3440*4757b351SPierre Pronchery.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d 3441*4757b351SPierre Pronchery.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d 3442*4757b351SPierre Pronchery.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d 3443*4757b351SPierre Pronchery.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d 3444*4757b351SPierre Pronchery.if mixin == 1 3445*4757b351SPierre Pronchery eor x15,x15,x16 3446*4757b351SPierre Pronchery.endif 3447*4757b351SPierre Pronchery.if mixin == 1 3448*4757b351SPierre Pronchery eor x17,x17,x18 3449*4757b351SPierre Pronchery.endif 3450*4757b351SPierre Pronchery.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s 3451*4757b351SPierre Pronchery.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s 3452*4757b351SPierre Pronchery.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s 3453*4757b351SPierre Pronchery.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s 3454*4757b351SPierre Pronchery 3455*4757b351SPierre Pronchery.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s 3456*4757b351SPierre Pronchery.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s 3457*4757b351SPierre Pronchery.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s 3458*4757b351SPierre Pronchery.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s 3459*4757b351SPierre Pronchery 3460*4757b351SPierre Pronchery.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d 3461*4757b351SPierre Pronchery.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d 3462*4757b351SPierre Pronchery.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d 3463*4757b351SPierre Pronchery.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d 3464*4757b351SPierre Pronchery 3465*4757b351SPierre Pronchery.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d 3466*4757b351SPierre Pronchery.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d 3467*4757b351SPierre Pronchery.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d 3468*4757b351SPierre Pronchery.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d 3469*4757b351SPierre Pronchery.if mixin == 1 3470*4757b351SPierre Pronchery eor x19,x19,x20 3471*4757b351SPierre Pronchery.endif 3472*4757b351SPierre Pronchery.if mixin == 1 3473*4757b351SPierre Pronchery eor x21,x21,x22 3474*4757b351SPierre Pronchery.endif 3475*4757b351SPierre Pronchery.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] 3476*4757b351SPierre Pronchery.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] 3477*4757b351SPierre Pronchery.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] 3478*4757b351SPierre Pronchery.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] 3479*4757b351SPierre Pronchery.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] 3480*4757b351SPierre Pronchery.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] 3481*4757b351SPierre Pronchery.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] 3482*4757b351SPierre Pronchery.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] 3483*4757b351SPierre Pronchery.inst 0x04215101 //addvl x1,x1,8 3484*4757b351SPierre Pronchery.inst 0x04b13000 //eor z0.d,z0.d,z17.d 3485*4757b351SPierre Pronchery.inst 0x04b23084 //eor z4.d,z4.d,z18.d 3486*4757b351SPierre Pronchery.inst 0x04b33108 //eor z8.d,z8.d,z19.d 3487*4757b351SPierre Pronchery.inst 0x04b4318c //eor z12.d,z12.d,z20.d 3488*4757b351SPierre Pronchery.inst 0x04b53021 //eor z1.d,z1.d,z21.d 3489*4757b351SPierre Pronchery.inst 0x04b630a5 //eor z5.d,z5.d,z22.d 3490*4757b351SPierre Pronchery.inst 0x04b73129 //eor z9.d,z9.d,z23.d 3491*4757b351SPierre Pronchery.inst 0x04b831ad //eor z13.d,z13.d,z24.d 3492*4757b351SPierre Pronchery.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL] 3493*4757b351SPierre Pronchery.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL] 3494*4757b351SPierre Pronchery.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL] 3495*4757b351SPierre Pronchery.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL] 3496*4757b351SPierre Pronchery.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL] 3497*4757b351SPierre Pronchery.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL] 3498*4757b351SPierre Pronchery.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL] 3499*4757b351SPierre Pronchery.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL] 3500*4757b351SPierre Pronchery.inst 0x04215101 //addvl x1,x1,8 3501*4757b351SPierre Pronchery.if mixin == 1 3502*4757b351SPierre Pronchery stp x7,x9,[x0],#16 3503*4757b351SPierre Pronchery.endif 3504*4757b351SPierre Pronchery.inst 0x04b13042 //eor z2.d,z2.d,z17.d 3505*4757b351SPierre Pronchery.inst 0x04b230c6 //eor z6.d,z6.d,z18.d 3506*4757b351SPierre Pronchery.if mixin == 1 3507*4757b351SPierre Pronchery stp x11,x13,[x0],#16 3508*4757b351SPierre Pronchery.endif 3509*4757b351SPierre Pronchery.inst 0x04b3314a //eor z10.d,z10.d,z19.d 3510*4757b351SPierre Pronchery.inst 0x04b431ce //eor z14.d,z14.d,z20.d 3511*4757b351SPierre Pronchery.if mixin == 1 3512*4757b351SPierre Pronchery stp x15,x17,[x0],#16 3513*4757b351SPierre Pronchery.endif 3514*4757b351SPierre Pronchery.inst 0x04b53063 //eor z3.d,z3.d,z21.d 3515*4757b351SPierre Pronchery.inst 0x04b630e7 //eor z7.d,z7.d,z22.d 3516*4757b351SPierre Pronchery.if mixin == 1 3517*4757b351SPierre Pronchery stp x19,x21,[x0],#16 3518*4757b351SPierre Pronchery.endif 3519*4757b351SPierre Pronchery.inst 0x04b7316b //eor z11.d,z11.d,z23.d 3520*4757b351SPierre Pronchery.inst 0x04b831ef //eor z15.d,z15.d,z24.d 3521*4757b351SPierre Pronchery.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL] 3522*4757b351SPierre Pronchery.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL] 3523*4757b351SPierre Pronchery.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL] 3524*4757b351SPierre Pronchery.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL] 3525*4757b351SPierre Pronchery.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL] 3526*4757b351SPierre Pronchery.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL] 3527*4757b351SPierre Pronchery.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL] 3528*4757b351SPierre Pronchery.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL] 3529*4757b351SPierre Pronchery.inst 0x04205100 //addvl x0,x0,8 3530*4757b351SPierre Pronchery.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL] 3531*4757b351SPierre Pronchery.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL] 3532*4757b351SPierre Pronchery.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL] 3533*4757b351SPierre Pronchery.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL] 3534*4757b351SPierre Pronchery.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL] 3535*4757b351SPierre Pronchery.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL] 3536*4757b351SPierre Pronchery.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL] 3537*4757b351SPierre Pronchery.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL] 3538*4757b351SPierre Pronchery.inst 0x04205100 //addvl x0,x0,8 3539*4757b351SPierre Pronchery210: 3540*4757b351SPierre Pronchery.inst 0x04b0e3fd //incw x29, ALL, MUL #1 3541*4757b351SPierre Pronchery110: 3542*4757b351SPierre Pronchery2: 3543*4757b351SPierre Pronchery str w29,[x4] 3544*4757b351SPierre Pronchery ldp d10,d11,[sp,16] 3545*4757b351SPierre Pronchery ldp d12,d13,[sp,32] 3546*4757b351SPierre Pronchery ldp d14,d15,[sp,48] 3547*4757b351SPierre Pronchery ldp x16,x17,[sp,64] 3548*4757b351SPierre Pronchery ldp x18,x19,[sp,80] 3549*4757b351SPierre Pronchery ldp x20,x21,[sp,96] 3550*4757b351SPierre Pronchery ldp x22,x23,[sp,112] 3551*4757b351SPierre Pronchery ldp x24,x25,[sp,128] 3552*4757b351SPierre Pronchery ldp x26,x27,[sp,144] 3553*4757b351SPierre Pronchery ldp x28,x29,[sp,160] 3554*4757b351SPierre Pronchery ldr x30,[sp,176] 3555*4757b351SPierre Pronchery ldp d8,d9,[sp],192 3556*4757b351SPierre Pronchery AARCH64_VALIDATE_LINK_REGISTER 3557*4757b351SPierre Pronchery.Lreturn: 3558*4757b351SPierre Pronchery ret 3559*4757b351SPierre Pronchery.size ChaCha20_ctr32_sve,.-ChaCha20_ctr32_sve 3560