1*c745b15cSMark Brown// SPDX-License-Identifier: GPL-2.0-only 2*c745b15cSMark Brown// Copyright (C) 2021-3 ARM Limited. 3*c745b15cSMark Brown// 4*c745b15cSMark Brown// Assembly portion of the FP ptrace test 5*c745b15cSMark Brown 6*c745b15cSMark Brown// 7*c745b15cSMark Brown// Load values from memory into registers, break on a breakpoint, then 8*c745b15cSMark Brown// break on a further breakpoint 9*c745b15cSMark Brown// 10*c745b15cSMark Brown 11*c745b15cSMark Brown#include "fp-ptrace.h" 12*c745b15cSMark Brown#include "sme-inst.h" 13*c745b15cSMark Brown 14*c745b15cSMark Brown.arch_extension sve 15*c745b15cSMark Brown 16*c745b15cSMark Brown// Load and save register values with pauses for ptrace 17*c745b15cSMark Brown// 18*c745b15cSMark Brown// x0 - SVE in use 19*c745b15cSMark Brown// x1 - SME in use 20*c745b15cSMark Brown// x2 - SME2 in use 21*c745b15cSMark Brown// x3 - FA64 supported 22*c745b15cSMark Brown 23*c745b15cSMark Brown.globl load_and_save 24*c745b15cSMark Brownload_and_save: 25*c745b15cSMark Brown stp x11, x12, [sp, #-0x10]! 26*c745b15cSMark Brown 27*c745b15cSMark Brown // This should be redundant in the SVE case 28*c745b15cSMark Brown ldr x7, =v_in 29*c745b15cSMark Brown ldp q0, q1, [x7] 30*c745b15cSMark Brown ldp q2, q3, [x7, #16 * 2] 31*c745b15cSMark Brown ldp q4, q5, [x7, #16 * 4] 32*c745b15cSMark Brown ldp q6, q7, [x7, #16 * 6] 33*c745b15cSMark Brown ldp q8, q9, [x7, #16 * 8] 34*c745b15cSMark Brown ldp q10, q11, [x7, #16 * 10] 35*c745b15cSMark Brown ldp q12, q13, [x7, #16 * 12] 36*c745b15cSMark Brown ldp q14, q15, [x7, #16 * 14] 37*c745b15cSMark Brown ldp q16, q17, [x7, #16 * 16] 38*c745b15cSMark Brown ldp q18, q19, [x7, #16 * 18] 39*c745b15cSMark Brown ldp q20, q21, [x7, #16 * 20] 40*c745b15cSMark Brown ldp q22, q23, [x7, #16 * 22] 41*c745b15cSMark Brown ldp q24, q25, [x7, #16 * 24] 42*c745b15cSMark Brown ldp q26, q27, [x7, #16 * 26] 43*c745b15cSMark Brown ldp q28, q29, [x7, #16 * 28] 44*c745b15cSMark Brown ldp q30, q31, [x7, #16 * 30] 45*c745b15cSMark Brown 46*c745b15cSMark Brown // SME? 47*c745b15cSMark Brown cbz x1, check_sve_in 48*c745b15cSMark Brown 49*c745b15cSMark Brown adrp x7, svcr_in 50*c745b15cSMark Brown ldr x7, [x7, :lo12:svcr_in] 51*c745b15cSMark Brown // SVCR is 0 by default, avoid triggering SME if not in use 52*c745b15cSMark Brown cbz x7, check_sve_in 53*c745b15cSMark Brown msr S3_3_C4_C2_2, x7 54*c745b15cSMark Brown 55*c745b15cSMark Brown // ZA? 56*c745b15cSMark Brown tbz x7, #SVCR_ZA_SHIFT, check_sm_in 57*c745b15cSMark Brown rdsvl 11, 1 58*c745b15cSMark Brown mov w12, #0 59*c745b15cSMark Brown ldr x6, =za_in 60*c745b15cSMark Brown1: _ldr_za 12, 6 61*c745b15cSMark Brown add x6, x6, x11 62*c745b15cSMark Brown add x12, x12, #1 63*c745b15cSMark Brown cmp x11, x12 64*c745b15cSMark Brown bne 1b 65*c745b15cSMark Brown 66*c745b15cSMark Brown // ZT? 67*c745b15cSMark Brown cbz x2, check_sm_in 68*c745b15cSMark Brown adrp x6, zt_in 69*c745b15cSMark Brown add x6, x6, :lo12:zt_in 70*c745b15cSMark Brown _ldr_zt 6 71*c745b15cSMark Brown 72*c745b15cSMark Brown // In streaming mode? 73*c745b15cSMark Browncheck_sm_in: 74*c745b15cSMark Brown tbz x7, #SVCR_SM_SHIFT, check_sve_in 75*c745b15cSMark Brown mov x4, x3 // Load FFR if we have FA64 76*c745b15cSMark Brown b load_sve 77*c745b15cSMark Brown 78*c745b15cSMark Brown // SVE? 79*c745b15cSMark Browncheck_sve_in: 80*c745b15cSMark Brown cbz x0, wait_for_writes 81*c745b15cSMark Brown mov x4, #1 82*c745b15cSMark Brown 83*c745b15cSMark Brownload_sve: 84*c745b15cSMark Brown ldr x7, =z_in 85*c745b15cSMark Brown ldr z0, [x7, #0, MUL VL] 86*c745b15cSMark Brown ldr z1, [x7, #1, MUL VL] 87*c745b15cSMark Brown ldr z2, [x7, #2, MUL VL] 88*c745b15cSMark Brown ldr z3, [x7, #3, MUL VL] 89*c745b15cSMark Brown ldr z4, [x7, #4, MUL VL] 90*c745b15cSMark Brown ldr z5, [x7, #5, MUL VL] 91*c745b15cSMark Brown ldr z6, [x7, #6, MUL VL] 92*c745b15cSMark Brown ldr z7, [x7, #7, MUL VL] 93*c745b15cSMark Brown ldr z8, [x7, #8, MUL VL] 94*c745b15cSMark Brown ldr z9, [x7, #9, MUL VL] 95*c745b15cSMark Brown ldr z10, [x7, #10, MUL VL] 96*c745b15cSMark Brown ldr z11, [x7, #11, MUL VL] 97*c745b15cSMark Brown ldr z12, [x7, #12, MUL VL] 98*c745b15cSMark Brown ldr z13, [x7, #13, MUL VL] 99*c745b15cSMark Brown ldr z14, [x7, #14, MUL VL] 100*c745b15cSMark Brown ldr z15, [x7, #15, MUL VL] 101*c745b15cSMark Brown ldr z16, [x7, #16, MUL VL] 102*c745b15cSMark Brown ldr z17, [x7, #17, MUL VL] 103*c745b15cSMark Brown ldr z18, [x7, #18, MUL VL] 104*c745b15cSMark Brown ldr z19, [x7, #19, MUL VL] 105*c745b15cSMark Brown ldr z20, [x7, #20, MUL VL] 106*c745b15cSMark Brown ldr z21, [x7, #21, MUL VL] 107*c745b15cSMark Brown ldr z22, [x7, #22, MUL VL] 108*c745b15cSMark Brown ldr z23, [x7, #23, MUL VL] 109*c745b15cSMark Brown ldr z24, [x7, #24, MUL VL] 110*c745b15cSMark Brown ldr z25, [x7, #25, MUL VL] 111*c745b15cSMark Brown ldr z26, [x7, #26, MUL VL] 112*c745b15cSMark Brown ldr z27, [x7, #27, MUL VL] 113*c745b15cSMark Brown ldr z28, [x7, #28, MUL VL] 114*c745b15cSMark Brown ldr z29, [x7, #29, MUL VL] 115*c745b15cSMark Brown ldr z30, [x7, #30, MUL VL] 116*c745b15cSMark Brown ldr z31, [x7, #31, MUL VL] 117*c745b15cSMark Brown 118*c745b15cSMark Brown // FFR is not present in base SME 119*c745b15cSMark Brown cbz x4, 1f 120*c745b15cSMark Brown ldr x7, =ffr_in 121*c745b15cSMark Brown ldr p0, [x7] 122*c745b15cSMark Brown ldr x7, [x7, #0] 123*c745b15cSMark Brown cbz x7, 1f 124*c745b15cSMark Brown wrffr p0.b 125*c745b15cSMark Brown1: 126*c745b15cSMark Brown 127*c745b15cSMark Brown ldr x7, =p_in 128*c745b15cSMark Brown ldr p0, [x7, #0, MUL VL] 129*c745b15cSMark Brown ldr p1, [x7, #1, MUL VL] 130*c745b15cSMark Brown ldr p2, [x7, #2, MUL VL] 131*c745b15cSMark Brown ldr p3, [x7, #3, MUL VL] 132*c745b15cSMark Brown ldr p4, [x7, #4, MUL VL] 133*c745b15cSMark Brown ldr p5, [x7, #5, MUL VL] 134*c745b15cSMark Brown ldr p6, [x7, #6, MUL VL] 135*c745b15cSMark Brown ldr p7, [x7, #7, MUL VL] 136*c745b15cSMark Brown ldr p8, [x7, #8, MUL VL] 137*c745b15cSMark Brown ldr p9, [x7, #9, MUL VL] 138*c745b15cSMark Brown ldr p10, [x7, #10, MUL VL] 139*c745b15cSMark Brown ldr p11, [x7, #11, MUL VL] 140*c745b15cSMark Brown ldr p12, [x7, #12, MUL VL] 141*c745b15cSMark Brown ldr p13, [x7, #13, MUL VL] 142*c745b15cSMark Brown ldr p14, [x7, #14, MUL VL] 143*c745b15cSMark Brown ldr p15, [x7, #15, MUL VL] 144*c745b15cSMark Brown 145*c745b15cSMark Brownwait_for_writes: 146*c745b15cSMark Brown // Wait for the parent 147*c745b15cSMark Brown brk #0 148*c745b15cSMark Brown 149*c745b15cSMark Brown // Save values 150*c745b15cSMark Brown ldr x7, =v_out 151*c745b15cSMark Brown stp q0, q1, [x7] 152*c745b15cSMark Brown stp q2, q3, [x7, #16 * 2] 153*c745b15cSMark Brown stp q4, q5, [x7, #16 * 4] 154*c745b15cSMark Brown stp q6, q7, [x7, #16 * 6] 155*c745b15cSMark Brown stp q8, q9, [x7, #16 * 8] 156*c745b15cSMark Brown stp q10, q11, [x7, #16 * 10] 157*c745b15cSMark Brown stp q12, q13, [x7, #16 * 12] 158*c745b15cSMark Brown stp q14, q15, [x7, #16 * 14] 159*c745b15cSMark Brown stp q16, q17, [x7, #16 * 16] 160*c745b15cSMark Brown stp q18, q19, [x7, #16 * 18] 161*c745b15cSMark Brown stp q20, q21, [x7, #16 * 20] 162*c745b15cSMark Brown stp q22, q23, [x7, #16 * 22] 163*c745b15cSMark Brown stp q24, q25, [x7, #16 * 24] 164*c745b15cSMark Brown stp q26, q27, [x7, #16 * 26] 165*c745b15cSMark Brown stp q28, q29, [x7, #16 * 28] 166*c745b15cSMark Brown stp q30, q31, [x7, #16 * 30] 167*c745b15cSMark Brown 168*c745b15cSMark Brown // SME? 169*c745b15cSMark Brown cbz x1, check_sve_out 170*c745b15cSMark Brown 171*c745b15cSMark Brown rdsvl 11, 1 172*c745b15cSMark Brown adrp x6, sme_vl_out 173*c745b15cSMark Brown str x11, [x6, :lo12:sme_vl_out] 174*c745b15cSMark Brown 175*c745b15cSMark Brown mrs x7, S3_3_C4_C2_2 176*c745b15cSMark Brown adrp x6, svcr_out 177*c745b15cSMark Brown str x7, [x6, :lo12:svcr_out] 178*c745b15cSMark Brown 179*c745b15cSMark Brown // ZA? 180*c745b15cSMark Brown tbz x7, #SVCR_ZA_SHIFT, check_sm_out 181*c745b15cSMark Brown mov w12, #0 182*c745b15cSMark Brown ldr x6, =za_out 183*c745b15cSMark Brown1: _str_za 12, 6 184*c745b15cSMark Brown add x6, x6, x11 185*c745b15cSMark Brown add x12, x12, #1 186*c745b15cSMark Brown cmp x11, x12 187*c745b15cSMark Brown bne 1b 188*c745b15cSMark Brown 189*c745b15cSMark Brown // ZT? 190*c745b15cSMark Brown cbz x2, check_sm_out 191*c745b15cSMark Brown adrp x6, zt_out 192*c745b15cSMark Brown add x6, x6, :lo12:zt_out 193*c745b15cSMark Brown _str_zt 6 194*c745b15cSMark Brown 195*c745b15cSMark Brown // In streaming mode? 196*c745b15cSMark Browncheck_sm_out: 197*c745b15cSMark Brown tbz x7, #SVCR_SM_SHIFT, check_sve_out 198*c745b15cSMark Brown mov x4, x3 // FFR? 199*c745b15cSMark Brown b read_sve 200*c745b15cSMark Brown 201*c745b15cSMark Brown // SVE? 202*c745b15cSMark Browncheck_sve_out: 203*c745b15cSMark Brown cbz x0, wait_for_reads 204*c745b15cSMark Brown mov x4, #1 205*c745b15cSMark Brown 206*c745b15cSMark Brown rdvl x7, #1 207*c745b15cSMark Brown adrp x6, sve_vl_out 208*c745b15cSMark Brown str x7, [x6, :lo12:sve_vl_out] 209*c745b15cSMark Brown 210*c745b15cSMark Brownread_sve: 211*c745b15cSMark Brown ldr x7, =z_out 212*c745b15cSMark Brown str z0, [x7, #0, MUL VL] 213*c745b15cSMark Brown str z1, [x7, #1, MUL VL] 214*c745b15cSMark Brown str z2, [x7, #2, MUL VL] 215*c745b15cSMark Brown str z3, [x7, #3, MUL VL] 216*c745b15cSMark Brown str z4, [x7, #4, MUL VL] 217*c745b15cSMark Brown str z5, [x7, #5, MUL VL] 218*c745b15cSMark Brown str z6, [x7, #6, MUL VL] 219*c745b15cSMark Brown str z7, [x7, #7, MUL VL] 220*c745b15cSMark Brown str z8, [x7, #8, MUL VL] 221*c745b15cSMark Brown str z9, [x7, #9, MUL VL] 222*c745b15cSMark Brown str z10, [x7, #10, MUL VL] 223*c745b15cSMark Brown str z11, [x7, #11, MUL VL] 224*c745b15cSMark Brown str z12, [x7, #12, MUL VL] 225*c745b15cSMark Brown str z13, [x7, #13, MUL VL] 226*c745b15cSMark Brown str z14, [x7, #14, MUL VL] 227*c745b15cSMark Brown str z15, [x7, #15, MUL VL] 228*c745b15cSMark Brown str z16, [x7, #16, MUL VL] 229*c745b15cSMark Brown str z17, [x7, #17, MUL VL] 230*c745b15cSMark Brown str z18, [x7, #18, MUL VL] 231*c745b15cSMark Brown str z19, [x7, #19, MUL VL] 232*c745b15cSMark Brown str z20, [x7, #20, MUL VL] 233*c745b15cSMark Brown str z21, [x7, #21, MUL VL] 234*c745b15cSMark Brown str z22, [x7, #22, MUL VL] 235*c745b15cSMark Brown str z23, [x7, #23, MUL VL] 236*c745b15cSMark Brown str z24, [x7, #24, MUL VL] 237*c745b15cSMark Brown str z25, [x7, #25, MUL VL] 238*c745b15cSMark Brown str z26, [x7, #26, MUL VL] 239*c745b15cSMark Brown str z27, [x7, #27, MUL VL] 240*c745b15cSMark Brown str z28, [x7, #28, MUL VL] 241*c745b15cSMark Brown str z29, [x7, #29, MUL VL] 242*c745b15cSMark Brown str z30, [x7, #30, MUL VL] 243*c745b15cSMark Brown str z31, [x7, #31, MUL VL] 244*c745b15cSMark Brown 245*c745b15cSMark Brown ldr x7, =p_out 246*c745b15cSMark Brown str p0, [x7, #0, MUL VL] 247*c745b15cSMark Brown str p1, [x7, #1, MUL VL] 248*c745b15cSMark Brown str p2, [x7, #2, MUL VL] 249*c745b15cSMark Brown str p3, [x7, #3, MUL VL] 250*c745b15cSMark Brown str p4, [x7, #4, MUL VL] 251*c745b15cSMark Brown str p5, [x7, #5, MUL VL] 252*c745b15cSMark Brown str p6, [x7, #6, MUL VL] 253*c745b15cSMark Brown str p7, [x7, #7, MUL VL] 254*c745b15cSMark Brown str p8, [x7, #8, MUL VL] 255*c745b15cSMark Brown str p9, [x7, #9, MUL VL] 256*c745b15cSMark Brown str p10, [x7, #10, MUL VL] 257*c745b15cSMark Brown str p11, [x7, #11, MUL VL] 258*c745b15cSMark Brown str p12, [x7, #12, MUL VL] 259*c745b15cSMark Brown str p13, [x7, #13, MUL VL] 260*c745b15cSMark Brown str p14, [x7, #14, MUL VL] 261*c745b15cSMark Brown str p15, [x7, #15, MUL VL] 262*c745b15cSMark Brown 263*c745b15cSMark Brown // Only save FFR if it exists 264*c745b15cSMark Brown cbz x4, wait_for_reads 265*c745b15cSMark Brown ldr x7, =ffr_out 266*c745b15cSMark Brown rdffr p0.b 267*c745b15cSMark Brown str p0, [x7] 268*c745b15cSMark Brown 269*c745b15cSMark Brownwait_for_reads: 270*c745b15cSMark Brown // Wait for the parent 271*c745b15cSMark Brown brk #0 272*c745b15cSMark Brown 273*c745b15cSMark Brown // Ensure we don't leave ourselves in streaming mode 274*c745b15cSMark Brown cbz x1, out 275*c745b15cSMark Brown msr S3_3_C4_C2_2, xzr 276*c745b15cSMark Brown 277*c745b15cSMark Brownout: 278*c745b15cSMark Brown ldp x11, x12, [sp, #-0x10] 279*c745b15cSMark Brown ret 280