1*f3087befSAndrew Turner/* 2*f3087befSAndrew Turner * strcpy/stpcpy - copy a string returning pointer to start/end. 3*f3087befSAndrew Turner * 4*f3087befSAndrew Turner * Copyright (c) 2018-2022, Arm Limited. 5*f3087befSAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6*f3087befSAndrew Turner */ 7*f3087befSAndrew Turner 8*f3087befSAndrew Turner#include "asmdefs.h" 9*f3087befSAndrew Turner 10*f3087befSAndrew Turner.arch armv8-a+sve 11*f3087befSAndrew Turner 12*f3087befSAndrew Turner/* Assumptions: 13*f3087befSAndrew Turner * 14*f3087befSAndrew Turner * ARMv8-a, AArch64 15*f3087befSAndrew Turner * SVE Available. 16*f3087befSAndrew Turner */ 17*f3087befSAndrew Turner 18*f3087befSAndrew Turner/* To build as stpcpy, define BUILD_STPCPY before compiling this file. */ 19*f3087befSAndrew Turner#ifdef BUILD_STPCPY 20*f3087befSAndrew Turner#define FUNC __stpcpy_aarch64_sve 21*f3087befSAndrew Turner#else 22*f3087befSAndrew Turner#define FUNC __strcpy_aarch64_sve 23*f3087befSAndrew Turner#endif 24*f3087befSAndrew Turner 25*f3087befSAndrew TurnerENTRY (FUNC) 26*f3087befSAndrew Turner setffr /* initialize FFR */ 27*f3087befSAndrew Turner ptrue p2.b, all /* all ones; loop invariant */ 28*f3087befSAndrew Turner mov x2, 0 /* initialize offset */ 29*f3087befSAndrew Turner 30*f3087befSAndrew Turner .p2align 4 31*f3087befSAndrew Turner /* Read a vector's worth of bytes, stopping on first fault. */ 32*f3087befSAndrew Turner0: ldff1b z0.b, p2/z, [x1, x2] 33*f3087befSAndrew Turner rdffrs p0.b, p2/z 34*f3087befSAndrew Turner b.nlast 1f 35*f3087befSAndrew Turner 36*f3087befSAndrew Turner /* First fault did not fail: the whole vector is valid. 37*f3087befSAndrew Turner Avoid depending on the contexts of FFR beyond the branch. */ 38*f3087befSAndrew Turner cmpeq p1.b, p2/z, z0.b, 0 /* search for zeros */ 39*f3087befSAndrew Turner b.any 2f 40*f3087befSAndrew Turner 41*f3087befSAndrew Turner /* No zero found. Store the whole vector and loop. */ 42*f3087befSAndrew Turner st1b z0.b, p2, [x0, x2] 43*f3087befSAndrew Turner incb x2, all 44*f3087befSAndrew Turner b 0b 45*f3087befSAndrew Turner 46*f3087befSAndrew Turner /* First fault failed: only some of the vector is valid. 47*f3087befSAndrew Turner Perform the comparison only on the valid bytes. */ 48*f3087befSAndrew Turner1: cmpeq p1.b, p0/z, z0.b, 0 /* search for zeros */ 49*f3087befSAndrew Turner b.any 2f 50*f3087befSAndrew Turner 51*f3087befSAndrew Turner /* No zero found. Store the valid portion of the vector and loop. */ 52*f3087befSAndrew Turner setffr /* re-init FFR */ 53*f3087befSAndrew Turner st1b z0.b, p0, [x0, x2] 54*f3087befSAndrew Turner incp x2, p0.b 55*f3087befSAndrew Turner b 0b 56*f3087befSAndrew Turner 57*f3087befSAndrew Turner /* Zero found. Crop the vector to the found zero and finish. */ 58*f3087befSAndrew Turner2: brka p0.b, p2/z, p1.b 59*f3087befSAndrew Turner st1b z0.b, p0, [x0, x2] 60*f3087befSAndrew Turner#ifdef BUILD_STPCPY 61*f3087befSAndrew Turner add x0, x0, x2 62*f3087befSAndrew Turner sub x0, x0, 1 63*f3087befSAndrew Turner incp x0, p0.b 64*f3087befSAndrew Turner#endif 65*f3087befSAndrew Turner ret 66*f3087befSAndrew Turner 67*f3087befSAndrew TurnerEND (FUNC) 68