131914882SAlex Richardson/* 231914882SAlex Richardson * memset - fill memory with a constant byte 331914882SAlex Richardson * 4*072a4ba8SAndrew Turner * Copyright (c) 2012-2022, Arm Limited. 5*072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 631914882SAlex Richardson */ 731914882SAlex Richardson 831914882SAlex Richardson/* Assumptions: 931914882SAlex Richardson * 1031914882SAlex Richardson * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. 1131914882SAlex Richardson * 1231914882SAlex Richardson */ 1331914882SAlex Richardson 14*072a4ba8SAndrew Turner#include "asmdefs.h" 1531914882SAlex Richardson 1631914882SAlex Richardson#define dstin x0 1731914882SAlex Richardson#define val x1 1831914882SAlex Richardson#define valw w1 1931914882SAlex Richardson#define count x2 2031914882SAlex Richardson#define dst x3 2131914882SAlex Richardson#define dstend x4 2231914882SAlex Richardson#define zva_val x5 2331914882SAlex Richardson 2431914882SAlex RichardsonENTRY (__memset_aarch64) 2531914882SAlex Richardson PTR_ARG (0) 2631914882SAlex Richardson SIZE_ARG (2) 2731914882SAlex Richardson 2831914882SAlex Richardson dup v0.16B, valw 2931914882SAlex Richardson add dstend, dstin, count 3031914882SAlex Richardson 3131914882SAlex Richardson cmp count, 96 3231914882SAlex Richardson b.hi L(set_long) 3331914882SAlex Richardson cmp count, 16 3431914882SAlex Richardson b.hs L(set_medium) 3531914882SAlex Richardson mov val, v0.D[0] 3631914882SAlex Richardson 3731914882SAlex Richardson /* Set 0..15 bytes. */ 3831914882SAlex Richardson tbz count, 3, 1f 3931914882SAlex Richardson str val, [dstin] 4031914882SAlex Richardson str val, [dstend, -8] 4131914882SAlex Richardson ret 4231914882SAlex Richardson .p2align 4 4331914882SAlex Richardson1: tbz count, 2, 2f 4431914882SAlex Richardson str valw, [dstin] 4531914882SAlex Richardson str valw, [dstend, -4] 4631914882SAlex Richardson ret 4731914882SAlex Richardson2: cbz count, 3f 4831914882SAlex Richardson strb valw, [dstin] 4931914882SAlex Richardson tbz count, 1, 3f 5031914882SAlex Richardson strh valw, [dstend, -2] 5131914882SAlex Richardson3: ret 5231914882SAlex Richardson 5331914882SAlex Richardson /* Set 17..96 bytes. */ 5431914882SAlex RichardsonL(set_medium): 5531914882SAlex Richardson str q0, [dstin] 5631914882SAlex Richardson tbnz count, 6, L(set96) 5731914882SAlex Richardson str q0, [dstend, -16] 5831914882SAlex Richardson tbz count, 5, 1f 5931914882SAlex Richardson str q0, [dstin, 16] 6031914882SAlex Richardson str q0, [dstend, -32] 6131914882SAlex Richardson1: ret 6231914882SAlex Richardson 6331914882SAlex Richardson .p2align 4 6431914882SAlex Richardson /* Set 64..96 bytes. Write 64 bytes from the start and 6531914882SAlex Richardson 32 bytes from the end. */ 6631914882SAlex RichardsonL(set96): 6731914882SAlex Richardson str q0, [dstin, 16] 6831914882SAlex Richardson stp q0, q0, [dstin, 32] 6931914882SAlex Richardson stp q0, q0, [dstend, -32] 7031914882SAlex Richardson ret 7131914882SAlex Richardson 7231914882SAlex Richardson .p2align 4 7331914882SAlex RichardsonL(set_long): 7431914882SAlex Richardson and valw, valw, 255 7531914882SAlex Richardson bic dst, dstin, 15 7631914882SAlex Richardson str q0, [dstin] 7731914882SAlex Richardson cmp count, 160 7831914882SAlex Richardson ccmp valw, 0, 0, hs 7931914882SAlex Richardson b.ne L(no_zva) 8031914882SAlex Richardson 8131914882SAlex Richardson#ifndef SKIP_ZVA_CHECK 8231914882SAlex Richardson mrs zva_val, dczid_el0 8331914882SAlex Richardson and zva_val, zva_val, 31 8431914882SAlex Richardson cmp zva_val, 4 /* ZVA size is 64 bytes. */ 8531914882SAlex Richardson b.ne L(no_zva) 8631914882SAlex Richardson#endif 8731914882SAlex Richardson str q0, [dst, 16] 8831914882SAlex Richardson stp q0, q0, [dst, 32] 8931914882SAlex Richardson bic dst, dst, 63 9031914882SAlex Richardson sub count, dstend, dst /* Count is now 64 too large. */ 9131914882SAlex Richardson sub count, count, 128 /* Adjust count and bias for loop. */ 9231914882SAlex Richardson 9331914882SAlex Richardson .p2align 4 9431914882SAlex RichardsonL(zva_loop): 9531914882SAlex Richardson add dst, dst, 64 9631914882SAlex Richardson dc zva, dst 9731914882SAlex Richardson subs count, count, 64 9831914882SAlex Richardson b.hi L(zva_loop) 9931914882SAlex Richardson stp q0, q0, [dstend, -64] 10031914882SAlex Richardson stp q0, q0, [dstend, -32] 10131914882SAlex Richardson ret 10231914882SAlex Richardson 10331914882SAlex RichardsonL(no_zva): 10431914882SAlex Richardson sub count, dstend, dst /* Count is 16 too large. */ 10531914882SAlex Richardson sub dst, dst, 16 /* Dst is biased by -32. */ 10631914882SAlex Richardson sub count, count, 64 + 16 /* Adjust count and bias for loop. */ 10731914882SAlex RichardsonL(no_zva_loop): 10831914882SAlex Richardson stp q0, q0, [dst, 32] 10931914882SAlex Richardson stp q0, q0, [dst, 64]! 11031914882SAlex Richardson subs count, count, 64 11131914882SAlex Richardson b.hi L(no_zva_loop) 11231914882SAlex Richardson stp q0, q0, [dstend, -64] 11331914882SAlex Richardson stp q0, q0, [dstend, -32] 11431914882SAlex Richardson ret 11531914882SAlex Richardson 11631914882SAlex RichardsonEND (__memset_aarch64) 11731914882SAlex Richardson 118