1/* 2 * memset - fill memory with a constant byte 3 * 4 * Copyright (c) 2012-2024, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. 11 * 12 */ 13 14#include "asmdefs.h" 15 16#define dstin x0 17#define val x1 18#define valw w1 19#define count x2 20#define dst x3 21#define dstend x4 22#define zva_val x5 23#define off x3 24#define dstend2 x5 25 26ENTRY (__memset_aarch64) 27 dup v0.16B, valw 28 cmp count, 16 29 b.lo L(set_small) 30 31 add dstend, dstin, count 32 cmp count, 64 33 b.hs L(set_128) 34 35 /* Set 16..63 bytes. */ 36 mov off, 16 37 and off, off, count, lsr 1 38 sub dstend2, dstend, off 39 str q0, [dstin] 40 str q0, [dstin, off] 41 str q0, [dstend2, -16] 42 str q0, [dstend, -16] 43 ret 44 45 .p2align 4 46 /* Set 0..15 bytes. */ 47L(set_small): 48 add dstend, dstin, count 49 cmp count, 4 50 b.lo 2f 51 lsr off, count, 3 52 sub dstend2, dstend, off, lsl 2 53 str s0, [dstin] 54 str s0, [dstin, off, lsl 2] 55 str s0, [dstend2, -4] 56 str s0, [dstend, -4] 57 ret 58 59 /* Set 0..3 bytes. */ 602: cbz count, 3f 61 lsr off, count, 1 62 strb valw, [dstin] 63 strb valw, [dstin, off] 64 strb valw, [dstend, -1] 653: ret 66 67 .p2align 4 68L(set_128): 69 bic dst, dstin, 15 70 cmp count, 128 71 b.hi L(set_long) 72 stp q0, q0, [dstin] 73 stp q0, q0, [dstin, 32] 74 stp q0, q0, [dstend, -64] 75 stp q0, q0, [dstend, -32] 76 ret 77 78 .p2align 4 79L(set_long): 80 str q0, [dstin] 81 str q0, [dst, 16] 82 tst valw, 255 83 b.ne L(no_zva) 84#ifndef SKIP_ZVA_CHECK 85 mrs zva_val, dczid_el0 86 and zva_val, zva_val, 31 87 cmp zva_val, 4 /* ZVA size is 64 bytes. */ 88 b.ne L(no_zva) 89#endif 90 stp q0, q0, [dst, 32] 91 bic dst, dstin, 63 92 sub count, dstend, dst /* Count is now 64 too large. */ 93 sub count, count, 64 + 64 /* Adjust count and bias for loop. */ 94 95 /* Write last bytes before ZVA loop. */ 96 stp q0, q0, [dstend, -64] 97 stp q0, q0, [dstend, -32] 98 99 .p2align 4 100L(zva64_loop): 101 add dst, dst, 64 102 dc zva, dst 103 subs count, count, 64 104 b.hi L(zva64_loop) 105 ret 106 107 .p2align 3 108L(no_zva): 109 sub count, dstend, dst /* Count is 32 too large. */ 110 sub count, count, 64 + 32 /* Adjust count and bias for loop. */ 111L(no_zva_loop): 112 stp q0, q0, [dst, 32] 113 stp q0, q0, [dst, 64] 114 add dst, dst, 64 115 subs count, count, 64 116 b.hi L(no_zva_loop) 117 stp q0, q0, [dstend, -64] 118 stp q0, q0, [dstend, -32] 119 ret 120 121END (__memset_aarch64) 122