1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 4 */ 5 6#include <linux/export.h> 7#include <asm/alternative-asm.h> 8#include <asm/asm.h> 9#include <asm/asmmacro.h> 10#include <asm/cpu.h> 11#include <asm/regdef.h> 12#include <asm/unwind_hints.h> 13 14.macro fill_to_64 r0 15 bstrins.d \r0, \r0, 15, 8 16 bstrins.d \r0, \r0, 31, 16 17 bstrins.d \r0, \r0, 63, 32 18.endm 19 20.section .noinstr.text, "ax" 21 22SYM_FUNC_START(memset) 23 /* 24 * Some CPUs support hardware unaligned access 25 */ 26 ALTERNATIVE "b __memset_generic", \ 27 "b __memset_fast", CPU_FEATURE_UAL 28SYM_FUNC_END(memset) 29SYM_FUNC_ALIAS(__memset, memset) 30 31EXPORT_SYMBOL(memset) 32EXPORT_SYMBOL(__memset) 33 34_ASM_NOKPROBE(memset) 35_ASM_NOKPROBE(__memset) 36 37/* 38 * void *__memset_generic(void *s, int c, size_t n) 39 * 40 * a0: s 41 * a1: c 42 * a2: n 43 */ 44SYM_FUNC_START(__memset_generic) 45 move a3, a0 46 beqz a2, 2f 47 481: st.b a1, a0, 0 49 addi.d a0, a0, 1 50 addi.d a2, a2, -1 51 bgt a2, zero, 1b 52 532: move a0, a3 54 jr ra 55SYM_FUNC_END(__memset_generic) 56_ASM_NOKPROBE(__memset_generic) 57 58/* 59 * void *__memset_fast(void *s, int c, size_t n) 60 * 61 * a0: s 62 * a1: c 63 * a2: n 64 */ 65SYM_FUNC_START(__memset_fast) 66 /* fill a1 to 64 bits */ 67 fill_to_64 a1 68 69 sltui t0, a2, 9 70 bnez t0, .Lsmall 71 72 add.d a2, a0, a2 73 st.d a1, a0, 0 74 75 /* align up address */ 76 addi.d a3, a0, 8 77 bstrins.d a3, zero, 2, 0 78 79 addi.d a4, a2, -64 80 bgeu a3, a4, .Llt64 81 82 /* set 64 bytes at a time */ 83.Lloop64: 84 st.d a1, a3, 0 85 st.d a1, a3, 8 86 st.d a1, a3, 16 87 st.d a1, a3, 24 88 st.d a1, a3, 32 89 st.d a1, a3, 40 90 st.d a1, a3, 48 91 st.d a1, a3, 56 92 addi.d a3, a3, 64 93 bltu a3, a4, .Lloop64 94 95 /* set the remaining bytes */ 96.Llt64: 97 addi.d a4, a2, -32 98 bgeu a3, a4, .Llt32 99 st.d a1, a3, 0 100 st.d a1, a3, 8 101 st.d a1, a3, 16 102 st.d a1, a3, 24 103 addi.d a3, a3, 32 104 105.Llt32: 106 addi.d a4, a2, -16 107 bgeu a3, a4, .Llt16 108 st.d a1, a3, 0 109 st.d a1, a3, 8 110 addi.d a3, a3, 16 111 112.Llt16: 113 addi.d a4, a2, -8 114 bgeu a3, a4, .Llt8 115 st.d a1, a3, 0 116 117.Llt8: 118 st.d a1, a2, -8 119 120 /* return */ 121 jr ra 122 123 .align 4 124.Lsmall: 125 pcaddi t0, 4 126 slli.d a2, a2, 4 127 add.d t0, t0, a2 128 jr t0 129 130 .align 4 1310: jr ra 132 133 .align 4 1341: st.b a1, a0, 0 135 jr ra 136 137 .align 4 1382: st.h a1, a0, 0 139 jr ra 140 141 .align 4 1423: st.h a1, a0, 0 143 st.b a1, a0, 2 144 jr ra 145 146 .align 4 1474: st.w a1, a0, 0 148 jr ra 149 150 .align 4 1515: st.w a1, a0, 0 152 st.b a1, a0, 4 153 jr ra 154 155 .align 4 1566: st.w a1, a0, 0 157 st.h a1, a0, 4 158 jr ra 159 160 .align 4 1617: st.w a1, a0, 0 162 st.w a1, a0, 3 163 jr ra 164 165 .align 4 1668: st.d a1, a0, 0 167 jr ra 168SYM_FUNC_END(__memset_fast) 169_ASM_NOKPROBE(__memset_fast) 170 171STACK_FRAME_NON_STANDARD __memset_fast 172