1/* 2 * arch/xtensa/lib/memset.S 3 * 4 * ANSI C standard library function memset 5 * (Well, almost. .fixup code might return zero.) 6 * 7 * This file is subject to the terms and conditions of the GNU General 8 * Public License. See the file "COPYING" in the main directory of 9 * this archive for more details. 10 * 11 * Copyright (C) 2002 Tensilica Inc. 12 */ 13 14#include <linux/linkage.h> 15#include <variant/core.h> 16#include <asm/asmmacro.h> 17 18/* 19 * void *memset(void *dst, int c, size_t length) 20 * 21 * The algorithm is as follows: 22 * Create a word with c in all byte positions 23 * If the destination is aligned, 24 * do 16B chucks with a loop, and then finish up with 25 * 8B, 4B, 2B, and 1B stores conditional on the length. 26 * If destination is unaligned, align it by conditionally 27 * setting 1B and 2B and then go to aligned case. 28 * This code tries to use fall-through branches for the common 29 * case of an aligned destination (except for the branches to 30 * the alignment labels). 31 */ 32 33.text 34ENTRY(memset) 35 36 entry sp, 16 # minimal stack frame 37 # a2/ dst, a3/ c, a4/ length 38 extui a3, a3, 0, 8 # mask to just 8 bits 39 slli a7, a3, 8 # duplicate character in all bytes of word 40 or a3, a3, a7 # ... 41 slli a7, a3, 16 # ... 42 or a3, a3, a7 # ... 43 mov a5, a2 # copy dst so that a2 is return value 44 movi a6, 3 # for alignment tests 45 bany a2, a6, .Ldstunaligned # if dst is unaligned 46.L0: # return here from .Ldstunaligned when dst is aligned 47 srli a7, a4, 4 # number of loop iterations with 16B 48 # per iteration 49 bnez a4, .Laligned 50 retw 51 52/* 53 * Destination is word-aligned. 54 */ 55 # set 16 bytes per iteration for word-aligned dst 56 .align 4 # 1 mod 4 alignment for LOOPNEZ 57 .byte 0 # (0 mod 4 alignment for LBEG) 58.Laligned: 59#if XCHAL_HAVE_LOOPS 60 loopnez a7, .Loop1done 61#else /* !XCHAL_HAVE_LOOPS */ 62 beqz a7, .Loop1done 63 slli a6, a7, 4 64 add a6, a6, a5 # a6 = end of last 16B chunk 65#endif /* !XCHAL_HAVE_LOOPS */ 66.Loop1: 67EX(10f) s32i a3, a5, 0 68EX(10f) s32i a3, a5, 4 69EX(10f) s32i a3, a5, 8 70EX(10f) s32i a3, a5, 12 71 addi a5, a5, 16 72#if !XCHAL_HAVE_LOOPS 73 blt a5, a6, .Loop1 74#endif /* !XCHAL_HAVE_LOOPS */ 75.Loop1done: 76 bbci.l a4, 3, .L2 77 # set 8 bytes 78EX(10f) s32i a3, a5, 0 79EX(10f) s32i a3, a5, 4 80 addi a5, a5, 8 81.L2: 82 bbci.l a4, 2, .L3 83 # set 4 bytes 84EX(10f) s32i a3, a5, 0 85 addi a5, a5, 4 86.L3: 87 bbci.l a4, 1, .L4 88 # set 2 bytes 89EX(10f) s16i a3, a5, 0 90 addi a5, a5, 2 91.L4: 92 bbci.l a4, 0, .L5 93 # set 1 byte 94EX(10f) s8i a3, a5, 0 95.L5: 96.Lret1: 97 retw 98 99/* 100 * Destination is unaligned 101 */ 102 103.Ldstunaligned: 104 bltui a4, 8, .Lbyteset # do short copies byte by byte 105 bbci.l a5, 0, .L20 # branch if dst alignment half-aligned 106 # dst is only byte aligned 107 # set 1 byte 108EX(10f) s8i a3, a5, 0 109 addi a5, a5, 1 110 addi a4, a4, -1 111 # now retest if dst aligned 112 bbci.l a5, 1, .L0 # if now aligned, return to main algorithm 113.L20: 114 # dst half-aligned 115 # set 2 bytes 116EX(10f) s16i a3, a5, 0 117 addi a5, a5, 2 118 addi a4, a4, -2 119 j .L0 # dst is now aligned, return to main algorithm 120 121/* 122 * Byte by byte set 123 */ 124 .align 4 125 .byte 0 # 1 mod 4 alignment for LOOPNEZ 126 # (0 mod 4 alignment for LBEG) 127.Lbyteset: 128#if XCHAL_HAVE_LOOPS 129 loopnez a4, .Lbytesetdone 130#else /* !XCHAL_HAVE_LOOPS */ 131 beqz a4, .Lbytesetdone 132 add a6, a5, a4 # a6 = ending address 133#endif /* !XCHAL_HAVE_LOOPS */ 134.Lbyteloop: 135EX(10f) s8i a3, a5, 0 136 addi a5, a5, 1 137#if !XCHAL_HAVE_LOOPS 138 blt a5, a6, .Lbyteloop 139#endif /* !XCHAL_HAVE_LOOPS */ 140.Lbytesetdone: 141 retw 142 143ENDPROC(memset) 144 145 .section .fixup, "ax" 146 .align 4 147 148/* We return zero if a failure occurred. */ 149 15010: 151 movi a2, 0 152 retw 153