1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * linux/arch/arm/lib/memset.S 4 * 5 * Copyright (C) 1995-2000 Russell King 6 * 7 * ASM optimised string functions 8 */ 9#include <linux/linkage.h> 10#include <asm/assembler.h> 11#include <asm/unwind.h> 12 13 .text 14 .align 5 15 16ENTRY(mmioset) 17ENTRY(memset) 18UNWIND( .fnstart ) 19 ands r3, r0, #3 @ 1 unaligned? 20 mov ip, r0 @ preserve r0 as return value 21 bne 6f @ 1 22/* 23 * we know that the pointer in ip is aligned to a word boundary. 24 */ 251: orr r1, r1, r1, lsl #8 26 orr r1, r1, r1, lsl #16 27 mov r3, r1 287: cmp r2, #16 29 blt 4f 30 31#if ! CALGN(1)+0 32 33/* 34 * We need 2 extra registers for this loop - use r8 and the LR 35 */ 36 stmfd sp!, {r8, lr} 37UNWIND( .fnend ) 38UNWIND( .fnstart ) 39UNWIND( .save {r8, lr} ) 40 mov r8, r1 41 mov lr, r3 42 432: subs r2, r2, #64 44 stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. 45 stmiage ip!, {r1, r3, r8, lr} 46 stmiage ip!, {r1, r3, r8, lr} 47 stmiage ip!, {r1, r3, r8, lr} 48 bgt 2b 49 ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. 50/* 51 * No need to correct the count; we're only testing bits from now on 52 */ 53 tst r2, #32 54 stmiane ip!, {r1, r3, r8, lr} 55 stmiane ip!, {r1, r3, r8, lr} 56 tst r2, #16 57 stmiane ip!, {r1, r3, r8, lr} 58 ldmfd sp!, {r8, lr} 59UNWIND( .fnend ) 60 61#else 62 63/* 64 * This version aligns the destination pointer in order to write 65 * whole cache lines at once. 66 */ 67 68 stmfd sp!, {r4-r8, lr} 69UNWIND( .fnend ) 70UNWIND( .fnstart ) 71UNWIND( .save {r4-r8, lr} ) 72 mov r4, r1 73 mov r5, r3 74 mov r6, r1 75 mov r7, r3 76 mov r8, r1 77 mov lr, r3 78 79 cmp r2, #96 80 tstgt ip, #31 81 ble 3f 82 83 and r8, ip, #31 84 rsb r8, r8, #32 85 sub r2, r2, r8 86 movs r8, r8, lsl #(32 - 4) 87 stmiacs ip!, {r4, r5, r6, r7} 88 stmiami ip!, {r4, r5} 89 tst r8, #(1 << 30) 90 mov r8, r1 91 strne r1, [ip], #4 92 933: subs r2, r2, #64 94 stmiage ip!, {r1, r3-r8, lr} 95 stmiage ip!, {r1, r3-r8, lr} 96 bgt 3b 97 ldmfdeq sp!, {r4-r8, pc} 98 99 tst r2, #32 100 stmiane ip!, {r1, r3-r8, lr} 101 tst r2, #16 102 stmiane ip!, {r4-r7} 103 ldmfd sp!, {r4-r8, lr} 104UNWIND( .fnend ) 105 106#endif 107 108UNWIND( .fnstart ) 1094: tst r2, #8 110 stmiane ip!, {r1, r3} 111 tst r2, #4 112 strne r1, [ip], #4 113/* 114 * When we get here, we've got less than 4 bytes to set. We 115 * may have an unaligned pointer as well. 116 */ 1175: tst r2, #2 118 strbne r1, [ip], #1 119 strbne r1, [ip], #1 120 tst r2, #1 121 strbne r1, [ip], #1 122 ret lr 123 1246: subs r2, r2, #4 @ 1 do we have enough 125 blt 5b @ 1 bytes to align with? 126 cmp r3, #2 @ 1 127 strblt r1, [ip], #1 @ 1 128 strble r1, [ip], #1 @ 1 129 strb r1, [ip], #1 @ 1 130 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) 131 b 1b 132UNWIND( .fnend ) 133ENDPROC(memset) 134ENDPROC(mmioset) 135 136ENTRY(__memset32) 137UNWIND( .fnstart ) 138 mov r3, r1 @ copy r1 to r3 and fall into memset64 139UNWIND( .fnend ) 140ENDPROC(__memset32) 141ENTRY(__memset64) 142UNWIND( .fnstart ) 143 mov ip, r0 @ preserve r0 as return value 144 b 7b @ jump into the middle of memset 145UNWIND( .fnend ) 146ENDPROC(__memset64) 147