1*5d8544e2SPalmer Dabbelt/* 2*5d8544e2SPalmer Dabbelt * Copyright (C) 2013 Regents of the University of California 3*5d8544e2SPalmer Dabbelt * 4*5d8544e2SPalmer Dabbelt * This program is free software; you can redistribute it and/or 5*5d8544e2SPalmer Dabbelt * modify it under the terms of the GNU General Public License 6*5d8544e2SPalmer Dabbelt * as published by the Free Software Foundation, version 2. 7*5d8544e2SPalmer Dabbelt * 8*5d8544e2SPalmer Dabbelt * This program is distributed in the hope that it will be useful, 9*5d8544e2SPalmer Dabbelt * but WITHOUT ANY WARRANTY; without even the implied warranty of 10*5d8544e2SPalmer Dabbelt * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11*5d8544e2SPalmer Dabbelt * GNU General Public License for more details. 12*5d8544e2SPalmer Dabbelt */ 13*5d8544e2SPalmer Dabbelt 14*5d8544e2SPalmer Dabbelt 15*5d8544e2SPalmer Dabbelt#include <linux/linkage.h> 16*5d8544e2SPalmer Dabbelt#include <asm/asm.h> 17*5d8544e2SPalmer Dabbelt 18*5d8544e2SPalmer Dabbelt/* void *memset(void *, int, size_t) */ 19*5d8544e2SPalmer DabbeltENTRY(memset) 20*5d8544e2SPalmer Dabbelt move t0, a0 /* Preserve return value */ 21*5d8544e2SPalmer Dabbelt 22*5d8544e2SPalmer Dabbelt /* Defer to byte-oriented fill for small sizes */ 23*5d8544e2SPalmer Dabbelt sltiu a3, a2, 16 24*5d8544e2SPalmer Dabbelt bnez a3, 4f 25*5d8544e2SPalmer Dabbelt 26*5d8544e2SPalmer Dabbelt /* 27*5d8544e2SPalmer Dabbelt * Round to nearest XLEN-aligned address 28*5d8544e2SPalmer Dabbelt * greater than or equal to start address 29*5d8544e2SPalmer Dabbelt */ 30*5d8544e2SPalmer Dabbelt addi a3, t0, SZREG-1 31*5d8544e2SPalmer Dabbelt andi a3, a3, ~(SZREG-1) 32*5d8544e2SPalmer Dabbelt beq a3, t0, 2f /* Skip if already aligned */ 33*5d8544e2SPalmer Dabbelt /* Handle initial misalignment */ 34*5d8544e2SPalmer Dabbelt sub a4, a3, t0 35*5d8544e2SPalmer Dabbelt1: 36*5d8544e2SPalmer Dabbelt sb a1, 0(t0) 37*5d8544e2SPalmer Dabbelt addi t0, t0, 1 38*5d8544e2SPalmer Dabbelt bltu t0, a3, 1b 39*5d8544e2SPalmer Dabbelt sub a2, a2, a4 /* Update count */ 40*5d8544e2SPalmer Dabbelt 41*5d8544e2SPalmer Dabbelt2: /* Duff's device with 32 XLEN stores per iteration */ 42*5d8544e2SPalmer Dabbelt /* Broadcast value into all bytes */ 43*5d8544e2SPalmer Dabbelt andi a1, a1, 0xff 44*5d8544e2SPalmer Dabbelt slli a3, a1, 8 45*5d8544e2SPalmer Dabbelt or a1, a3, a1 46*5d8544e2SPalmer Dabbelt slli a3, a1, 16 47*5d8544e2SPalmer Dabbelt or a1, a3, a1 48*5d8544e2SPalmer Dabbelt#ifdef CONFIG_64BIT 49*5d8544e2SPalmer Dabbelt slli a3, a1, 32 50*5d8544e2SPalmer Dabbelt or a1, a3, a1 51*5d8544e2SPalmer Dabbelt#endif 52*5d8544e2SPalmer Dabbelt 53*5d8544e2SPalmer Dabbelt /* Calculate end address */ 54*5d8544e2SPalmer Dabbelt andi a4, a2, ~(SZREG-1) 55*5d8544e2SPalmer Dabbelt add a3, t0, a4 56*5d8544e2SPalmer Dabbelt 57*5d8544e2SPalmer Dabbelt andi a4, a4, 31*SZREG /* Calculate remainder */ 58*5d8544e2SPalmer Dabbelt beqz a4, 3f /* Shortcut if no remainder */ 59*5d8544e2SPalmer Dabbelt neg a4, a4 60*5d8544e2SPalmer Dabbelt addi a4, a4, 32*SZREG /* Calculate initial offset */ 61*5d8544e2SPalmer Dabbelt 62*5d8544e2SPalmer Dabbelt /* Adjust start address with offset */ 63*5d8544e2SPalmer Dabbelt sub t0, t0, a4 64*5d8544e2SPalmer Dabbelt 65*5d8544e2SPalmer Dabbelt /* Jump into loop body */ 66*5d8544e2SPalmer Dabbelt /* Assumes 32-bit instruction lengths */ 67*5d8544e2SPalmer Dabbelt la a5, 3f 68*5d8544e2SPalmer Dabbelt#ifdef CONFIG_64BIT 69*5d8544e2SPalmer Dabbelt srli a4, a4, 1 70*5d8544e2SPalmer Dabbelt#endif 71*5d8544e2SPalmer Dabbelt add a5, a5, a4 72*5d8544e2SPalmer Dabbelt jr a5 73*5d8544e2SPalmer Dabbelt3: 74*5d8544e2SPalmer Dabbelt REG_S a1, 0(t0) 75*5d8544e2SPalmer Dabbelt REG_S a1, SZREG(t0) 76*5d8544e2SPalmer Dabbelt REG_S a1, 2*SZREG(t0) 77*5d8544e2SPalmer Dabbelt REG_S a1, 3*SZREG(t0) 78*5d8544e2SPalmer Dabbelt REG_S a1, 4*SZREG(t0) 79*5d8544e2SPalmer Dabbelt REG_S a1, 5*SZREG(t0) 80*5d8544e2SPalmer Dabbelt REG_S a1, 6*SZREG(t0) 81*5d8544e2SPalmer Dabbelt REG_S a1, 7*SZREG(t0) 82*5d8544e2SPalmer Dabbelt REG_S a1, 8*SZREG(t0) 83*5d8544e2SPalmer Dabbelt REG_S a1, 9*SZREG(t0) 84*5d8544e2SPalmer Dabbelt REG_S a1, 10*SZREG(t0) 85*5d8544e2SPalmer Dabbelt REG_S a1, 11*SZREG(t0) 86*5d8544e2SPalmer Dabbelt REG_S a1, 12*SZREG(t0) 87*5d8544e2SPalmer Dabbelt REG_S a1, 13*SZREG(t0) 88*5d8544e2SPalmer Dabbelt REG_S a1, 14*SZREG(t0) 89*5d8544e2SPalmer Dabbelt REG_S a1, 15*SZREG(t0) 90*5d8544e2SPalmer Dabbelt REG_S a1, 16*SZREG(t0) 91*5d8544e2SPalmer Dabbelt REG_S a1, 17*SZREG(t0) 92*5d8544e2SPalmer Dabbelt REG_S a1, 18*SZREG(t0) 93*5d8544e2SPalmer Dabbelt REG_S a1, 19*SZREG(t0) 94*5d8544e2SPalmer Dabbelt REG_S a1, 20*SZREG(t0) 95*5d8544e2SPalmer Dabbelt REG_S a1, 21*SZREG(t0) 96*5d8544e2SPalmer Dabbelt REG_S a1, 22*SZREG(t0) 97*5d8544e2SPalmer Dabbelt REG_S a1, 23*SZREG(t0) 98*5d8544e2SPalmer Dabbelt REG_S a1, 24*SZREG(t0) 99*5d8544e2SPalmer Dabbelt REG_S a1, 25*SZREG(t0) 100*5d8544e2SPalmer Dabbelt REG_S a1, 26*SZREG(t0) 101*5d8544e2SPalmer Dabbelt REG_S a1, 27*SZREG(t0) 102*5d8544e2SPalmer Dabbelt REG_S a1, 28*SZREG(t0) 103*5d8544e2SPalmer Dabbelt REG_S a1, 29*SZREG(t0) 104*5d8544e2SPalmer Dabbelt REG_S a1, 30*SZREG(t0) 105*5d8544e2SPalmer Dabbelt REG_S a1, 31*SZREG(t0) 106*5d8544e2SPalmer Dabbelt addi t0, t0, 32*SZREG 107*5d8544e2SPalmer Dabbelt bltu t0, a3, 3b 108*5d8544e2SPalmer Dabbelt andi a2, a2, SZREG-1 /* Update count */ 109*5d8544e2SPalmer Dabbelt 110*5d8544e2SPalmer Dabbelt4: 111*5d8544e2SPalmer Dabbelt /* Handle trailing misalignment */ 112*5d8544e2SPalmer Dabbelt beqz a2, 6f 113*5d8544e2SPalmer Dabbelt add a3, t0, a2 114*5d8544e2SPalmer Dabbelt5: 115*5d8544e2SPalmer Dabbelt sb a1, 0(t0) 116*5d8544e2SPalmer Dabbelt addi t0, t0, 1 117*5d8544e2SPalmer Dabbelt bltu t0, a3, 5b 118*5d8544e2SPalmer Dabbelt6: 119*5d8544e2SPalmer Dabbelt ret 120*5d8544e2SPalmer DabbeltEND(memset) 121