1249ac17eSChris Zankel/* 2249ac17eSChris Zankel * arch/xtensa/lib/usercopy.S 3249ac17eSChris Zankel * 4249ac17eSChris Zankel * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S) 5249ac17eSChris Zankel * 6249ac17eSChris Zankel * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>. 7249ac17eSChris Zankel * It needs to remain separate and distinct. The hal files are part 8*4b3f686dSMatt LaPlante * of the Xtensa link-time HAL, and those files may differ per 9249ac17eSChris Zankel * processor configuration. Patching the kernel for another 10249ac17eSChris Zankel * processor configuration includes replacing the hal files, and we 11*4b3f686dSMatt LaPlante * could lose the special functionality for accessing user-space 12249ac17eSChris Zankel * memory during such a patch. We sacrifice a little code space here 13249ac17eSChris Zankel * in favor to simplify code maintenance. 14249ac17eSChris Zankel * 15249ac17eSChris Zankel * This file is subject to the terms and conditions of the GNU General 16249ac17eSChris Zankel * Public License. See the file "COPYING" in the main directory of 17249ac17eSChris Zankel * this archive for more details. 18249ac17eSChris Zankel * 19249ac17eSChris Zankel * Copyright (C) 2002 Tensilica Inc. 20249ac17eSChris Zankel */ 21249ac17eSChris Zankel 22249ac17eSChris Zankel 23249ac17eSChris Zankel/* 24249ac17eSChris Zankel * size_t __xtensa_copy_user (void *dst, const void *src, size_t len); 25249ac17eSChris Zankel * 26249ac17eSChris Zankel * The returned value is the number of bytes not copied. Implies zero 27249ac17eSChris Zankel * is success. 28249ac17eSChris Zankel * 29249ac17eSChris Zankel * The general case algorithm is as follows: 30249ac17eSChris Zankel * If the destination and source are both aligned, 31249ac17eSChris Zankel * do 16B chunks with a loop, and then finish up with 32249ac17eSChris Zankel * 8B, 4B, 2B, and 1B copies conditional on the length. 33249ac17eSChris Zankel * If destination is aligned and source unaligned, 34249ac17eSChris Zankel * do the same, but use SRC to align the source data. 35249ac17eSChris Zankel * If destination is unaligned, align it by conditionally 36249ac17eSChris Zankel * copying 1B and 2B and then retest. 37249ac17eSChris Zankel * This code tries to use fall-through braches for the common 38249ac17eSChris Zankel * case of aligned destinations (except for the branches to 39249ac17eSChris Zankel * the alignment label). 40249ac17eSChris Zankel * 41249ac17eSChris Zankel * Register use: 42249ac17eSChris Zankel * a0/ return address 43249ac17eSChris Zankel * a1/ stack pointer 44249ac17eSChris Zankel * a2/ return value 45249ac17eSChris Zankel * a3/ src 46249ac17eSChris Zankel * a4/ length 47249ac17eSChris Zankel * a5/ dst 48249ac17eSChris Zankel * a6/ tmp 49249ac17eSChris Zankel * a7/ tmp 50249ac17eSChris Zankel * a8/ tmp 51249ac17eSChris Zankel * a9/ tmp 52249ac17eSChris Zankel * a10/ tmp 53249ac17eSChris Zankel * a11/ original length 54249ac17eSChris Zankel */ 55249ac17eSChris Zankel 56249ac17eSChris Zankel#include <xtensa/coreasm.h> 57249ac17eSChris Zankel 58249ac17eSChris Zankel#ifdef __XTENSA_EB__ 59249ac17eSChris Zankel#define ALIGN(R, W0, W1) src R, W0, W1 60249ac17eSChris Zankel#define SSA8(R) ssa8b R 61249ac17eSChris Zankel#else 62249ac17eSChris Zankel#define ALIGN(R, W0, W1) src R, W1, W0 63249ac17eSChris Zankel#define SSA8(R) ssa8l R 64249ac17eSChris Zankel#endif 65249ac17eSChris Zankel 66249ac17eSChris Zankel/* Load or store instructions that may cause exceptions use the EX macro. */ 67249ac17eSChris Zankel 68249ac17eSChris Zankel#define EX(insn,reg1,reg2,offset,handler) \ 69249ac17eSChris Zankel9: insn reg1, reg2, offset; \ 70249ac17eSChris Zankel .section __ex_table, "a"; \ 71249ac17eSChris Zankel .word 9b, handler; \ 72249ac17eSChris Zankel .previous 73249ac17eSChris Zankel 74249ac17eSChris Zankel 75249ac17eSChris Zankel .text 76249ac17eSChris Zankel .align 4 77249ac17eSChris Zankel .global __xtensa_copy_user 78249ac17eSChris Zankel .type __xtensa_copy_user,@function 79249ac17eSChris Zankel__xtensa_copy_user: 80249ac17eSChris Zankel entry sp, 16 # minimal stack frame 81249ac17eSChris Zankel # a2/ dst, a3/ src, a4/ len 82249ac17eSChris Zankel mov a5, a2 # copy dst so that a2 is return value 83249ac17eSChris Zankel mov a11, a4 # preserve original len for error case 84249ac17eSChris Zankel.Lcommon: 85249ac17eSChris Zankel bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2 86249ac17eSChris Zankel bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4 87249ac17eSChris Zankel.Ldstaligned: # return here from .Ldstunaligned when dst is aligned 88249ac17eSChris Zankel srli a7, a4, 4 # number of loop iterations with 16B 89249ac17eSChris Zankel # per iteration 90249ac17eSChris Zankel movi a8, 3 # if source is also aligned, 91249ac17eSChris Zankel bnone a3, a8, .Laligned # then use word copy 92249ac17eSChris Zankel SSA8( a3) # set shift amount from byte offset 93249ac17eSChris Zankel bnez a4, .Lsrcunaligned 94249ac17eSChris Zankel movi a2, 0 # return success for len==0 95249ac17eSChris Zankel retw 96249ac17eSChris Zankel 97249ac17eSChris Zankel/* 98249ac17eSChris Zankel * Destination is unaligned 99249ac17eSChris Zankel */ 100249ac17eSChris Zankel 101249ac17eSChris Zankel.Ldst1mod2: # dst is only byte aligned 102249ac17eSChris Zankel bltui a4, 7, .Lbytecopy # do short copies byte by byte 103249ac17eSChris Zankel 104249ac17eSChris Zankel # copy 1 byte 105249ac17eSChris Zankel EX(l8ui, a6, a3, 0, l_fixup) 106249ac17eSChris Zankel addi a3, a3, 1 107249ac17eSChris Zankel EX(s8i, a6, a5, 0, s_fixup) 108249ac17eSChris Zankel addi a5, a5, 1 109249ac17eSChris Zankel addi a4, a4, -1 110249ac17eSChris Zankel bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then 111249ac17eSChris Zankel # return to main algorithm 112249ac17eSChris Zankel.Ldst2mod4: # dst 16-bit aligned 113249ac17eSChris Zankel # copy 2 bytes 114249ac17eSChris Zankel bltui a4, 6, .Lbytecopy # do short copies byte by byte 115249ac17eSChris Zankel EX(l8ui, a6, a3, 0, l_fixup) 116249ac17eSChris Zankel EX(l8ui, a7, a3, 1, l_fixup) 117249ac17eSChris Zankel addi a3, a3, 2 118249ac17eSChris Zankel EX(s8i, a6, a5, 0, s_fixup) 119249ac17eSChris Zankel EX(s8i, a7, a5, 1, s_fixup) 120249ac17eSChris Zankel addi a5, a5, 2 121249ac17eSChris Zankel addi a4, a4, -2 122249ac17eSChris Zankel j .Ldstaligned # dst is now aligned, return to main algorithm 123249ac17eSChris Zankel 124249ac17eSChris Zankel/* 125249ac17eSChris Zankel * Byte by byte copy 126249ac17eSChris Zankel */ 127249ac17eSChris Zankel .align 4 128249ac17eSChris Zankel .byte 0 # 1 mod 4 alignment for LOOPNEZ 129249ac17eSChris Zankel # (0 mod 4 alignment for LBEG) 130249ac17eSChris Zankel.Lbytecopy: 131249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS 132249ac17eSChris Zankel loopnez a4, .Lbytecopydone 133249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */ 134249ac17eSChris Zankel beqz a4, .Lbytecopydone 135249ac17eSChris Zankel add a7, a3, a4 # a7 = end address for source 136249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 137249ac17eSChris Zankel.Lnextbyte: 138249ac17eSChris Zankel EX(l8ui, a6, a3, 0, l_fixup) 139249ac17eSChris Zankel addi a3, a3, 1 140249ac17eSChris Zankel EX(s8i, a6, a5, 0, s_fixup) 141249ac17eSChris Zankel addi a5, a5, 1 142249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS 143249ac17eSChris Zankel blt a3, a7, .Lnextbyte 144249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 145249ac17eSChris Zankel.Lbytecopydone: 146249ac17eSChris Zankel movi a2, 0 # return success for len bytes copied 147249ac17eSChris Zankel retw 148249ac17eSChris Zankel 149249ac17eSChris Zankel/* 150249ac17eSChris Zankel * Destination and source are word-aligned. 151249ac17eSChris Zankel */ 152249ac17eSChris Zankel # copy 16 bytes per iteration for word-aligned dst and word-aligned src 153249ac17eSChris Zankel .align 4 # 1 mod 4 alignment for LOOPNEZ 154249ac17eSChris Zankel .byte 0 # (0 mod 4 alignment for LBEG) 155249ac17eSChris Zankel.Laligned: 156249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS 157249ac17eSChris Zankel loopnez a7, .Loop1done 158249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */ 159249ac17eSChris Zankel beqz a7, .Loop1done 160249ac17eSChris Zankel slli a8, a7, 4 161249ac17eSChris Zankel add a8, a8, a3 # a8 = end of last 16B source chunk 162249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 163249ac17eSChris Zankel.Loop1: 164249ac17eSChris Zankel EX(l32i, a6, a3, 0, l_fixup) 165249ac17eSChris Zankel EX(l32i, a7, a3, 4, l_fixup) 166249ac17eSChris Zankel EX(s32i, a6, a5, 0, s_fixup) 167249ac17eSChris Zankel EX(l32i, a6, a3, 8, l_fixup) 168249ac17eSChris Zankel EX(s32i, a7, a5, 4, s_fixup) 169249ac17eSChris Zankel EX(l32i, a7, a3, 12, l_fixup) 170249ac17eSChris Zankel EX(s32i, a6, a5, 8, s_fixup) 171249ac17eSChris Zankel addi a3, a3, 16 172249ac17eSChris Zankel EX(s32i, a7, a5, 12, s_fixup) 173249ac17eSChris Zankel addi a5, a5, 16 174249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS 175249ac17eSChris Zankel blt a3, a8, .Loop1 176249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 177249ac17eSChris Zankel.Loop1done: 178249ac17eSChris Zankel bbci.l a4, 3, .L2 179249ac17eSChris Zankel # copy 8 bytes 180249ac17eSChris Zankel EX(l32i, a6, a3, 0, l_fixup) 181249ac17eSChris Zankel EX(l32i, a7, a3, 4, l_fixup) 182249ac17eSChris Zankel addi a3, a3, 8 183249ac17eSChris Zankel EX(s32i, a6, a5, 0, s_fixup) 184249ac17eSChris Zankel EX(s32i, a7, a5, 4, s_fixup) 185249ac17eSChris Zankel addi a5, a5, 8 186249ac17eSChris Zankel.L2: 187249ac17eSChris Zankel bbci.l a4, 2, .L3 188249ac17eSChris Zankel # copy 4 bytes 189249ac17eSChris Zankel EX(l32i, a6, a3, 0, l_fixup) 190249ac17eSChris Zankel addi a3, a3, 4 191249ac17eSChris Zankel EX(s32i, a6, a5, 0, s_fixup) 192249ac17eSChris Zankel addi a5, a5, 4 193249ac17eSChris Zankel.L3: 194249ac17eSChris Zankel bbci.l a4, 1, .L4 195249ac17eSChris Zankel # copy 2 bytes 196249ac17eSChris Zankel EX(l16ui, a6, a3, 0, l_fixup) 197249ac17eSChris Zankel addi a3, a3, 2 198249ac17eSChris Zankel EX(s16i, a6, a5, 0, s_fixup) 199249ac17eSChris Zankel addi a5, a5, 2 200249ac17eSChris Zankel.L4: 201249ac17eSChris Zankel bbci.l a4, 0, .L5 202249ac17eSChris Zankel # copy 1 byte 203249ac17eSChris Zankel EX(l8ui, a6, a3, 0, l_fixup) 204249ac17eSChris Zankel EX(s8i, a6, a5, 0, s_fixup) 205249ac17eSChris Zankel.L5: 206249ac17eSChris Zankel movi a2, 0 # return success for len bytes copied 207249ac17eSChris Zankel retw 208249ac17eSChris Zankel 209249ac17eSChris Zankel/* 210249ac17eSChris Zankel * Destination is aligned, Source is unaligned 211249ac17eSChris Zankel */ 212249ac17eSChris Zankel 213249ac17eSChris Zankel .align 4 214249ac17eSChris Zankel .byte 0 # 1 mod 4 alignement for LOOPNEZ 215249ac17eSChris Zankel # (0 mod 4 alignment for LBEG) 216249ac17eSChris Zankel.Lsrcunaligned: 217249ac17eSChris Zankel # copy 16 bytes per iteration for word-aligned dst and unaligned src 218249ac17eSChris Zankel and a10, a3, a8 # save unalignment offset for below 219249ac17eSChris Zankel sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware) 220249ac17eSChris Zankel EX(l32i, a6, a3, 0, l_fixup) # load first word 221249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS 222249ac17eSChris Zankel loopnez a7, .Loop2done 223249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */ 224249ac17eSChris Zankel beqz a7, .Loop2done 225249ac17eSChris Zankel slli a10, a7, 4 226249ac17eSChris Zankel add a10, a10, a3 # a10 = end of last 16B source chunk 227249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 228249ac17eSChris Zankel.Loop2: 229249ac17eSChris Zankel EX(l32i, a7, a3, 4, l_fixup) 230249ac17eSChris Zankel EX(l32i, a8, a3, 8, l_fixup) 231249ac17eSChris Zankel ALIGN( a6, a6, a7) 232249ac17eSChris Zankel EX(s32i, a6, a5, 0, s_fixup) 233249ac17eSChris Zankel EX(l32i, a9, a3, 12, l_fixup) 234249ac17eSChris Zankel ALIGN( a7, a7, a8) 235249ac17eSChris Zankel EX(s32i, a7, a5, 4, s_fixup) 236249ac17eSChris Zankel EX(l32i, a6, a3, 16, l_fixup) 237249ac17eSChris Zankel ALIGN( a8, a8, a9) 238249ac17eSChris Zankel EX(s32i, a8, a5, 8, s_fixup) 239249ac17eSChris Zankel addi a3, a3, 16 240249ac17eSChris Zankel ALIGN( a9, a9, a6) 241249ac17eSChris Zankel EX(s32i, a9, a5, 12, s_fixup) 242249ac17eSChris Zankel addi a5, a5, 16 243249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS 244249ac17eSChris Zankel blt a3, a10, .Loop2 245249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 246249ac17eSChris Zankel.Loop2done: 247249ac17eSChris Zankel bbci.l a4, 3, .L12 248249ac17eSChris Zankel # copy 8 bytes 249249ac17eSChris Zankel EX(l32i, a7, a3, 4, l_fixup) 250249ac17eSChris Zankel EX(l32i, a8, a3, 8, l_fixup) 251249ac17eSChris Zankel ALIGN( a6, a6, a7) 252249ac17eSChris Zankel EX(s32i, a6, a5, 0, s_fixup) 253249ac17eSChris Zankel addi a3, a3, 8 254249ac17eSChris Zankel ALIGN( a7, a7, a8) 255249ac17eSChris Zankel EX(s32i, a7, a5, 4, s_fixup) 256249ac17eSChris Zankel addi a5, a5, 8 257249ac17eSChris Zankel mov a6, a8 258249ac17eSChris Zankel.L12: 259249ac17eSChris Zankel bbci.l a4, 2, .L13 260249ac17eSChris Zankel # copy 4 bytes 261249ac17eSChris Zankel EX(l32i, a7, a3, 4, l_fixup) 262249ac17eSChris Zankel addi a3, a3, 4 263249ac17eSChris Zankel ALIGN( a6, a6, a7) 264249ac17eSChris Zankel EX(s32i, a6, a5, 0, s_fixup) 265249ac17eSChris Zankel addi a5, a5, 4 266249ac17eSChris Zankel mov a6, a7 267249ac17eSChris Zankel.L13: 268249ac17eSChris Zankel add a3, a3, a10 # readjust a3 with correct misalignment 269249ac17eSChris Zankel bbci.l a4, 1, .L14 270249ac17eSChris Zankel # copy 2 bytes 271249ac17eSChris Zankel EX(l8ui, a6, a3, 0, l_fixup) 272249ac17eSChris Zankel EX(l8ui, a7, a3, 1, l_fixup) 273249ac17eSChris Zankel addi a3, a3, 2 274249ac17eSChris Zankel EX(s8i, a6, a5, 0, s_fixup) 275249ac17eSChris Zankel EX(s8i, a7, a5, 1, s_fixup) 276249ac17eSChris Zankel addi a5, a5, 2 277249ac17eSChris Zankel.L14: 278249ac17eSChris Zankel bbci.l a4, 0, .L15 279249ac17eSChris Zankel # copy 1 byte 280249ac17eSChris Zankel EX(l8ui, a6, a3, 0, l_fixup) 281249ac17eSChris Zankel EX(s8i, a6, a5, 0, s_fixup) 282249ac17eSChris Zankel.L15: 283249ac17eSChris Zankel movi a2, 0 # return success for len bytes copied 284249ac17eSChris Zankel retw 285249ac17eSChris Zankel 286249ac17eSChris Zankel 287249ac17eSChris Zankel .section .fixup, "ax" 288249ac17eSChris Zankel .align 4 289249ac17eSChris Zankel 290249ac17eSChris Zankel/* a2 = original dst; a5 = current dst; a11= original len 291249ac17eSChris Zankel * bytes_copied = a5 - a2 292249ac17eSChris Zankel * retval = bytes_not_copied = original len - bytes_copied 293249ac17eSChris Zankel * retval = a11 - (a5 - a2) 294249ac17eSChris Zankel * 295249ac17eSChris Zankel * Clearing the remaining pieces of kernel memory plugs security 296249ac17eSChris Zankel * holes. This functionality is the equivalent of the *_zeroing 297249ac17eSChris Zankel * functions that some architectures provide. 298249ac17eSChris Zankel */ 299249ac17eSChris Zankel 300249ac17eSChris Zankel.Lmemset: 301249ac17eSChris Zankel .word memset 302249ac17eSChris Zankel 303249ac17eSChris Zankels_fixup: 304249ac17eSChris Zankel sub a2, a5, a2 /* a2 <-- bytes copied */ 305249ac17eSChris Zankel sub a2, a11, a2 /* a2 <-- bytes not copied */ 306249ac17eSChris Zankel retw 307249ac17eSChris Zankel 308249ac17eSChris Zankell_fixup: 309249ac17eSChris Zankel sub a2, a5, a2 /* a2 <-- bytes copied */ 310249ac17eSChris Zankel sub a2, a11, a2 /* a2 <-- bytes not copied == return value */ 311249ac17eSChris Zankel 312249ac17eSChris Zankel /* void *memset(void *s, int c, size_t n); */ 313249ac17eSChris Zankel mov a6, a5 /* s */ 314249ac17eSChris Zankel movi a7, 0 /* c */ 315249ac17eSChris Zankel mov a8, a2 /* n */ 316249ac17eSChris Zankel l32r a4, .Lmemset 317249ac17eSChris Zankel callx4 a4 318249ac17eSChris Zankel /* Ignore memset return value in a6. */ 319249ac17eSChris Zankel /* a2 still contains bytes not copied. */ 320249ac17eSChris Zankel retw 321249ac17eSChris Zankel 322