1249ac17eSChris Zankel/* 2249ac17eSChris Zankel * arch/xtensa/lib/usercopy.S 3249ac17eSChris Zankel * 4249ac17eSChris Zankel * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S) 5249ac17eSChris Zankel * 6249ac17eSChris Zankel * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>. 7249ac17eSChris Zankel * It needs to remain separate and distinct. The hal files are part 84b3f686dSMatt LaPlante * of the Xtensa link-time HAL, and those files may differ per 9249ac17eSChris Zankel * processor configuration. Patching the kernel for another 10249ac17eSChris Zankel * processor configuration includes replacing the hal files, and we 114b3f686dSMatt LaPlante * could lose the special functionality for accessing user-space 12249ac17eSChris Zankel * memory during such a patch. We sacrifice a little code space here 13249ac17eSChris Zankel * in favor to simplify code maintenance. 14249ac17eSChris Zankel * 15249ac17eSChris Zankel * This file is subject to the terms and conditions of the GNU General 16249ac17eSChris Zankel * Public License. See the file "COPYING" in the main directory of 17249ac17eSChris Zankel * this archive for more details. 18249ac17eSChris Zankel * 19249ac17eSChris Zankel * Copyright (C) 2002 Tensilica Inc. 20249ac17eSChris Zankel */ 21249ac17eSChris Zankel 22249ac17eSChris Zankel 23249ac17eSChris Zankel/* 24249ac17eSChris Zankel * size_t __xtensa_copy_user (void *dst, const void *src, size_t len); 25249ac17eSChris Zankel * 26249ac17eSChris Zankel * The returned value is the number of bytes not copied. Implies zero 27249ac17eSChris Zankel * is success. 28249ac17eSChris Zankel * 29249ac17eSChris Zankel * The general case algorithm is as follows: 30249ac17eSChris Zankel * If the destination and source are both aligned, 31249ac17eSChris Zankel * do 16B chunks with a loop, and then finish up with 32249ac17eSChris Zankel * 8B, 4B, 2B, and 1B copies conditional on the length. 33249ac17eSChris Zankel * If destination is aligned and source unaligned, 34249ac17eSChris Zankel * do the same, but use SRC to align the source data. 35249ac17eSChris Zankel * If destination is unaligned, align it by conditionally 36249ac17eSChris Zankel * copying 1B and 2B and then retest. 37249ac17eSChris Zankel * This code tries to use fall-through braches for the common 38249ac17eSChris Zankel * case of aligned destinations (except for the branches to 39249ac17eSChris Zankel * the alignment label). 40249ac17eSChris Zankel * 41249ac17eSChris Zankel * Register use: 42249ac17eSChris Zankel * a0/ return address 43249ac17eSChris Zankel * a1/ stack pointer 44249ac17eSChris Zankel * a2/ return value 45249ac17eSChris Zankel * a3/ src 46249ac17eSChris Zankel * a4/ length 47249ac17eSChris Zankel * a5/ dst 48249ac17eSChris Zankel * a6/ tmp 49249ac17eSChris Zankel * a7/ tmp 50249ac17eSChris Zankel * a8/ tmp 51249ac17eSChris Zankel * a9/ tmp 52249ac17eSChris Zankel * a10/ tmp 53249ac17eSChris Zankel * a11/ original length 54249ac17eSChris Zankel */ 55249ac17eSChris Zankel 565cf97ebdSMax Filippov#include <linux/linkage.h> 570013acebSMax Filippov#include <asm/asmmacro.h> 588f8d5745SMax Filippov#include <asm/core.h> 59249ac17eSChris Zankel 60249ac17eSChris Zankel .text 615cf97ebdSMax FilippovENTRY(__xtensa_copy_user) 625cf97ebdSMax Filippov 6361a6b912SMax Filippov#if !XCHAL_HAVE_LOOPS && defined(__XTENSA_CALL0_ABI__) 6461a6b912SMax Filippov#define STACK_SIZE 4 6561a6b912SMax Filippov#else 6661a6b912SMax Filippov#define STACK_SIZE 0 6761a6b912SMax Filippov#endif 6861a6b912SMax Filippov abi_entry(STACK_SIZE) 69249ac17eSChris Zankel # a2/ dst, a3/ src, a4/ len 70249ac17eSChris Zankel mov a5, a2 # copy dst so that a2 is return value 71249ac17eSChris Zankel mov a11, a4 # preserve original len for error case 72249ac17eSChris Zankel.Lcommon: 73249ac17eSChris Zankel bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2 74249ac17eSChris Zankel bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4 75249ac17eSChris Zankel.Ldstaligned: # return here from .Ldstunaligned when dst is aligned 76249ac17eSChris Zankel srli a7, a4, 4 # number of loop iterations with 16B 77249ac17eSChris Zankel # per iteration 78249ac17eSChris Zankel movi a8, 3 # if source is also aligned, 79249ac17eSChris Zankel bnone a3, a8, .Laligned # then use word copy 80fbb871e2SMax Filippov __ssa8 a3 # set shift amount from byte offset 81249ac17eSChris Zankel bnez a4, .Lsrcunaligned 82249ac17eSChris Zankel movi a2, 0 # return success for len==0 8361a6b912SMax Filippov abi_ret(STACK_SIZE) 84249ac17eSChris Zankel 85249ac17eSChris Zankel/* 86249ac17eSChris Zankel * Destination is unaligned 87249ac17eSChris Zankel */ 88249ac17eSChris Zankel 89249ac17eSChris Zankel.Ldst1mod2: # dst is only byte aligned 90249ac17eSChris Zankel bltui a4, 7, .Lbytecopy # do short copies byte by byte 91249ac17eSChris Zankel 92249ac17eSChris Zankel # copy 1 byte 930013acebSMax FilippovEX(10f) l8ui a6, a3, 0 94249ac17eSChris Zankel addi a3, a3, 1 950013acebSMax FilippovEX(10f) s8i a6, a5, 0 96249ac17eSChris Zankel addi a5, a5, 1 97249ac17eSChris Zankel addi a4, a4, -1 98249ac17eSChris Zankel bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then 99249ac17eSChris Zankel # return to main algorithm 100249ac17eSChris Zankel.Ldst2mod4: # dst 16-bit aligned 101249ac17eSChris Zankel # copy 2 bytes 102249ac17eSChris Zankel bltui a4, 6, .Lbytecopy # do short copies byte by byte 1030013acebSMax FilippovEX(10f) l8ui a6, a3, 0 1040013acebSMax FilippovEX(10f) l8ui a7, a3, 1 105249ac17eSChris Zankel addi a3, a3, 2 1060013acebSMax FilippovEX(10f) s8i a6, a5, 0 1070013acebSMax FilippovEX(10f) s8i a7, a5, 1 108249ac17eSChris Zankel addi a5, a5, 2 109249ac17eSChris Zankel addi a4, a4, -2 110249ac17eSChris Zankel j .Ldstaligned # dst is now aligned, return to main algorithm 111249ac17eSChris Zankel 112249ac17eSChris Zankel/* 113249ac17eSChris Zankel * Byte by byte copy 114249ac17eSChris Zankel */ 115249ac17eSChris Zankel .align 4 116249ac17eSChris Zankel .byte 0 # 1 mod 4 alignment for LOOPNEZ 117249ac17eSChris Zankel # (0 mod 4 alignment for LBEG) 118249ac17eSChris Zankel.Lbytecopy: 119249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS 120249ac17eSChris Zankel loopnez a4, .Lbytecopydone 121249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */ 122249ac17eSChris Zankel beqz a4, .Lbytecopydone 123249ac17eSChris Zankel add a7, a3, a4 # a7 = end address for source 124249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 125249ac17eSChris Zankel.Lnextbyte: 1260013acebSMax FilippovEX(10f) l8ui a6, a3, 0 127249ac17eSChris Zankel addi a3, a3, 1 1280013acebSMax FilippovEX(10f) s8i a6, a5, 0 129249ac17eSChris Zankel addi a5, a5, 1 130249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS 131249ac17eSChris Zankel blt a3, a7, .Lnextbyte 132249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 133249ac17eSChris Zankel.Lbytecopydone: 134249ac17eSChris Zankel movi a2, 0 # return success for len bytes copied 13561a6b912SMax Filippov abi_ret(STACK_SIZE) 136249ac17eSChris Zankel 137249ac17eSChris Zankel/* 138249ac17eSChris Zankel * Destination and source are word-aligned. 139249ac17eSChris Zankel */ 140249ac17eSChris Zankel # copy 16 bytes per iteration for word-aligned dst and word-aligned src 141249ac17eSChris Zankel .align 4 # 1 mod 4 alignment for LOOPNEZ 142249ac17eSChris Zankel .byte 0 # (0 mod 4 alignment for LBEG) 143249ac17eSChris Zankel.Laligned: 144249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS 145249ac17eSChris Zankel loopnez a7, .Loop1done 146249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */ 147249ac17eSChris Zankel beqz a7, .Loop1done 148249ac17eSChris Zankel slli a8, a7, 4 149249ac17eSChris Zankel add a8, a8, a3 # a8 = end of last 16B source chunk 150249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 151249ac17eSChris Zankel.Loop1: 1520013acebSMax FilippovEX(10f) l32i a6, a3, 0 1530013acebSMax FilippovEX(10f) l32i a7, a3, 4 1540013acebSMax FilippovEX(10f) s32i a6, a5, 0 1550013acebSMax FilippovEX(10f) l32i a6, a3, 8 1560013acebSMax FilippovEX(10f) s32i a7, a5, 4 1570013acebSMax FilippovEX(10f) l32i a7, a3, 12 1580013acebSMax FilippovEX(10f) s32i a6, a5, 8 159249ac17eSChris Zankel addi a3, a3, 16 1600013acebSMax FilippovEX(10f) s32i a7, a5, 12 161249ac17eSChris Zankel addi a5, a5, 16 162249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS 163249ac17eSChris Zankel blt a3, a8, .Loop1 164249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 165249ac17eSChris Zankel.Loop1done: 166249ac17eSChris Zankel bbci.l a4, 3, .L2 167249ac17eSChris Zankel # copy 8 bytes 1680013acebSMax FilippovEX(10f) l32i a6, a3, 0 1690013acebSMax FilippovEX(10f) l32i a7, a3, 4 170249ac17eSChris Zankel addi a3, a3, 8 1710013acebSMax FilippovEX(10f) s32i a6, a5, 0 1720013acebSMax FilippovEX(10f) s32i a7, a5, 4 173249ac17eSChris Zankel addi a5, a5, 8 174249ac17eSChris Zankel.L2: 175249ac17eSChris Zankel bbci.l a4, 2, .L3 176249ac17eSChris Zankel # copy 4 bytes 1770013acebSMax FilippovEX(10f) l32i a6, a3, 0 178249ac17eSChris Zankel addi a3, a3, 4 1790013acebSMax FilippovEX(10f) s32i a6, a5, 0 180249ac17eSChris Zankel addi a5, a5, 4 181249ac17eSChris Zankel.L3: 182249ac17eSChris Zankel bbci.l a4, 1, .L4 183249ac17eSChris Zankel # copy 2 bytes 1840013acebSMax FilippovEX(10f) l16ui a6, a3, 0 185249ac17eSChris Zankel addi a3, a3, 2 1860013acebSMax FilippovEX(10f) s16i a6, a5, 0 187249ac17eSChris Zankel addi a5, a5, 2 188249ac17eSChris Zankel.L4: 189249ac17eSChris Zankel bbci.l a4, 0, .L5 190249ac17eSChris Zankel # copy 1 byte 1910013acebSMax FilippovEX(10f) l8ui a6, a3, 0 1920013acebSMax FilippovEX(10f) s8i a6, a5, 0 193249ac17eSChris Zankel.L5: 194249ac17eSChris Zankel movi a2, 0 # return success for len bytes copied 19561a6b912SMax Filippov abi_ret(STACK_SIZE) 196249ac17eSChris Zankel 197249ac17eSChris Zankel/* 198249ac17eSChris Zankel * Destination is aligned, Source is unaligned 199249ac17eSChris Zankel */ 200249ac17eSChris Zankel 201249ac17eSChris Zankel .align 4 202249ac17eSChris Zankel .byte 0 # 1 mod 4 alignement for LOOPNEZ 203249ac17eSChris Zankel # (0 mod 4 alignment for LBEG) 204249ac17eSChris Zankel.Lsrcunaligned: 205249ac17eSChris Zankel # copy 16 bytes per iteration for word-aligned dst and unaligned src 206249ac17eSChris Zankel and a10, a3, a8 # save unalignment offset for below 207249ac17eSChris Zankel sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware) 2080013acebSMax FilippovEX(10f) l32i a6, a3, 0 # load first word 209249ac17eSChris Zankel#if XCHAL_HAVE_LOOPS 210249ac17eSChris Zankel loopnez a7, .Loop2done 211249ac17eSChris Zankel#else /* !XCHAL_HAVE_LOOPS */ 212249ac17eSChris Zankel beqz a7, .Loop2done 21361a6b912SMax Filippov#if defined(__XTENSA_CALL0_ABI__) 21461a6b912SMax Filippov s32i a10, a1, 0 21561a6b912SMax Filippov slli a10, a7, 4 21661a6b912SMax Filippov add a10, a10, a3 # a10 = end of last 16B source chunk 21761a6b912SMax Filippov#else 2185029615eSMax Filippov slli a12, a7, 4 2195029615eSMax Filippov add a12, a12, a3 # a12 = end of last 16B source chunk 22061a6b912SMax Filippov#endif 221249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 222249ac17eSChris Zankel.Loop2: 2230013acebSMax FilippovEX(10f) l32i a7, a3, 4 2240013acebSMax FilippovEX(10f) l32i a8, a3, 8 225fbb871e2SMax Filippov __src_b a6, a6, a7 2260013acebSMax FilippovEX(10f) s32i a6, a5, 0 2270013acebSMax FilippovEX(10f) l32i a9, a3, 12 228fbb871e2SMax Filippov __src_b a7, a7, a8 2290013acebSMax FilippovEX(10f) s32i a7, a5, 4 2300013acebSMax FilippovEX(10f) l32i a6, a3, 16 231fbb871e2SMax Filippov __src_b a8, a8, a9 2320013acebSMax FilippovEX(10f) s32i a8, a5, 8 233249ac17eSChris Zankel addi a3, a3, 16 234fbb871e2SMax Filippov __src_b a9, a9, a6 2350013acebSMax FilippovEX(10f) s32i a9, a5, 12 236249ac17eSChris Zankel addi a5, a5, 16 237249ac17eSChris Zankel#if !XCHAL_HAVE_LOOPS 23861a6b912SMax Filippov#if defined(__XTENSA_CALL0_ABI__) 23961a6b912SMax Filippov blt a3, a10, .Loop2 24061a6b912SMax Filippov l32i a10, a1, 0 24161a6b912SMax Filippov#else 2425029615eSMax Filippov blt a3, a12, .Loop2 24361a6b912SMax Filippov#endif 244249ac17eSChris Zankel#endif /* !XCHAL_HAVE_LOOPS */ 245249ac17eSChris Zankel.Loop2done: 246249ac17eSChris Zankel bbci.l a4, 3, .L12 247249ac17eSChris Zankel # copy 8 bytes 2480013acebSMax FilippovEX(10f) l32i a7, a3, 4 2490013acebSMax FilippovEX(10f) l32i a8, a3, 8 250fbb871e2SMax Filippov __src_b a6, a6, a7 2510013acebSMax FilippovEX(10f) s32i a6, a5, 0 252249ac17eSChris Zankel addi a3, a3, 8 253fbb871e2SMax Filippov __src_b a7, a7, a8 2540013acebSMax FilippovEX(10f) s32i a7, a5, 4 255249ac17eSChris Zankel addi a5, a5, 8 256249ac17eSChris Zankel mov a6, a8 257249ac17eSChris Zankel.L12: 258249ac17eSChris Zankel bbci.l a4, 2, .L13 259249ac17eSChris Zankel # copy 4 bytes 2600013acebSMax FilippovEX(10f) l32i a7, a3, 4 261249ac17eSChris Zankel addi a3, a3, 4 262fbb871e2SMax Filippov __src_b a6, a6, a7 2630013acebSMax FilippovEX(10f) s32i a6, a5, 0 264249ac17eSChris Zankel addi a5, a5, 4 265249ac17eSChris Zankel mov a6, a7 266249ac17eSChris Zankel.L13: 267249ac17eSChris Zankel add a3, a3, a10 # readjust a3 with correct misalignment 268249ac17eSChris Zankel bbci.l a4, 1, .L14 269249ac17eSChris Zankel # copy 2 bytes 2700013acebSMax FilippovEX(10f) l8ui a6, a3, 0 2710013acebSMax FilippovEX(10f) l8ui a7, a3, 1 272249ac17eSChris Zankel addi a3, a3, 2 2730013acebSMax FilippovEX(10f) s8i a6, a5, 0 2740013acebSMax FilippovEX(10f) s8i a7, a5, 1 275249ac17eSChris Zankel addi a5, a5, 2 276249ac17eSChris Zankel.L14: 277249ac17eSChris Zankel bbci.l a4, 0, .L15 278249ac17eSChris Zankel # copy 1 byte 2790013acebSMax FilippovEX(10f) l8ui a6, a3, 0 2800013acebSMax FilippovEX(10f) s8i a6, a5, 0 281249ac17eSChris Zankel.L15: 282249ac17eSChris Zankel movi a2, 0 # return success for len bytes copied 28361a6b912SMax Filippov abi_ret(STACK_SIZE) 284249ac17eSChris Zankel 2855cf97ebdSMax FilippovENDPROC(__xtensa_copy_user) 286*338d9150SMax FilippovEXPORT_SYMBOL(__xtensa_copy_user) 287249ac17eSChris Zankel 288249ac17eSChris Zankel .section .fixup, "ax" 289249ac17eSChris Zankel .align 4 290249ac17eSChris Zankel 291249ac17eSChris Zankel/* a2 = original dst; a5 = current dst; a11= original len 292249ac17eSChris Zankel * bytes_copied = a5 - a2 293249ac17eSChris Zankel * retval = bytes_not_copied = original len - bytes_copied 294249ac17eSChris Zankel * retval = a11 - (a5 - a2) 295249ac17eSChris Zankel */ 296249ac17eSChris Zankel 297249ac17eSChris Zankel 2980013acebSMax Filippov10: 299249ac17eSChris Zankel sub a2, a5, a2 /* a2 <-- bytes copied */ 300249ac17eSChris Zankel sub a2, a11, a2 /* a2 <-- bytes not copied */ 30161a6b912SMax Filippov abi_ret(STACK_SIZE) 302