12874c5fdSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */ 270d64ceaSPaul Mackerras/* 370d64ceaSPaul Mackerras * Copyright (C) 2002 Paul Mackerras, IBM Corp. 470d64ceaSPaul Mackerras */ 5*39326182SMasahiro Yamada#include <linux/export.h> 670d64ceaSPaul Mackerras#include <asm/processor.h> 770d64ceaSPaul Mackerras#include <asm/ppc_asm.h> 8ec0c464cSChristophe Leroy#include <asm/asm-compat.h> 92c86cd18SChristophe Leroy#include <asm/feature-fixups.h> 1070d64ceaSPaul Mackerras 1198c45f51SPaul Mackerras#ifndef SELFTEST_CASE 1298c45f51SPaul Mackerras/* 0 == most CPUs, 1 == POWER6, 2 == Cell */ 1398c45f51SPaul Mackerras#define SELFTEST_CASE 0 1498c45f51SPaul Mackerras#endif 1598c45f51SPaul Mackerras 1620151169SPaul E. McKenney#ifdef __BIG_ENDIAN__ 1720151169SPaul E. McKenney#define sLd sld /* Shift towards low-numbered address. */ 1820151169SPaul E. McKenney#define sHd srd /* Shift towards high-numbered address. */ 1920151169SPaul E. McKenney#else 2020151169SPaul E. McKenney#define sLd srd /* Shift towards low-numbered address. */ 2120151169SPaul E. McKenney#define sHd sld /* Shift towards high-numbered address. */ 2220151169SPaul E. McKenney#endif 2320151169SPaul E. McKenney 24a7c81ce3SPaul Mackerras/* 25a7c81ce3SPaul Mackerras * These macros are used to generate exception table entries. 26a7c81ce3SPaul Mackerras * The exception handlers below use the original arguments 27a7c81ce3SPaul Mackerras * (stored on the stack) and the point where we're up to in 28a7c81ce3SPaul Mackerras * the destination buffer, i.e. the address of the first 29a7c81ce3SPaul Mackerras * unmodified byte. Generally r3 points into the destination 30a7c81ce3SPaul Mackerras * buffer, but the first unmodified byte is at a variable 31a7c81ce3SPaul Mackerras * offset from r3. In the code below, the symbol r3_offset 32a7c81ce3SPaul Mackerras * is set to indicate the current offset at each point in 33a7c81ce3SPaul Mackerras * the code. This offset is then used as a negative offset 34a7c81ce3SPaul Mackerras * from the exception handler code, and those instructions 35a7c81ce3SPaul Mackerras * before the exception handlers are addi instructions that 36a7c81ce3SPaul Mackerras * adjust r3 to point to the correct place. 37a7c81ce3SPaul Mackerras */ 38a7c81ce3SPaul Mackerras .macro lex /* exception handler for load */ 39a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lld_exc - r3_offset) 40a7c81ce3SPaul Mackerras .endm 41a7c81ce3SPaul Mackerras 42a7c81ce3SPaul Mackerras .macro stex /* exception handler for store */ 43a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lst_exc - r3_offset) 44a7c81ce3SPaul Mackerras .endm 45a7c81ce3SPaul Mackerras 4670d64ceaSPaul Mackerras .align 7 47169c7ceeSAnton Blanchard_GLOBAL_TOC(__copy_tofrom_user) 4815a3204dSNicholas Piggin#ifdef CONFIG_PPC_BOOK3S_64 49a66086b8SAnton BlanchardBEGIN_FTR_SECTION 50a66086b8SAnton Blanchard nop 51a66086b8SAnton BlanchardFTR_SECTION_ELSE 52a66086b8SAnton Blanchard b __copy_tofrom_user_power7 53a66086b8SAnton BlanchardALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 5415a3204dSNicholas Piggin#endif 55a66086b8SAnton Blanchard_GLOBAL(__copy_tofrom_user_base) 56a7c81ce3SPaul Mackerras /* first check for a 4kB copy on a 4kB boundary */ 5770d64ceaSPaul Mackerras cmpldi cr1,r5,16 5870d64ceaSPaul Mackerras cmpdi cr6,r5,4096 5970d64ceaSPaul Mackerras or r0,r3,r4 6070d64ceaSPaul Mackerras neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 6170d64ceaSPaul Mackerras andi. r0,r0,4095 6270d64ceaSPaul Mackerras std r3,-24(r1) 6370d64ceaSPaul Mackerras crand cr0*4+2,cr0*4+2,cr6*4+2 6470d64ceaSPaul Mackerras std r4,-16(r1) 6570d64ceaSPaul Mackerras std r5,-8(r1) 6670d64ceaSPaul Mackerras dcbt 0,r4 673c726f8dSBenjamin Herrenschmidt beq .Lcopy_page_4K 6870d64ceaSPaul Mackerras andi. r6,r6,7 69694caf02SAnton Blanchard PPC_MTOCRF(0x01,r5) 7070d64ceaSPaul Mackerras blt cr1,.Lshort_copy 71a4e22f02SMark Nelson/* Below we want to nop out the bne if we're on a CPU that has the 72a4e22f02SMark Nelson * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 73a4e22f02SMark Nelson * cleared. 74a4e22f02SMark Nelson * At the time of writing the only CPU that has this combination of bits 75a4e22f02SMark Nelson * set is Power6. 76a4e22f02SMark Nelson */ 7798c45f51SPaul Mackerrastest_feature = (SELFTEST_CASE == 1) 78a4e22f02SMark NelsonBEGIN_FTR_SECTION 79a4e22f02SMark Nelson nop 80a4e22f02SMark NelsonFTR_SECTION_ELSE 8170d64ceaSPaul Mackerras bne .Ldst_unaligned 82a4e22f02SMark NelsonALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 83a4e22f02SMark Nelson CPU_FTR_UNALIGNED_LD_STD) 8470d64ceaSPaul Mackerras.Ldst_aligned: 8570d64ceaSPaul Mackerras addi r3,r3,-16 86a7c81ce3SPaul Mackerrasr3_offset = 16 8798c45f51SPaul Mackerrastest_feature = (SELFTEST_CASE == 0) 88a4e22f02SMark NelsonBEGIN_FTR_SECTION 89a4e22f02SMark Nelson andi. r0,r4,7 9070d64ceaSPaul Mackerras bne .Lsrc_unaligned 91a4e22f02SMark NelsonEND_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 92789c299cSAnton Blanchard blt cr1,.Ldo_tail /* if < 16 bytes to copy */ 93789c299cSAnton Blanchard srdi r0,r5,5 94789c299cSAnton Blanchard cmpdi cr1,r0,0 95a7c81ce3SPaul Mackerraslex; ld r7,0(r4) 96a7c81ce3SPaul Mackerraslex; ld r6,8(r4) 97789c299cSAnton Blanchard addi r4,r4,16 98789c299cSAnton Blanchard mtctr r0 99789c299cSAnton Blanchard andi. r0,r5,0x10 100789c299cSAnton Blanchard beq 22f 10170d64ceaSPaul Mackerras addi r3,r3,16 102a7c81ce3SPaul Mackerrasr3_offset = 0 103789c299cSAnton Blanchard addi r4,r4,-16 104789c299cSAnton Blanchard mr r9,r7 105789c299cSAnton Blanchard mr r8,r6 106789c299cSAnton Blanchard beq cr1,72f 107a7c81ce3SPaul Mackerras21: 108a7c81ce3SPaul Mackerraslex; ld r7,16(r4) 109a7c81ce3SPaul Mackerraslex; ld r6,24(r4) 110789c299cSAnton Blanchard addi r4,r4,32 111a7c81ce3SPaul Mackerrasstex; std r9,0(r3) 112a7c81ce3SPaul Mackerrasr3_offset = 8 113a7c81ce3SPaul Mackerrasstex; std r8,8(r3) 114a7c81ce3SPaul Mackerrasr3_offset = 16 115a7c81ce3SPaul Mackerras22: 116a7c81ce3SPaul Mackerraslex; ld r9,0(r4) 117a7c81ce3SPaul Mackerraslex; ld r8,8(r4) 118a7c81ce3SPaul Mackerrasstex; std r7,16(r3) 119a7c81ce3SPaul Mackerrasr3_offset = 24 120a7c81ce3SPaul Mackerrasstex; std r6,24(r3) 121789c299cSAnton Blanchard addi r3,r3,32 122a7c81ce3SPaul Mackerrasr3_offset = 0 123789c299cSAnton Blanchard bdnz 21b 124a7c81ce3SPaul Mackerras72: 125a7c81ce3SPaul Mackerrasstex; std r9,0(r3) 126a7c81ce3SPaul Mackerrasr3_offset = 8 127a7c81ce3SPaul Mackerrasstex; std r8,8(r3) 128a7c81ce3SPaul Mackerrasr3_offset = 16 129789c299cSAnton Blanchard andi. r5,r5,0xf 130789c299cSAnton Blanchard beq+ 3f 131789c299cSAnton Blanchard addi r4,r4,16 13270d64ceaSPaul Mackerras.Ldo_tail: 133789c299cSAnton Blanchard addi r3,r3,16 134a7c81ce3SPaul Mackerrasr3_offset = 0 135789c299cSAnton Blanchard bf cr7*4+0,246f 136a7c81ce3SPaul Mackerraslex; ld r9,0(r4) 137789c299cSAnton Blanchard addi r4,r4,8 138a7c81ce3SPaul Mackerrasstex; std r9,0(r3) 139789c299cSAnton Blanchard addi r3,r3,8 140789c299cSAnton Blanchard246: bf cr7*4+1,1f 141a7c81ce3SPaul Mackerraslex; lwz r9,0(r4) 142f72b728bSMark Nelson addi r4,r4,4 143a7c81ce3SPaul Mackerrasstex; stw r9,0(r3) 14470d64ceaSPaul Mackerras addi r3,r3,4 14570d64ceaSPaul Mackerras1: bf cr7*4+2,2f 146a7c81ce3SPaul Mackerraslex; lhz r9,0(r4) 147f72b728bSMark Nelson addi r4,r4,2 148a7c81ce3SPaul Mackerrasstex; sth r9,0(r3) 14970d64ceaSPaul Mackerras addi r3,r3,2 15070d64ceaSPaul Mackerras2: bf cr7*4+3,3f 151a7c81ce3SPaul Mackerraslex; lbz r9,0(r4) 152a7c81ce3SPaul Mackerrasstex; stb r9,0(r3) 15370d64ceaSPaul Mackerras3: li r3,0 15470d64ceaSPaul Mackerras blr 15570d64ceaSPaul Mackerras 15670d64ceaSPaul Mackerras.Lsrc_unaligned: 157a7c81ce3SPaul Mackerrasr3_offset = 16 15870d64ceaSPaul Mackerras srdi r6,r5,3 15970d64ceaSPaul Mackerras addi r5,r5,-16 16070d64ceaSPaul Mackerras subf r4,r0,r4 16170d64ceaSPaul Mackerras srdi r7,r5,4 16270d64ceaSPaul Mackerras sldi r10,r0,3 16370d64ceaSPaul Mackerras cmpldi cr6,r6,3 16470d64ceaSPaul Mackerras andi. r5,r5,7 16570d64ceaSPaul Mackerras mtctr r7 16670d64ceaSPaul Mackerras subfic r11,r10,64 16770d64ceaSPaul Mackerras add r5,r5,r0 16870d64ceaSPaul Mackerras bt cr7*4+0,28f 16970d64ceaSPaul Mackerras 170a7c81ce3SPaul Mackerraslex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 171a7c81ce3SPaul Mackerraslex; ld r0,8(r4) 17220151169SPaul E. McKenney sLd r6,r9,r10 173a7c81ce3SPaul Mackerraslex; ldu r9,16(r4) 17420151169SPaul E. McKenney sHd r7,r0,r11 17520151169SPaul E. McKenney sLd r8,r0,r10 17670d64ceaSPaul Mackerras or r7,r7,r6 17770d64ceaSPaul Mackerras blt cr6,79f 178a7c81ce3SPaul Mackerraslex; ld r0,8(r4) 17970d64ceaSPaul Mackerras b 2f 18070d64ceaSPaul Mackerras 181a7c81ce3SPaul Mackerras28: 182a7c81ce3SPaul Mackerraslex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 183a7c81ce3SPaul Mackerraslex; ldu r9,8(r4) 18420151169SPaul E. McKenney sLd r8,r0,r10 18570d64ceaSPaul Mackerras addi r3,r3,-8 186a7c81ce3SPaul Mackerrasr3_offset = 24 18770d64ceaSPaul Mackerras blt cr6,5f 188a7c81ce3SPaul Mackerraslex; ld r0,8(r4) 18920151169SPaul E. McKenney sHd r12,r9,r11 19020151169SPaul E. McKenney sLd r6,r9,r10 191a7c81ce3SPaul Mackerraslex; ldu r9,16(r4) 19270d64ceaSPaul Mackerras or r12,r8,r12 19320151169SPaul E. McKenney sHd r7,r0,r11 19420151169SPaul E. McKenney sLd r8,r0,r10 19570d64ceaSPaul Mackerras addi r3,r3,16 196a7c81ce3SPaul Mackerrasr3_offset = 8 19770d64ceaSPaul Mackerras beq cr6,78f 19870d64ceaSPaul Mackerras 19970d64ceaSPaul Mackerras1: or r7,r7,r6 200a7c81ce3SPaul Mackerraslex; ld r0,8(r4) 201a7c81ce3SPaul Mackerrasstex; std r12,8(r3) 202a7c81ce3SPaul Mackerrasr3_offset = 16 20320151169SPaul E. McKenney2: sHd r12,r9,r11 20420151169SPaul E. McKenney sLd r6,r9,r10 205a7c81ce3SPaul Mackerraslex; ldu r9,16(r4) 20670d64ceaSPaul Mackerras or r12,r8,r12 207a7c81ce3SPaul Mackerrasstex; stdu r7,16(r3) 208a7c81ce3SPaul Mackerrasr3_offset = 8 20920151169SPaul E. McKenney sHd r7,r0,r11 21020151169SPaul E. McKenney sLd r8,r0,r10 21170d64ceaSPaul Mackerras bdnz 1b 21270d64ceaSPaul Mackerras 213a7c81ce3SPaul Mackerras78: 214a7c81ce3SPaul Mackerrasstex; std r12,8(r3) 215a7c81ce3SPaul Mackerrasr3_offset = 16 21670d64ceaSPaul Mackerras or r7,r7,r6 217a7c81ce3SPaul Mackerras79: 218a7c81ce3SPaul Mackerrasstex; std r7,16(r3) 219a7c81ce3SPaul Mackerrasr3_offset = 24 22020151169SPaul E. McKenney5: sHd r12,r9,r11 22170d64ceaSPaul Mackerras or r12,r8,r12 222a7c81ce3SPaul Mackerrasstex; std r12,24(r3) 223a7c81ce3SPaul Mackerrasr3_offset = 32 22470d64ceaSPaul Mackerras bne 6f 22570d64ceaSPaul Mackerras li r3,0 22670d64ceaSPaul Mackerras blr 22770d64ceaSPaul Mackerras6: cmpwi cr1,r5,8 22870d64ceaSPaul Mackerras addi r3,r3,32 229a7c81ce3SPaul Mackerrasr3_offset = 0 23020151169SPaul E. McKenney sLd r9,r9,r10 231f72b728bSMark Nelson ble cr1,7f 232a7c81ce3SPaul Mackerraslex; ld r0,8(r4) 23320151169SPaul E. McKenney sHd r7,r0,r11 23470d64ceaSPaul Mackerras or r9,r7,r9 235f72b728bSMark Nelson7: 236f72b728bSMark Nelson bf cr7*4+1,1f 23720151169SPaul E. McKenney#ifdef __BIG_ENDIAN__ 238f72b728bSMark Nelson rotldi r9,r9,32 23920151169SPaul E. McKenney#endif 240a7c81ce3SPaul Mackerrasstex; stw r9,0(r3) 24120151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__ 24220151169SPaul E. McKenney rotrdi r9,r9,32 24320151169SPaul E. McKenney#endif 244f72b728bSMark Nelson addi r3,r3,4 245f72b728bSMark Nelson1: bf cr7*4+2,2f 24620151169SPaul E. McKenney#ifdef __BIG_ENDIAN__ 247f72b728bSMark Nelson rotldi r9,r9,16 24820151169SPaul E. McKenney#endif 249a7c81ce3SPaul Mackerrasstex; sth r9,0(r3) 25020151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__ 25120151169SPaul E. McKenney rotrdi r9,r9,16 25220151169SPaul E. McKenney#endif 253f72b728bSMark Nelson addi r3,r3,2 254f72b728bSMark Nelson2: bf cr7*4+3,3f 25520151169SPaul E. McKenney#ifdef __BIG_ENDIAN__ 256f72b728bSMark Nelson rotldi r9,r9,8 25720151169SPaul E. McKenney#endif 258a7c81ce3SPaul Mackerrasstex; stb r9,0(r3) 25920151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__ 26020151169SPaul E. McKenney rotrdi r9,r9,8 26120151169SPaul E. McKenney#endif 262f72b728bSMark Nelson3: li r3,0 263f72b728bSMark Nelson blr 26470d64ceaSPaul Mackerras 26570d64ceaSPaul Mackerras.Ldst_unaligned: 266a7c81ce3SPaul Mackerrasr3_offset = 0 267694caf02SAnton Blanchard PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ 26870d64ceaSPaul Mackerras subf r5,r6,r5 26970d64ceaSPaul Mackerras li r7,0 270a4e22f02SMark Nelson cmpldi cr1,r5,16 27170d64ceaSPaul Mackerras bf cr7*4+3,1f 272a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lld_exc_r7) 273a7c81ce3SPaul Mackerras lbz r0,0(r4) 274a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lst_exc_r7) 275a7c81ce3SPaul Mackerras stb r0,0(r3) 27670d64ceaSPaul Mackerras addi r7,r7,1 27770d64ceaSPaul Mackerras1: bf cr7*4+2,2f 278a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lld_exc_r7) 279a7c81ce3SPaul Mackerras lhzx r0,r7,r4 280a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lst_exc_r7) 281a7c81ce3SPaul Mackerras sthx r0,r7,r3 28270d64ceaSPaul Mackerras addi r7,r7,2 28370d64ceaSPaul Mackerras2: bf cr7*4+1,3f 284a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lld_exc_r7) 285a7c81ce3SPaul Mackerras lwzx r0,r7,r4 286a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lst_exc_r7) 287a7c81ce3SPaul Mackerras stwx r0,r7,r3 288694caf02SAnton Blanchard3: PPC_MTOCRF(0x01,r5) 28970d64ceaSPaul Mackerras add r4,r6,r4 29070d64ceaSPaul Mackerras add r3,r6,r3 29170d64ceaSPaul Mackerras b .Ldst_aligned 29270d64ceaSPaul Mackerras 29370d64ceaSPaul Mackerras.Lshort_copy: 294a7c81ce3SPaul Mackerrasr3_offset = 0 29570d64ceaSPaul Mackerras bf cr7*4+0,1f 296a7c81ce3SPaul Mackerraslex; lwz r0,0(r4) 297a7c81ce3SPaul Mackerraslex; lwz r9,4(r4) 29870d64ceaSPaul Mackerras addi r4,r4,8 299a7c81ce3SPaul Mackerrasstex; stw r0,0(r3) 300a7c81ce3SPaul Mackerrasstex; stw r9,4(r3) 30170d64ceaSPaul Mackerras addi r3,r3,8 30270d64ceaSPaul Mackerras1: bf cr7*4+1,2f 303a7c81ce3SPaul Mackerraslex; lwz r0,0(r4) 30470d64ceaSPaul Mackerras addi r4,r4,4 305a7c81ce3SPaul Mackerrasstex; stw r0,0(r3) 30670d64ceaSPaul Mackerras addi r3,r3,4 30770d64ceaSPaul Mackerras2: bf cr7*4+2,3f 308a7c81ce3SPaul Mackerraslex; lhz r0,0(r4) 30970d64ceaSPaul Mackerras addi r4,r4,2 310a7c81ce3SPaul Mackerrasstex; sth r0,0(r3) 31170d64ceaSPaul Mackerras addi r3,r3,2 31270d64ceaSPaul Mackerras3: bf cr7*4+3,4f 313a7c81ce3SPaul Mackerraslex; lbz r0,0(r4) 314a7c81ce3SPaul Mackerrasstex; stb r0,0(r3) 31570d64ceaSPaul Mackerras4: li r3,0 31670d64ceaSPaul Mackerras blr 31770d64ceaSPaul Mackerras 31870d64ceaSPaul Mackerras/* 31970d64ceaSPaul Mackerras * exception handlers follow 32070d64ceaSPaul Mackerras * we have to return the number of bytes not copied 32170d64ceaSPaul Mackerras * for an exception on a load, we set the rest of the destination to 0 322a7c81ce3SPaul Mackerras * Note that the number of bytes of instructions for adjusting r3 needs 323a7c81ce3SPaul Mackerras * to equal the amount of the adjustment, due to the trick of using 324a7c81ce3SPaul Mackerras * .Lld_exc - r3_offset as the handler address. 32570d64ceaSPaul Mackerras */ 32670d64ceaSPaul Mackerras 327a7c81ce3SPaul Mackerras.Lld_exc_r7: 32870d64ceaSPaul Mackerras add r3,r3,r7 329a7c81ce3SPaul Mackerras b .Lld_exc 330a7c81ce3SPaul Mackerras 331a7c81ce3SPaul Mackerras /* adjust by 24 */ 33270d64ceaSPaul Mackerras addi r3,r3,8 333a7c81ce3SPaul Mackerras nop 334a7c81ce3SPaul Mackerras /* adjust by 16 */ 33570d64ceaSPaul Mackerras addi r3,r3,8 336a7c81ce3SPaul Mackerras nop 337a7c81ce3SPaul Mackerras /* adjust by 8 */ 33870d64ceaSPaul Mackerras addi r3,r3,8 339a7c81ce3SPaul Mackerras nop 34070d64ceaSPaul Mackerras 34170d64ceaSPaul Mackerras/* 342a7c81ce3SPaul Mackerras * Here we have had a fault on a load and r3 points to the first 343a7c81ce3SPaul Mackerras * unmodified byte of the destination. We use the original arguments 344a7c81ce3SPaul Mackerras * and r3 to work out how much wasn't copied. Since we load some 345a7c81ce3SPaul Mackerras * distance ahead of the stores, we continue copying byte-by-byte until 346a7c81ce3SPaul Mackerras * we hit the load fault again in order to copy as much as possible. 34770d64ceaSPaul Mackerras */ 348a7c81ce3SPaul Mackerras.Lld_exc: 349a7c81ce3SPaul Mackerras ld r6,-24(r1) 35070d64ceaSPaul Mackerras ld r4,-16(r1) 35170d64ceaSPaul Mackerras ld r5,-8(r1) 35270d64ceaSPaul Mackerras subf r6,r6,r3 35370d64ceaSPaul Mackerras add r4,r4,r6 35470d64ceaSPaul Mackerras subf r5,r6,r5 /* #bytes left to go */ 35570d64ceaSPaul Mackerras 35670d64ceaSPaul Mackerras/* 35770d64ceaSPaul Mackerras * first see if we can copy any more bytes before hitting another exception 35870d64ceaSPaul Mackerras */ 35970d64ceaSPaul Mackerras mtctr r5 360a7c81ce3SPaul Mackerrasr3_offset = 0 361a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Ldone) 36270d64ceaSPaul Mackerras43: lbz r0,0(r4) 36370d64ceaSPaul Mackerras addi r4,r4,1 364a7c81ce3SPaul Mackerrasstex; stb r0,0(r3) 36570d64ceaSPaul Mackerras addi r3,r3,1 36670d64ceaSPaul Mackerras bdnz 43b 36770d64ceaSPaul Mackerras li r3,0 /* huh? all copied successfully this time? */ 36870d64ceaSPaul Mackerras blr 36970d64ceaSPaul Mackerras 37070d64ceaSPaul Mackerras/* 3713448890cSAl Viro * here we have trapped again, amount remaining is in ctr. 37270d64ceaSPaul Mackerras */ 373a7c81ce3SPaul Mackerras.Ldone: 374a7c81ce3SPaul Mackerras mfctr r3 37570d64ceaSPaul Mackerras blr 37670d64ceaSPaul Mackerras 37770d64ceaSPaul Mackerras/* 378f8db2007SPaul Mackerras * exception handlers for stores: we need to work out how many bytes 379f8db2007SPaul Mackerras * weren't copied, and we may need to copy some more. 380a7c81ce3SPaul Mackerras * Note that the number of bytes of instructions for adjusting r3 needs 381a7c81ce3SPaul Mackerras * to equal the amount of the adjustment, due to the trick of using 382a7c81ce3SPaul Mackerras * .Lst_exc - r3_offset as the handler address. 38370d64ceaSPaul Mackerras */ 384a7c81ce3SPaul Mackerras.Lst_exc_r7: 38570d64ceaSPaul Mackerras add r3,r3,r7 386a7c81ce3SPaul Mackerras b .Lst_exc 387a7c81ce3SPaul Mackerras 388a7c81ce3SPaul Mackerras /* adjust by 24 */ 38970d64ceaSPaul Mackerras addi r3,r3,8 390a7c81ce3SPaul Mackerras nop 391a7c81ce3SPaul Mackerras /* adjust by 16 */ 39270d64ceaSPaul Mackerras addi r3,r3,8 393a7c81ce3SPaul Mackerras nop 394a7c81ce3SPaul Mackerras /* adjust by 8 */ 39570d64ceaSPaul Mackerras addi r3,r3,4 396a7c81ce3SPaul Mackerras /* adjust by 4 */ 39770d64ceaSPaul Mackerras addi r3,r3,4 398a7c81ce3SPaul Mackerras.Lst_exc: 399f8db2007SPaul Mackerras ld r6,-24(r1) /* original destination pointer */ 400f8db2007SPaul Mackerras ld r4,-16(r1) /* original source pointer */ 401f8db2007SPaul Mackerras ld r5,-8(r1) /* original number of bytes */ 402f8db2007SPaul Mackerras add r7,r6,r5 403f8db2007SPaul Mackerras /* 404f8db2007SPaul Mackerras * If the destination pointer isn't 8-byte aligned, 405f8db2007SPaul Mackerras * we may have got the exception as a result of a 406f8db2007SPaul Mackerras * store that overlapped a page boundary, so we may be 407f8db2007SPaul Mackerras * able to copy a few more bytes. 408f8db2007SPaul Mackerras */ 409f8db2007SPaul Mackerras17: andi. r0,r3,7 410f8db2007SPaul Mackerras beq 19f 411f8db2007SPaul Mackerras subf r8,r6,r3 /* #bytes copied */ 412f8db2007SPaul Mackerras100: EX_TABLE(100b,19f) 413f8db2007SPaul Mackerras lbzx r0,r8,r4 414f8db2007SPaul Mackerras100: EX_TABLE(100b,19f) 415f8db2007SPaul Mackerras stb r0,0(r3) 416f8db2007SPaul Mackerras addi r3,r3,1 417f8db2007SPaul Mackerras cmpld r3,r7 418f8db2007SPaul Mackerras blt 17b 419f8db2007SPaul Mackerras19: subf r3,r3,r7 /* #bytes not copied in r3 */ 4203448890cSAl Viro blr 42170d64ceaSPaul Mackerras 42270d64ceaSPaul Mackerras/* 42370d64ceaSPaul Mackerras * Routine to copy a whole page of data, optimized for POWER4. 42470d64ceaSPaul Mackerras * On POWER4 it is more than 50% faster than the simple loop 4250f369103SMichael Ellerman * above (following the .Ldst_aligned label). 42670d64ceaSPaul Mackerras */ 427a7c81ce3SPaul Mackerras .macro exc 428a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Labort) 429a7c81ce3SPaul Mackerras .endm 4303c726f8dSBenjamin Herrenschmidt.Lcopy_page_4K: 43170d64ceaSPaul Mackerras std r31,-32(1) 43270d64ceaSPaul Mackerras std r30,-40(1) 43370d64ceaSPaul Mackerras std r29,-48(1) 43470d64ceaSPaul Mackerras std r28,-56(1) 43570d64ceaSPaul Mackerras std r27,-64(1) 43670d64ceaSPaul Mackerras std r26,-72(1) 43770d64ceaSPaul Mackerras std r25,-80(1) 43870d64ceaSPaul Mackerras std r24,-88(1) 43970d64ceaSPaul Mackerras std r23,-96(1) 44070d64ceaSPaul Mackerras std r22,-104(1) 44170d64ceaSPaul Mackerras std r21,-112(1) 44270d64ceaSPaul Mackerras std r20,-120(1) 44370d64ceaSPaul Mackerras li r5,4096/32 - 1 44470d64ceaSPaul Mackerras addi r3,r3,-8 44570d64ceaSPaul Mackerras li r0,5 44670d64ceaSPaul Mackerras0: addi r5,r5,-24 44770d64ceaSPaul Mackerras mtctr r0 448a7c81ce3SPaul Mackerrasexc; ld r22,640(4) 449a7c81ce3SPaul Mackerrasexc; ld r21,512(4) 450a7c81ce3SPaul Mackerrasexc; ld r20,384(4) 451a7c81ce3SPaul Mackerrasexc; ld r11,256(4) 452a7c81ce3SPaul Mackerrasexc; ld r9,128(4) 453a7c81ce3SPaul Mackerrasexc; ld r7,0(4) 454a7c81ce3SPaul Mackerrasexc; ld r25,648(4) 455a7c81ce3SPaul Mackerrasexc; ld r24,520(4) 456a7c81ce3SPaul Mackerrasexc; ld r23,392(4) 457a7c81ce3SPaul Mackerrasexc; ld r10,264(4) 458a7c81ce3SPaul Mackerrasexc; ld r8,136(4) 459a7c81ce3SPaul Mackerrasexc; ldu r6,8(4) 46070d64ceaSPaul Mackerras cmpwi r5,24 46170d64ceaSPaul Mackerras1: 462a7c81ce3SPaul Mackerrasexc; std r22,648(3) 463a7c81ce3SPaul Mackerrasexc; std r21,520(3) 464a7c81ce3SPaul Mackerrasexc; std r20,392(3) 465a7c81ce3SPaul Mackerrasexc; std r11,264(3) 466a7c81ce3SPaul Mackerrasexc; std r9,136(3) 467a7c81ce3SPaul Mackerrasexc; std r7,8(3) 468a7c81ce3SPaul Mackerrasexc; ld r28,648(4) 469a7c81ce3SPaul Mackerrasexc; ld r27,520(4) 470a7c81ce3SPaul Mackerrasexc; ld r26,392(4) 471a7c81ce3SPaul Mackerrasexc; ld r31,264(4) 472a7c81ce3SPaul Mackerrasexc; ld r30,136(4) 473a7c81ce3SPaul Mackerrasexc; ld r29,8(4) 474a7c81ce3SPaul Mackerrasexc; std r25,656(3) 475a7c81ce3SPaul Mackerrasexc; std r24,528(3) 476a7c81ce3SPaul Mackerrasexc; std r23,400(3) 477a7c81ce3SPaul Mackerrasexc; std r10,272(3) 478a7c81ce3SPaul Mackerrasexc; std r8,144(3) 479a7c81ce3SPaul Mackerrasexc; std r6,16(3) 480a7c81ce3SPaul Mackerrasexc; ld r22,656(4) 481a7c81ce3SPaul Mackerrasexc; ld r21,528(4) 482a7c81ce3SPaul Mackerrasexc; ld r20,400(4) 483a7c81ce3SPaul Mackerrasexc; ld r11,272(4) 484a7c81ce3SPaul Mackerrasexc; ld r9,144(4) 485a7c81ce3SPaul Mackerrasexc; ld r7,16(4) 486a7c81ce3SPaul Mackerrasexc; std r28,664(3) 487a7c81ce3SPaul Mackerrasexc; std r27,536(3) 488a7c81ce3SPaul Mackerrasexc; std r26,408(3) 489a7c81ce3SPaul Mackerrasexc; std r31,280(3) 490a7c81ce3SPaul Mackerrasexc; std r30,152(3) 491a7c81ce3SPaul Mackerrasexc; stdu r29,24(3) 492a7c81ce3SPaul Mackerrasexc; ld r25,664(4) 493a7c81ce3SPaul Mackerrasexc; ld r24,536(4) 494a7c81ce3SPaul Mackerrasexc; ld r23,408(4) 495a7c81ce3SPaul Mackerrasexc; ld r10,280(4) 496a7c81ce3SPaul Mackerrasexc; ld r8,152(4) 497a7c81ce3SPaul Mackerrasexc; ldu r6,24(4) 49870d64ceaSPaul Mackerras bdnz 1b 499a7c81ce3SPaul Mackerrasexc; std r22,648(3) 500a7c81ce3SPaul Mackerrasexc; std r21,520(3) 501a7c81ce3SPaul Mackerrasexc; std r20,392(3) 502a7c81ce3SPaul Mackerrasexc; std r11,264(3) 503a7c81ce3SPaul Mackerrasexc; std r9,136(3) 504a7c81ce3SPaul Mackerrasexc; std r7,8(3) 505a7c81ce3SPaul Mackerras addi r4,r4,640 506a7c81ce3SPaul Mackerras addi r3,r3,648 50770d64ceaSPaul Mackerras bge 0b 50870d64ceaSPaul Mackerras mtctr r5 509a7c81ce3SPaul Mackerrasexc; ld r7,0(4) 510a7c81ce3SPaul Mackerrasexc; ld r8,8(4) 511a7c81ce3SPaul Mackerrasexc; ldu r9,16(4) 51270d64ceaSPaul Mackerras3: 513a7c81ce3SPaul Mackerrasexc; ld r10,8(4) 514a7c81ce3SPaul Mackerrasexc; std r7,8(3) 515a7c81ce3SPaul Mackerrasexc; ld r7,16(4) 516a7c81ce3SPaul Mackerrasexc; std r8,16(3) 517a7c81ce3SPaul Mackerrasexc; ld r8,24(4) 518a7c81ce3SPaul Mackerrasexc; std r9,24(3) 519a7c81ce3SPaul Mackerrasexc; ldu r9,32(4) 520a7c81ce3SPaul Mackerrasexc; stdu r10,32(3) 52170d64ceaSPaul Mackerras bdnz 3b 52270d64ceaSPaul Mackerras4: 523a7c81ce3SPaul Mackerrasexc; ld r10,8(4) 524a7c81ce3SPaul Mackerrasexc; std r7,8(3) 525a7c81ce3SPaul Mackerrasexc; std r8,16(3) 526a7c81ce3SPaul Mackerrasexc; std r9,24(3) 527a7c81ce3SPaul Mackerrasexc; std r10,32(3) 52870d64ceaSPaul Mackerras9: ld r20,-120(1) 52970d64ceaSPaul Mackerras ld r21,-112(1) 53070d64ceaSPaul Mackerras ld r22,-104(1) 53170d64ceaSPaul Mackerras ld r23,-96(1) 53270d64ceaSPaul Mackerras ld r24,-88(1) 53370d64ceaSPaul Mackerras ld r25,-80(1) 53470d64ceaSPaul Mackerras ld r26,-72(1) 53570d64ceaSPaul Mackerras ld r27,-64(1) 53670d64ceaSPaul Mackerras ld r28,-56(1) 53770d64ceaSPaul Mackerras ld r29,-48(1) 53870d64ceaSPaul Mackerras ld r30,-40(1) 53970d64ceaSPaul Mackerras ld r31,-32(1) 54070d64ceaSPaul Mackerras li r3,0 54170d64ceaSPaul Mackerras blr 54270d64ceaSPaul Mackerras 54370d64ceaSPaul Mackerras/* 54470d64ceaSPaul Mackerras * on an exception, reset to the beginning and jump back into the 54570d64ceaSPaul Mackerras * standard __copy_tofrom_user 54670d64ceaSPaul Mackerras */ 547a7c81ce3SPaul Mackerras.Labort: 548a7c81ce3SPaul Mackerras ld r20,-120(1) 54970d64ceaSPaul Mackerras ld r21,-112(1) 55070d64ceaSPaul Mackerras ld r22,-104(1) 55170d64ceaSPaul Mackerras ld r23,-96(1) 55270d64ceaSPaul Mackerras ld r24,-88(1) 55370d64ceaSPaul Mackerras ld r25,-80(1) 55470d64ceaSPaul Mackerras ld r26,-72(1) 55570d64ceaSPaul Mackerras ld r27,-64(1) 55670d64ceaSPaul Mackerras ld r28,-56(1) 55770d64ceaSPaul Mackerras ld r29,-48(1) 55870d64ceaSPaul Mackerras ld r30,-40(1) 55970d64ceaSPaul Mackerras ld r31,-32(1) 56070d64ceaSPaul Mackerras ld r3,-24(r1) 56170d64ceaSPaul Mackerras ld r4,-16(r1) 56270d64ceaSPaul Mackerras li r5,4096 56370d64ceaSPaul Mackerras b .Ldst_aligned 5649445aa1aSAl ViroEXPORT_SYMBOL(__copy_tofrom_user) 565