170d64ceaSPaul Mackerras/* 270d64ceaSPaul Mackerras * Copyright (C) 2002 Paul Mackerras, IBM Corp. 370d64ceaSPaul Mackerras * 470d64ceaSPaul Mackerras * This program is free software; you can redistribute it and/or 570d64ceaSPaul Mackerras * modify it under the terms of the GNU General Public License 670d64ceaSPaul Mackerras * as published by the Free Software Foundation; either version 770d64ceaSPaul Mackerras * 2 of the License, or (at your option) any later version. 870d64ceaSPaul Mackerras */ 970d64ceaSPaul Mackerras#include <asm/processor.h> 1070d64ceaSPaul Mackerras#include <asm/ppc_asm.h> 119445aa1aSAl Viro#include <asm/export.h> 12ec0c464cSChristophe Leroy#include <asm/asm-compat.h> 132c86cd18SChristophe Leroy#include <asm/feature-fixups.h> 1470d64ceaSPaul Mackerras 1598c45f51SPaul Mackerras#ifndef SELFTEST_CASE 1698c45f51SPaul Mackerras/* 0 == most CPUs, 1 == POWER6, 2 == Cell */ 1798c45f51SPaul Mackerras#define SELFTEST_CASE 0 1898c45f51SPaul Mackerras#endif 1998c45f51SPaul Mackerras 2020151169SPaul E. McKenney#ifdef __BIG_ENDIAN__ 2120151169SPaul E. McKenney#define sLd sld /* Shift towards low-numbered address. */ 2220151169SPaul E. McKenney#define sHd srd /* Shift towards high-numbered address. */ 2320151169SPaul E. McKenney#else 2420151169SPaul E. McKenney#define sLd srd /* Shift towards low-numbered address. */ 2520151169SPaul E. McKenney#define sHd sld /* Shift towards high-numbered address. */ 2620151169SPaul E. McKenney#endif 2720151169SPaul E. McKenney 28a7c81ce3SPaul Mackerras/* 29a7c81ce3SPaul Mackerras * These macros are used to generate exception table entries. 30a7c81ce3SPaul Mackerras * The exception handlers below use the original arguments 31a7c81ce3SPaul Mackerras * (stored on the stack) and the point where we're up to in 32a7c81ce3SPaul Mackerras * the destination buffer, i.e. the address of the first 33a7c81ce3SPaul Mackerras * unmodified byte. Generally r3 points into the destination 34a7c81ce3SPaul Mackerras * buffer, but the first unmodified byte is at a variable 35a7c81ce3SPaul Mackerras * offset from r3. In the code below, the symbol r3_offset 36a7c81ce3SPaul Mackerras * is set to indicate the current offset at each point in 37a7c81ce3SPaul Mackerras * the code. This offset is then used as a negative offset 38a7c81ce3SPaul Mackerras * from the exception handler code, and those instructions 39a7c81ce3SPaul Mackerras * before the exception handlers are addi instructions that 40a7c81ce3SPaul Mackerras * adjust r3 to point to the correct place. 41a7c81ce3SPaul Mackerras */ 42a7c81ce3SPaul Mackerras .macro lex /* exception handler for load */ 43a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lld_exc - r3_offset) 44a7c81ce3SPaul Mackerras .endm 45a7c81ce3SPaul Mackerras 46a7c81ce3SPaul Mackerras .macro stex /* exception handler for store */ 47a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lst_exc - r3_offset) 48a7c81ce3SPaul Mackerras .endm 49a7c81ce3SPaul Mackerras 5070d64ceaSPaul Mackerras .align 7 51169c7ceeSAnton Blanchard_GLOBAL_TOC(__copy_tofrom_user) 5215a3204dSNicholas Piggin#ifdef CONFIG_PPC_BOOK3S_64 53a66086b8SAnton BlanchardBEGIN_FTR_SECTION 54a66086b8SAnton Blanchard nop 55a66086b8SAnton BlanchardFTR_SECTION_ELSE 56a66086b8SAnton Blanchard b __copy_tofrom_user_power7 57a66086b8SAnton BlanchardALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 5815a3204dSNicholas Piggin#endif 59a66086b8SAnton Blanchard_GLOBAL(__copy_tofrom_user_base) 60a7c81ce3SPaul Mackerras /* first check for a 4kB copy on a 4kB boundary */ 6170d64ceaSPaul Mackerras cmpldi cr1,r5,16 6270d64ceaSPaul Mackerras cmpdi cr6,r5,4096 6370d64ceaSPaul Mackerras or r0,r3,r4 6470d64ceaSPaul Mackerras neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 6570d64ceaSPaul Mackerras andi. r0,r0,4095 6670d64ceaSPaul Mackerras std r3,-24(r1) 6770d64ceaSPaul Mackerras crand cr0*4+2,cr0*4+2,cr6*4+2 6870d64ceaSPaul Mackerras std r4,-16(r1) 6970d64ceaSPaul Mackerras std r5,-8(r1) 7070d64ceaSPaul Mackerras dcbt 0,r4 713c726f8dSBenjamin Herrenschmidt beq .Lcopy_page_4K 7270d64ceaSPaul Mackerras andi. r6,r6,7 73694caf02SAnton Blanchard PPC_MTOCRF(0x01,r5) 7470d64ceaSPaul Mackerras blt cr1,.Lshort_copy 75a4e22f02SMark Nelson/* Below we want to nop out the bne if we're on a CPU that has the 76a4e22f02SMark Nelson * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 77a4e22f02SMark Nelson * cleared. 78a4e22f02SMark Nelson * At the time of writing the only CPU that has this combination of bits 79a4e22f02SMark Nelson * set is Power6. 80a4e22f02SMark Nelson */ 8198c45f51SPaul Mackerrastest_feature = (SELFTEST_CASE == 1) 82a4e22f02SMark NelsonBEGIN_FTR_SECTION 83a4e22f02SMark Nelson nop 84a4e22f02SMark NelsonFTR_SECTION_ELSE 8570d64ceaSPaul Mackerras bne .Ldst_unaligned 86a4e22f02SMark NelsonALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 87a4e22f02SMark Nelson CPU_FTR_UNALIGNED_LD_STD) 8870d64ceaSPaul Mackerras.Ldst_aligned: 8970d64ceaSPaul Mackerras addi r3,r3,-16 90a7c81ce3SPaul Mackerrasr3_offset = 16 9198c45f51SPaul Mackerrastest_feature = (SELFTEST_CASE == 0) 92a4e22f02SMark NelsonBEGIN_FTR_SECTION 93a4e22f02SMark Nelson andi. r0,r4,7 9470d64ceaSPaul Mackerras bne .Lsrc_unaligned 95a4e22f02SMark NelsonEND_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 96789c299cSAnton Blanchard blt cr1,.Ldo_tail /* if < 16 bytes to copy */ 97789c299cSAnton Blanchard srdi r0,r5,5 98789c299cSAnton Blanchard cmpdi cr1,r0,0 99a7c81ce3SPaul Mackerraslex; ld r7,0(r4) 100a7c81ce3SPaul Mackerraslex; ld r6,8(r4) 101789c299cSAnton Blanchard addi r4,r4,16 102789c299cSAnton Blanchard mtctr r0 103789c299cSAnton Blanchard andi. r0,r5,0x10 104789c299cSAnton Blanchard beq 22f 10570d64ceaSPaul Mackerras addi r3,r3,16 106a7c81ce3SPaul Mackerrasr3_offset = 0 107789c299cSAnton Blanchard addi r4,r4,-16 108789c299cSAnton Blanchard mr r9,r7 109789c299cSAnton Blanchard mr r8,r6 110789c299cSAnton Blanchard beq cr1,72f 111a7c81ce3SPaul Mackerras21: 112a7c81ce3SPaul Mackerraslex; ld r7,16(r4) 113a7c81ce3SPaul Mackerraslex; ld r6,24(r4) 114789c299cSAnton Blanchard addi r4,r4,32 115a7c81ce3SPaul Mackerrasstex; std r9,0(r3) 116a7c81ce3SPaul Mackerrasr3_offset = 8 117a7c81ce3SPaul Mackerrasstex; std r8,8(r3) 118a7c81ce3SPaul Mackerrasr3_offset = 16 119a7c81ce3SPaul Mackerras22: 120a7c81ce3SPaul Mackerraslex; ld r9,0(r4) 121a7c81ce3SPaul Mackerraslex; ld r8,8(r4) 122a7c81ce3SPaul Mackerrasstex; std r7,16(r3) 123a7c81ce3SPaul Mackerrasr3_offset = 24 124a7c81ce3SPaul Mackerrasstex; std r6,24(r3) 125789c299cSAnton Blanchard addi r3,r3,32 126a7c81ce3SPaul Mackerrasr3_offset = 0 127789c299cSAnton Blanchard bdnz 21b 128a7c81ce3SPaul Mackerras72: 129a7c81ce3SPaul Mackerrasstex; std r9,0(r3) 130a7c81ce3SPaul Mackerrasr3_offset = 8 131a7c81ce3SPaul Mackerrasstex; std r8,8(r3) 132a7c81ce3SPaul Mackerrasr3_offset = 16 133789c299cSAnton Blanchard andi. r5,r5,0xf 134789c299cSAnton Blanchard beq+ 3f 135789c299cSAnton Blanchard addi r4,r4,16 13670d64ceaSPaul Mackerras.Ldo_tail: 137789c299cSAnton Blanchard addi r3,r3,16 138a7c81ce3SPaul Mackerrasr3_offset = 0 139789c299cSAnton Blanchard bf cr7*4+0,246f 140a7c81ce3SPaul Mackerraslex; ld r9,0(r4) 141789c299cSAnton Blanchard addi r4,r4,8 142a7c81ce3SPaul Mackerrasstex; std r9,0(r3) 143789c299cSAnton Blanchard addi r3,r3,8 144789c299cSAnton Blanchard246: bf cr7*4+1,1f 145a7c81ce3SPaul Mackerraslex; lwz r9,0(r4) 146f72b728bSMark Nelson addi r4,r4,4 147a7c81ce3SPaul Mackerrasstex; stw r9,0(r3) 14870d64ceaSPaul Mackerras addi r3,r3,4 14970d64ceaSPaul Mackerras1: bf cr7*4+2,2f 150a7c81ce3SPaul Mackerraslex; lhz r9,0(r4) 151f72b728bSMark Nelson addi r4,r4,2 152a7c81ce3SPaul Mackerrasstex; sth r9,0(r3) 15370d64ceaSPaul Mackerras addi r3,r3,2 15470d64ceaSPaul Mackerras2: bf cr7*4+3,3f 155a7c81ce3SPaul Mackerraslex; lbz r9,0(r4) 156a7c81ce3SPaul Mackerrasstex; stb r9,0(r3) 15770d64ceaSPaul Mackerras3: li r3,0 15870d64ceaSPaul Mackerras blr 15970d64ceaSPaul Mackerras 16070d64ceaSPaul Mackerras.Lsrc_unaligned: 161a7c81ce3SPaul Mackerrasr3_offset = 16 16270d64ceaSPaul Mackerras srdi r6,r5,3 16370d64ceaSPaul Mackerras addi r5,r5,-16 16470d64ceaSPaul Mackerras subf r4,r0,r4 16570d64ceaSPaul Mackerras srdi r7,r5,4 16670d64ceaSPaul Mackerras sldi r10,r0,3 16770d64ceaSPaul Mackerras cmpldi cr6,r6,3 16870d64ceaSPaul Mackerras andi. r5,r5,7 16970d64ceaSPaul Mackerras mtctr r7 17070d64ceaSPaul Mackerras subfic r11,r10,64 17170d64ceaSPaul Mackerras add r5,r5,r0 17270d64ceaSPaul Mackerras bt cr7*4+0,28f 17370d64ceaSPaul Mackerras 174a7c81ce3SPaul Mackerraslex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 175a7c81ce3SPaul Mackerraslex; ld r0,8(r4) 17620151169SPaul E. McKenney sLd r6,r9,r10 177a7c81ce3SPaul Mackerraslex; ldu r9,16(r4) 17820151169SPaul E. McKenney sHd r7,r0,r11 17920151169SPaul E. McKenney sLd r8,r0,r10 18070d64ceaSPaul Mackerras or r7,r7,r6 18170d64ceaSPaul Mackerras blt cr6,79f 182a7c81ce3SPaul Mackerraslex; ld r0,8(r4) 18370d64ceaSPaul Mackerras b 2f 18470d64ceaSPaul Mackerras 185a7c81ce3SPaul Mackerras28: 186a7c81ce3SPaul Mackerraslex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 187a7c81ce3SPaul Mackerraslex; ldu r9,8(r4) 18820151169SPaul E. McKenney sLd r8,r0,r10 18970d64ceaSPaul Mackerras addi r3,r3,-8 190a7c81ce3SPaul Mackerrasr3_offset = 24 19170d64ceaSPaul Mackerras blt cr6,5f 192a7c81ce3SPaul Mackerraslex; ld r0,8(r4) 19320151169SPaul E. McKenney sHd r12,r9,r11 19420151169SPaul E. McKenney sLd r6,r9,r10 195a7c81ce3SPaul Mackerraslex; ldu r9,16(r4) 19670d64ceaSPaul Mackerras or r12,r8,r12 19720151169SPaul E. McKenney sHd r7,r0,r11 19820151169SPaul E. McKenney sLd r8,r0,r10 19970d64ceaSPaul Mackerras addi r3,r3,16 200a7c81ce3SPaul Mackerrasr3_offset = 8 20170d64ceaSPaul Mackerras beq cr6,78f 20270d64ceaSPaul Mackerras 20370d64ceaSPaul Mackerras1: or r7,r7,r6 204a7c81ce3SPaul Mackerraslex; ld r0,8(r4) 205a7c81ce3SPaul Mackerrasstex; std r12,8(r3) 206a7c81ce3SPaul Mackerrasr3_offset = 16 20720151169SPaul E. McKenney2: sHd r12,r9,r11 20820151169SPaul E. McKenney sLd r6,r9,r10 209a7c81ce3SPaul Mackerraslex; ldu r9,16(r4) 21070d64ceaSPaul Mackerras or r12,r8,r12 211a7c81ce3SPaul Mackerrasstex; stdu r7,16(r3) 212a7c81ce3SPaul Mackerrasr3_offset = 8 21320151169SPaul E. McKenney sHd r7,r0,r11 21420151169SPaul E. McKenney sLd r8,r0,r10 21570d64ceaSPaul Mackerras bdnz 1b 21670d64ceaSPaul Mackerras 217a7c81ce3SPaul Mackerras78: 218a7c81ce3SPaul Mackerrasstex; std r12,8(r3) 219a7c81ce3SPaul Mackerrasr3_offset = 16 22070d64ceaSPaul Mackerras or r7,r7,r6 221a7c81ce3SPaul Mackerras79: 222a7c81ce3SPaul Mackerrasstex; std r7,16(r3) 223a7c81ce3SPaul Mackerrasr3_offset = 24 22420151169SPaul E. McKenney5: sHd r12,r9,r11 22570d64ceaSPaul Mackerras or r12,r8,r12 226a7c81ce3SPaul Mackerrasstex; std r12,24(r3) 227a7c81ce3SPaul Mackerrasr3_offset = 32 22870d64ceaSPaul Mackerras bne 6f 22970d64ceaSPaul Mackerras li r3,0 23070d64ceaSPaul Mackerras blr 23170d64ceaSPaul Mackerras6: cmpwi cr1,r5,8 23270d64ceaSPaul Mackerras addi r3,r3,32 233a7c81ce3SPaul Mackerrasr3_offset = 0 23420151169SPaul E. McKenney sLd r9,r9,r10 235f72b728bSMark Nelson ble cr1,7f 236a7c81ce3SPaul Mackerraslex; ld r0,8(r4) 23720151169SPaul E. McKenney sHd r7,r0,r11 23870d64ceaSPaul Mackerras or r9,r7,r9 239f72b728bSMark Nelson7: 240f72b728bSMark Nelson bf cr7*4+1,1f 24120151169SPaul E. McKenney#ifdef __BIG_ENDIAN__ 242f72b728bSMark Nelson rotldi r9,r9,32 24320151169SPaul E. McKenney#endif 244a7c81ce3SPaul Mackerrasstex; stw r9,0(r3) 24520151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__ 24620151169SPaul E. McKenney rotrdi r9,r9,32 24720151169SPaul E. McKenney#endif 248f72b728bSMark Nelson addi r3,r3,4 249f72b728bSMark Nelson1: bf cr7*4+2,2f 25020151169SPaul E. McKenney#ifdef __BIG_ENDIAN__ 251f72b728bSMark Nelson rotldi r9,r9,16 25220151169SPaul E. McKenney#endif 253a7c81ce3SPaul Mackerrasstex; sth r9,0(r3) 25420151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__ 25520151169SPaul E. McKenney rotrdi r9,r9,16 25620151169SPaul E. McKenney#endif 257f72b728bSMark Nelson addi r3,r3,2 258f72b728bSMark Nelson2: bf cr7*4+3,3f 25920151169SPaul E. McKenney#ifdef __BIG_ENDIAN__ 260f72b728bSMark Nelson rotldi r9,r9,8 26120151169SPaul E. McKenney#endif 262a7c81ce3SPaul Mackerrasstex; stb r9,0(r3) 26320151169SPaul E. McKenney#ifdef __LITTLE_ENDIAN__ 26420151169SPaul E. McKenney rotrdi r9,r9,8 26520151169SPaul E. McKenney#endif 266f72b728bSMark Nelson3: li r3,0 267f72b728bSMark Nelson blr 26870d64ceaSPaul Mackerras 26970d64ceaSPaul Mackerras.Ldst_unaligned: 270a7c81ce3SPaul Mackerrasr3_offset = 0 271694caf02SAnton Blanchard PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ 27270d64ceaSPaul Mackerras subf r5,r6,r5 27370d64ceaSPaul Mackerras li r7,0 274a4e22f02SMark Nelson cmpldi cr1,r5,16 27570d64ceaSPaul Mackerras bf cr7*4+3,1f 276a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lld_exc_r7) 277a7c81ce3SPaul Mackerras lbz r0,0(r4) 278a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lst_exc_r7) 279a7c81ce3SPaul Mackerras stb r0,0(r3) 28070d64ceaSPaul Mackerras addi r7,r7,1 28170d64ceaSPaul Mackerras1: bf cr7*4+2,2f 282a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lld_exc_r7) 283a7c81ce3SPaul Mackerras lhzx r0,r7,r4 284a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lst_exc_r7) 285a7c81ce3SPaul Mackerras sthx r0,r7,r3 28670d64ceaSPaul Mackerras addi r7,r7,2 28770d64ceaSPaul Mackerras2: bf cr7*4+1,3f 288a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lld_exc_r7) 289a7c81ce3SPaul Mackerras lwzx r0,r7,r4 290a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Lst_exc_r7) 291a7c81ce3SPaul Mackerras stwx r0,r7,r3 292694caf02SAnton Blanchard3: PPC_MTOCRF(0x01,r5) 29370d64ceaSPaul Mackerras add r4,r6,r4 29470d64ceaSPaul Mackerras add r3,r6,r3 29570d64ceaSPaul Mackerras b .Ldst_aligned 29670d64ceaSPaul Mackerras 29770d64ceaSPaul Mackerras.Lshort_copy: 298a7c81ce3SPaul Mackerrasr3_offset = 0 29970d64ceaSPaul Mackerras bf cr7*4+0,1f 300a7c81ce3SPaul Mackerraslex; lwz r0,0(r4) 301a7c81ce3SPaul Mackerraslex; lwz r9,4(r4) 30270d64ceaSPaul Mackerras addi r4,r4,8 303a7c81ce3SPaul Mackerrasstex; stw r0,0(r3) 304a7c81ce3SPaul Mackerrasstex; stw r9,4(r3) 30570d64ceaSPaul Mackerras addi r3,r3,8 30670d64ceaSPaul Mackerras1: bf cr7*4+1,2f 307a7c81ce3SPaul Mackerraslex; lwz r0,0(r4) 30870d64ceaSPaul Mackerras addi r4,r4,4 309a7c81ce3SPaul Mackerrasstex; stw r0,0(r3) 31070d64ceaSPaul Mackerras addi r3,r3,4 31170d64ceaSPaul Mackerras2: bf cr7*4+2,3f 312a7c81ce3SPaul Mackerraslex; lhz r0,0(r4) 31370d64ceaSPaul Mackerras addi r4,r4,2 314a7c81ce3SPaul Mackerrasstex; sth r0,0(r3) 31570d64ceaSPaul Mackerras addi r3,r3,2 31670d64ceaSPaul Mackerras3: bf cr7*4+3,4f 317a7c81ce3SPaul Mackerraslex; lbz r0,0(r4) 318a7c81ce3SPaul Mackerrasstex; stb r0,0(r3) 31970d64ceaSPaul Mackerras4: li r3,0 32070d64ceaSPaul Mackerras blr 32170d64ceaSPaul Mackerras 32270d64ceaSPaul Mackerras/* 32370d64ceaSPaul Mackerras * exception handlers follow 32470d64ceaSPaul Mackerras * we have to return the number of bytes not copied 32570d64ceaSPaul Mackerras * for an exception on a load, we set the rest of the destination to 0 326a7c81ce3SPaul Mackerras * Note that the number of bytes of instructions for adjusting r3 needs 327a7c81ce3SPaul Mackerras * to equal the amount of the adjustment, due to the trick of using 328a7c81ce3SPaul Mackerras * .Lld_exc - r3_offset as the handler address. 32970d64ceaSPaul Mackerras */ 33070d64ceaSPaul Mackerras 331a7c81ce3SPaul Mackerras.Lld_exc_r7: 33270d64ceaSPaul Mackerras add r3,r3,r7 333a7c81ce3SPaul Mackerras b .Lld_exc 334a7c81ce3SPaul Mackerras 335a7c81ce3SPaul Mackerras /* adjust by 24 */ 33670d64ceaSPaul Mackerras addi r3,r3,8 337a7c81ce3SPaul Mackerras nop 338a7c81ce3SPaul Mackerras /* adjust by 16 */ 33970d64ceaSPaul Mackerras addi r3,r3,8 340a7c81ce3SPaul Mackerras nop 341a7c81ce3SPaul Mackerras /* adjust by 8 */ 34270d64ceaSPaul Mackerras addi r3,r3,8 343a7c81ce3SPaul Mackerras nop 34470d64ceaSPaul Mackerras 34570d64ceaSPaul Mackerras/* 346a7c81ce3SPaul Mackerras * Here we have had a fault on a load and r3 points to the first 347a7c81ce3SPaul Mackerras * unmodified byte of the destination. We use the original arguments 348a7c81ce3SPaul Mackerras * and r3 to work out how much wasn't copied. Since we load some 349a7c81ce3SPaul Mackerras * distance ahead of the stores, we continue copying byte-by-byte until 350a7c81ce3SPaul Mackerras * we hit the load fault again in order to copy as much as possible. 35170d64ceaSPaul Mackerras */ 352a7c81ce3SPaul Mackerras.Lld_exc: 353a7c81ce3SPaul Mackerras ld r6,-24(r1) 35470d64ceaSPaul Mackerras ld r4,-16(r1) 35570d64ceaSPaul Mackerras ld r5,-8(r1) 35670d64ceaSPaul Mackerras subf r6,r6,r3 35770d64ceaSPaul Mackerras add r4,r4,r6 35870d64ceaSPaul Mackerras subf r5,r6,r5 /* #bytes left to go */ 35970d64ceaSPaul Mackerras 36070d64ceaSPaul Mackerras/* 36170d64ceaSPaul Mackerras * first see if we can copy any more bytes before hitting another exception 36270d64ceaSPaul Mackerras */ 36370d64ceaSPaul Mackerras mtctr r5 364a7c81ce3SPaul Mackerrasr3_offset = 0 365a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Ldone) 36670d64ceaSPaul Mackerras43: lbz r0,0(r4) 36770d64ceaSPaul Mackerras addi r4,r4,1 368a7c81ce3SPaul Mackerrasstex; stb r0,0(r3) 36970d64ceaSPaul Mackerras addi r3,r3,1 37070d64ceaSPaul Mackerras bdnz 43b 37170d64ceaSPaul Mackerras li r3,0 /* huh? all copied successfully this time? */ 37270d64ceaSPaul Mackerras blr 37370d64ceaSPaul Mackerras 37470d64ceaSPaul Mackerras/* 3753448890cSAl Viro * here we have trapped again, amount remaining is in ctr. 37670d64ceaSPaul Mackerras */ 377a7c81ce3SPaul Mackerras.Ldone: 378a7c81ce3SPaul Mackerras mfctr r3 37970d64ceaSPaul Mackerras blr 38070d64ceaSPaul Mackerras 38170d64ceaSPaul Mackerras/* 382*f8db2007SPaul Mackerras * exception handlers for stores: we need to work out how many bytes 383*f8db2007SPaul Mackerras * weren't copied, and we may need to copy some more. 384a7c81ce3SPaul Mackerras * Note that the number of bytes of instructions for adjusting r3 needs 385a7c81ce3SPaul Mackerras * to equal the amount of the adjustment, due to the trick of using 386a7c81ce3SPaul Mackerras * .Lst_exc - r3_offset as the handler address. 38770d64ceaSPaul Mackerras */ 388a7c81ce3SPaul Mackerras.Lst_exc_r7: 38970d64ceaSPaul Mackerras add r3,r3,r7 390a7c81ce3SPaul Mackerras b .Lst_exc 391a7c81ce3SPaul Mackerras 392a7c81ce3SPaul Mackerras /* adjust by 24 */ 39370d64ceaSPaul Mackerras addi r3,r3,8 394a7c81ce3SPaul Mackerras nop 395a7c81ce3SPaul Mackerras /* adjust by 16 */ 39670d64ceaSPaul Mackerras addi r3,r3,8 397a7c81ce3SPaul Mackerras nop 398a7c81ce3SPaul Mackerras /* adjust by 8 */ 39970d64ceaSPaul Mackerras addi r3,r3,4 400a7c81ce3SPaul Mackerras /* adjust by 4 */ 40170d64ceaSPaul Mackerras addi r3,r3,4 402a7c81ce3SPaul Mackerras.Lst_exc: 403*f8db2007SPaul Mackerras ld r6,-24(r1) /* original destination pointer */ 404*f8db2007SPaul Mackerras ld r4,-16(r1) /* original source pointer */ 405*f8db2007SPaul Mackerras ld r5,-8(r1) /* original number of bytes */ 406*f8db2007SPaul Mackerras add r7,r6,r5 407*f8db2007SPaul Mackerras /* 408*f8db2007SPaul Mackerras * If the destination pointer isn't 8-byte aligned, 409*f8db2007SPaul Mackerras * we may have got the exception as a result of a 410*f8db2007SPaul Mackerras * store that overlapped a page boundary, so we may be 411*f8db2007SPaul Mackerras * able to copy a few more bytes. 412*f8db2007SPaul Mackerras */ 413*f8db2007SPaul Mackerras17: andi. r0,r3,7 414*f8db2007SPaul Mackerras beq 19f 415*f8db2007SPaul Mackerras subf r8,r6,r3 /* #bytes copied */ 416*f8db2007SPaul Mackerras100: EX_TABLE(100b,19f) 417*f8db2007SPaul Mackerras lbzx r0,r8,r4 418*f8db2007SPaul Mackerras100: EX_TABLE(100b,19f) 419*f8db2007SPaul Mackerras stb r0,0(r3) 420*f8db2007SPaul Mackerras addi r3,r3,1 421*f8db2007SPaul Mackerras cmpld r3,r7 422*f8db2007SPaul Mackerras blt 17b 423*f8db2007SPaul Mackerras19: subf r3,r3,r7 /* #bytes not copied in r3 */ 4243448890cSAl Viro blr 42570d64ceaSPaul Mackerras 42670d64ceaSPaul Mackerras/* 42770d64ceaSPaul Mackerras * Routine to copy a whole page of data, optimized for POWER4. 42870d64ceaSPaul Mackerras * On POWER4 it is more than 50% faster than the simple loop 4290f369103SMichael Ellerman * above (following the .Ldst_aligned label). 43070d64ceaSPaul Mackerras */ 431a7c81ce3SPaul Mackerras .macro exc 432a7c81ce3SPaul Mackerras100: EX_TABLE(100b, .Labort) 433a7c81ce3SPaul Mackerras .endm 4343c726f8dSBenjamin Herrenschmidt.Lcopy_page_4K: 43570d64ceaSPaul Mackerras std r31,-32(1) 43670d64ceaSPaul Mackerras std r30,-40(1) 43770d64ceaSPaul Mackerras std r29,-48(1) 43870d64ceaSPaul Mackerras std r28,-56(1) 43970d64ceaSPaul Mackerras std r27,-64(1) 44070d64ceaSPaul Mackerras std r26,-72(1) 44170d64ceaSPaul Mackerras std r25,-80(1) 44270d64ceaSPaul Mackerras std r24,-88(1) 44370d64ceaSPaul Mackerras std r23,-96(1) 44470d64ceaSPaul Mackerras std r22,-104(1) 44570d64ceaSPaul Mackerras std r21,-112(1) 44670d64ceaSPaul Mackerras std r20,-120(1) 44770d64ceaSPaul Mackerras li r5,4096/32 - 1 44870d64ceaSPaul Mackerras addi r3,r3,-8 44970d64ceaSPaul Mackerras li r0,5 45070d64ceaSPaul Mackerras0: addi r5,r5,-24 45170d64ceaSPaul Mackerras mtctr r0 452a7c81ce3SPaul Mackerrasexc; ld r22,640(4) 453a7c81ce3SPaul Mackerrasexc; ld r21,512(4) 454a7c81ce3SPaul Mackerrasexc; ld r20,384(4) 455a7c81ce3SPaul Mackerrasexc; ld r11,256(4) 456a7c81ce3SPaul Mackerrasexc; ld r9,128(4) 457a7c81ce3SPaul Mackerrasexc; ld r7,0(4) 458a7c81ce3SPaul Mackerrasexc; ld r25,648(4) 459a7c81ce3SPaul Mackerrasexc; ld r24,520(4) 460a7c81ce3SPaul Mackerrasexc; ld r23,392(4) 461a7c81ce3SPaul Mackerrasexc; ld r10,264(4) 462a7c81ce3SPaul Mackerrasexc; ld r8,136(4) 463a7c81ce3SPaul Mackerrasexc; ldu r6,8(4) 46470d64ceaSPaul Mackerras cmpwi r5,24 46570d64ceaSPaul Mackerras1: 466a7c81ce3SPaul Mackerrasexc; std r22,648(3) 467a7c81ce3SPaul Mackerrasexc; std r21,520(3) 468a7c81ce3SPaul Mackerrasexc; std r20,392(3) 469a7c81ce3SPaul Mackerrasexc; std r11,264(3) 470a7c81ce3SPaul Mackerrasexc; std r9,136(3) 471a7c81ce3SPaul Mackerrasexc; std r7,8(3) 472a7c81ce3SPaul Mackerrasexc; ld r28,648(4) 473a7c81ce3SPaul Mackerrasexc; ld r27,520(4) 474a7c81ce3SPaul Mackerrasexc; ld r26,392(4) 475a7c81ce3SPaul Mackerrasexc; ld r31,264(4) 476a7c81ce3SPaul Mackerrasexc; ld r30,136(4) 477a7c81ce3SPaul Mackerrasexc; ld r29,8(4) 478a7c81ce3SPaul Mackerrasexc; std r25,656(3) 479a7c81ce3SPaul Mackerrasexc; std r24,528(3) 480a7c81ce3SPaul Mackerrasexc; std r23,400(3) 481a7c81ce3SPaul Mackerrasexc; std r10,272(3) 482a7c81ce3SPaul Mackerrasexc; std r8,144(3) 483a7c81ce3SPaul Mackerrasexc; std r6,16(3) 484a7c81ce3SPaul Mackerrasexc; ld r22,656(4) 485a7c81ce3SPaul Mackerrasexc; ld r21,528(4) 486a7c81ce3SPaul Mackerrasexc; ld r20,400(4) 487a7c81ce3SPaul Mackerrasexc; ld r11,272(4) 488a7c81ce3SPaul Mackerrasexc; ld r9,144(4) 489a7c81ce3SPaul Mackerrasexc; ld r7,16(4) 490a7c81ce3SPaul Mackerrasexc; std r28,664(3) 491a7c81ce3SPaul Mackerrasexc; std r27,536(3) 492a7c81ce3SPaul Mackerrasexc; std r26,408(3) 493a7c81ce3SPaul Mackerrasexc; std r31,280(3) 494a7c81ce3SPaul Mackerrasexc; std r30,152(3) 495a7c81ce3SPaul Mackerrasexc; stdu r29,24(3) 496a7c81ce3SPaul Mackerrasexc; ld r25,664(4) 497a7c81ce3SPaul Mackerrasexc; ld r24,536(4) 498a7c81ce3SPaul Mackerrasexc; ld r23,408(4) 499a7c81ce3SPaul Mackerrasexc; ld r10,280(4) 500a7c81ce3SPaul Mackerrasexc; ld r8,152(4) 501a7c81ce3SPaul Mackerrasexc; ldu r6,24(4) 50270d64ceaSPaul Mackerras bdnz 1b 503a7c81ce3SPaul Mackerrasexc; std r22,648(3) 504a7c81ce3SPaul Mackerrasexc; std r21,520(3) 505a7c81ce3SPaul Mackerrasexc; std r20,392(3) 506a7c81ce3SPaul Mackerrasexc; std r11,264(3) 507a7c81ce3SPaul Mackerrasexc; std r9,136(3) 508a7c81ce3SPaul Mackerrasexc; std r7,8(3) 509a7c81ce3SPaul Mackerras addi r4,r4,640 510a7c81ce3SPaul Mackerras addi r3,r3,648 51170d64ceaSPaul Mackerras bge 0b 51270d64ceaSPaul Mackerras mtctr r5 513a7c81ce3SPaul Mackerrasexc; ld r7,0(4) 514a7c81ce3SPaul Mackerrasexc; ld r8,8(4) 515a7c81ce3SPaul Mackerrasexc; ldu r9,16(4) 51670d64ceaSPaul Mackerras3: 517a7c81ce3SPaul Mackerrasexc; ld r10,8(4) 518a7c81ce3SPaul Mackerrasexc; std r7,8(3) 519a7c81ce3SPaul Mackerrasexc; ld r7,16(4) 520a7c81ce3SPaul Mackerrasexc; std r8,16(3) 521a7c81ce3SPaul Mackerrasexc; ld r8,24(4) 522a7c81ce3SPaul Mackerrasexc; std r9,24(3) 523a7c81ce3SPaul Mackerrasexc; ldu r9,32(4) 524a7c81ce3SPaul Mackerrasexc; stdu r10,32(3) 52570d64ceaSPaul Mackerras bdnz 3b 52670d64ceaSPaul Mackerras4: 527a7c81ce3SPaul Mackerrasexc; ld r10,8(4) 528a7c81ce3SPaul Mackerrasexc; std r7,8(3) 529a7c81ce3SPaul Mackerrasexc; std r8,16(3) 530a7c81ce3SPaul Mackerrasexc; std r9,24(3) 531a7c81ce3SPaul Mackerrasexc; std r10,32(3) 53270d64ceaSPaul Mackerras9: ld r20,-120(1) 53370d64ceaSPaul Mackerras ld r21,-112(1) 53470d64ceaSPaul Mackerras ld r22,-104(1) 53570d64ceaSPaul Mackerras ld r23,-96(1) 53670d64ceaSPaul Mackerras ld r24,-88(1) 53770d64ceaSPaul Mackerras ld r25,-80(1) 53870d64ceaSPaul Mackerras ld r26,-72(1) 53970d64ceaSPaul Mackerras ld r27,-64(1) 54070d64ceaSPaul Mackerras ld r28,-56(1) 54170d64ceaSPaul Mackerras ld r29,-48(1) 54270d64ceaSPaul Mackerras ld r30,-40(1) 54370d64ceaSPaul Mackerras ld r31,-32(1) 54470d64ceaSPaul Mackerras li r3,0 54570d64ceaSPaul Mackerras blr 54670d64ceaSPaul Mackerras 54770d64ceaSPaul Mackerras/* 54870d64ceaSPaul Mackerras * on an exception, reset to the beginning and jump back into the 54970d64ceaSPaul Mackerras * standard __copy_tofrom_user 55070d64ceaSPaul Mackerras */ 551a7c81ce3SPaul Mackerras.Labort: 552a7c81ce3SPaul Mackerras ld r20,-120(1) 55370d64ceaSPaul Mackerras ld r21,-112(1) 55470d64ceaSPaul Mackerras ld r22,-104(1) 55570d64ceaSPaul Mackerras ld r23,-96(1) 55670d64ceaSPaul Mackerras ld r24,-88(1) 55770d64ceaSPaul Mackerras ld r25,-80(1) 55870d64ceaSPaul Mackerras ld r26,-72(1) 55970d64ceaSPaul Mackerras ld r27,-64(1) 56070d64ceaSPaul Mackerras ld r28,-56(1) 56170d64ceaSPaul Mackerras ld r29,-48(1) 56270d64ceaSPaul Mackerras ld r30,-40(1) 56370d64ceaSPaul Mackerras ld r31,-32(1) 56470d64ceaSPaul Mackerras ld r3,-24(r1) 56570d64ceaSPaul Mackerras ld r4,-16(r1) 56670d64ceaSPaul Mackerras li r5,4096 56770d64ceaSPaul Mackerras b .Ldst_aligned 5689445aa1aSAl ViroEXPORT_SYMBOL(__copy_tofrom_user) 569