1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11#include <asm/export.h> 12#include <asm/asm-compat.h> 13#include <asm/feature-fixups.h> 14 15#ifdef __BIG_ENDIAN__ 16#define sLd sld /* Shift towards low-numbered address. */ 17#define sHd srd /* Shift towards high-numbered address. */ 18#else 19#define sLd srd /* Shift towards low-numbered address. */ 20#define sHd sld /* Shift towards high-numbered address. */ 21#endif 22 23 .align 7 24_GLOBAL_TOC(__copy_tofrom_user) 25#ifdef CONFIG_PPC_BOOK3S_64 26BEGIN_FTR_SECTION 27 nop 28FTR_SECTION_ELSE 29 b __copy_tofrom_user_power7 30ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 31#endif 32_GLOBAL(__copy_tofrom_user_base) 33 /* first check for a whole page copy on a page boundary */ 34 cmpldi cr1,r5,16 35 cmpdi cr6,r5,4096 36 or r0,r3,r4 37 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 38 andi. r0,r0,4095 39 std r3,-24(r1) 40 crand cr0*4+2,cr0*4+2,cr6*4+2 41 std r4,-16(r1) 42 std r5,-8(r1) 43 dcbt 0,r4 44 beq .Lcopy_page_4K 45 andi. r6,r6,7 46 PPC_MTOCRF(0x01,r5) 47 blt cr1,.Lshort_copy 48/* Below we want to nop out the bne if we're on a CPU that has the 49 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 50 * cleared. 51 * At the time of writing the only CPU that has this combination of bits 52 * set is Power6. 53 */ 54BEGIN_FTR_SECTION 55 nop 56FTR_SECTION_ELSE 57 bne .Ldst_unaligned 58ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 59 CPU_FTR_UNALIGNED_LD_STD) 60.Ldst_aligned: 61 addi r3,r3,-16 62BEGIN_FTR_SECTION 63 andi. r0,r4,7 64 bne .Lsrc_unaligned 65END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 66 blt cr1,.Ldo_tail /* if < 16 bytes to copy */ 67 srdi r0,r5,5 68 cmpdi cr1,r0,0 6920: ld r7,0(r4) 70220: ld r6,8(r4) 71 addi r4,r4,16 72 mtctr r0 73 andi. r0,r5,0x10 74 beq 22f 75 addi r3,r3,16 76 addi r4,r4,-16 77 mr r9,r7 78 mr r8,r6 79 beq cr1,72f 8021: ld r7,16(r4) 81221: ld r6,24(r4) 82 addi r4,r4,32 8370: std r9,0(r3) 84270: std r8,8(r3) 8522: ld r9,0(r4) 86222: ld r8,8(r4) 8771: std r7,16(r3) 88271: std r6,24(r3) 89 addi r3,r3,32 90 bdnz 21b 9172: std r9,0(r3) 92272: std r8,8(r3) 93 andi. r5,r5,0xf 94 beq+ 3f 95 addi r4,r4,16 96.Ldo_tail: 97 addi r3,r3,16 98 bf cr7*4+0,246f 99244: ld r9,0(r4) 100 addi r4,r4,8 101245: std r9,0(r3) 102 addi r3,r3,8 103246: bf cr7*4+1,1f 10423: lwz r9,0(r4) 105 addi r4,r4,4 10673: stw r9,0(r3) 107 addi r3,r3,4 1081: bf cr7*4+2,2f 10944: lhz r9,0(r4) 110 addi r4,r4,2 11174: sth r9,0(r3) 112 addi r3,r3,2 1132: bf cr7*4+3,3f 11445: lbz r9,0(r4) 11575: stb r9,0(r3) 1163: li r3,0 117 blr 118 119.Lsrc_unaligned: 120 srdi r6,r5,3 121 addi r5,r5,-16 122 subf r4,r0,r4 123 srdi r7,r5,4 124 sldi r10,r0,3 125 cmpldi cr6,r6,3 126 andi. r5,r5,7 127 mtctr r7 128 subfic r11,r10,64 129 add r5,r5,r0 130 bt cr7*4+0,28f 131 13224: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 13325: ld r0,8(r4) 134 sLd r6,r9,r10 13526: ldu r9,16(r4) 136 sHd r7,r0,r11 137 sLd r8,r0,r10 138 or r7,r7,r6 139 blt cr6,79f 14027: ld r0,8(r4) 141 b 2f 142 14328: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 14429: ldu r9,8(r4) 145 sLd r8,r0,r10 146 addi r3,r3,-8 147 blt cr6,5f 14830: ld r0,8(r4) 149 sHd r12,r9,r11 150 sLd r6,r9,r10 15131: ldu r9,16(r4) 152 or r12,r8,r12 153 sHd r7,r0,r11 154 sLd r8,r0,r10 155 addi r3,r3,16 156 beq cr6,78f 157 1581: or r7,r7,r6 15932: ld r0,8(r4) 16076: std r12,8(r3) 1612: sHd r12,r9,r11 162 sLd r6,r9,r10 16333: ldu r9,16(r4) 164 or r12,r8,r12 16577: stdu r7,16(r3) 166 sHd r7,r0,r11 167 sLd r8,r0,r10 168 bdnz 1b 169 17078: std r12,8(r3) 171 or r7,r7,r6 17279: std r7,16(r3) 1735: sHd r12,r9,r11 174 or r12,r8,r12 17580: std r12,24(r3) 176 bne 6f 177 li r3,0 178 blr 1796: cmpwi cr1,r5,8 180 addi r3,r3,32 181 sLd r9,r9,r10 182 ble cr1,7f 18334: ld r0,8(r4) 184 sHd r7,r0,r11 185 or r9,r7,r9 1867: 187 bf cr7*4+1,1f 188#ifdef __BIG_ENDIAN__ 189 rotldi r9,r9,32 190#endif 19194: stw r9,0(r3) 192#ifdef __LITTLE_ENDIAN__ 193 rotrdi r9,r9,32 194#endif 195 addi r3,r3,4 1961: bf cr7*4+2,2f 197#ifdef __BIG_ENDIAN__ 198 rotldi r9,r9,16 199#endif 20095: sth r9,0(r3) 201#ifdef __LITTLE_ENDIAN__ 202 rotrdi r9,r9,16 203#endif 204 addi r3,r3,2 2052: bf cr7*4+3,3f 206#ifdef __BIG_ENDIAN__ 207 rotldi r9,r9,8 208#endif 20996: stb r9,0(r3) 210#ifdef __LITTLE_ENDIAN__ 211 rotrdi r9,r9,8 212#endif 2133: li r3,0 214 blr 215 216.Ldst_unaligned: 217 PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ 218 subf r5,r6,r5 219 li r7,0 220 cmpldi cr1,r5,16 221 bf cr7*4+3,1f 22235: lbz r0,0(r4) 22381: stb r0,0(r3) 224 addi r7,r7,1 2251: bf cr7*4+2,2f 22636: lhzx r0,r7,r4 22782: sthx r0,r7,r3 228 addi r7,r7,2 2292: bf cr7*4+1,3f 23037: lwzx r0,r7,r4 23183: stwx r0,r7,r3 2323: PPC_MTOCRF(0x01,r5) 233 add r4,r6,r4 234 add r3,r6,r3 235 b .Ldst_aligned 236 237.Lshort_copy: 238 bf cr7*4+0,1f 23938: lwz r0,0(r4) 24039: lwz r9,4(r4) 241 addi r4,r4,8 24284: stw r0,0(r3) 24385: stw r9,4(r3) 244 addi r3,r3,8 2451: bf cr7*4+1,2f 24640: lwz r0,0(r4) 247 addi r4,r4,4 24886: stw r0,0(r3) 249 addi r3,r3,4 2502: bf cr7*4+2,3f 25141: lhz r0,0(r4) 252 addi r4,r4,2 25387: sth r0,0(r3) 254 addi r3,r3,2 2553: bf cr7*4+3,4f 25642: lbz r0,0(r4) 25788: stb r0,0(r3) 2584: li r3,0 259 blr 260 261/* 262 * exception handlers follow 263 * we have to return the number of bytes not copied 264 * for an exception on a load, we set the rest of the destination to 0 265 */ 266 267136: 268137: 269 add r3,r3,r7 270 b 1f 271130: 272131: 273 addi r3,r3,8 274120: 275320: 276122: 277322: 278124: 279125: 280126: 281127: 282128: 283129: 284133: 285 addi r3,r3,8 286132: 287 addi r3,r3,8 288121: 289321: 290344: 291134: 292135: 293138: 294139: 295140: 296141: 297142: 298123: 299144: 300145: 301 302/* 303 * here we have had a fault on a load and r3 points to the first 304 * unmodified byte of the destination 305 */ 3061: ld r6,-24(r1) 307 ld r4,-16(r1) 308 ld r5,-8(r1) 309 subf r6,r6,r3 310 add r4,r4,r6 311 subf r5,r6,r5 /* #bytes left to go */ 312 313/* 314 * first see if we can copy any more bytes before hitting another exception 315 */ 316 mtctr r5 31743: lbz r0,0(r4) 318 addi r4,r4,1 31989: stb r0,0(r3) 320 addi r3,r3,1 321 bdnz 43b 322 li r3,0 /* huh? all copied successfully this time? */ 323 blr 324 325/* 326 * here we have trapped again, amount remaining is in ctr. 327 */ 328143: mfctr r3 329 blr 330 331/* 332 * exception handlers for stores: we just need to work 333 * out how many bytes weren't copied 334 */ 335182: 336183: 337 add r3,r3,r7 338 b 1f 339371: 340180: 341 addi r3,r3,8 342171: 343177: 344179: 345 addi r3,r3,8 346370: 347372: 348176: 349178: 350 addi r3,r3,4 351185: 352 addi r3,r3,4 353170: 354172: 355345: 356173: 357174: 358175: 359181: 360184: 361186: 362187: 363188: 364189: 365194: 366195: 367196: 3681: 369 ld r6,-24(r1) 370 ld r5,-8(r1) 371 add r6,r6,r5 372 subf r3,r3,r6 /* #bytes not copied */ 373 blr 374 375 EX_TABLE(20b,120b) 376 EX_TABLE(220b,320b) 377 EX_TABLE(21b,121b) 378 EX_TABLE(221b,321b) 379 EX_TABLE(70b,170b) 380 EX_TABLE(270b,370b) 381 EX_TABLE(22b,122b) 382 EX_TABLE(222b,322b) 383 EX_TABLE(71b,171b) 384 EX_TABLE(271b,371b) 385 EX_TABLE(72b,172b) 386 EX_TABLE(272b,372b) 387 EX_TABLE(244b,344b) 388 EX_TABLE(245b,345b) 389 EX_TABLE(23b,123b) 390 EX_TABLE(73b,173b) 391 EX_TABLE(44b,144b) 392 EX_TABLE(74b,174b) 393 EX_TABLE(45b,145b) 394 EX_TABLE(75b,175b) 395 EX_TABLE(24b,124b) 396 EX_TABLE(25b,125b) 397 EX_TABLE(26b,126b) 398 EX_TABLE(27b,127b) 399 EX_TABLE(28b,128b) 400 EX_TABLE(29b,129b) 401 EX_TABLE(30b,130b) 402 EX_TABLE(31b,131b) 403 EX_TABLE(32b,132b) 404 EX_TABLE(76b,176b) 405 EX_TABLE(33b,133b) 406 EX_TABLE(77b,177b) 407 EX_TABLE(78b,178b) 408 EX_TABLE(79b,179b) 409 EX_TABLE(80b,180b) 410 EX_TABLE(34b,134b) 411 EX_TABLE(94b,194b) 412 EX_TABLE(95b,195b) 413 EX_TABLE(96b,196b) 414 EX_TABLE(35b,135b) 415 EX_TABLE(81b,181b) 416 EX_TABLE(36b,136b) 417 EX_TABLE(82b,182b) 418 EX_TABLE(37b,137b) 419 EX_TABLE(83b,183b) 420 EX_TABLE(38b,138b) 421 EX_TABLE(39b,139b) 422 EX_TABLE(84b,184b) 423 EX_TABLE(85b,185b) 424 EX_TABLE(40b,140b) 425 EX_TABLE(86b,186b) 426 EX_TABLE(41b,141b) 427 EX_TABLE(87b,187b) 428 EX_TABLE(42b,142b) 429 EX_TABLE(88b,188b) 430 EX_TABLE(43b,143b) 431 EX_TABLE(89b,189b) 432 433/* 434 * Routine to copy a whole page of data, optimized for POWER4. 435 * On POWER4 it is more than 50% faster than the simple loop 436 * above (following the .Ldst_aligned label). 437 */ 438.Lcopy_page_4K: 439 std r31,-32(1) 440 std r30,-40(1) 441 std r29,-48(1) 442 std r28,-56(1) 443 std r27,-64(1) 444 std r26,-72(1) 445 std r25,-80(1) 446 std r24,-88(1) 447 std r23,-96(1) 448 std r22,-104(1) 449 std r21,-112(1) 450 std r20,-120(1) 451 li r5,4096/32 - 1 452 addi r3,r3,-8 453 li r0,5 4540: addi r5,r5,-24 455 mtctr r0 45620: ld r22,640(4) 45721: ld r21,512(4) 45822: ld r20,384(4) 45923: ld r11,256(4) 46024: ld r9,128(4) 46125: ld r7,0(4) 46226: ld r25,648(4) 46327: ld r24,520(4) 46428: ld r23,392(4) 46529: ld r10,264(4) 46630: ld r8,136(4) 46731: ldu r6,8(4) 468 cmpwi r5,24 4691: 47032: std r22,648(3) 47133: std r21,520(3) 47234: std r20,392(3) 47335: std r11,264(3) 47436: std r9,136(3) 47537: std r7,8(3) 47638: ld r28,648(4) 47739: ld r27,520(4) 47840: ld r26,392(4) 47941: ld r31,264(4) 48042: ld r30,136(4) 48143: ld r29,8(4) 48244: std r25,656(3) 48345: std r24,528(3) 48446: std r23,400(3) 48547: std r10,272(3) 48648: std r8,144(3) 48749: std r6,16(3) 48850: ld r22,656(4) 48951: ld r21,528(4) 49052: ld r20,400(4) 49153: ld r11,272(4) 49254: ld r9,144(4) 49355: ld r7,16(4) 49456: std r28,664(3) 49557: std r27,536(3) 49658: std r26,408(3) 49759: std r31,280(3) 49860: std r30,152(3) 49961: stdu r29,24(3) 50062: ld r25,664(4) 50163: ld r24,536(4) 50264: ld r23,408(4) 50365: ld r10,280(4) 50466: ld r8,152(4) 50567: ldu r6,24(4) 506 bdnz 1b 50768: std r22,648(3) 50869: std r21,520(3) 50970: std r20,392(3) 51071: std r11,264(3) 51172: std r9,136(3) 51273: std r7,8(3) 51374: addi r4,r4,640 51475: addi r3,r3,648 515 bge 0b 516 mtctr r5 51776: ld r7,0(4) 51877: ld r8,8(4) 51978: ldu r9,16(4) 5203: 52179: ld r10,8(4) 52280: std r7,8(3) 52381: ld r7,16(4) 52482: std r8,16(3) 52583: ld r8,24(4) 52684: std r9,24(3) 52785: ldu r9,32(4) 52886: stdu r10,32(3) 529 bdnz 3b 5304: 53187: ld r10,8(4) 53288: std r7,8(3) 53389: std r8,16(3) 53490: std r9,24(3) 53591: std r10,32(3) 5369: ld r20,-120(1) 537 ld r21,-112(1) 538 ld r22,-104(1) 539 ld r23,-96(1) 540 ld r24,-88(1) 541 ld r25,-80(1) 542 ld r26,-72(1) 543 ld r27,-64(1) 544 ld r28,-56(1) 545 ld r29,-48(1) 546 ld r30,-40(1) 547 ld r31,-32(1) 548 li r3,0 549 blr 550 551/* 552 * on an exception, reset to the beginning and jump back into the 553 * standard __copy_tofrom_user 554 */ 555100: ld r20,-120(1) 556 ld r21,-112(1) 557 ld r22,-104(1) 558 ld r23,-96(1) 559 ld r24,-88(1) 560 ld r25,-80(1) 561 ld r26,-72(1) 562 ld r27,-64(1) 563 ld r28,-56(1) 564 ld r29,-48(1) 565 ld r30,-40(1) 566 ld r31,-32(1) 567 ld r3,-24(r1) 568 ld r4,-16(r1) 569 li r5,4096 570 b .Ldst_aligned 571 572 EX_TABLE(20b,100b) 573 EX_TABLE(21b,100b) 574 EX_TABLE(22b,100b) 575 EX_TABLE(23b,100b) 576 EX_TABLE(24b,100b) 577 EX_TABLE(25b,100b) 578 EX_TABLE(26b,100b) 579 EX_TABLE(27b,100b) 580 EX_TABLE(28b,100b) 581 EX_TABLE(29b,100b) 582 EX_TABLE(30b,100b) 583 EX_TABLE(31b,100b) 584 EX_TABLE(32b,100b) 585 EX_TABLE(33b,100b) 586 EX_TABLE(34b,100b) 587 EX_TABLE(35b,100b) 588 EX_TABLE(36b,100b) 589 EX_TABLE(37b,100b) 590 EX_TABLE(38b,100b) 591 EX_TABLE(39b,100b) 592 EX_TABLE(40b,100b) 593 EX_TABLE(41b,100b) 594 EX_TABLE(42b,100b) 595 EX_TABLE(43b,100b) 596 EX_TABLE(44b,100b) 597 EX_TABLE(45b,100b) 598 EX_TABLE(46b,100b) 599 EX_TABLE(47b,100b) 600 EX_TABLE(48b,100b) 601 EX_TABLE(49b,100b) 602 EX_TABLE(50b,100b) 603 EX_TABLE(51b,100b) 604 EX_TABLE(52b,100b) 605 EX_TABLE(53b,100b) 606 EX_TABLE(54b,100b) 607 EX_TABLE(55b,100b) 608 EX_TABLE(56b,100b) 609 EX_TABLE(57b,100b) 610 EX_TABLE(58b,100b) 611 EX_TABLE(59b,100b) 612 EX_TABLE(60b,100b) 613 EX_TABLE(61b,100b) 614 EX_TABLE(62b,100b) 615 EX_TABLE(63b,100b) 616 EX_TABLE(64b,100b) 617 EX_TABLE(65b,100b) 618 EX_TABLE(66b,100b) 619 EX_TABLE(67b,100b) 620 EX_TABLE(68b,100b) 621 EX_TABLE(69b,100b) 622 EX_TABLE(70b,100b) 623 EX_TABLE(71b,100b) 624 EX_TABLE(72b,100b) 625 EX_TABLE(73b,100b) 626 EX_TABLE(74b,100b) 627 EX_TABLE(75b,100b) 628 EX_TABLE(76b,100b) 629 EX_TABLE(77b,100b) 630 EX_TABLE(78b,100b) 631 EX_TABLE(79b,100b) 632 EX_TABLE(80b,100b) 633 EX_TABLE(81b,100b) 634 EX_TABLE(82b,100b) 635 EX_TABLE(83b,100b) 636 EX_TABLE(84b,100b) 637 EX_TABLE(85b,100b) 638 EX_TABLE(86b,100b) 639 EX_TABLE(87b,100b) 640 EX_TABLE(88b,100b) 641 EX_TABLE(89b,100b) 642 EX_TABLE(90b,100b) 643 EX_TABLE(91b,100b) 644 645EXPORT_SYMBOL(__copy_tofrom_user) 646