1/* 2 * Copyright (C) 2002 Paul Mackerras, IBM Corp. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 */ 9#include <asm/processor.h> 10#include <asm/ppc_asm.h> 11 12 .align 7 13_GLOBAL(__copy_tofrom_user) 14 /* first check for a whole page copy on a page boundary */ 15 cmpldi cr1,r5,16 16 cmpdi cr6,r5,4096 17 or r0,r3,r4 18 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ 19 andi. r0,r0,4095 20 std r3,-24(r1) 21 crand cr0*4+2,cr0*4+2,cr6*4+2 22 std r4,-16(r1) 23 std r5,-8(r1) 24 dcbt 0,r4 25 beq .Lcopy_page_4K 26 andi. r6,r6,7 27 PPC_MTOCRF 0x01,r5 28 blt cr1,.Lshort_copy 29/* Below we want to nop out the bne if we're on a CPU that has the 30 * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit 31 * cleared. 32 * At the time of writing the only CPU that has this combination of bits 33 * set is Power6. 34 */ 35BEGIN_FTR_SECTION 36 nop 37FTR_SECTION_ELSE 38 bne .Ldst_unaligned 39ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ 40 CPU_FTR_UNALIGNED_LD_STD) 41.Ldst_aligned: 42 addi r3,r3,-16 43BEGIN_FTR_SECTION 44 andi. r0,r4,7 45 bne .Lsrc_unaligned 46END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 47 srdi r7,r5,4 4820: ld r9,0(r4) 49 addi r4,r4,-8 50 mtctr r7 51 andi. r5,r5,7 52 bf cr7*4+0,22f 53 addi r3,r3,8 54 addi r4,r4,8 55 mr r8,r9 56 blt cr1,72f 5721: ld r9,8(r4) 5870: std r8,8(r3) 5922: ldu r8,16(r4) 6071: stdu r9,16(r3) 61 bdnz 21b 6272: std r8,8(r3) 63 beq+ 3f 64 addi r3,r3,16 6523: ld r9,8(r4) 66.Ldo_tail: 67 bf cr7*4+1,1f 68 rotldi r9,r9,32 6973: stw r9,0(r3) 70 addi r3,r3,4 711: bf cr7*4+2,2f 72 rotldi r9,r9,16 7374: sth r9,0(r3) 74 addi r3,r3,2 752: bf cr7*4+3,3f 76 rotldi r9,r9,8 7775: stb r9,0(r3) 783: li r3,0 79 blr 80 81.Lsrc_unaligned: 82 srdi r6,r5,3 83 addi r5,r5,-16 84 subf r4,r0,r4 85 srdi r7,r5,4 86 sldi r10,r0,3 87 cmpldi cr6,r6,3 88 andi. r5,r5,7 89 mtctr r7 90 subfic r11,r10,64 91 add r5,r5,r0 92 bt cr7*4+0,28f 93 9424: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 9525: ld r0,8(r4) 96 sld r6,r9,r10 9726: ldu r9,16(r4) 98 srd r7,r0,r11 99 sld r8,r0,r10 100 or r7,r7,r6 101 blt cr6,79f 10227: ld r0,8(r4) 103 b 2f 104 10528: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 10629: ldu r9,8(r4) 107 sld r8,r0,r10 108 addi r3,r3,-8 109 blt cr6,5f 11030: ld r0,8(r4) 111 srd r12,r9,r11 112 sld r6,r9,r10 11331: ldu r9,16(r4) 114 or r12,r8,r12 115 srd r7,r0,r11 116 sld r8,r0,r10 117 addi r3,r3,16 118 beq cr6,78f 119 1201: or r7,r7,r6 12132: ld r0,8(r4) 12276: std r12,8(r3) 1232: srd r12,r9,r11 124 sld r6,r9,r10 12533: ldu r9,16(r4) 126 or r12,r8,r12 12777: stdu r7,16(r3) 128 srd r7,r0,r11 129 sld r8,r0,r10 130 bdnz 1b 131 13278: std r12,8(r3) 133 or r7,r7,r6 13479: std r7,16(r3) 1355: srd r12,r9,r11 136 or r12,r8,r12 13780: std r12,24(r3) 138 bne 6f 139 li r3,0 140 blr 1416: cmpwi cr1,r5,8 142 addi r3,r3,32 143 sld r9,r9,r10 144 ble cr1,.Ldo_tail 14534: ld r0,8(r4) 146 srd r7,r0,r11 147 or r9,r7,r9 148 b .Ldo_tail 149 150.Ldst_unaligned: 151 PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */ 152 subf r5,r6,r5 153 li r7,0 154 cmpldi cr1,r5,16 155 bf cr7*4+3,1f 15635: lbz r0,0(r4) 15781: stb r0,0(r3) 158 addi r7,r7,1 1591: bf cr7*4+2,2f 16036: lhzx r0,r7,r4 16182: sthx r0,r7,r3 162 addi r7,r7,2 1632: bf cr7*4+1,3f 16437: lwzx r0,r7,r4 16583: stwx r0,r7,r3 1663: PPC_MTOCRF 0x01,r5 167 add r4,r6,r4 168 add r3,r6,r3 169 b .Ldst_aligned 170 171.Lshort_copy: 172 bf cr7*4+0,1f 17338: lwz r0,0(r4) 17439: lwz r9,4(r4) 175 addi r4,r4,8 17684: stw r0,0(r3) 17785: stw r9,4(r3) 178 addi r3,r3,8 1791: bf cr7*4+1,2f 18040: lwz r0,0(r4) 181 addi r4,r4,4 18286: stw r0,0(r3) 183 addi r3,r3,4 1842: bf cr7*4+2,3f 18541: lhz r0,0(r4) 186 addi r4,r4,2 18787: sth r0,0(r3) 188 addi r3,r3,2 1893: bf cr7*4+3,4f 19042: lbz r0,0(r4) 19188: stb r0,0(r3) 1924: li r3,0 193 blr 194 195/* 196 * exception handlers follow 197 * we have to return the number of bytes not copied 198 * for an exception on a load, we set the rest of the destination to 0 199 */ 200 201136: 202137: 203 add r3,r3,r7 204 b 1f 205130: 206131: 207 addi r3,r3,8 208120: 209122: 210124: 211125: 212126: 213127: 214128: 215129: 216133: 217 addi r3,r3,8 218121: 219132: 220 addi r3,r3,8 221123: 222134: 223135: 224138: 225139: 226140: 227141: 228142: 229 230/* 231 * here we have had a fault on a load and r3 points to the first 232 * unmodified byte of the destination 233 */ 2341: ld r6,-24(r1) 235 ld r4,-16(r1) 236 ld r5,-8(r1) 237 subf r6,r6,r3 238 add r4,r4,r6 239 subf r5,r6,r5 /* #bytes left to go */ 240 241/* 242 * first see if we can copy any more bytes before hitting another exception 243 */ 244 mtctr r5 24543: lbz r0,0(r4) 246 addi r4,r4,1 24789: stb r0,0(r3) 248 addi r3,r3,1 249 bdnz 43b 250 li r3,0 /* huh? all copied successfully this time? */ 251 blr 252 253/* 254 * here we have trapped again, need to clear ctr bytes starting at r3 255 */ 256143: mfctr r5 257 li r0,0 258 mr r4,r3 259 mr r3,r5 /* return the number of bytes not copied */ 2601: andi. r9,r4,7 261 beq 3f 26290: stb r0,0(r4) 263 addic. r5,r5,-1 264 addi r4,r4,1 265 bne 1b 266 blr 2673: cmpldi cr1,r5,8 268 srdi r9,r5,3 269 andi. r5,r5,7 270 blt cr1,93f 271 mtctr r9 27291: std r0,0(r4) 273 addi r4,r4,8 274 bdnz 91b 27593: beqlr 276 mtctr r5 27792: stb r0,0(r4) 278 addi r4,r4,1 279 bdnz 92b 280 blr 281 282/* 283 * exception handlers for stores: we just need to work 284 * out how many bytes weren't copied 285 */ 286182: 287183: 288 add r3,r3,r7 289 b 1f 290180: 291 addi r3,r3,8 292171: 293177: 294 addi r3,r3,8 295170: 296172: 297176: 298178: 299 addi r3,r3,4 300185: 301 addi r3,r3,4 302173: 303174: 304175: 305179: 306181: 307184: 308186: 309187: 310188: 311189: 3121: 313 ld r6,-24(r1) 314 ld r5,-8(r1) 315 add r6,r6,r5 316 subf r3,r3,r6 /* #bytes not copied */ 317190: 318191: 319192: 320 blr /* #bytes not copied in r3 */ 321 322 .section __ex_table,"a" 323 .align 3 324 .llong 20b,120b 325 .llong 21b,121b 326 .llong 70b,170b 327 .llong 22b,122b 328 .llong 71b,171b 329 .llong 72b,172b 330 .llong 23b,123b 331 .llong 73b,173b 332 .llong 74b,174b 333 .llong 75b,175b 334 .llong 24b,124b 335 .llong 25b,125b 336 .llong 26b,126b 337 .llong 27b,127b 338 .llong 28b,128b 339 .llong 29b,129b 340 .llong 30b,130b 341 .llong 31b,131b 342 .llong 32b,132b 343 .llong 76b,176b 344 .llong 33b,133b 345 .llong 77b,177b 346 .llong 78b,178b 347 .llong 79b,179b 348 .llong 80b,180b 349 .llong 34b,134b 350 .llong 35b,135b 351 .llong 81b,181b 352 .llong 36b,136b 353 .llong 82b,182b 354 .llong 37b,137b 355 .llong 83b,183b 356 .llong 38b,138b 357 .llong 39b,139b 358 .llong 84b,184b 359 .llong 85b,185b 360 .llong 40b,140b 361 .llong 86b,186b 362 .llong 41b,141b 363 .llong 87b,187b 364 .llong 42b,142b 365 .llong 88b,188b 366 .llong 43b,143b 367 .llong 89b,189b 368 .llong 90b,190b 369 .llong 91b,191b 370 .llong 92b,192b 371 372 .text 373 374/* 375 * Routine to copy a whole page of data, optimized for POWER4. 376 * On POWER4 it is more than 50% faster than the simple loop 377 * above (following the .Ldst_aligned label) but it runs slightly 378 * slower on POWER3. 379 */ 380.Lcopy_page_4K: 381 std r31,-32(1) 382 std r30,-40(1) 383 std r29,-48(1) 384 std r28,-56(1) 385 std r27,-64(1) 386 std r26,-72(1) 387 std r25,-80(1) 388 std r24,-88(1) 389 std r23,-96(1) 390 std r22,-104(1) 391 std r21,-112(1) 392 std r20,-120(1) 393 li r5,4096/32 - 1 394 addi r3,r3,-8 395 li r0,5 3960: addi r5,r5,-24 397 mtctr r0 39820: ld r22,640(4) 39921: ld r21,512(4) 40022: ld r20,384(4) 40123: ld r11,256(4) 40224: ld r9,128(4) 40325: ld r7,0(4) 40426: ld r25,648(4) 40527: ld r24,520(4) 40628: ld r23,392(4) 40729: ld r10,264(4) 40830: ld r8,136(4) 40931: ldu r6,8(4) 410 cmpwi r5,24 4111: 41232: std r22,648(3) 41333: std r21,520(3) 41434: std r20,392(3) 41535: std r11,264(3) 41636: std r9,136(3) 41737: std r7,8(3) 41838: ld r28,648(4) 41939: ld r27,520(4) 42040: ld r26,392(4) 42141: ld r31,264(4) 42242: ld r30,136(4) 42343: ld r29,8(4) 42444: std r25,656(3) 42545: std r24,528(3) 42646: std r23,400(3) 42747: std r10,272(3) 42848: std r8,144(3) 42949: std r6,16(3) 43050: ld r22,656(4) 43151: ld r21,528(4) 43252: ld r20,400(4) 43353: ld r11,272(4) 43454: ld r9,144(4) 43555: ld r7,16(4) 43656: std r28,664(3) 43757: std r27,536(3) 43858: std r26,408(3) 43959: std r31,280(3) 44060: std r30,152(3) 44161: stdu r29,24(3) 44262: ld r25,664(4) 44363: ld r24,536(4) 44464: ld r23,408(4) 44565: ld r10,280(4) 44666: ld r8,152(4) 44767: ldu r6,24(4) 448 bdnz 1b 44968: std r22,648(3) 45069: std r21,520(3) 45170: std r20,392(3) 45271: std r11,264(3) 45372: std r9,136(3) 45473: std r7,8(3) 45574: addi r4,r4,640 45675: addi r3,r3,648 457 bge 0b 458 mtctr r5 45976: ld r7,0(4) 46077: ld r8,8(4) 46178: ldu r9,16(4) 4623: 46379: ld r10,8(4) 46480: std r7,8(3) 46581: ld r7,16(4) 46682: std r8,16(3) 46783: ld r8,24(4) 46884: std r9,24(3) 46985: ldu r9,32(4) 47086: stdu r10,32(3) 471 bdnz 3b 4724: 47387: ld r10,8(4) 47488: std r7,8(3) 47589: std r8,16(3) 47690: std r9,24(3) 47791: std r10,32(3) 4789: ld r20,-120(1) 479 ld r21,-112(1) 480 ld r22,-104(1) 481 ld r23,-96(1) 482 ld r24,-88(1) 483 ld r25,-80(1) 484 ld r26,-72(1) 485 ld r27,-64(1) 486 ld r28,-56(1) 487 ld r29,-48(1) 488 ld r30,-40(1) 489 ld r31,-32(1) 490 li r3,0 491 blr 492 493/* 494 * on an exception, reset to the beginning and jump back into the 495 * standard __copy_tofrom_user 496 */ 497100: ld r20,-120(1) 498 ld r21,-112(1) 499 ld r22,-104(1) 500 ld r23,-96(1) 501 ld r24,-88(1) 502 ld r25,-80(1) 503 ld r26,-72(1) 504 ld r27,-64(1) 505 ld r28,-56(1) 506 ld r29,-48(1) 507 ld r30,-40(1) 508 ld r31,-32(1) 509 ld r3,-24(r1) 510 ld r4,-16(r1) 511 li r5,4096 512 b .Ldst_aligned 513 514 .section __ex_table,"a" 515 .align 3 516 .llong 20b,100b 517 .llong 21b,100b 518 .llong 22b,100b 519 .llong 23b,100b 520 .llong 24b,100b 521 .llong 25b,100b 522 .llong 26b,100b 523 .llong 27b,100b 524 .llong 28b,100b 525 .llong 29b,100b 526 .llong 30b,100b 527 .llong 31b,100b 528 .llong 32b,100b 529 .llong 33b,100b 530 .llong 34b,100b 531 .llong 35b,100b 532 .llong 36b,100b 533 .llong 37b,100b 534 .llong 38b,100b 535 .llong 39b,100b 536 .llong 40b,100b 537 .llong 41b,100b 538 .llong 42b,100b 539 .llong 43b,100b 540 .llong 44b,100b 541 .llong 45b,100b 542 .llong 46b,100b 543 .llong 47b,100b 544 .llong 48b,100b 545 .llong 49b,100b 546 .llong 50b,100b 547 .llong 51b,100b 548 .llong 52b,100b 549 .llong 53b,100b 550 .llong 54b,100b 551 .llong 55b,100b 552 .llong 56b,100b 553 .llong 57b,100b 554 .llong 58b,100b 555 .llong 59b,100b 556 .llong 60b,100b 557 .llong 61b,100b 558 .llong 62b,100b 559 .llong 63b,100b 560 .llong 64b,100b 561 .llong 65b,100b 562 .llong 66b,100b 563 .llong 67b,100b 564 .llong 68b,100b 565 .llong 69b,100b 566 .llong 70b,100b 567 .llong 71b,100b 568 .llong 72b,100b 569 .llong 73b,100b 570 .llong 74b,100b 571 .llong 75b,100b 572 .llong 76b,100b 573 .llong 77b,100b 574 .llong 78b,100b 575 .llong 79b,100b 576 .llong 80b,100b 577 .llong 81b,100b 578 .llong 82b,100b 579 .llong 83b,100b 580 .llong 84b,100b 581 .llong 85b,100b 582 .llong 86b,100b 583 .llong 87b,100b 584 .llong 88b,100b 585 .llong 89b,100b 586 .llong 90b,100b 587 .llong 91b,100b 588