1/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code. 2 * 3 * Copyright(C) 1995 Linus Torvalds 4 * Copyright(C) 1996 David S. Miller 5 * Copyright(C) 1996 Eddie C. Dost 6 * Copyright(C) 1996,1998 Jakub Jelinek 7 * 8 * derived from: 9 * e-mail between David and Eddie. 10 * 11 * Returns 0 if successful, otherwise count of bytes not copied yet 12 */ 13 14#include <asm/ptrace.h> 15#include <asm/asmmacro.h> 16#include <asm/page.h> 17 18/* Work around cpp -rob */ 19#define ALLOC #alloc 20#define EXECINSTR #execinstr 21#define EX(x,y,a,b) \ 2298: x,y; \ 23 .section .fixup,ALLOC,EXECINSTR; \ 24 .align 4; \ 2599: ba fixupretl; \ 26 a, b, %g3; \ 27 .section __ex_table,ALLOC; \ 28 .align 4; \ 29 .word 98b, 99b; \ 30 .text; \ 31 .align 4 32 33#define EX2(x,y,c,d,e,a,b) \ 3498: x,y; \ 35 .section .fixup,ALLOC,EXECINSTR; \ 36 .align 4; \ 3799: c, d, e; \ 38 ba fixupretl; \ 39 a, b, %g3; \ 40 .section __ex_table,ALLOC; \ 41 .align 4; \ 42 .word 98b, 99b; \ 43 .text; \ 44 .align 4 45 46#define EXO2(x,y) \ 4798: x, y; \ 48 .section __ex_table,ALLOC; \ 49 .align 4; \ 50 .word 98b, 97f; \ 51 .text; \ 52 .align 4 53 54#define EXT(start,end,handler) \ 55 .section __ex_table,ALLOC; \ 56 .align 4; \ 57 .word start, 0, end, handler; \ 58 .text; \ 59 .align 4 60 61/* Please do not change following macros unless you change logic used 62 * in .fixup at the end of this file as well 63 */ 64 65/* Both these macros have to start with exactly the same insn */ 66#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 67 ldd [%src + (offset) + 0x00], %t0; \ 68 ldd [%src + (offset) + 0x08], %t2; \ 69 ldd [%src + (offset) + 0x10], %t4; \ 70 ldd [%src + (offset) + 0x18], %t6; \ 71 st %t0, [%dst + (offset) + 0x00]; \ 72 st %t1, [%dst + (offset) + 0x04]; \ 73 st %t2, [%dst + (offset) + 0x08]; \ 74 st %t3, [%dst + (offset) + 0x0c]; \ 75 st %t4, [%dst + (offset) + 0x10]; \ 76 st %t5, [%dst + (offset) + 0x14]; \ 77 st %t6, [%dst + (offset) + 0x18]; \ 78 st %t7, [%dst + (offset) + 0x1c]; 79 80#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ 81 ldd [%src + (offset) + 0x00], %t0; \ 82 ldd [%src + (offset) + 0x08], %t2; \ 83 ldd [%src + (offset) + 0x10], %t4; \ 84 ldd [%src + (offset) + 0x18], %t6; \ 85 std %t0, [%dst + (offset) + 0x00]; \ 86 std %t2, [%dst + (offset) + 0x08]; \ 87 std %t4, [%dst + (offset) + 0x10]; \ 88 std %t6, [%dst + (offset) + 0x18]; 89 90#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 91 ldd [%src - (offset) - 0x10], %t0; \ 92 ldd [%src - (offset) - 0x08], %t2; \ 93 st %t0, [%dst - (offset) - 0x10]; \ 94 st %t1, [%dst - (offset) - 0x0c]; \ 95 st %t2, [%dst - (offset) - 0x08]; \ 96 st %t3, [%dst - (offset) - 0x04]; 97 98#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \ 99 lduh [%src + (offset) + 0x00], %t0; \ 100 lduh [%src + (offset) + 0x02], %t1; \ 101 lduh [%src + (offset) + 0x04], %t2; \ 102 lduh [%src + (offset) + 0x06], %t3; \ 103 sth %t0, [%dst + (offset) + 0x00]; \ 104 sth %t1, [%dst + (offset) + 0x02]; \ 105 sth %t2, [%dst + (offset) + 0x04]; \ 106 sth %t3, [%dst + (offset) + 0x06]; 107 108#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ 109 ldub [%src - (offset) - 0x02], %t0; \ 110 ldub [%src - (offset) - 0x01], %t1; \ 111 stb %t0, [%dst - (offset) - 0x02]; \ 112 stb %t1, [%dst - (offset) - 0x01]; 113 114 .text 115 .align 4 116 117 .globl __copy_user_begin 118__copy_user_begin: 119 120 .globl __copy_user 121dword_align: 122 andcc %o1, 1, %g0 123 be 4f 124 andcc %o1, 2, %g0 125 126 EXO2(ldub [%o1], %g2) 127 add %o1, 1, %o1 128 EXO2(stb %g2, [%o0]) 129 sub %o2, 1, %o2 130 bne 3f 131 add %o0, 1, %o0 132 133 EXO2(lduh [%o1], %g2) 134 add %o1, 2, %o1 135 EXO2(sth %g2, [%o0]) 136 sub %o2, 2, %o2 137 b 3f 138 add %o0, 2, %o0 1394: 140 EXO2(lduh [%o1], %g2) 141 add %o1, 2, %o1 142 EXO2(sth %g2, [%o0]) 143 sub %o2, 2, %o2 144 b 3f 145 add %o0, 2, %o0 146 147__copy_user: /* %o0=dst %o1=src %o2=len */ 148 xor %o0, %o1, %o4 1491: 150 andcc %o4, 3, %o5 1512: 152 bne cannot_optimize 153 cmp %o2, 15 154 155 bleu short_aligned_end 156 andcc %o1, 3, %g0 157 158 bne dword_align 1593: 160 andcc %o1, 4, %g0 161 162 be 2f 163 mov %o2, %g1 164 165 EXO2(ld [%o1], %o4) 166 sub %g1, 4, %g1 167 EXO2(st %o4, [%o0]) 168 add %o1, 4, %o1 169 add %o0, 4, %o0 1702: 171 andcc %g1, 0xffffff80, %g7 172 be 3f 173 andcc %o0, 4, %g0 174 175 be ldd_std + 4 1765: 177 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 178 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 179 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 180 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 18180: 182 EXT(5b, 80b, 50f) 183 subcc %g7, 128, %g7 184 add %o1, 128, %o1 185 bne 5b 186 add %o0, 128, %o0 1873: 188 andcc %g1, 0x70, %g7 189 be copy_user_table_end 190 andcc %g1, 8, %g0 191 192 sethi %hi(copy_user_table_end), %o5 193 srl %g7, 1, %o4 194 add %g7, %o4, %o4 195 add %o1, %g7, %o1 196 sub %o5, %o4, %o5 197 jmpl %o5 + %lo(copy_user_table_end), %g0 198 add %o0, %g7, %o0 199 200copy_user_table: 201 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) 202 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) 203 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) 204 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) 205 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) 206 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) 207 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 208copy_user_table_end: 209 EXT(copy_user_table, copy_user_table_end, 51f) 210 be copy_user_last7 211 andcc %g1, 4, %g0 212 213 EX(ldd [%o1], %g2, and %g1, 0xf) 214 add %o0, 8, %o0 215 add %o1, 8, %o1 216 EX(st %g2, [%o0 - 0x08], and %g1, 0xf) 217 EX2(st %g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4) 218copy_user_last7: 219 be 1f 220 andcc %g1, 2, %g0 221 222 EX(ld [%o1], %g2, and %g1, 7) 223 add %o1, 4, %o1 224 EX(st %g2, [%o0], and %g1, 7) 225 add %o0, 4, %o0 2261: 227 be 1f 228 andcc %g1, 1, %g0 229 230 EX(lduh [%o1], %g2, and %g1, 3) 231 add %o1, 2, %o1 232 EX(sth %g2, [%o0], and %g1, 3) 233 add %o0, 2, %o0 2341: 235 be 1f 236 nop 237 238 EX(ldub [%o1], %g2, add %g0, 1) 239 EX(stb %g2, [%o0], add %g0, 1) 2401: 241 retl 242 clr %o0 243 244ldd_std: 245 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) 246 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) 247 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) 248 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) 24981: 250 EXT(ldd_std, 81b, 52f) 251 subcc %g7, 128, %g7 252 add %o1, 128, %o1 253 bne ldd_std 254 add %o0, 128, %o0 255 256 andcc %g1, 0x70, %g7 257 be copy_user_table_end 258 andcc %g1, 8, %g0 259 260 sethi %hi(copy_user_table_end), %o5 261 srl %g7, 1, %o4 262 add %g7, %o4, %o4 263 add %o1, %g7, %o1 264 sub %o5, %o4, %o5 265 jmpl %o5 + %lo(copy_user_table_end), %g0 266 add %o0, %g7, %o0 267 268cannot_optimize: 269 bleu short_end 270 cmp %o5, 2 271 272 bne byte_chunk 273 and %o2, 0xfffffff0, %o3 274 275 andcc %o1, 1, %g0 276 be 10f 277 nop 278 279 EXO2(ldub [%o1], %g2) 280 add %o1, 1, %o1 281 EXO2(stb %g2, [%o0]) 282 sub %o2, 1, %o2 283 andcc %o2, 0xfffffff0, %o3 284 be short_end 285 add %o0, 1, %o0 28610: 287 MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5) 288 MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5) 28982: 290 EXT(10b, 82b, 53f) 291 subcc %o3, 0x10, %o3 292 add %o1, 0x10, %o1 293 bne 10b 294 add %o0, 0x10, %o0 295 b 2f 296 and %o2, 0xe, %o3 297 298byte_chunk: 299 MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3) 300 MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3) 301 MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3) 302 MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3) 303 MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3) 304 MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3) 305 MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3) 306 MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3) 30783: 308 EXT(byte_chunk, 83b, 54f) 309 subcc %o3, 0x10, %o3 310 add %o1, 0x10, %o1 311 bne byte_chunk 312 add %o0, 0x10, %o0 313 314short_end: 315 and %o2, 0xe, %o3 3162: 317 sethi %hi(short_table_end), %o5 318 sll %o3, 3, %o4 319 add %o0, %o3, %o0 320 sub %o5, %o4, %o5 321 add %o1, %o3, %o1 322 jmpl %o5 + %lo(short_table_end), %g0 323 andcc %o2, 1, %g0 32484: 325 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) 326 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) 327 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) 328 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) 329 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) 330 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) 331 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) 332short_table_end: 333 EXT(84b, short_table_end, 55f) 334 be 1f 335 nop 336 EX(ldub [%o1], %g2, add %g0, 1) 337 EX(stb %g2, [%o0], add %g0, 1) 3381: 339 retl 340 clr %o0 341 342short_aligned_end: 343 bne short_end 344 andcc %o2, 8, %g0 345 346 be 1f 347 andcc %o2, 4, %g0 348 349 EXO2(ld [%o1 + 0x00], %g2) 350 EXO2(ld [%o1 + 0x04], %g3) 351 add %o1, 8, %o1 352 EXO2(st %g2, [%o0 + 0x00]) 353 EX(st %g3, [%o0 + 0x04], sub %o2, 4) 354 add %o0, 8, %o0 3551: 356 b copy_user_last7 357 mov %o2, %g1 358 359 .section .fixup,#alloc,#execinstr 360 .align 4 36197: 362 mov %o2, %g3 363fixupretl: 364 sethi %hi(PAGE_OFFSET), %g1 365 cmp %o0, %g1 366 blu 1f 367 cmp %o1, %g1 368 bgeu 1f 369 nop 370 save %sp, -64, %sp 371 mov %i0, %o0 372 call __bzero 373 mov %g3, %o1 374 restore 3751: retl 376 mov %g3, %o0 377 378/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */ 37950: 380/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK 381 * happens. This is derived from the amount ldd reads, st stores, etc. 382 * x = g2 % 12; 383 * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4); 384 * o0 += (g2 / 12) * 32; 385 */ 386 cmp %g2, 12 387 add %o0, %g7, %o0 388 bcs 1f 389 cmp %g2, 24 390 bcs 2f 391 cmp %g2, 36 392 bcs 3f 393 nop 394 sub %g2, 12, %g2 395 sub %g7, 32, %g7 3963: sub %g2, 12, %g2 397 sub %g7, 32, %g7 3982: sub %g2, 12, %g2 399 sub %g7, 32, %g7 4001: cmp %g2, 4 401 bcs,a 60f 402 clr %g2 403 sub %g2, 4, %g2 404 sll %g2, 2, %g2 40560: and %g1, 0x7f, %g3 406 sub %o0, %g7, %o0 407 add %g3, %g7, %g3 408 ba fixupretl 409 sub %g3, %g2, %g3 41051: 411/* i = 41 - g2; j = i % 6; 412 * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16; 413 * o0 -= (i / 6) * 16 + 16; 414 */ 415 neg %g2 416 and %g1, 0xf, %g1 417 add %g2, 41, %g2 418 add %o0, %g1, %o0 4191: cmp %g2, 6 420 bcs,a 2f 421 cmp %g2, 4 422 add %g1, 16, %g1 423 b 1b 424 sub %g2, 6, %g2 4252: bcc,a 2f 426 mov 16, %g2 427 inc %g2 428 sll %g2, 2, %g2 4292: add %g1, %g2, %g3 430 ba fixupretl 431 sub %o0, %g3, %o0 43252: 433/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0; 434 o0 += (g2 / 8) * 32 */ 435 andn %g2, 7, %g4 436 add %o0, %g7, %o0 437 andcc %g2, 4, %g0 438 and %g2, 3, %g2 439 sll %g4, 2, %g4 440 sll %g2, 3, %g2 441 bne 60b 442 sub %g7, %g4, %g7 443 ba 60b 444 clr %g2 44553: 446/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0; 447 o0 += (g2 & 8) */ 448 and %g2, 3, %g4 449 andcc %g2, 4, %g0 450 and %g2, 8, %g2 451 sll %g4, 1, %g4 452 be 1f 453 add %o0, %g2, %o0 454 add %g2, %g4, %g2 4551: and %o2, 0xf, %g3 456 add %g3, %o3, %g3 457 ba fixupretl 458 sub %g3, %g2, %g3 45954: 460/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0; 461 o0 += (g2 / 4) * 2 */ 462 srl %g2, 2, %o4 463 and %g2, 1, %o5 464 srl %g2, 1, %g2 465 add %o4, %o4, %o4 466 and %o5, %g2, %o5 467 and %o2, 0xf, %o2 468 add %o0, %o4, %o0 469 sub %o3, %o5, %o3 470 sub %o2, %o4, %o2 471 ba fixupretl 472 add %o2, %o3, %g3 47355: 474/* i = 27 - g2; 475 g3 = (o2 & 1) + i / 4 * 2 + !(i & 3); 476 o0 -= i / 4 * 2 + 1 */ 477 neg %g2 478 and %o2, 1, %o2 479 add %g2, 27, %g2 480 srl %g2, 2, %o5 481 andcc %g2, 3, %g0 482 mov 1, %g2 483 add %o5, %o5, %o5 484 be,a 1f 485 clr %g2 4861: add %g2, %o5, %g3 487 sub %o0, %g3, %o0 488 ba fixupretl 489 add %g3, %o2, %g3 490 491 .globl __copy_user_end 492__copy_user_end: 493