1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * User Space Access Routines 4 * 5 * Copyright (C) 2000-2002 Hewlett-Packard (John Marvin) 6 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> 7 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> 8 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> 9 * Copyright (C) 2017 Helge Deller <deller@gmx.de> 10 * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net> 11 */ 12 13/* 14 * These routines still have plenty of room for optimization 15 * (word & doubleword load/store, dual issue, store hints, etc.). 16 */ 17 18/* 19 * The following routines assume that space register 3 (sr3) contains 20 * the space id associated with the current users address space. 21 */ 22 23 24 .text 25 26#include <asm/assembly.h> 27#include <asm/errno.h> 28#include <linux/linkage.h> 29 30 /* 31 * get_sr gets the appropriate space value into 32 * sr1 for kernel/user space access, depending 33 * on the flag stored in the task structure. 34 */ 35 36 .macro get_sr 37 mfctl %cr30,%r1 38 ldw TI_SEGMENT(%r1),%r22 39 mfsp %sr3,%r1 40 or,<> %r22,%r0,%r0 41 copy %r0,%r1 42 mtsp %r1,%sr1 43 .endm 44 45 /* 46 * unsigned long lclear_user(void *to, unsigned long n) 47 * 48 * Returns 0 for success. 49 * otherwise, returns number of bytes not transferred. 50 */ 51 52ENTRY_CFI(lclear_user) 53 comib,=,n 0,%r25,$lclu_done 54 get_sr 55$lclu_loop: 56 addib,<> -1,%r25,$lclu_loop 571: stbs,ma %r0,1(%sr1,%r26) 58 59$lclu_done: 60 bv %r0(%r2) 61 copy %r25,%r28 62 632: b $lclu_done 64 ldo 1(%r25),%r25 65 66 ASM_EXCEPTIONTABLE_ENTRY(1b,2b) 67ENDPROC_CFI(lclear_user) 68 69 70 /* 71 * long lstrnlen_user(char *s, long n) 72 * 73 * Returns 0 if exception before zero byte or reaching N, 74 * N+1 if N would be exceeded, 75 * else strlen + 1 (i.e. includes zero byte). 76 */ 77 78ENTRY_CFI(lstrnlen_user) 79 comib,= 0,%r25,$lslen_nzero 80 copy %r26,%r24 81 get_sr 821: ldbs,ma 1(%sr1,%r26),%r1 83$lslen_loop: 84 comib,=,n 0,%r1,$lslen_done 85 addib,<> -1,%r25,$lslen_loop 862: ldbs,ma 1(%sr1,%r26),%r1 87$lslen_done: 88 bv %r0(%r2) 89 sub %r26,%r24,%r28 90 91$lslen_nzero: 92 b $lslen_done 93 ldo 1(%r26),%r26 /* special case for N == 0 */ 94 953: b $lslen_done 96 copy %r24,%r26 /* reset r26 so 0 is returned on fault */ 97 98 ASM_EXCEPTIONTABLE_ENTRY(1b,3b) 99 ASM_EXCEPTIONTABLE_ENTRY(2b,3b) 100 101ENDPROC_CFI(lstrnlen_user) 102 103 104/* 105 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) 106 * 107 * Inputs: 108 * - sr1 already contains space of source region 109 * - sr2 already contains space of destination region 110 * 111 * Returns: 112 * - number of bytes that could not be copied. 113 * On success, this will be zero. 114 * 115 * This code is based on a C-implementation of a copy routine written by 116 * Randolph Chung, which in turn was derived from the glibc. 117 * 118 * Several strategies are tried to try to get the best performance for various 119 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes 120 * at a time using general registers. Unaligned copies are handled either by 121 * aligning the destination and then using shift-and-write method, or in a few 122 * cases by falling back to a byte-at-a-time copy. 123 * 124 * Testing with various alignments and buffer sizes shows that this code is 125 * often >10x faster than a simple byte-at-a-time copy, even for strangely 126 * aligned operands. It is interesting to note that the glibc version of memcpy 127 * (written in C) is actually quite fast already. This routine is able to beat 128 * it by 30-40% for aligned copies because of the loop unrolling, but in some 129 * cases the glibc version is still slightly faster. This lends more 130 * credibility that gcc can generate very good code as long as we are careful. 131 * 132 * Possible optimizations: 133 * - add cache prefetching 134 * - try not to use the post-increment address modifiers; they may create 135 * additional interlocks. Assumption is that those were only efficient on old 136 * machines (pre PA8000 processors) 137 */ 138 139 dst = arg0 140 src = arg1 141 len = arg2 142 end = arg3 143 t1 = r19 144 t2 = r20 145 t3 = r21 146 t4 = r22 147 srcspc = sr1 148 dstspc = sr2 149 150 t0 = r1 151 a1 = t1 152 a2 = t2 153 a3 = t3 154 a0 = t4 155 156 save_src = ret0 157 save_dst = ret1 158 save_len = r31 159 160ENTRY_CFI(pa_memcpy) 161 /* Last destination address */ 162 add dst,len,end 163 164 /* short copy with less than 16 bytes? */ 165 cmpib,COND(>>=),n 15,len,.Lbyte_loop 166 167 /* same alignment? */ 168 xor src,dst,t0 169 extru t0,31,2,t1 170 cmpib,<>,n 0,t1,.Lunaligned_copy 171 172#ifdef CONFIG_64BIT 173 /* only do 64-bit copies if we can get aligned. */ 174 extru t0,31,3,t1 175 cmpib,<>,n 0,t1,.Lalign_loop32 176 177 /* loop until we are 64-bit aligned */ 178.Lalign_loop64: 179 extru dst,31,3,t1 180 cmpib,=,n 0,t1,.Lcopy_loop_16_start 18120: ldb,ma 1(srcspc,src),t1 18221: stb,ma t1,1(dstspc,dst) 183 b .Lalign_loop64 184 ldo -1(len),len 185 186 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 187 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 188 189.Lcopy_loop_16_start: 190 ldi 31,t0 191.Lcopy_loop_16: 192 cmpb,COND(>>=),n t0,len,.Lword_loop 193 19410: ldd 0(srcspc,src),t1 19511: ldd 8(srcspc,src),t2 196 ldo 16(src),src 19712: std,ma t1,8(dstspc,dst) 19813: std,ma t2,8(dstspc,dst) 19914: ldd 0(srcspc,src),t1 20015: ldd 8(srcspc,src),t2 201 ldo 16(src),src 20216: std,ma t1,8(dstspc,dst) 20317: std,ma t2,8(dstspc,dst) 204 205 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 206 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault) 207 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 208 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 209 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 210 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault) 211 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 212 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 213 214 b .Lcopy_loop_16 215 ldo -32(len),len 216 217.Lword_loop: 218 cmpib,COND(>>=),n 3,len,.Lbyte_loop 21920: ldw,ma 4(srcspc,src),t1 22021: stw,ma t1,4(dstspc,dst) 221 b .Lword_loop 222 ldo -4(len),len 223 224 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 225 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 226 227#endif /* CONFIG_64BIT */ 228 229 /* loop until we are 32-bit aligned */ 230.Lalign_loop32: 231 extru dst,31,2,t1 232 cmpib,=,n 0,t1,.Lcopy_loop_8 23320: ldb,ma 1(srcspc,src),t1 23421: stb,ma t1,1(dstspc,dst) 235 b .Lalign_loop32 236 ldo -1(len),len 237 238 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 239 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 240 241 242.Lcopy_loop_8: 243 cmpib,COND(>>=),n 15,len,.Lbyte_loop 244 24510: ldw 0(srcspc,src),t1 24611: ldw 4(srcspc,src),t2 24712: stw,ma t1,4(dstspc,dst) 24813: stw,ma t2,4(dstspc,dst) 24914: ldw 8(srcspc,src),t1 25015: ldw 12(srcspc,src),t2 251 ldo 16(src),src 25216: stw,ma t1,4(dstspc,dst) 25317: stw,ma t2,4(dstspc,dst) 254 255 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 256 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault) 257 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 258 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 259 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 260 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault) 261 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 262 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 263 264 b .Lcopy_loop_8 265 ldo -16(len),len 266 267.Lbyte_loop: 268 cmpclr,COND(<>) len,%r0,%r0 269 b,n .Lcopy_done 27020: ldb 0(srcspc,src),t1 271 ldo 1(src),src 27221: stb,ma t1,1(dstspc,dst) 273 b .Lbyte_loop 274 ldo -1(len),len 275 276 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 277 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 278 279.Lcopy_done: 280 bv %r0(%r2) 281 sub end,dst,ret0 282 283 284 /* src and dst are not aligned the same way. */ 285 /* need to go the hard way */ 286.Lunaligned_copy: 287 /* align until dst is 32bit-word-aligned */ 288 extru dst,31,2,t1 289 cmpib,=,n 0,t1,.Lcopy_dstaligned 29020: ldb 0(srcspc,src),t1 291 ldo 1(src),src 29221: stb,ma t1,1(dstspc,dst) 293 b .Lunaligned_copy 294 ldo -1(len),len 295 296 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 297 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 298 299.Lcopy_dstaligned: 300 301 /* store src, dst and len in safe place */ 302 copy src,save_src 303 copy dst,save_dst 304 copy len,save_len 305 306 /* len now needs give number of words to copy */ 307 SHRREG len,2,len 308 309 /* 310 * Copy from a not-aligned src to an aligned dst using shifts. 311 * Handles 4 words per loop. 312 */ 313 314 depw,z src,28,2,t0 315 subi 32,t0,t0 316 mtsar t0 317 extru len,31,2,t0 318 cmpib,= 2,t0,.Lcase2 319 /* Make src aligned by rounding it down. */ 320 depi 0,31,2,src 321 322 cmpiclr,<> 3,t0,%r0 323 b,n .Lcase3 324 cmpiclr,<> 1,t0,%r0 325 b,n .Lcase1 326.Lcase0: 327 cmpb,COND(=) %r0,len,.Lcda_finish 328 nop 329 3301: ldw,ma 4(srcspc,src), a3 331 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3321: ldw,ma 4(srcspc,src), a0 333 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 334 b,n .Ldo3 335.Lcase1: 3361: ldw,ma 4(srcspc,src), a2 337 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3381: ldw,ma 4(srcspc,src), a3 339 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 340 ldo -1(len),len 341 cmpb,COND(=),n %r0,len,.Ldo0 342.Ldo4: 3431: ldw,ma 4(srcspc,src), a0 344 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 345 shrpw a2, a3, %sar, t0 3461: stw,ma t0, 4(dstspc,dst) 347 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 348.Ldo3: 3491: ldw,ma 4(srcspc,src), a1 350 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 351 shrpw a3, a0, %sar, t0 3521: stw,ma t0, 4(dstspc,dst) 353 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 354.Ldo2: 3551: ldw,ma 4(srcspc,src), a2 356 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 357 shrpw a0, a1, %sar, t0 3581: stw,ma t0, 4(dstspc,dst) 359 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 360.Ldo1: 3611: ldw,ma 4(srcspc,src), a3 362 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 363 shrpw a1, a2, %sar, t0 3641: stw,ma t0, 4(dstspc,dst) 365 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 366 ldo -4(len),len 367 cmpb,COND(<>) %r0,len,.Ldo4 368 nop 369.Ldo0: 370 shrpw a2, a3, %sar, t0 3711: stw,ma t0, 4(dstspc,dst) 372 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 373 374.Lcda_rdfault: 375.Lcda_finish: 376 /* calculate new src, dst and len and jump to byte-copy loop */ 377 sub dst,save_dst,t0 378 add save_src,t0,src 379 b .Lbyte_loop 380 sub save_len,t0,len 381 382.Lcase3: 3831: ldw,ma 4(srcspc,src), a0 384 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3851: ldw,ma 4(srcspc,src), a1 386 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 387 b .Ldo2 388 ldo 1(len),len 389.Lcase2: 3901: ldw,ma 4(srcspc,src), a1 391 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3921: ldw,ma 4(srcspc,src), a2 393 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 394 b .Ldo1 395 ldo 2(len),len 396 397 398 /* fault exception fixup handlers: */ 399#ifdef CONFIG_64BIT 400.Lcopy16_fault: 401 b .Lcopy_done 40210: std,ma t1,8(dstspc,dst) 403 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 404#endif 405 406.Lcopy8_fault: 407 b .Lcopy_done 40810: stw,ma t1,4(dstspc,dst) 409 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 410ENDPROC_CFI(pa_memcpy) 411 412 .end 413