1/* 2 * User Space Access Routines 3 * 4 * Copyright (C) 2000-2002 Hewlett-Packard (John Marvin) 5 * Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org> 6 * Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr> 7 * Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org> 8 * Copyright (C) 2017 Helge Deller <deller@gmx.de> 9 * Copyright (C) 2017 John David Anglin <dave.anglin@bell.net> 10 * 11 * 12 * This program is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU General Public License as published by 14 * the Free Software Foundation; either version 2, or (at your option) 15 * any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 * 22 * You should have received a copy of the GNU General Public License 23 * along with this program; if not, write to the Free Software 24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 */ 26 27/* 28 * These routines still have plenty of room for optimization 29 * (word & doubleword load/store, dual issue, store hints, etc.). 30 */ 31 32/* 33 * The following routines assume that space register 3 (sr3) contains 34 * the space id associated with the current users address space. 35 */ 36 37 38 .text 39 40#include <asm/assembly.h> 41#include <asm/errno.h> 42#include <linux/linkage.h> 43 44 /* 45 * get_sr gets the appropriate space value into 46 * sr1 for kernel/user space access, depending 47 * on the flag stored in the task structure. 48 */ 49 50 .macro get_sr 51 mfctl %cr30,%r1 52 ldw TI_SEGMENT(%r1),%r22 53 mfsp %sr3,%r1 54 or,<> %r22,%r0,%r0 55 copy %r0,%r1 56 mtsp %r1,%sr1 57 .endm 58 59 /* 60 * unsigned long lclear_user(void *to, unsigned long n) 61 * 62 * Returns 0 for success. 63 * otherwise, returns number of bytes not transferred. 64 */ 65 66ENTRY_CFI(lclear_user) 67 comib,=,n 0,%r25,$lclu_done 68 get_sr 69$lclu_loop: 70 addib,<> -1,%r25,$lclu_loop 711: stbs,ma %r0,1(%sr1,%r26) 72 73$lclu_done: 74 bv %r0(%r2) 75 copy %r25,%r28 76 772: b $lclu_done 78 ldo 1(%r25),%r25 79 80 ASM_EXCEPTIONTABLE_ENTRY(1b,2b) 81ENDPROC_CFI(lclear_user) 82 83 84 /* 85 * long lstrnlen_user(char *s, long n) 86 * 87 * Returns 0 if exception before zero byte or reaching N, 88 * N+1 if N would be exceeded, 89 * else strlen + 1 (i.e. includes zero byte). 90 */ 91 92ENTRY_CFI(lstrnlen_user) 93 comib,= 0,%r25,$lslen_nzero 94 copy %r26,%r24 95 get_sr 961: ldbs,ma 1(%sr1,%r26),%r1 97$lslen_loop: 98 comib,=,n 0,%r1,$lslen_done 99 addib,<> -1,%r25,$lslen_loop 1002: ldbs,ma 1(%sr1,%r26),%r1 101$lslen_done: 102 bv %r0(%r2) 103 sub %r26,%r24,%r28 104 105$lslen_nzero: 106 b $lslen_done 107 ldo 1(%r26),%r26 /* special case for N == 0 */ 108 1093: b $lslen_done 110 copy %r24,%r26 /* reset r26 so 0 is returned on fault */ 111 112 ASM_EXCEPTIONTABLE_ENTRY(1b,3b) 113 ASM_EXCEPTIONTABLE_ENTRY(2b,3b) 114 115ENDPROC_CFI(lstrnlen_user) 116 117 118/* 119 * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len) 120 * 121 * Inputs: 122 * - sr1 already contains space of source region 123 * - sr2 already contains space of destination region 124 * 125 * Returns: 126 * - number of bytes that could not be copied. 127 * On success, this will be zero. 128 * 129 * This code is based on a C-implementation of a copy routine written by 130 * Randolph Chung, which in turn was derived from the glibc. 131 * 132 * Several strategies are tried to try to get the best performance for various 133 * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes 134 * at a time using general registers. Unaligned copies are handled either by 135 * aligning the destination and then using shift-and-write method, or in a few 136 * cases by falling back to a byte-at-a-time copy. 137 * 138 * Testing with various alignments and buffer sizes shows that this code is 139 * often >10x faster than a simple byte-at-a-time copy, even for strangely 140 * aligned operands. It is interesting to note that the glibc version of memcpy 141 * (written in C) is actually quite fast already. This routine is able to beat 142 * it by 30-40% for aligned copies because of the loop unrolling, but in some 143 * cases the glibc version is still slightly faster. This lends more 144 * credibility that gcc can generate very good code as long as we are careful. 145 * 146 * Possible optimizations: 147 * - add cache prefetching 148 * - try not to use the post-increment address modifiers; they may create 149 * additional interlocks. Assumption is that those were only efficient on old 150 * machines (pre PA8000 processors) 151 */ 152 153 dst = arg0 154 src = arg1 155 len = arg2 156 end = arg3 157 t1 = r19 158 t2 = r20 159 t3 = r21 160 t4 = r22 161 srcspc = sr1 162 dstspc = sr2 163 164 t0 = r1 165 a1 = t1 166 a2 = t2 167 a3 = t3 168 a0 = t4 169 170 save_src = ret0 171 save_dst = ret1 172 save_len = r31 173 174ENTRY_CFI(pa_memcpy) 175 /* Last destination address */ 176 add dst,len,end 177 178 /* short copy with less than 16 bytes? */ 179 cmpib,COND(>>=),n 15,len,.Lbyte_loop 180 181 /* same alignment? */ 182 xor src,dst,t0 183 extru t0,31,2,t1 184 cmpib,<>,n 0,t1,.Lunaligned_copy 185 186#ifdef CONFIG_64BIT 187 /* only do 64-bit copies if we can get aligned. */ 188 extru t0,31,3,t1 189 cmpib,<>,n 0,t1,.Lalign_loop32 190 191 /* loop until we are 64-bit aligned */ 192.Lalign_loop64: 193 extru dst,31,3,t1 194 cmpib,=,n 0,t1,.Lcopy_loop_16_start 19520: ldb,ma 1(srcspc,src),t1 19621: stb,ma t1,1(dstspc,dst) 197 b .Lalign_loop64 198 ldo -1(len),len 199 200 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 201 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 202 203.Lcopy_loop_16_start: 204 ldi 31,t0 205.Lcopy_loop_16: 206 cmpb,COND(>>=),n t0,len,.Lword_loop 207 20810: ldd 0(srcspc,src),t1 20911: ldd 8(srcspc,src),t2 210 ldo 16(src),src 21112: std,ma t1,8(dstspc,dst) 21213: std,ma t2,8(dstspc,dst) 21314: ldd 0(srcspc,src),t1 21415: ldd 8(srcspc,src),t2 215 ldo 16(src),src 21616: std,ma t1,8(dstspc,dst) 21717: std,ma t2,8(dstspc,dst) 218 219 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 220 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault) 221 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 222 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 223 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 224 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault) 225 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 226 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 227 228 b .Lcopy_loop_16 229 ldo -32(len),len 230 231.Lword_loop: 232 cmpib,COND(>>=),n 3,len,.Lbyte_loop 23320: ldw,ma 4(srcspc,src),t1 23421: stw,ma t1,4(dstspc,dst) 235 b .Lword_loop 236 ldo -4(len),len 237 238 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 239 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 240 241#endif /* CONFIG_64BIT */ 242 243 /* loop until we are 32-bit aligned */ 244.Lalign_loop32: 245 extru dst,31,2,t1 246 cmpib,=,n 0,t1,.Lcopy_loop_8 24720: ldb,ma 1(srcspc,src),t1 24821: stb,ma t1,1(dstspc,dst) 249 b .Lalign_loop32 250 ldo -1(len),len 251 252 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 253 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 254 255 256.Lcopy_loop_8: 257 cmpib,COND(>>=),n 15,len,.Lbyte_loop 258 25910: ldw 0(srcspc,src),t1 26011: ldw 4(srcspc,src),t2 26112: stw,ma t1,4(dstspc,dst) 26213: stw,ma t2,4(dstspc,dst) 26314: ldw 8(srcspc,src),t1 26415: ldw 12(srcspc,src),t2 265 ldo 16(src),src 26616: stw,ma t1,4(dstspc,dst) 26717: stw,ma t2,4(dstspc,dst) 268 269 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 270 ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault) 271 ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done) 272 ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done) 273 ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done) 274 ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault) 275 ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done) 276 ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done) 277 278 b .Lcopy_loop_8 279 ldo -16(len),len 280 281.Lbyte_loop: 282 cmpclr,COND(<>) len,%r0,%r0 283 b,n .Lcopy_done 28420: ldb 0(srcspc,src),t1 285 ldo 1(src),src 28621: stb,ma t1,1(dstspc,dst) 287 b .Lbyte_loop 288 ldo -1(len),len 289 290 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 291 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 292 293.Lcopy_done: 294 bv %r0(%r2) 295 sub end,dst,ret0 296 297 298 /* src and dst are not aligned the same way. */ 299 /* need to go the hard way */ 300.Lunaligned_copy: 301 /* align until dst is 32bit-word-aligned */ 302 extru dst,31,2,t1 303 cmpib,=,n 0,t1,.Lcopy_dstaligned 30420: ldb 0(srcspc,src),t1 305 ldo 1(src),src 30621: stb,ma t1,1(dstspc,dst) 307 b .Lunaligned_copy 308 ldo -1(len),len 309 310 ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done) 311 ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done) 312 313.Lcopy_dstaligned: 314 315 /* store src, dst and len in safe place */ 316 copy src,save_src 317 copy dst,save_dst 318 copy len,save_len 319 320 /* len now needs give number of words to copy */ 321 SHRREG len,2,len 322 323 /* 324 * Copy from a not-aligned src to an aligned dst using shifts. 325 * Handles 4 words per loop. 326 */ 327 328 depw,z src,28,2,t0 329 subi 32,t0,t0 330 mtsar t0 331 extru len,31,2,t0 332 cmpib,= 2,t0,.Lcase2 333 /* Make src aligned by rounding it down. */ 334 depi 0,31,2,src 335 336 cmpiclr,<> 3,t0,%r0 337 b,n .Lcase3 338 cmpiclr,<> 1,t0,%r0 339 b,n .Lcase1 340.Lcase0: 341 cmpb,COND(=) %r0,len,.Lcda_finish 342 nop 343 3441: ldw,ma 4(srcspc,src), a3 345 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3461: ldw,ma 4(srcspc,src), a0 347 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 348 b,n .Ldo3 349.Lcase1: 3501: ldw,ma 4(srcspc,src), a2 351 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3521: ldw,ma 4(srcspc,src), a3 353 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 354 ldo -1(len),len 355 cmpb,COND(=),n %r0,len,.Ldo0 356.Ldo4: 3571: ldw,ma 4(srcspc,src), a0 358 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 359 shrpw a2, a3, %sar, t0 3601: stw,ma t0, 4(dstspc,dst) 361 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 362.Ldo3: 3631: ldw,ma 4(srcspc,src), a1 364 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 365 shrpw a3, a0, %sar, t0 3661: stw,ma t0, 4(dstspc,dst) 367 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 368.Ldo2: 3691: ldw,ma 4(srcspc,src), a2 370 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 371 shrpw a0, a1, %sar, t0 3721: stw,ma t0, 4(dstspc,dst) 373 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 374.Ldo1: 3751: ldw,ma 4(srcspc,src), a3 376 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 377 shrpw a1, a2, %sar, t0 3781: stw,ma t0, 4(dstspc,dst) 379 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 380 ldo -4(len),len 381 cmpb,COND(<>) %r0,len,.Ldo4 382 nop 383.Ldo0: 384 shrpw a2, a3, %sar, t0 3851: stw,ma t0, 4(dstspc,dst) 386 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done) 387 388.Lcda_rdfault: 389.Lcda_finish: 390 /* calculate new src, dst and len and jump to byte-copy loop */ 391 sub dst,save_dst,t0 392 add save_src,t0,src 393 b .Lbyte_loop 394 sub save_len,t0,len 395 396.Lcase3: 3971: ldw,ma 4(srcspc,src), a0 398 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 3991: ldw,ma 4(srcspc,src), a1 400 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 401 b .Ldo2 402 ldo 1(len),len 403.Lcase2: 4041: ldw,ma 4(srcspc,src), a1 405 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 4061: ldw,ma 4(srcspc,src), a2 407 ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault) 408 b .Ldo1 409 ldo 2(len),len 410 411 412 /* fault exception fixup handlers: */ 413#ifdef CONFIG_64BIT 414.Lcopy16_fault: 415 b .Lcopy_done 41610: std,ma t1,8(dstspc,dst) 417 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 418#endif 419 420.Lcopy8_fault: 421 b .Lcopy_done 42210: stw,ma t1,4(dstspc,dst) 423 ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done) 424ENDPROC_CFI(pa_memcpy) 425 426 .end 427