1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/arch/alpha/lib/memcpy.c 4 * 5 * Copyright (C) 1995 Linus Torvalds 6 */ 7 8 /* 9 * This is a reasonably optimized memcpy() routine. 10 */ 11 12 /* 13 * Note that the C code is written to be optimized into good assembly. However, 14 * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a 15 * explicit compare against 0 (instead of just using the proper "blt reg, xx" or 16 * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually.. 17 */ 18 19 #include <linux/types.h> 20 #include <linux/export.h> 21 #include <linux/string.h> 22 23 /* 24 * This should be done in one go with ldq_u*2/mask/stq_u. Do it 25 * with a macro so that we can fix it up later.. 26 */ 27 #define ALIGN_DEST_TO8_UP(d,s,n) \ 28 while (d & 7) { \ 29 if (n <= 0) return; \ 30 n--; \ 31 *(char *) d = *(char *) s; \ 32 d++; s++; \ 33 } 34 #define ALIGN_DEST_TO8_DN(d,s,n) \ 35 while (d & 7) { \ 36 if (n <= 0) return; \ 37 n--; \ 38 d--; s--; \ 39 *(char *) d = *(char *) s; \ 40 } 41 42 /* 43 * This should similarly be done with ldq_u*2/mask/stq. The destination 44 * is aligned, but we don't fill in a full quad-word 45 */ 46 #define DO_REST_UP(d,s,n) \ 47 while (n > 0) { \ 48 n--; \ 49 *(char *) d = *(char *) s; \ 50 d++; s++; \ 51 } 52 #define DO_REST_DN(d,s,n) \ 53 while (n > 0) { \ 54 n--; \ 55 d--; s--; \ 56 *(char *) d = *(char *) s; \ 57 } 58 59 /* 60 * This should be done with ldq/mask/stq. The source and destination are 61 * aligned, but we don't fill in a full quad-word 62 */ 63 #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n) 64 #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n) 65 66 /* 67 * This does unaligned memory copies. We want to avoid storing to 68 * an unaligned address, as that would do a read-modify-write cycle. 69 * We also want to avoid double-reading the unaligned reads. 70 * 71 * Note the ordering to try to avoid load (and address generation) latencies. 72 */ 73 static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s, 74 long n) 75 { 76 ALIGN_DEST_TO8_UP(d,s,n); 77 n -= 8; /* to avoid compare against 8 in the loop */ 78 if (n >= 0) { 79 unsigned long low_word, high_word; 80 __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s)); 81 do { 82 unsigned long tmp; 83 __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8))); 84 n -= 8; 85 __asm__("extql %1,%2,%0" 86 :"=r" (low_word) 87 :"r" (low_word), "r" (s)); 88 __asm__("extqh %1,%2,%0" 89 :"=r" (tmp) 90 :"r" (high_word), "r" (s)); 91 s += 8; 92 *(unsigned long *) d = low_word | tmp; 93 d += 8; 94 low_word = high_word; 95 } while (n >= 0); 96 } 97 n += 8; 98 DO_REST_UP(d,s,n); 99 } 100 101 static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s, 102 long n) 103 { 104 /* I don't understand AXP assembler well enough for this. -Tim */ 105 s += n; 106 d += n; 107 while (n--) 108 * (char *) --d = * (char *) --s; 109 } 110 111 /* 112 * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register 113 * for the load-store. I don't know why, but it would seem that using a floating 114 * point register for the move seems to slow things down (very small difference, 115 * though). 116 * 117 * Note the ordering to try to avoid load (and address generation) latencies. 118 */ 119 static inline void __memcpy_aligned_up (unsigned long d, unsigned long s, 120 long n) 121 { 122 ALIGN_DEST_TO8_UP(d,s,n); 123 n -= 8; 124 while (n >= 0) { 125 unsigned long tmp; 126 __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s)); 127 n -= 8; 128 s += 8; 129 *(unsigned long *) d = tmp; 130 d += 8; 131 } 132 n += 8; 133 DO_REST_ALIGNED_UP(d,s,n); 134 } 135 static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s, 136 long n) 137 { 138 s += n; 139 d += n; 140 ALIGN_DEST_TO8_DN(d,s,n); 141 n -= 8; 142 while (n >= 0) { 143 unsigned long tmp; 144 s -= 8; 145 __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s)); 146 n -= 8; 147 d -= 8; 148 *(unsigned long *) d = tmp; 149 } 150 n += 8; 151 DO_REST_ALIGNED_DN(d,s,n); 152 } 153 154 #undef memcpy 155 156 void * memcpy(void * dest, const void *src, size_t n) 157 { 158 if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) { 159 __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src, 160 n); 161 return dest; 162 } 163 __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n); 164 return dest; 165 } 166 EXPORT_SYMBOL(memcpy); 167