xref: /linux/arch/microblaze/lib/memcpy.c (revision 132db93572821ec2fdf81e354cc40f558faf7e4f)
1 /*
2  * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
3  * Copyright (C) 2008-2009 PetaLogix
4  * Copyright (C) 2007 John Williams
5  *
6  * Reasonably optimised generic C-code for memcpy on Microblaze
7  * This is generic C code to do efficient, alignment-aware memcpy.
8  *
9  * It is based on demo code originally Copyright 2001 by Intel Corp, taken from
10  * http://www.embedded.com/showArticle.jhtml?articleID=19205567
11  *
12  * Attempts were made, unsuccessfully, to contact the original
13  * author of this code (Michael Morrow, Intel).  Below is the original
14  * copyright notice.
15  *
16  * This software has been developed by Intel Corporation.
17  * Intel specifically disclaims all warranties, express or
18  * implied, and all liability, including consequential and
19  * other indirect damages, for the use of this program, including
20  * liability for infringement of any proprietary rights,
21  * and including the warranties of merchantability and fitness
22  * for a particular purpose. Intel does not assume any
23  * responsibility for and errors which may appear in this program
24  * not any responsibility to update it.
25  */
26 
27 #include <linux/export.h>
28 #include <linux/types.h>
29 #include <linux/stddef.h>
30 #include <linux/compiler.h>
31 
32 #include <linux/string.h>
33 
34 #ifdef __HAVE_ARCH_MEMCPY
35 #ifndef CONFIG_OPT_LIB_FUNCTION
36 void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
37 {
38 	const char *src = v_src;
39 	char *dst = v_dst;
40 
41 	/* Simple, byte oriented memcpy. */
42 	while (c--)
43 		*dst++ = *src++;
44 
45 	return v_dst;
46 }
47 #else /* CONFIG_OPT_LIB_FUNCTION */
48 void *memcpy(void *v_dst, const void *v_src, __kernel_size_t c)
49 {
50 	const char *src = v_src;
51 	char *dst = v_dst;
52 
53 	/* The following code tries to optimize the copy by using unsigned
54 	 * alignment. This will work fine if both source and destination are
55 	 * aligned on the same boundary. However, if they are aligned on
56 	 * different boundaries shifts will be necessary. This might result in
57 	 * bad performance on MicroBlaze systems without a barrel shifter.
58 	 */
59 	const uint32_t *i_src;
60 	uint32_t *i_dst;
61 
62 	if (likely(c >= 4)) {
63 		unsigned  value, buf_hold;
64 
65 		/* Align the destination to a word boundary. */
66 		/* This is done in an endian independent manner. */
67 		switch ((unsigned long)dst & 3) {
68 		case 1:
69 			*dst++ = *src++;
70 			--c;
71 		case 2:
72 			*dst++ = *src++;
73 			--c;
74 		case 3:
75 			*dst++ = *src++;
76 			--c;
77 		}
78 
79 		i_dst = (void *)dst;
80 
81 		/* Choose a copy scheme based on the source */
82 		/* alignment relative to destination. */
83 		switch ((unsigned long)src & 3) {
84 		case 0x0:	/* Both byte offsets are aligned */
85 			i_src  = (const void *)src;
86 
87 			for (; c >= 4; c -= 4)
88 				*i_dst++ = *i_src++;
89 
90 			src  = (const void *)i_src;
91 			break;
92 		case 0x1:	/* Unaligned - Off by 1 */
93 			/* Word align the source */
94 			i_src = (const void *) ((unsigned)src & ~3);
95 #ifndef __MICROBLAZEEL__
96 			/* Load the holding buffer */
97 			buf_hold = *i_src++ << 8;
98 
99 			for (; c >= 4; c -= 4) {
100 				value = *i_src++;
101 				*i_dst++ = buf_hold | value >> 24;
102 				buf_hold = value << 8;
103 			}
104 #else
105 			/* Load the holding buffer */
106 			buf_hold = (*i_src++ & 0xFFFFFF00) >> 8;
107 
108 			for (; c >= 4; c -= 4) {
109 				value = *i_src++;
110 				*i_dst++ = buf_hold | ((value & 0xFF) << 24);
111 				buf_hold = (value & 0xFFFFFF00) >> 8;
112 			}
113 #endif
114 			/* Realign the source */
115 			src = (const void *)i_src;
116 			src -= 3;
117 			break;
118 		case 0x2:	/* Unaligned - Off by 2 */
119 			/* Word align the source */
120 			i_src = (const void *) ((unsigned)src & ~3);
121 #ifndef __MICROBLAZEEL__
122 			/* Load the holding buffer */
123 			buf_hold = *i_src++ << 16;
124 
125 			for (; c >= 4; c -= 4) {
126 				value = *i_src++;
127 				*i_dst++ = buf_hold | value >> 16;
128 				buf_hold = value << 16;
129 			}
130 #else
131 			/* Load the holding buffer */
132 			buf_hold = (*i_src++ & 0xFFFF0000) >> 16;
133 
134 			for (; c >= 4; c -= 4) {
135 				value = *i_src++;
136 				*i_dst++ = buf_hold | ((value & 0xFFFF) << 16);
137 				buf_hold = (value & 0xFFFF0000) >> 16;
138 			}
139 #endif
140 			/* Realign the source */
141 			src = (const void *)i_src;
142 			src -= 2;
143 			break;
144 		case 0x3:	/* Unaligned - Off by 3 */
145 			/* Word align the source */
146 			i_src = (const void *) ((unsigned)src & ~3);
147 #ifndef __MICROBLAZEEL__
148 			/* Load the holding buffer */
149 			buf_hold = *i_src++ << 24;
150 
151 			for (; c >= 4; c -= 4) {
152 				value = *i_src++;
153 				*i_dst++ = buf_hold | value >> 8;
154 				buf_hold = value << 24;
155 			}
156 #else
157 			/* Load the holding buffer */
158 			buf_hold = (*i_src++ & 0xFF000000) >> 24;
159 
160 			for (; c >= 4; c -= 4) {
161 				value = *i_src++;
162 				*i_dst++ = buf_hold | ((value & 0xFFFFFF) << 8);
163 				buf_hold = (value & 0xFF000000) >> 24;
164 			}
165 #endif
166 			/* Realign the source */
167 			src = (const void *)i_src;
168 			src -= 1;
169 			break;
170 		}
171 		dst = (void *)i_dst;
172 	}
173 
174 	/* Finish off any remaining bytes */
175 	/* simple fast copy, ... unless a cache boundary is crossed */
176 	switch (c) {
177 	case 3:
178 		*dst++ = *src++;
179 	case 2:
180 		*dst++ = *src++;
181 	case 1:
182 		*dst++ = *src++;
183 	}
184 
185 	return v_dst;
186 }
187 #endif /* CONFIG_OPT_LIB_FUNCTION */
188 EXPORT_SYMBOL(memcpy);
189 #endif /* __HAVE_ARCH_MEMCPY */
190