xref: /freebsd/sys/contrib/ncsw/etc/memcpy.c (revision dd41de95a84d979615a2ef11df6850622bf6184e)
1 /*
2  * Copyright 2008-2012 Freescale Semiconductor Inc.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *     * Redistributions of source code must retain the above copyright
7  *       notice, this list of conditions and the following disclaimer.
8  *     * Redistributions in binary form must reproduce the above copyright
9  *       notice, this list of conditions and the following disclaimer in the
10  *       documentation and/or other materials provided with the distribution.
11  *     * Neither the name of Freescale Semiconductor nor the
12  *       names of its contributors may be used to endorse or promote products
13  *       derived from this software without specific prior written permission.
14  *
15  *
16  * ALTERNATIVELY, this software may be distributed under the terms of the
17  * GNU General Public License ("GPL") as published by the Free Software
18  * Foundation, either version 2 of that License or (at your option) any
19  * later version.
20  *
21  * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
22  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24  * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
25  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 
34 
35 #include "std_ext.h"
36 #include "xx_ext.h"
37 #include "memcpy_ext.h"
38 
39 void * MemCpy8(void* pDst, void* pSrc, uint32_t size)
40 {
41     uint32_t i;
42 
43     for(i = 0; i < size; ++i)
44         *(((uint8_t*)(pDst)) + i) = *(((uint8_t*)(pSrc)) + i);
45 
46     return pDst;
47 }
48 
49 void * MemSet8(void* pDst, int c, uint32_t size)
50 {
51     uint32_t i;
52 
53     for(i = 0; i < size; ++i)
54         *(((uint8_t*)(pDst)) + i) = (uint8_t)(c);
55 
56     return pDst;
57 }
58 
59 void * MemCpy32(void* pDst,void* pSrc, uint32_t size)
60 {
61     uint32_t leftAlign;
62     uint32_t rightAlign;
63     uint32_t lastWord;
64     uint32_t currWord;
65     uint32_t *p_Src32;
66     uint32_t *p_Dst32;
67     uint8_t  *p_Src8;
68     uint8_t  *p_Dst8;
69 
70     p_Src8 = (uint8_t*)(pSrc);
71     p_Dst8 = (uint8_t*)(pDst);
72     /* first copy byte by byte till the source first alignment
73      * this step is necessary to ensure we do not even try to access
74      * data which is before the source buffer, hence it is not ours.
75      */
76     while((PTR_TO_UINT(p_Src8) & 3) && size) /* (pSrc mod 4) > 0 and size > 0 */
77     {
78         *p_Dst8++ = *p_Src8++;
79         size--;
80     }
81 
82     /* align destination (possibly disaligning source)*/
83     while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
84     {
85         *p_Dst8++ = *p_Src8++;
86         size--;
87     }
88 
89     /* dest is aligned and source is not necessarily aligned */
90     leftAlign = (uint32_t)((PTR_TO_UINT(p_Src8) & 3) << 3); /* leftAlign = (pSrc mod 4)*8 */
91     rightAlign = 32 - leftAlign;
92 
93 
94     if (leftAlign == 0)
95     {
96         /* source is also aligned */
97         p_Src32 = (uint32_t*)(p_Src8);
98         p_Dst32 = (uint32_t*)(p_Dst8);
99         while (size >> 2) /* size >= 4 */
100         {
101             *p_Dst32++ = *p_Src32++;
102             size -= 4;
103         }
104         p_Src8 = (uint8_t*)(p_Src32);
105         p_Dst8 = (uint8_t*)(p_Dst32);
106     }
107     else
108     {
109         /* source is not aligned (destination is aligned)*/
110         p_Src32 = (uint32_t*)(p_Src8 - (leftAlign >> 3));
111         p_Dst32 = (uint32_t*)(p_Dst8);
112         lastWord = *p_Src32++;
113         while(size >> 3) /* size >= 8 */
114         {
115             currWord = *p_Src32;
116             *p_Dst32 = (lastWord << leftAlign) | (currWord >> rightAlign);
117             lastWord = currWord;
118             p_Src32++;
119             p_Dst32++;
120             size -= 4;
121         }
122         p_Dst8 = (uint8_t*)(p_Dst32);
123         p_Src8 = (uint8_t*)(p_Src32) - 4 + (leftAlign >> 3);
124     }
125 
126     /* complete the left overs */
127     while (size--)
128         *p_Dst8++ = *p_Src8++;
129 
130     return pDst;
131 }
132 
133 void * IO2IOCpy32(void* pDst,void* pSrc, uint32_t size)
134 {
135     uint32_t leftAlign;
136     uint32_t rightAlign;
137     uint32_t lastWord;
138     uint32_t currWord;
139     uint32_t *p_Src32;
140     uint32_t *p_Dst32;
141     uint8_t  *p_Src8;
142     uint8_t  *p_Dst8;
143 
144     p_Src8 = (uint8_t*)(pSrc);
145     p_Dst8 = (uint8_t*)(pDst);
146     /* first copy byte by byte till the source first alignment
147      * this step is necessary to ensure we do not even try to access
148      * data which is before the source buffer, hence it is not ours.
149      */
150     while((PTR_TO_UINT(p_Src8) & 3) && size) /* (pSrc mod 4) > 0 and size > 0 */
151     {
152         WRITE_UINT8(*p_Dst8, GET_UINT8(*p_Src8));
153         p_Dst8++;p_Src8++;
154         size--;
155     }
156 
157     /* align destination (possibly disaligning source)*/
158     while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
159     {
160         WRITE_UINT8(*p_Dst8, GET_UINT8(*p_Src8));
161         p_Dst8++;p_Src8++;
162         size--;
163     }
164 
165     /* dest is aligned and source is not necessarily aligned */
166     leftAlign = (uint32_t)((PTR_TO_UINT(p_Src8) & 3) << 3); /* leftAlign = (pSrc mod 4)*8 */
167     rightAlign = 32 - leftAlign;
168 
169     if (leftAlign == 0)
170     {
171         /* source is also aligned */
172         p_Src32 = (uint32_t*)(p_Src8);
173         p_Dst32 = (uint32_t*)(p_Dst8);
174         while (size >> 2) /* size >= 4 */
175         {
176             WRITE_UINT32(*p_Dst32, GET_UINT32(*p_Src32));
177             p_Dst32++;p_Src32++;
178             size -= 4;
179         }
180         p_Src8 = (uint8_t*)(p_Src32);
181         p_Dst8 = (uint8_t*)(p_Dst32);
182     }
183     else
184     {
185         /* source is not aligned (destination is aligned)*/
186         p_Src32 = (uint32_t*)(p_Src8 - (leftAlign >> 3));
187         p_Dst32 = (uint32_t*)(p_Dst8);
188         lastWord = GET_UINT32(*p_Src32);
189         p_Src32++;
190         while(size >> 3) /* size >= 8 */
191         {
192             currWord = GET_UINT32(*p_Src32);
193             WRITE_UINT32(*p_Dst32, (lastWord << leftAlign) | (currWord >> rightAlign));
194             lastWord = currWord;
195             p_Src32++;p_Dst32++;
196             size -= 4;
197         }
198         p_Dst8 = (uint8_t*)(p_Dst32);
199         p_Src8 = (uint8_t*)(p_Src32) - 4 + (leftAlign >> 3);
200     }
201 
202     /* complete the left overs */
203     while (size--)
204     {
205         WRITE_UINT8(*p_Dst8, GET_UINT8(*p_Src8));
206         p_Dst8++;p_Src8++;
207     }
208 
209     return pDst;
210 }
211 
212 void * Mem2IOCpy32(void* pDst,void* pSrc, uint32_t size)
213 {
214     uint32_t leftAlign;
215     uint32_t rightAlign;
216     uint32_t lastWord;
217     uint32_t currWord;
218     uint32_t *p_Src32;
219     uint32_t *p_Dst32;
220     uint8_t  *p_Src8;
221     uint8_t  *p_Dst8;
222 
223     p_Src8 = (uint8_t*)(pSrc);
224     p_Dst8 = (uint8_t*)(pDst);
225     /* first copy byte by byte till the source first alignment
226      * this step is necessary to ensure we do not even try to access
227      * data which is before the source buffer, hence it is not ours.
228      */
229     while((PTR_TO_UINT(p_Src8) & 3) && size) /* (pSrc mod 4) > 0 and size > 0 */
230     {
231         WRITE_UINT8(*p_Dst8, *p_Src8);
232         p_Dst8++;p_Src8++;
233         size--;
234     }
235 
236     /* align destination (possibly disaligning source)*/
237     while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
238     {
239         WRITE_UINT8(*p_Dst8, *p_Src8);
240         p_Dst8++;p_Src8++;
241         size--;
242     }
243 
244     /* dest is aligned and source is not necessarily aligned */
245     leftAlign = (uint32_t)((PTR_TO_UINT(p_Src8) & 3) << 3); /* leftAlign = (pSrc mod 4)*8 */
246     rightAlign = 32 - leftAlign;
247 
248     if (leftAlign == 0)
249     {
250         /* source is also aligned */
251         p_Src32 = (uint32_t*)(p_Src8);
252         p_Dst32 = (uint32_t*)(p_Dst8);
253         while (size >> 2) /* size >= 4 */
254         {
255             WRITE_UINT32(*p_Dst32, *p_Src32);
256             p_Dst32++;p_Src32++;
257             size -= 4;
258         }
259         p_Src8 = (uint8_t*)(p_Src32);
260         p_Dst8 = (uint8_t*)(p_Dst32);
261     }
262     else
263     {
264         /* source is not aligned (destination is aligned)*/
265         p_Src32 = (uint32_t*)(p_Src8 - (leftAlign >> 3));
266         p_Dst32 = (uint32_t*)(p_Dst8);
267         lastWord = *p_Src32++;
268         while(size >> 3) /* size >= 8 */
269         {
270             currWord = *p_Src32;
271             WRITE_UINT32(*p_Dst32, (lastWord << leftAlign) | (currWord >> rightAlign));
272             lastWord = currWord;
273             p_Src32++;p_Dst32++;
274             size -= 4;
275         }
276         p_Dst8 = (uint8_t*)(p_Dst32);
277         p_Src8 = (uint8_t*)(p_Src32) - 4 + (leftAlign >> 3);
278     }
279 
280     /* complete the left overs */
281     while (size--)
282     {
283         WRITE_UINT8(*p_Dst8, *p_Src8);
284         p_Dst8++;p_Src8++;
285     }
286 
287     return pDst;
288 }
289 
290 void * IO2MemCpy32(void* pDst,void* pSrc, uint32_t size)
291 {
292     uint32_t leftAlign;
293     uint32_t rightAlign;
294     uint32_t lastWord;
295     uint32_t currWord;
296     uint32_t *p_Src32;
297     uint32_t *p_Dst32;
298     uint8_t  *p_Src8;
299     uint8_t  *p_Dst8;
300 
301     p_Src8 = (uint8_t*)(pSrc);
302     p_Dst8 = (uint8_t*)(pDst);
303     /* first copy byte by byte till the source first alignment
304      * this step is necessary to ensure we do not even try to access
305      * data which is before the source buffer, hence it is not ours.
306      */
307     while((PTR_TO_UINT(p_Src8) & 3) && size) /* (pSrc mod 4) > 0 and size > 0 */
308     {
309         *p_Dst8 = GET_UINT8(*p_Src8);
310         p_Dst8++;p_Src8++;
311         size--;
312     }
313 
314     /* align destination (possibly disaligning source)*/
315     while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
316     {
317         *p_Dst8 = GET_UINT8(*p_Src8);
318         p_Dst8++;p_Src8++;
319         size--;
320     }
321 
322     /* dest is aligned and source is not necessarily aligned */
323     leftAlign = (uint32_t)((PTR_TO_UINT(p_Src8) & 3) << 3); /* leftAlign = (pSrc mod 4)*8 */
324     rightAlign = 32 - leftAlign;
325 
326     if (leftAlign == 0)
327     {
328         /* source is also aligned */
329         p_Src32 = (uint32_t*)(p_Src8);
330         p_Dst32 = (uint32_t*)(p_Dst8);
331         while (size >> 2) /* size >= 4 */
332         {
333             *p_Dst32 = GET_UINT32(*p_Src32);
334             p_Dst32++;p_Src32++;
335             size -= 4;
336         }
337         p_Src8 = (uint8_t*)(p_Src32);
338         p_Dst8 = (uint8_t*)(p_Dst32);
339     }
340     else
341     {
342         /* source is not aligned (destination is aligned)*/
343         p_Src32 = (uint32_t*)(p_Src8 - (leftAlign >> 3));
344         p_Dst32 = (uint32_t*)(p_Dst8);
345         lastWord = GET_UINT32(*p_Src32);
346         p_Src32++;
347         while(size >> 3) /* size >= 8 */
348         {
349             currWord = GET_UINT32(*p_Src32);
350             *p_Dst32 = (lastWord << leftAlign) | (currWord >> rightAlign);
351             lastWord = currWord;
352             p_Src32++;p_Dst32++;
353             size -= 4;
354         }
355         p_Dst8 = (uint8_t*)(p_Dst32);
356         p_Src8 = (uint8_t*)(p_Src32) - 4 + (leftAlign >> 3);
357     }
358 
359     /* complete the left overs */
360     while (size--)
361     {
362         *p_Dst8 = GET_UINT8(*p_Src8);
363         p_Dst8++;p_Src8++;
364     }
365 
366     return pDst;
367 }
368 
369 void * MemCpy64(void* pDst,void* pSrc, uint32_t size)
370 {
371     uint32_t leftAlign;
372     uint32_t rightAlign;
373     uint64_t lastWord;
374     uint64_t currWord;
375     uint64_t *pSrc64;
376     uint64_t *pDst64;
377     uint8_t  *p_Src8;
378     uint8_t  *p_Dst8;
379 
380     p_Src8 = (uint8_t*)(pSrc);
381     p_Dst8 = (uint8_t*)(pDst);
382     /* first copy byte by byte till the source first alignment
383      * this step is necessarily to ensure we do not even try to access
384      * data which is before the source buffer, hence it is not ours.
385      */
386     while((PTR_TO_UINT(p_Src8) & 7) && size) /* (pSrc mod 8) > 0 and size > 0 */
387     {
388         *p_Dst8++ = *p_Src8++;
389         size--;
390     }
391 
392     /* align destination (possibly disaligning source)*/
393     while((PTR_TO_UINT(p_Dst8) & 7) && size) /* (pDst mod 8) > 0 and size > 0 */
394     {
395         *p_Dst8++ = *p_Src8++;
396         size--;
397     }
398 
399     /* dest is aligned and source is not necessarily aligned */
400     leftAlign = (uint32_t)((PTR_TO_UINT(p_Src8) & 7) << 3); /* leftAlign = (pSrc mod 8)*8 */
401     rightAlign = 64 - leftAlign;
402 
403 
404     if (leftAlign == 0)
405     {
406         /* source is also aligned */
407         pSrc64 = (uint64_t*)(p_Src8);
408         pDst64 = (uint64_t*)(p_Dst8);
409         while (size >> 3) /* size >= 8 */
410         {
411             *pDst64++ = *pSrc64++;
412             size -= 8;
413         }
414         p_Src8 = (uint8_t*)(pSrc64);
415         p_Dst8 = (uint8_t*)(pDst64);
416     }
417     else
418     {
419         /* source is not aligned (destination is aligned)*/
420         pSrc64 = (uint64_t*)(p_Src8 - (leftAlign >> 3));
421         pDst64 = (uint64_t*)(p_Dst8);
422         lastWord = *pSrc64++;
423         while(size >> 4) /* size >= 16 */
424         {
425             currWord = *pSrc64;
426             *pDst64 = (lastWord << leftAlign) | (currWord >> rightAlign);
427             lastWord = currWord;
428             pSrc64++;
429             pDst64++;
430             size -= 8;
431         }
432         p_Dst8 = (uint8_t*)(pDst64);
433         p_Src8 = (uint8_t*)(pSrc64) - 8 + (leftAlign >> 3);
434     }
435 
436     /* complete the left overs */
437     while (size--)
438         *p_Dst8++ = *p_Src8++;
439 
440     return pDst;
441 }
442 
443 void * MemSet32(void* pDst, uint8_t val, uint32_t size)
444 {
445     uint32_t val32;
446     uint32_t *p_Dst32;
447     uint8_t  *p_Dst8;
448 
449     p_Dst8 = (uint8_t*)(pDst);
450 
451     /* generate four 8-bit val's in 32-bit container */
452     val32  = (uint32_t) val;
453     val32 |= (val32 <<  8);
454     val32 |= (val32 << 16);
455 
456     /* align destination to 32 */
457     while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
458     {
459         *p_Dst8++ = val;
460         size--;
461     }
462 
463     /* 32-bit chunks */
464     p_Dst32 = (uint32_t*)(p_Dst8);
465     while (size >> 2) /* size >= 4 */
466     {
467         *p_Dst32++ = val32;
468         size -= 4;
469     }
470 
471     /* complete the leftovers */
472     p_Dst8 = (uint8_t*)(p_Dst32);
473     while (size--)
474         *p_Dst8++ = val;
475 
476     return pDst;
477 }
478 
479 void * IOMemSet32(void* pDst, uint8_t val, uint32_t size)
480 {
481     uint32_t val32;
482     uint32_t *p_Dst32;
483     uint8_t  *p_Dst8;
484 
485     p_Dst8 = (uint8_t*)(pDst);
486 
487     /* generate four 8-bit val's in 32-bit container */
488     val32  = (uint32_t) val;
489     val32 |= (val32 <<  8);
490     val32 |= (val32 << 16);
491 
492     /* align destination to 32 */
493     while((PTR_TO_UINT(p_Dst8) & 3) && size) /* (pDst mod 4) > 0 and size > 0 */
494     {
495         WRITE_UINT8(*p_Dst8, val);
496         p_Dst8++;
497         size--;
498     }
499 
500     /* 32-bit chunks */
501     p_Dst32 = (uint32_t*)(p_Dst8);
502     while (size >> 2) /* size >= 4 */
503     {
504         WRITE_UINT32(*p_Dst32, val32);
505         p_Dst32++;
506         size -= 4;
507     }
508 
509     /* complete the leftovers */
510     p_Dst8 = (uint8_t*)(p_Dst32);
511     while (size--)
512     {
513         WRITE_UINT8(*p_Dst8, val);
514         p_Dst8++;
515     }
516 
517     return pDst;
518 }
519 
520 void * MemSet64(void* pDst, uint8_t val, uint32_t size)
521 {
522     uint64_t val64;
523     uint64_t *pDst64;
524     uint8_t  *p_Dst8;
525 
526     p_Dst8 = (uint8_t*)(pDst);
527 
528     /* generate four 8-bit val's in 32-bit container */
529     val64  = (uint64_t) val;
530     val64 |= (val64 <<  8);
531     val64 |= (val64 << 16);
532     val64 |= (val64 << 24);
533     val64 |= (val64 << 32);
534 
535     /* align destination to 64 */
536     while((PTR_TO_UINT(p_Dst8) & 7) && size) /* (pDst mod 8) > 0 and size > 0 */
537     {
538         *p_Dst8++ = val;
539         size--;
540     }
541 
542     /* 64-bit chunks */
543     pDst64 = (uint64_t*)(p_Dst8);
544     while (size >> 4) /* size >= 8 */
545     {
546         *pDst64++ = val64;
547         size -= 8;
548     }
549 
550     /* complete the leftovers */
551     p_Dst8 = (uint8_t*)(pDst64);
552     while (size--)
553         *p_Dst8++ = val;
554 
555     return pDst;
556 }
557 
558 void MemDisp(uint8_t *p, int size)
559 {
560     uint32_t    space = (uint32_t)(PTR_TO_UINT(p) & 0x3);
561     uint8_t     *p_Limit;
562 
563     if (space)
564     {
565         p_Limit = (p - space + 4);
566 
567         XX_Print("0x%08X: ", (p - space));
568 
569         while (space--)
570         {
571             XX_Print("--");
572         }
573         while (size  && (p < p_Limit))
574         {
575             XX_Print("%02x", *(uint8_t*)p);
576             size--;
577             p++;
578         }
579 
580         XX_Print(" ");
581         p_Limit += 12;
582 
583         while ((size > 3) && (p < p_Limit))
584         {
585             XX_Print("%08x ", *(uint32_t*)p);
586             size -= 4;
587             p += 4;
588         }
589         XX_Print("\r\n");
590     }
591 
592     while (size > 15)
593     {
594         XX_Print("0x%08X: %08x %08x %08x %08x\r\n",
595                  p, *(uint32_t *)p, *(uint32_t *)(p + 4),
596                  *(uint32_t *)(p + 8), *(uint32_t *)(p + 12));
597         size -= 16;
598         p += 16;
599     }
600 
601     if (size)
602     {
603         XX_Print("0x%08X: ", p);
604 
605         while (size > 3)
606         {
607             XX_Print("%08x ", *(uint32_t *)p);
608             size -= 4;
609             p += 4;
610         }
611         while (size)
612         {
613             XX_Print("%02x", *(uint8_t *)p);
614             size--;
615             p++;
616         }
617 
618         XX_Print("\r\n");
619     }
620 }
621