xref: /linux/arch/arc/lib/memset-archs.S (revision 2b64b2ed277ff23e785fbdb65098ee7e1252d64f)
1/*
2 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/linkage.h>
10#include <asm/cache.h>
11
12/*
13 * The memset implementation below is optimized to use prefetchw and prealloc
14 * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
15 * If you want to implement optimized memset for other possible L1 data cache
16 * line lengths (32B and 128B) you should rewrite code carefully checking
17 * we don't call any prefetchw/prealloc instruction for L1 cache lines which
18 * don't belongs to memset area.
19 */
20
21#if L1_CACHE_SHIFT == 6
22
23.macro PREALLOC_INSTR	reg, off
24	prealloc	[\reg, \off]
25.endm
26
27.macro PREFETCHW_INSTR	reg, off
28	prefetchw	[\reg, \off]
29.endm
30
31#else
32
33.macro PREALLOC_INSTR
34.endm
35
36.macro PREFETCHW_INSTR
37.endm
38
39#endif
40
41ENTRY_CFI(memset)
42	PREFETCHW_INSTR	r0, 0	; Prefetch the first write location
43	mov.f	0, r2
44;;; if size is zero
45	jz.d	[blink]
46	mov	r3, r0		; don't clobber ret val
47
48;;; if length < 8
49	brls.d.nt	r2, 8, .Lsmallchunk
50	mov.f	lp_count,r2
51
52	and.f	r4, r0, 0x03
53	rsub	lp_count, r4, 4
54	lpnz	@.Laligndestination
55	;; LOOP BEGIN
56	stb.ab	r1, [r3,1]
57	sub	r2, r2, 1
58.Laligndestination:
59
60;;; Destination is aligned
61	and	r1, r1, 0xFF
62	asl	r4, r1, 8
63	or	r4, r4, r1
64	asl	r5, r4, 16
65	or	r5, r5, r4
66	mov	r4, r5
67
68	sub3	lp_count, r2, 8
69	cmp     r2, 64
70	bmsk.hi	r2, r2, 5
71	mov.ls	lp_count, 0
72	add3.hi	r2, r2, 8
73
74;;; Convert len to Dwords, unfold x8
75	lsr.f	lp_count, lp_count, 6
76
77	lpnz	@.Lset64bytes
78	;; LOOP START
79	PREALLOC_INSTR	r3, 64	; alloc next line w/o fetching
80
81#ifdef CONFIG_ARC_HAS_LL64
82	std.ab	r4, [r3, 8]
83	std.ab	r4, [r3, 8]
84	std.ab	r4, [r3, 8]
85	std.ab	r4, [r3, 8]
86	std.ab	r4, [r3, 8]
87	std.ab	r4, [r3, 8]
88	std.ab	r4, [r3, 8]
89	std.ab	r4, [r3, 8]
90#else
91	st.ab	r4, [r3, 4]
92	st.ab	r4, [r3, 4]
93	st.ab	r4, [r3, 4]
94	st.ab	r4, [r3, 4]
95	st.ab	r4, [r3, 4]
96	st.ab	r4, [r3, 4]
97	st.ab	r4, [r3, 4]
98	st.ab	r4, [r3, 4]
99	st.ab	r4, [r3, 4]
100	st.ab	r4, [r3, 4]
101	st.ab	r4, [r3, 4]
102	st.ab	r4, [r3, 4]
103	st.ab	r4, [r3, 4]
104	st.ab	r4, [r3, 4]
105	st.ab	r4, [r3, 4]
106	st.ab	r4, [r3, 4]
107#endif
108.Lset64bytes:
109
110	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes
111	lpnz	.Lset32bytes
112	;; LOOP START
113#ifdef CONFIG_ARC_HAS_LL64
114	std.ab	r4, [r3, 8]
115	std.ab	r4, [r3, 8]
116	std.ab	r4, [r3, 8]
117	std.ab	r4, [r3, 8]
118#else
119	st.ab	r4, [r3, 4]
120	st.ab	r4, [r3, 4]
121	st.ab	r4, [r3, 4]
122	st.ab	r4, [r3, 4]
123	st.ab	r4, [r3, 4]
124	st.ab	r4, [r3, 4]
125	st.ab	r4, [r3, 4]
126	st.ab	r4, [r3, 4]
127#endif
128.Lset32bytes:
129
130	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes
131.Lsmallchunk:
132	lpnz	.Lcopy3bytes
133	;; LOOP START
134	stb.ab	r1, [r3, 1]
135.Lcopy3bytes:
136
137	j	[blink]
138
139END_CFI(memset)
140
141ENTRY_CFI(memzero)
142    ; adjust bzero args to memset args
143    mov r2, r1
144    b.d  memset    ;tail call so need to tinker with blink
145    mov r1, 0
146END_CFI(memzero)
147