xref: /linux/arch/arc/lib/strchr-700.S (revision 24bce201d79807b668bf9d9e0aca801c5c0d5f78)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
4 */
5
6/* ARC700 has a relatively long pipeline and branch prediction, so we want
7   to avoid branches that are hard to predict.  On the other hand, the
8   presence of the norm instruction makes it easier to operate on whole
9   words branch-free.  */
10
11#include <linux/linkage.h>
12
13ENTRY_CFI(strchr)
14	extb_s	r1,r1
15	asl	r5,r1,8
16	bmsk	r2,r0,1
17	or	r5,r5,r1
18	mov_s	r3,0x01010101
19	breq.d	r2,r0,.Laligned
20	asl	r4,r5,16
21	sub_s	r0,r0,r2
22	asl	r7,r2,3
23	ld_s	r2,[r0]
24#ifdef __LITTLE_ENDIAN__
25	asl	r7,r3,r7
26#else
27	lsr	r7,r3,r7
28#endif
29	or	r5,r5,r4
30	ror	r4,r3
31	sub	r12,r2,r7
32	bic_s	r12,r12,r2
33	and	r12,r12,r4
34	brne.d	r12,0,.Lfound0_ua
35	xor	r6,r2,r5
36	ld.a	r2,[r0,4]
37	sub	r12,r6,r7
38	bic	r12,r12,r6
39#ifdef __LITTLE_ENDIAN__
40	and	r7,r12,r4
41	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
42	b	.Lfound_char ; Likewise this one.
43#else
44	and	r12,r12,r4
45	breq	r12,0,.Loop ; For speed, we want this branch to be unaligned.
46	lsr_s	r12,r12,7
47	bic 	r2,r7,r6
48	b.d	.Lfound_char_b
49	and_s	r2,r2,r12
50#endif
51; /* We require this code address to be unaligned for speed...  */
52.Laligned:
53	ld_s	r2,[r0]
54	or	r5,r5,r4
55	ror	r4,r3
56; /* ... so that this code address is aligned, for itself and ...  */
57.Loop:
58	sub	r12,r2,r3
59	bic_s	r12,r12,r2
60	and	r12,r12,r4
61	brne.d	r12,0,.Lfound0
62	xor	r6,r2,r5
63	ld.a	r2,[r0,4]
64	sub	r12,r6,r3
65	bic	r12,r12,r6
66	and	r7,r12,r4
67	breq	r7,0,.Loop /* ... so that this branch is unaligned.  */
68	; Found searched-for character.  r0 has already advanced to next word.
69#ifdef __LITTLE_ENDIAN__
70/* We only need the information about the first matching byte
71   (i.e. the least significant matching byte) to be exact,
72   hence there is no problem with carry effects.  */
73.Lfound_char:
74	sub	r3,r7,1
75	bic	r3,r3,r7
76	norm	r2,r3
77	sub_s	r0,r0,1
78	asr_s	r2,r2,3
79	j.d	[blink]
80	sub_s	r0,r0,r2
81
82	.balign	4
83.Lfound0_ua:
84	mov	r3,r7
85.Lfound0:
86	sub	r3,r6,r3
87	bic	r3,r3,r6
88	and	r2,r3,r4
89	or_s	r12,r12,r2
90	sub_s	r3,r12,1
91	bic_s	r3,r3,r12
92	norm	r3,r3
93	add_s	r0,r0,3
94	asr_s	r12,r3,3
95	asl.f	0,r2,r3
96	sub_s	r0,r0,r12
97	j_s.d	[blink]
98	mov.pl	r0,0
99#else /* BIG ENDIAN */
100.Lfound_char:
101	lsr	r7,r7,7
102
103	bic	r2,r7,r6
104.Lfound_char_b:
105	norm	r2,r2
106	sub_s	r0,r0,4
107	asr_s	r2,r2,3
108	j.d	[blink]
109	add_s	r0,r0,r2
110
111.Lfound0_ua:
112	mov_s	r3,r7
113.Lfound0:
114	asl_s	r2,r2,7
115	or	r7,r6,r4
116	bic_s	r12,r12,r2
117	sub	r2,r7,r3
118	or	r2,r2,r6
119	bic	r12,r2,r12
120	bic.f	r3,r4,r12
121	norm	r3,r3
122
123	add.pl	r3,r3,1
124	asr_s	r12,r3,3
125	asl.f	0,r2,r3
126	add_s	r0,r0,r12
127	j_s.d	[blink]
128	mov.mi	r0,0
129#endif /* ENDIAN */
130END_CFI(strchr)
131