xref: /linux/arch/arc/lib/strchr-700.S (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1/*
2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9/* ARC700 has a relatively long pipeline and branch prediction, so we want
10   to avoid branches that are hard to predict.  On the other hand, the
11   presence of the norm instruction makes it easier to operate on whole
12   words branch-free.  */
13
14#include <linux/linkage.h>
15
16ENTRY(strchr)
17	extb_s	r1,r1
18	asl	r5,r1,8
19	bmsk	r2,r0,1
20	or	r5,r5,r1
21	mov_s	r3,0x01010101
22	breq.d	r2,r0,.Laligned
23	asl	r4,r5,16
24	sub_s	r0,r0,r2
25	asl	r7,r2,3
26	ld_s	r2,[r0]
27#ifdef __LITTLE_ENDIAN__
28	asl	r7,r3,r7
29#else
30	lsr	r7,r3,r7
31#endif
32	or	r5,r5,r4
33	ror	r4,r3
34	sub	r12,r2,r7
35	bic_s	r12,r12,r2
36	and	r12,r12,r4
37	brne.d	r12,0,.Lfound0_ua
38	xor	r6,r2,r5
39	ld.a	r2,[r0,4]
40	sub	r12,r6,r7
41	bic	r12,r12,r6
42#ifdef __LITTLE_ENDIAN__
43	and	r7,r12,r4
44	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
45	b	.Lfound_char ; Likewise this one.
46#else
47	and	r12,r12,r4
48	breq	r12,0,.Loop ; For speed, we want this branch to be unaligned.
49	lsr_s	r12,r12,7
50	bic 	r2,r7,r6
51	b.d	.Lfound_char_b
52	and_s	r2,r2,r12
53#endif
54; /* We require this code address to be unaligned for speed...  */
55.Laligned:
56	ld_s	r2,[r0]
57	or	r5,r5,r4
58	ror	r4,r3
59; /* ... so that this code address is aligned, for itself and ...  */
60.Loop:
61	sub	r12,r2,r3
62	bic_s	r12,r12,r2
63	and	r12,r12,r4
64	brne.d	r12,0,.Lfound0
65	xor	r6,r2,r5
66	ld.a	r2,[r0,4]
67	sub	r12,r6,r3
68	bic	r12,r12,r6
69	and	r7,r12,r4
70	breq	r7,0,.Loop /* ... so that this branch is unaligned.  */
71	; Found searched-for character.  r0 has already advanced to next word.
72#ifdef __LITTLE_ENDIAN__
73/* We only need the information about the first matching byte
74   (i.e. the least significant matching byte) to be exact,
75   hence there is no problem with carry effects.  */
76.Lfound_char:
77	sub	r3,r7,1
78	bic	r3,r3,r7
79	norm	r2,r3
80	sub_s	r0,r0,1
81	asr_s	r2,r2,3
82	j.d	[blink]
83	sub_s	r0,r0,r2
84
85	.balign	4
86.Lfound0_ua:
87	mov	r3,r7
88.Lfound0:
89	sub	r3,r6,r3
90	bic	r3,r3,r6
91	and	r2,r3,r4
92	or_s	r12,r12,r2
93	sub_s	r3,r12,1
94	bic_s	r3,r3,r12
95	norm	r3,r3
96	add_s	r0,r0,3
97	asr_s	r12,r3,3
98	asl.f	0,r2,r3
99	sub_s	r0,r0,r12
100	j_s.d	[blink]
101	mov.pl	r0,0
102#else /* BIG ENDIAN */
103.Lfound_char:
104	lsr	r7,r7,7
105
106	bic	r2,r7,r6
107.Lfound_char_b:
108	norm	r2,r2
109	sub_s	r0,r0,4
110	asr_s	r2,r2,3
111	j.d	[blink]
112	add_s	r0,r0,r2
113
114.Lfound0_ua:
115	mov_s	r3,r7
116.Lfound0:
117	asl_s	r2,r2,7
118	or	r7,r6,r4
119	bic_s	r12,r12,r2
120	sub	r2,r7,r3
121	or	r2,r2,r6
122	bic	r12,r2,r12
123	bic.f	r3,r4,r12
124	norm	r3,r3
125
126	add.pl	r3,r3,1
127	asr_s	r12,r3,3
128	asl.f	0,r2,r3
129	add_s	r0,r0,r12
130	j_s.d	[blink]
131	mov.mi	r0,0
132#endif /* ENDIAN */
133END(strchr)
134