xref: /linux/arch/arm64/lib/strcmp.S (revision 722ecdbce68a87de2d9296f91308f44ea900a039)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (c) 2012-2022, Arm Limited.
4 *
5 * Adapted from the original at:
6 * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S
7 */
8
9#include <linux/linkage.h>
10#include <asm/assembler.h>
11
12/* Assumptions:
13 *
14 * ARMv8-a, AArch64.
15 * MTE compatible.
16 */
17
18#define L(label) .L ## label
19
20#define REP8_01 0x0101010101010101
21#define REP8_7f 0x7f7f7f7f7f7f7f7f
22
23#define src1		x0
24#define src2		x1
25#define result		x0
26
27#define data1		x2
28#define data1w		w2
29#define data2		x3
30#define data2w		w3
31#define has_nul		x4
32#define diff		x5
33#define off1		x5
34#define syndrome	x6
35#define tmp		x6
36#define data3		x7
37#define zeroones	x8
38#define shift		x9
39#define off2		x10
40
41/* On big-endian early bytes are at MSB and on little-endian LSB.
42   LS_FW means shifting towards early bytes.  */
43#ifdef __AARCH64EB__
44# define LS_FW lsl
45#else
46# define LS_FW lsr
47#endif
48
49/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
50   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
51   can be done in parallel across the entire word.
52   Since carry propagation makes 0x1 bytes before a NUL byte appear
53   NUL too in big-endian, byte-reverse the data before the NUL check.  */
54
55
56SYM_FUNC_START(__pi_strcmp)
57	sub	off2, src2, src1
58	mov	zeroones, REP8_01
59	and	tmp, src1, 7
60	tst	off2, 7
61	b.ne	L(misaligned8)
62	cbnz	tmp, L(mutual_align)
63
64	.p2align 4
65
66L(loop_aligned):
67	ldr	data2, [src1, off2]
68	ldr	data1, [src1], 8
69L(start_realigned):
70#ifdef __AARCH64EB__
71	rev	tmp, data1
72	sub	has_nul, tmp, zeroones
73	orr	tmp, tmp, REP8_7f
74#else
75	sub	has_nul, data1, zeroones
76	orr	tmp, data1, REP8_7f
77#endif
78	bics	has_nul, has_nul, tmp	/* Non-zero if NUL terminator.  */
79	ccmp	data1, data2, 0, eq
80	b.eq	L(loop_aligned)
81#ifdef __AARCH64EB__
82	rev	has_nul, has_nul
83#endif
84	eor	diff, data1, data2
85	orr	syndrome, diff, has_nul
86L(end):
87#ifndef __AARCH64EB__
88	rev	syndrome, syndrome
89	rev	data1, data1
90	rev	data2, data2
91#endif
92	clz	shift, syndrome
93	/* The most-significant-non-zero bit of the syndrome marks either the
94	   first bit that is different, or the top bit of the first zero byte.
95	   Shifting left now will bring the critical information into the
96	   top bits.  */
97	lsl	data1, data1, shift
98	lsl	data2, data2, shift
99	/* But we need to zero-extend (char is unsigned) the value and then
100	   perform a signed 32-bit subtraction.  */
101	lsr	data1, data1, 56
102	sub	result, data1, data2, lsr 56
103	ret
104
105	.p2align 4
106
107L(mutual_align):
108	/* Sources are mutually aligned, but are not currently at an
109	   alignment boundary.  Round down the addresses and then mask off
110	   the bytes that precede the start point.  */
111	bic	src1, src1, 7
112	ldr	data2, [src1, off2]
113	ldr	data1, [src1], 8
114	neg	shift, src2, lsl 3	/* Bits to alignment -64.  */
115	mov	tmp, -1
116	LS_FW	tmp, tmp, shift
117	orr	data1, data1, tmp
118	orr	data2, data2, tmp
119	b	L(start_realigned)
120
121L(misaligned8):
122	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
123	   checking to make sure that we don't access beyond the end of SRC2.  */
124	cbz	tmp, L(src1_aligned)
125L(do_misaligned):
126	ldrb	data1w, [src1], 1
127	ldrb	data2w, [src2], 1
128	cmp	data1w, 0
129	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
130	b.ne	L(done)
131	tst	src1, 7
132	b.ne	L(do_misaligned)
133
134L(src1_aligned):
135	neg	shift, src2, lsl 3
136	bic	src2, src2, 7
137	ldr	data3, [src2], 8
138#ifdef __AARCH64EB__
139	rev	data3, data3
140#endif
141	lsr	tmp, zeroones, shift
142	orr	data3, data3, tmp
143	sub	has_nul, data3, zeroones
144	orr	tmp, data3, REP8_7f
145	bics	has_nul, has_nul, tmp
146	b.ne	L(tail)
147
148	sub	off1, src2, src1
149
150	.p2align 4
151
152L(loop_unaligned):
153	ldr	data3, [src1, off1]
154	ldr	data2, [src1, off2]
155#ifdef __AARCH64EB__
156	rev	data3, data3
157#endif
158	sub	has_nul, data3, zeroones
159	orr	tmp, data3, REP8_7f
160	ldr	data1, [src1], 8
161	bics	has_nul, has_nul, tmp
162	ccmp	data1, data2, 0, eq
163	b.eq	L(loop_unaligned)
164
165	lsl	tmp, has_nul, shift
166#ifdef __AARCH64EB__
167	rev	tmp, tmp
168#endif
169	eor	diff, data1, data2
170	orr	syndrome, diff, tmp
171	cbnz	syndrome, L(end)
172L(tail):
173	ldr	data1, [src1]
174	neg	shift, shift
175	lsr	data2, data3, shift
176	lsr	has_nul, has_nul, shift
177#ifdef __AARCH64EB__
178	rev     data2, data2
179	rev	has_nul, has_nul
180#endif
181	eor	diff, data1, data2
182	orr	syndrome, diff, has_nul
183	b	L(end)
184
185L(done):
186	sub	result, data1, data2
187	ret
188SYM_FUNC_END(__pi_strcmp)
189SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp)
190EXPORT_SYMBOL_NOKASAN(strcmp)
191