xref: /freebsd/sys/arm64/arm64/strcmp.S (revision c2e0d56f5e493a8514324fd5e062ddc99a68b599)
1/*
2 * strcmp - compare two strings
3 *
4 * Copyright (c) 2012-2022, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8
9/* Assumptions:
10 *
11 * ARMv8-a, AArch64.
12 * MTE compatible.
13 */
14
15#include <sys/elf_common.h>
16
17#include <machine/asm.h>
18
19#define L(l) .L ## l
20
21#define REP8_01 0x0101010101010101
22#define REP8_7f 0x7f7f7f7f7f7f7f7f
23
24#define src1		x0
25#define src2		x1
26#define result		x0
27
28#define data1		x2
29#define data1w		w2
30#define data2		x3
31#define data2w		w3
32#define has_nul		x4
33#define diff		x5
34#define off1		x5
35#define syndrome	x6
36#define tmp		x6
37#define data3		x7
38#define zeroones	x8
39#define shift		x9
40#define off2		x10
41
42/* On big-endian early bytes are at MSB and on little-endian LSB.
43   LS_FW means shifting towards early bytes.  */
44#ifdef __AARCH64EB__
45# define LS_FW lsl
46#else
47# define LS_FW lsr
48#endif
49
50/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
51   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
52   can be done in parallel across the entire word.
53   Since carry propagation makes 0x1 bytes before a NUL byte appear
54   NUL too in big-endian, byte-reverse the data before the NUL check.  */
55
56
57ENTRY (strcmp)
58	sub	off2, src2, src1
59	mov	zeroones, REP8_01
60	and	tmp, src1, 7
61	tst	off2, 7
62	b.ne	L(misaligned8)
63	cbnz	tmp, L(mutual_align)
64
65	.p2align 4
66
67L(loop_aligned):
68	ldr	data2, [src1, off2]
69	ldr	data1, [src1], 8
70L(start_realigned):
71#ifdef __AARCH64EB__
72	rev	tmp, data1
73	sub	has_nul, tmp, zeroones
74	orr	tmp, tmp, REP8_7f
75#else
76	sub	has_nul, data1, zeroones
77	orr	tmp, data1, REP8_7f
78#endif
79	bics	has_nul, has_nul, tmp	/* Non-zero if NUL terminator.  */
80	ccmp	data1, data2, 0, eq
81	b.eq	L(loop_aligned)
82#ifdef __AARCH64EB__
83	rev	has_nul, has_nul
84#endif
85	eor	diff, data1, data2
86	orr	syndrome, diff, has_nul
87L(end):
88#ifndef __AARCH64EB__
89	rev	syndrome, syndrome
90	rev	data1, data1
91	rev	data2, data2
92#endif
93	clz	shift, syndrome
94	/* The most-significant-non-zero bit of the syndrome marks either the
95	   first bit that is different, or the top bit of the first zero byte.
96	   Shifting left now will bring the critical information into the
97	   top bits.  */
98	lsl	data1, data1, shift
99	lsl	data2, data2, shift
100	/* But we need to zero-extend (char is unsigned) the value and then
101	   perform a signed 32-bit subtraction.  */
102	lsr	data1, data1, 56
103	sub	result, data1, data2, lsr 56
104	ret
105
106	.p2align 4
107
108L(mutual_align):
109	/* Sources are mutually aligned, but are not currently at an
110	   alignment boundary.  Round down the addresses and then mask off
111	   the bytes that precede the start point.  */
112	bic	src1, src1, 7
113	ldr	data2, [src1, off2]
114	ldr	data1, [src1], 8
115	neg	shift, src2, lsl 3	/* Bits to alignment -64.  */
116	mov	tmp, -1
117	LS_FW	tmp, tmp, shift
118	orr	data1, data1, tmp
119	orr	data2, data2, tmp
120	b	L(start_realigned)
121
122L(misaligned8):
123	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
124	   checking to make sure that we don't access beyond the end of SRC2.  */
125	cbz	tmp, L(src1_aligned)
126L(do_misaligned):
127	ldrb	data1w, [src1], 1
128	ldrb	data2w, [src2], 1
129	cmp	data1w, 0
130	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
131	b.ne	L(done)
132	tst	src1, 7
133	b.ne	L(do_misaligned)
134
135L(src1_aligned):
136	neg	shift, src2, lsl 3
137	bic	src2, src2, 7
138	ldr	data3, [src2], 8
139#ifdef __AARCH64EB__
140	rev	data3, data3
141#endif
142	lsr	tmp, zeroones, shift
143	orr	data3, data3, tmp
144	sub	has_nul, data3, zeroones
145	orr	tmp, data3, REP8_7f
146	bics	has_nul, has_nul, tmp
147	b.ne	L(tail)
148
149	sub	off1, src2, src1
150
151	.p2align 4
152
153L(loop_unaligned):
154	ldr	data3, [src1, off1]
155	ldr	data2, [src1, off2]
156#ifdef __AARCH64EB__
157	rev	data3, data3
158#endif
159	sub	has_nul, data3, zeroones
160	orr	tmp, data3, REP8_7f
161	ldr	data1, [src1], 8
162	bics	has_nul, has_nul, tmp
163	ccmp	data1, data2, 0, eq
164	b.eq	L(loop_unaligned)
165
166	lsl	tmp, has_nul, shift
167#ifdef __AARCH64EB__
168	rev	tmp, tmp
169#endif
170	eor	diff, data1, data2
171	orr	syndrome, diff, tmp
172	cbnz	syndrome, L(end)
173L(tail):
174	ldr	data1, [src1]
175	neg	shift, shift
176	lsr	data2, data3, shift
177	lsr	has_nul, has_nul, shift
178#ifdef __AARCH64EB__
179	rev     data2, data2
180	rev	has_nul, has_nul
181#endif
182	eor	diff, data1, data2
183	orr	syndrome, diff, has_nul
184	b	L(end)
185
186L(done):
187	sub	result, data1, data2
188	ret
189
190END (strcmp)
191
192GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
193