xref: /linux/arch/arm64/lib/strlen.S (revision 6fdcba32711044c35c0e1b094cbd8f3f0b4472c9)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (C) 2013 ARM Ltd.
4 * Copyright (C) 2013 Linaro.
5 *
6 * This code is based on glibc cortex strings work originally authored by Linaro
7 * be found @
8 *
9 * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
10 * files/head:/src/aarch64/
11 */
12
13#include <linux/linkage.h>
14#include <asm/assembler.h>
15
16/*
17 * calculate the length of a string
18 *
19 * Parameters:
20 *	x0 - const string pointer
21 * Returns:
22 *	x0 - the return length of specific string
23 */
24
25/* Arguments and results.  */
26srcin		.req	x0
27len		.req	x0
28
29/* Locals and temporaries.  */
30src		.req	x1
31data1		.req	x2
32data2		.req	x3
33data2a		.req	x4
34has_nul1	.req	x5
35has_nul2	.req	x6
36tmp1		.req	x7
37tmp2		.req	x8
38tmp3		.req	x9
39tmp4		.req	x10
40zeroones	.req	x11
41pos		.req	x12
42
43#define REP8_01 0x0101010101010101
44#define REP8_7f 0x7f7f7f7f7f7f7f7f
45#define REP8_80 0x8080808080808080
46
47WEAK(strlen)
48	mov	zeroones, #REP8_01
49	bic	src, srcin, #15
50	ands	tmp1, srcin, #15
51	b.ne	.Lmisaligned
52	/*
53	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
54	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
55	* can be done in parallel across the entire word.
56	*/
57	/*
58	* The inner loop deals with two Dwords at a time. This has a
59	* slightly higher start-up cost, but we should win quite quickly,
60	* especially on cores with a high number of issue slots per
61	* cycle, as we get much better parallelism out of the operations.
62	*/
63.Lloop:
64	ldp	data1, data2, [src], #16
65.Lrealigned:
66	sub	tmp1, data1, zeroones
67	orr	tmp2, data1, #REP8_7f
68	sub	tmp3, data2, zeroones
69	orr	tmp4, data2, #REP8_7f
70	bic	has_nul1, tmp1, tmp2
71	bics	has_nul2, tmp3, tmp4
72	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
73	b.eq	.Lloop
74
75	sub	len, src, srcin
76	cbz	has_nul1, .Lnul_in_data2
77CPU_BE(	mov	data2, data1 )	/*prepare data to re-calculate the syndrome*/
78	sub	len, len, #8
79	mov	has_nul2, has_nul1
80.Lnul_in_data2:
81	/*
82	* For big-endian, carry propagation (if the final byte in the
83	* string is 0x01) means we cannot use has_nul directly.  The
84	* easiest way to get the correct byte is to byte-swap the data
85	* and calculate the syndrome a second time.
86	*/
87CPU_BE( rev	data2, data2 )
88CPU_BE( sub	tmp1, data2, zeroones )
89CPU_BE( orr	tmp2, data2, #REP8_7f )
90CPU_BE( bic	has_nul2, tmp1, tmp2 )
91
92	sub	len, len, #8
93	rev	has_nul2, has_nul2
94	clz	pos, has_nul2
95	add	len, len, pos, lsr #3		/* Bits to bytes.  */
96	ret
97
98.Lmisaligned:
99	cmp	tmp1, #8
100	neg	tmp1, tmp1
101	ldp	data1, data2, [src], #16
102	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
103	mov	tmp2, #~0
104	/* Big-endian.  Early bytes are at MSB.  */
105CPU_BE( lsl	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
106	/* Little-endian.  Early bytes are at LSB.  */
107CPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
108
109	orr	data1, data1, tmp2
110	orr	data2a, data2, tmp2
111	csinv	data1, data1, xzr, le
112	csel	data2, data2, data2a, le
113	b	.Lrealigned
114ENDPIPROC(strlen)
115EXPORT_SYMBOL_NOKASAN(strlen)
116