xref: /freebsd/lib/libc/amd64/string/strlen.S (revision dd41de95a84d979615a2ef11df6850622bf6184e)
1/*
2 * Written by Mateusz Guzik <mjg@freebsd.org>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7__FBSDID("$FreeBSD$");
8
9/*
10 * Note: this routine was written with kernel use in mind (read: no simd),
11 * it is only present in userspace as a temporary measure until something
12 * better gets imported.
13 */
14
15#define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
16
17/*
18 * strlen(string)
19 *	  %rdi
20 *
21 * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
22 *
23 * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
24 * with leaq.
25 *
26 * For a description see either:
27 * - "Hacker's Delight" by Henry S. Warren, Jr.
28 * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
29 *   by Agner Fog
30 *
31 * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
32 */
33ENTRY(strlen)
34	movabsq	$0xfefefefefefefeff,%r8
35	movabsq	$0x8080808080808080,%r9
36
37	movq	%rdi,%r10
38	movq	%rdi,%rcx
39	testb	$7,%dil
40	jz	2f
41
42	/*
43	 * Handle misaligned reads: align to 8 and fill
44	 * the spurious bytes.
45	 */
46	andq	$~7,%rdi
47	movq	(%rdi),%r11
48	shlq	$3,%rcx
49	movq	$-1,%rdx
50	shlq	%cl,%rdx
51	notq	%rdx
52	orq	%rdx,%r11
53
54	leaq	(%r11,%r8),%rcx
55	notq	%r11
56	andq	%r11,%rcx
57	andq	%r9,%rcx
58	jnz	3f
59
60	/*
61	 * Main loop.
62	 */
63	ALIGN_TEXT
641:
65	leaq	8(%rdi),%rdi
662:
67	movq	(%rdi),%r11
68	leaq	(%r11,%r8),%rcx
69	notq	%r11
70	andq	%r11,%rcx
71	andq	%r9,%rcx
72	jz	1b
733:
74	bsfq	%rcx,%rcx
75	shrq	$3,%rcx
76	leaq	(%rcx,%rdi),%rax
77	subq	%r10,%rax
78	ret
79END(strlen)
80
81	.section .note.GNU-stack,"",%progbits
82