xref: /linux/tools/arch/x86/lib/memset_64.S (revision bd5c6b81dd6025bd4c6ca7800a580b217d9899b9)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
27d7d1bf1SArnaldo Carvalho de Melo/* Copyright 2002 Andi Kleen, SuSE Labs */
37d7d1bf1SArnaldo Carvalho de Melo
47d7d1bf1SArnaldo Carvalho de Melo#include <linux/linkage.h>
57d7d1bf1SArnaldo Carvalho de Melo#include <asm/cpufeatures.h>
67d7d1bf1SArnaldo Carvalho de Melo#include <asm/alternative-asm.h>
77d7d1bf1SArnaldo Carvalho de Melo
87d7d1bf1SArnaldo Carvalho de Melo.weak memset
97d7d1bf1SArnaldo Carvalho de Melo
107d7d1bf1SArnaldo Carvalho de Melo/*
117d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses fast
127d7d1bf1SArnaldo Carvalho de Melo * string to get better performance than the original function. The code is
137d7d1bf1SArnaldo Carvalho de Melo * simpler and shorter than the original function as well.
147d7d1bf1SArnaldo Carvalho de Melo *
157d7d1bf1SArnaldo Carvalho de Melo * rdi   destination
167d7d1bf1SArnaldo Carvalho de Melo * rsi   value (char)
177d7d1bf1SArnaldo Carvalho de Melo * rdx   count (bytes)
187d7d1bf1SArnaldo Carvalho de Melo *
197d7d1bf1SArnaldo Carvalho de Melo * rax   original destination
207d7d1bf1SArnaldo Carvalho de Melo */
21*bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_START_ALIAS(memset)
22*bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_START(__memset)
237d7d1bf1SArnaldo Carvalho de Melo	/*
247d7d1bf1SArnaldo Carvalho de Melo	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
257d7d1bf1SArnaldo Carvalho de Melo	 * to use it when possible. If not available, use fast string instructions.
267d7d1bf1SArnaldo Carvalho de Melo	 *
277d7d1bf1SArnaldo Carvalho de Melo	 * Otherwise, use original memset function.
287d7d1bf1SArnaldo Carvalho de Melo	 */
297d7d1bf1SArnaldo Carvalho de Melo	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
307d7d1bf1SArnaldo Carvalho de Melo		      "jmp memset_erms", X86_FEATURE_ERMS
317d7d1bf1SArnaldo Carvalho de Melo
327d7d1bf1SArnaldo Carvalho de Melo	movq %rdi,%r9
337d7d1bf1SArnaldo Carvalho de Melo	movq %rdx,%rcx
347d7d1bf1SArnaldo Carvalho de Melo	andl $7,%edx
357d7d1bf1SArnaldo Carvalho de Melo	shrq $3,%rcx
367d7d1bf1SArnaldo Carvalho de Melo	/* expand byte value  */
377d7d1bf1SArnaldo Carvalho de Melo	movzbl %sil,%esi
387d7d1bf1SArnaldo Carvalho de Melo	movabs $0x0101010101010101,%rax
397d7d1bf1SArnaldo Carvalho de Melo	imulq %rsi,%rax
407d7d1bf1SArnaldo Carvalho de Melo	rep stosq
417d7d1bf1SArnaldo Carvalho de Melo	movl %edx,%ecx
427d7d1bf1SArnaldo Carvalho de Melo	rep stosb
437d7d1bf1SArnaldo Carvalho de Melo	movq %r9,%rax
447d7d1bf1SArnaldo Carvalho de Melo	ret
45*bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(__memset)
46*bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END_ALIAS(memset)
477d7d1bf1SArnaldo Carvalho de Melo
487d7d1bf1SArnaldo Carvalho de Melo/*
497d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses
507d7d1bf1SArnaldo Carvalho de Melo * enhanced rep stosb to override the fast string function.
517d7d1bf1SArnaldo Carvalho de Melo * The code is simpler and shorter than the fast string function as well.
527d7d1bf1SArnaldo Carvalho de Melo *
537d7d1bf1SArnaldo Carvalho de Melo * rdi   destination
547d7d1bf1SArnaldo Carvalho de Melo * rsi   value (char)
557d7d1bf1SArnaldo Carvalho de Melo * rdx   count (bytes)
567d7d1bf1SArnaldo Carvalho de Melo *
577d7d1bf1SArnaldo Carvalho de Melo * rax   original destination
587d7d1bf1SArnaldo Carvalho de Melo */
59*bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_START(memset_erms)
607d7d1bf1SArnaldo Carvalho de Melo	movq %rdi,%r9
617d7d1bf1SArnaldo Carvalho de Melo	movb %sil,%al
627d7d1bf1SArnaldo Carvalho de Melo	movq %rdx,%rcx
637d7d1bf1SArnaldo Carvalho de Melo	rep stosb
647d7d1bf1SArnaldo Carvalho de Melo	movq %r9,%rax
657d7d1bf1SArnaldo Carvalho de Melo	ret
66*bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(memset_erms)
677d7d1bf1SArnaldo Carvalho de Melo
68*bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_START(memset_orig)
697d7d1bf1SArnaldo Carvalho de Melo	movq %rdi,%r10
707d7d1bf1SArnaldo Carvalho de Melo
717d7d1bf1SArnaldo Carvalho de Melo	/* expand byte value  */
727d7d1bf1SArnaldo Carvalho de Melo	movzbl %sil,%ecx
737d7d1bf1SArnaldo Carvalho de Melo	movabs $0x0101010101010101,%rax
747d7d1bf1SArnaldo Carvalho de Melo	imulq  %rcx,%rax
757d7d1bf1SArnaldo Carvalho de Melo
767d7d1bf1SArnaldo Carvalho de Melo	/* align dst */
777d7d1bf1SArnaldo Carvalho de Melo	movl  %edi,%r9d
787d7d1bf1SArnaldo Carvalho de Melo	andl  $7,%r9d
797d7d1bf1SArnaldo Carvalho de Melo	jnz  .Lbad_alignment
807d7d1bf1SArnaldo Carvalho de Melo.Lafter_bad_alignment:
817d7d1bf1SArnaldo Carvalho de Melo
827d7d1bf1SArnaldo Carvalho de Melo	movq  %rdx,%rcx
837d7d1bf1SArnaldo Carvalho de Melo	shrq  $6,%rcx
847d7d1bf1SArnaldo Carvalho de Melo	jz	 .Lhandle_tail
857d7d1bf1SArnaldo Carvalho de Melo
867d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
877d7d1bf1SArnaldo Carvalho de Melo.Lloop_64:
887d7d1bf1SArnaldo Carvalho de Melo	decq  %rcx
897d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,(%rdi)
907d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,8(%rdi)
917d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,16(%rdi)
927d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,24(%rdi)
937d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,32(%rdi)
947d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,40(%rdi)
957d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,48(%rdi)
967d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,56(%rdi)
977d7d1bf1SArnaldo Carvalho de Melo	leaq  64(%rdi),%rdi
987d7d1bf1SArnaldo Carvalho de Melo	jnz    .Lloop_64
997d7d1bf1SArnaldo Carvalho de Melo
1007d7d1bf1SArnaldo Carvalho de Melo	/* Handle tail in loops. The loops should be faster than hard
1017d7d1bf1SArnaldo Carvalho de Melo	   to predict jump tables. */
1027d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
1037d7d1bf1SArnaldo Carvalho de Melo.Lhandle_tail:
1047d7d1bf1SArnaldo Carvalho de Melo	movl	%edx,%ecx
1057d7d1bf1SArnaldo Carvalho de Melo	andl    $63&(~7),%ecx
1067d7d1bf1SArnaldo Carvalho de Melo	jz 		.Lhandle_7
1077d7d1bf1SArnaldo Carvalho de Melo	shrl	$3,%ecx
1087d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
1097d7d1bf1SArnaldo Carvalho de Melo.Lloop_8:
1107d7d1bf1SArnaldo Carvalho de Melo	decl   %ecx
1117d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,(%rdi)
1127d7d1bf1SArnaldo Carvalho de Melo	leaq  8(%rdi),%rdi
1137d7d1bf1SArnaldo Carvalho de Melo	jnz    .Lloop_8
1147d7d1bf1SArnaldo Carvalho de Melo
1157d7d1bf1SArnaldo Carvalho de Melo.Lhandle_7:
1167d7d1bf1SArnaldo Carvalho de Melo	andl	$7,%edx
1177d7d1bf1SArnaldo Carvalho de Melo	jz      .Lende
1187d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
1197d7d1bf1SArnaldo Carvalho de Melo.Lloop_1:
1207d7d1bf1SArnaldo Carvalho de Melo	decl    %edx
1217d7d1bf1SArnaldo Carvalho de Melo	movb 	%al,(%rdi)
1227d7d1bf1SArnaldo Carvalho de Melo	leaq	1(%rdi),%rdi
1237d7d1bf1SArnaldo Carvalho de Melo	jnz     .Lloop_1
1247d7d1bf1SArnaldo Carvalho de Melo
1257d7d1bf1SArnaldo Carvalho de Melo.Lende:
1267d7d1bf1SArnaldo Carvalho de Melo	movq	%r10,%rax
1277d7d1bf1SArnaldo Carvalho de Melo	ret
1287d7d1bf1SArnaldo Carvalho de Melo
1297d7d1bf1SArnaldo Carvalho de Melo.Lbad_alignment:
1307d7d1bf1SArnaldo Carvalho de Melo	cmpq $7,%rdx
1317d7d1bf1SArnaldo Carvalho de Melo	jbe	.Lhandle_7
1327d7d1bf1SArnaldo Carvalho de Melo	movq %rax,(%rdi)	/* unaligned store */
1337d7d1bf1SArnaldo Carvalho de Melo	movq $8,%r8
1347d7d1bf1SArnaldo Carvalho de Melo	subq %r9,%r8
1357d7d1bf1SArnaldo Carvalho de Melo	addq %r8,%rdi
1367d7d1bf1SArnaldo Carvalho de Melo	subq %r8,%rdx
1377d7d1bf1SArnaldo Carvalho de Melo	jmp .Lafter_bad_alignment
1387d7d1bf1SArnaldo Carvalho de Melo.Lfinal:
139*bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(memset_orig)
140