xref: /linux/tools/arch/x86/lib/memset_64.S (revision 31d2e6b5d44e436969c15e4613e6b16c4c032d4d)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
27d7d1bf1SArnaldo Carvalho de Melo/* Copyright 2002 Andi Kleen, SuSE Labs */
37d7d1bf1SArnaldo Carvalho de Melo
47d7d1bf1SArnaldo Carvalho de Melo#include <linux/linkage.h>
57d7d1bf1SArnaldo Carvalho de Melo#include <asm/cpufeatures.h>
6fb24e308SArnaldo Carvalho de Melo#include <asm/alternative.h>
7db1a8b97SArnaldo Carvalho de Melo#include <asm/export.h>
87d7d1bf1SArnaldo Carvalho de Melo
9*31d2e6b5SArnaldo Carvalho de Melo.section .noinstr.text, "ax"
10*31d2e6b5SArnaldo Carvalho de Melo
117d7d1bf1SArnaldo Carvalho de Melo/*
127d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses fast
137d7d1bf1SArnaldo Carvalho de Melo * string to get better performance than the original function. The code is
147d7d1bf1SArnaldo Carvalho de Melo * simpler and shorter than the original function as well.
157d7d1bf1SArnaldo Carvalho de Melo *
167d7d1bf1SArnaldo Carvalho de Melo * rdi   destination
177d7d1bf1SArnaldo Carvalho de Melo * rsi   value (char)
187d7d1bf1SArnaldo Carvalho de Melo * rdx   count (bytes)
197d7d1bf1SArnaldo Carvalho de Melo *
207d7d1bf1SArnaldo Carvalho de Melo * rax   original destination
217d7d1bf1SArnaldo Carvalho de Melo */
22bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_START(__memset)
237d7d1bf1SArnaldo Carvalho de Melo	/*
247d7d1bf1SArnaldo Carvalho de Melo	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
257d7d1bf1SArnaldo Carvalho de Melo	 * to use it when possible. If not available, use fast string instructions.
267d7d1bf1SArnaldo Carvalho de Melo	 *
277d7d1bf1SArnaldo Carvalho de Melo	 * Otherwise, use original memset function.
287d7d1bf1SArnaldo Carvalho de Melo	 */
297d7d1bf1SArnaldo Carvalho de Melo	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
307d7d1bf1SArnaldo Carvalho de Melo		      "jmp memset_erms", X86_FEATURE_ERMS
317d7d1bf1SArnaldo Carvalho de Melo
327d7d1bf1SArnaldo Carvalho de Melo	movq %rdi,%r9
337d7d1bf1SArnaldo Carvalho de Melo	movq %rdx,%rcx
347d7d1bf1SArnaldo Carvalho de Melo	andl $7,%edx
357d7d1bf1SArnaldo Carvalho de Melo	shrq $3,%rcx
367d7d1bf1SArnaldo Carvalho de Melo	/* expand byte value  */
377d7d1bf1SArnaldo Carvalho de Melo	movzbl %sil,%esi
387d7d1bf1SArnaldo Carvalho de Melo	movabs $0x0101010101010101,%rax
397d7d1bf1SArnaldo Carvalho de Melo	imulq %rsi,%rax
407d7d1bf1SArnaldo Carvalho de Melo	rep stosq
417d7d1bf1SArnaldo Carvalho de Melo	movl %edx,%ecx
427d7d1bf1SArnaldo Carvalho de Melo	rep stosb
437d7d1bf1SArnaldo Carvalho de Melo	movq %r9,%rax
4435cb8c71SArnaldo Carvalho de Melo	RET
45bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(__memset)
46db1a8b97SArnaldo Carvalho de MeloEXPORT_SYMBOL(__memset)
477d7d1bf1SArnaldo Carvalho de Melo
48*31d2e6b5SArnaldo Carvalho de MeloSYM_FUNC_ALIAS(memset, __memset)
497be2e319SMark RutlandEXPORT_SYMBOL(memset)
507be2e319SMark Rutland
517d7d1bf1SArnaldo Carvalho de Melo/*
527d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses
537d7d1bf1SArnaldo Carvalho de Melo * enhanced rep stosb to override the fast string function.
547d7d1bf1SArnaldo Carvalho de Melo * The code is simpler and shorter than the fast string function as well.
557d7d1bf1SArnaldo Carvalho de Melo *
567d7d1bf1SArnaldo Carvalho de Melo * rdi   destination
577d7d1bf1SArnaldo Carvalho de Melo * rsi   value (char)
587d7d1bf1SArnaldo Carvalho de Melo * rdx   count (bytes)
597d7d1bf1SArnaldo Carvalho de Melo *
607d7d1bf1SArnaldo Carvalho de Melo * rax   original destination
617d7d1bf1SArnaldo Carvalho de Melo */
62db1a8b97SArnaldo Carvalho de MeloSYM_FUNC_START_LOCAL(memset_erms)
637d7d1bf1SArnaldo Carvalho de Melo	movq %rdi,%r9
647d7d1bf1SArnaldo Carvalho de Melo	movb %sil,%al
657d7d1bf1SArnaldo Carvalho de Melo	movq %rdx,%rcx
667d7d1bf1SArnaldo Carvalho de Melo	rep stosb
677d7d1bf1SArnaldo Carvalho de Melo	movq %r9,%rax
6835cb8c71SArnaldo Carvalho de Melo	RET
69bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(memset_erms)
707d7d1bf1SArnaldo Carvalho de Melo
71db1a8b97SArnaldo Carvalho de MeloSYM_FUNC_START_LOCAL(memset_orig)
727d7d1bf1SArnaldo Carvalho de Melo	movq %rdi,%r10
737d7d1bf1SArnaldo Carvalho de Melo
747d7d1bf1SArnaldo Carvalho de Melo	/* expand byte value  */
757d7d1bf1SArnaldo Carvalho de Melo	movzbl %sil,%ecx
767d7d1bf1SArnaldo Carvalho de Melo	movabs $0x0101010101010101,%rax
777d7d1bf1SArnaldo Carvalho de Melo	imulq  %rcx,%rax
787d7d1bf1SArnaldo Carvalho de Melo
797d7d1bf1SArnaldo Carvalho de Melo	/* align dst */
807d7d1bf1SArnaldo Carvalho de Melo	movl  %edi,%r9d
817d7d1bf1SArnaldo Carvalho de Melo	andl  $7,%r9d
827d7d1bf1SArnaldo Carvalho de Melo	jnz  .Lbad_alignment
837d7d1bf1SArnaldo Carvalho de Melo.Lafter_bad_alignment:
847d7d1bf1SArnaldo Carvalho de Melo
857d7d1bf1SArnaldo Carvalho de Melo	movq  %rdx,%rcx
867d7d1bf1SArnaldo Carvalho de Melo	shrq  $6,%rcx
877d7d1bf1SArnaldo Carvalho de Melo	jz	 .Lhandle_tail
887d7d1bf1SArnaldo Carvalho de Melo
897d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
907d7d1bf1SArnaldo Carvalho de Melo.Lloop_64:
917d7d1bf1SArnaldo Carvalho de Melo	decq  %rcx
927d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,(%rdi)
937d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,8(%rdi)
947d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,16(%rdi)
957d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,24(%rdi)
967d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,32(%rdi)
977d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,40(%rdi)
987d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,48(%rdi)
997d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,56(%rdi)
1007d7d1bf1SArnaldo Carvalho de Melo	leaq  64(%rdi),%rdi
1017d7d1bf1SArnaldo Carvalho de Melo	jnz    .Lloop_64
1027d7d1bf1SArnaldo Carvalho de Melo
1037d7d1bf1SArnaldo Carvalho de Melo	/* Handle tail in loops. The loops should be faster than hard
1047d7d1bf1SArnaldo Carvalho de Melo	   to predict jump tables. */
1057d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
1067d7d1bf1SArnaldo Carvalho de Melo.Lhandle_tail:
1077d7d1bf1SArnaldo Carvalho de Melo	movl	%edx,%ecx
1087d7d1bf1SArnaldo Carvalho de Melo	andl    $63&(~7),%ecx
1097d7d1bf1SArnaldo Carvalho de Melo	jz 		.Lhandle_7
1107d7d1bf1SArnaldo Carvalho de Melo	shrl	$3,%ecx
1117d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
1127d7d1bf1SArnaldo Carvalho de Melo.Lloop_8:
1137d7d1bf1SArnaldo Carvalho de Melo	decl   %ecx
1147d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,(%rdi)
1157d7d1bf1SArnaldo Carvalho de Melo	leaq  8(%rdi),%rdi
1167d7d1bf1SArnaldo Carvalho de Melo	jnz    .Lloop_8
1177d7d1bf1SArnaldo Carvalho de Melo
1187d7d1bf1SArnaldo Carvalho de Melo.Lhandle_7:
1197d7d1bf1SArnaldo Carvalho de Melo	andl	$7,%edx
1207d7d1bf1SArnaldo Carvalho de Melo	jz      .Lende
1217d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
1227d7d1bf1SArnaldo Carvalho de Melo.Lloop_1:
1237d7d1bf1SArnaldo Carvalho de Melo	decl    %edx
1247d7d1bf1SArnaldo Carvalho de Melo	movb 	%al,(%rdi)
1257d7d1bf1SArnaldo Carvalho de Melo	leaq	1(%rdi),%rdi
1267d7d1bf1SArnaldo Carvalho de Melo	jnz     .Lloop_1
1277d7d1bf1SArnaldo Carvalho de Melo
1287d7d1bf1SArnaldo Carvalho de Melo.Lende:
1297d7d1bf1SArnaldo Carvalho de Melo	movq	%r10,%rax
13035cb8c71SArnaldo Carvalho de Melo	RET
1317d7d1bf1SArnaldo Carvalho de Melo
1327d7d1bf1SArnaldo Carvalho de Melo.Lbad_alignment:
1337d7d1bf1SArnaldo Carvalho de Melo	cmpq $7,%rdx
1347d7d1bf1SArnaldo Carvalho de Melo	jbe	.Lhandle_7
1357d7d1bf1SArnaldo Carvalho de Melo	movq %rax,(%rdi)	/* unaligned store */
1367d7d1bf1SArnaldo Carvalho de Melo	movq $8,%r8
1377d7d1bf1SArnaldo Carvalho de Melo	subq %r9,%r8
1387d7d1bf1SArnaldo Carvalho de Melo	addq %r8,%rdi
1397d7d1bf1SArnaldo Carvalho de Melo	subq %r8,%rdx
1407d7d1bf1SArnaldo Carvalho de Melo	jmp .Lafter_bad_alignment
1417d7d1bf1SArnaldo Carvalho de Melo.Lfinal:
142bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(memset_orig)
143