xref: /linux/tools/arch/x86/lib/memset_64.S (revision db1a8b97a0a36155171dbb805fbcb276e07559f6)
1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */
27d7d1bf1SArnaldo Carvalho de Melo/* Copyright 2002 Andi Kleen, SuSE Labs */
37d7d1bf1SArnaldo Carvalho de Melo
47d7d1bf1SArnaldo Carvalho de Melo#include <linux/linkage.h>
57d7d1bf1SArnaldo Carvalho de Melo#include <asm/cpufeatures.h>
67d7d1bf1SArnaldo Carvalho de Melo#include <asm/alternative-asm.h>
7*db1a8b97SArnaldo Carvalho de Melo#include <asm/export.h>
87d7d1bf1SArnaldo Carvalho de Melo
97d7d1bf1SArnaldo Carvalho de Melo/*
107d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses fast
117d7d1bf1SArnaldo Carvalho de Melo * string to get better performance than the original function. The code is
127d7d1bf1SArnaldo Carvalho de Melo * simpler and shorter than the original function as well.
137d7d1bf1SArnaldo Carvalho de Melo *
147d7d1bf1SArnaldo Carvalho de Melo * rdi   destination
157d7d1bf1SArnaldo Carvalho de Melo * rsi   value (char)
167d7d1bf1SArnaldo Carvalho de Melo * rdx   count (bytes)
177d7d1bf1SArnaldo Carvalho de Melo *
187d7d1bf1SArnaldo Carvalho de Melo * rax   original destination
197d7d1bf1SArnaldo Carvalho de Melo */
20*db1a8b97SArnaldo Carvalho de MeloSYM_FUNC_START_WEAK(memset)
21bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_START(__memset)
227d7d1bf1SArnaldo Carvalho de Melo	/*
237d7d1bf1SArnaldo Carvalho de Melo	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
247d7d1bf1SArnaldo Carvalho de Melo	 * to use it when possible. If not available, use fast string instructions.
257d7d1bf1SArnaldo Carvalho de Melo	 *
267d7d1bf1SArnaldo Carvalho de Melo	 * Otherwise, use original memset function.
277d7d1bf1SArnaldo Carvalho de Melo	 */
287d7d1bf1SArnaldo Carvalho de Melo	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
297d7d1bf1SArnaldo Carvalho de Melo		      "jmp memset_erms", X86_FEATURE_ERMS
307d7d1bf1SArnaldo Carvalho de Melo
317d7d1bf1SArnaldo Carvalho de Melo	movq %rdi,%r9
327d7d1bf1SArnaldo Carvalho de Melo	movq %rdx,%rcx
337d7d1bf1SArnaldo Carvalho de Melo	andl $7,%edx
347d7d1bf1SArnaldo Carvalho de Melo	shrq $3,%rcx
357d7d1bf1SArnaldo Carvalho de Melo	/* expand byte value  */
367d7d1bf1SArnaldo Carvalho de Melo	movzbl %sil,%esi
377d7d1bf1SArnaldo Carvalho de Melo	movabs $0x0101010101010101,%rax
387d7d1bf1SArnaldo Carvalho de Melo	imulq %rsi,%rax
397d7d1bf1SArnaldo Carvalho de Melo	rep stosq
407d7d1bf1SArnaldo Carvalho de Melo	movl %edx,%ecx
417d7d1bf1SArnaldo Carvalho de Melo	rep stosb
427d7d1bf1SArnaldo Carvalho de Melo	movq %r9,%rax
437d7d1bf1SArnaldo Carvalho de Melo	ret
44bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(__memset)
45bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END_ALIAS(memset)
46*db1a8b97SArnaldo Carvalho de MeloEXPORT_SYMBOL(memset)
47*db1a8b97SArnaldo Carvalho de MeloEXPORT_SYMBOL(__memset)
487d7d1bf1SArnaldo Carvalho de Melo
497d7d1bf1SArnaldo Carvalho de Melo/*
507d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses
517d7d1bf1SArnaldo Carvalho de Melo * enhanced rep stosb to override the fast string function.
527d7d1bf1SArnaldo Carvalho de Melo * The code is simpler and shorter than the fast string function as well.
537d7d1bf1SArnaldo Carvalho de Melo *
547d7d1bf1SArnaldo Carvalho de Melo * rdi   destination
557d7d1bf1SArnaldo Carvalho de Melo * rsi   value (char)
567d7d1bf1SArnaldo Carvalho de Melo * rdx   count (bytes)
577d7d1bf1SArnaldo Carvalho de Melo *
587d7d1bf1SArnaldo Carvalho de Melo * rax   original destination
597d7d1bf1SArnaldo Carvalho de Melo */
60*db1a8b97SArnaldo Carvalho de MeloSYM_FUNC_START_LOCAL(memset_erms)
617d7d1bf1SArnaldo Carvalho de Melo	movq %rdi,%r9
627d7d1bf1SArnaldo Carvalho de Melo	movb %sil,%al
637d7d1bf1SArnaldo Carvalho de Melo	movq %rdx,%rcx
647d7d1bf1SArnaldo Carvalho de Melo	rep stosb
657d7d1bf1SArnaldo Carvalho de Melo	movq %r9,%rax
667d7d1bf1SArnaldo Carvalho de Melo	ret
67bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(memset_erms)
687d7d1bf1SArnaldo Carvalho de Melo
69*db1a8b97SArnaldo Carvalho de MeloSYM_FUNC_START_LOCAL(memset_orig)
707d7d1bf1SArnaldo Carvalho de Melo	movq %rdi,%r10
717d7d1bf1SArnaldo Carvalho de Melo
727d7d1bf1SArnaldo Carvalho de Melo	/* expand byte value  */
737d7d1bf1SArnaldo Carvalho de Melo	movzbl %sil,%ecx
747d7d1bf1SArnaldo Carvalho de Melo	movabs $0x0101010101010101,%rax
757d7d1bf1SArnaldo Carvalho de Melo	imulq  %rcx,%rax
767d7d1bf1SArnaldo Carvalho de Melo
777d7d1bf1SArnaldo Carvalho de Melo	/* align dst */
787d7d1bf1SArnaldo Carvalho de Melo	movl  %edi,%r9d
797d7d1bf1SArnaldo Carvalho de Melo	andl  $7,%r9d
807d7d1bf1SArnaldo Carvalho de Melo	jnz  .Lbad_alignment
817d7d1bf1SArnaldo Carvalho de Melo.Lafter_bad_alignment:
827d7d1bf1SArnaldo Carvalho de Melo
837d7d1bf1SArnaldo Carvalho de Melo	movq  %rdx,%rcx
847d7d1bf1SArnaldo Carvalho de Melo	shrq  $6,%rcx
857d7d1bf1SArnaldo Carvalho de Melo	jz	 .Lhandle_tail
867d7d1bf1SArnaldo Carvalho de Melo
877d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
887d7d1bf1SArnaldo Carvalho de Melo.Lloop_64:
897d7d1bf1SArnaldo Carvalho de Melo	decq  %rcx
907d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,(%rdi)
917d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,8(%rdi)
927d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,16(%rdi)
937d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,24(%rdi)
947d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,32(%rdi)
957d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,40(%rdi)
967d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,48(%rdi)
977d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,56(%rdi)
987d7d1bf1SArnaldo Carvalho de Melo	leaq  64(%rdi),%rdi
997d7d1bf1SArnaldo Carvalho de Melo	jnz    .Lloop_64
1007d7d1bf1SArnaldo Carvalho de Melo
1017d7d1bf1SArnaldo Carvalho de Melo	/* Handle tail in loops. The loops should be faster than hard
1027d7d1bf1SArnaldo Carvalho de Melo	   to predict jump tables. */
1037d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
1047d7d1bf1SArnaldo Carvalho de Melo.Lhandle_tail:
1057d7d1bf1SArnaldo Carvalho de Melo	movl	%edx,%ecx
1067d7d1bf1SArnaldo Carvalho de Melo	andl    $63&(~7),%ecx
1077d7d1bf1SArnaldo Carvalho de Melo	jz 		.Lhandle_7
1087d7d1bf1SArnaldo Carvalho de Melo	shrl	$3,%ecx
1097d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
1107d7d1bf1SArnaldo Carvalho de Melo.Lloop_8:
1117d7d1bf1SArnaldo Carvalho de Melo	decl   %ecx
1127d7d1bf1SArnaldo Carvalho de Melo	movq  %rax,(%rdi)
1137d7d1bf1SArnaldo Carvalho de Melo	leaq  8(%rdi),%rdi
1147d7d1bf1SArnaldo Carvalho de Melo	jnz    .Lloop_8
1157d7d1bf1SArnaldo Carvalho de Melo
1167d7d1bf1SArnaldo Carvalho de Melo.Lhandle_7:
1177d7d1bf1SArnaldo Carvalho de Melo	andl	$7,%edx
1187d7d1bf1SArnaldo Carvalho de Melo	jz      .Lende
1197d7d1bf1SArnaldo Carvalho de Melo	.p2align 4
1207d7d1bf1SArnaldo Carvalho de Melo.Lloop_1:
1217d7d1bf1SArnaldo Carvalho de Melo	decl    %edx
1227d7d1bf1SArnaldo Carvalho de Melo	movb 	%al,(%rdi)
1237d7d1bf1SArnaldo Carvalho de Melo	leaq	1(%rdi),%rdi
1247d7d1bf1SArnaldo Carvalho de Melo	jnz     .Lloop_1
1257d7d1bf1SArnaldo Carvalho de Melo
1267d7d1bf1SArnaldo Carvalho de Melo.Lende:
1277d7d1bf1SArnaldo Carvalho de Melo	movq	%r10,%rax
1287d7d1bf1SArnaldo Carvalho de Melo	ret
1297d7d1bf1SArnaldo Carvalho de Melo
1307d7d1bf1SArnaldo Carvalho de Melo.Lbad_alignment:
1317d7d1bf1SArnaldo Carvalho de Melo	cmpq $7,%rdx
1327d7d1bf1SArnaldo Carvalho de Melo	jbe	.Lhandle_7
1337d7d1bf1SArnaldo Carvalho de Melo	movq %rax,(%rdi)	/* unaligned store */
1347d7d1bf1SArnaldo Carvalho de Melo	movq $8,%r8
1357d7d1bf1SArnaldo Carvalho de Melo	subq %r9,%r8
1367d7d1bf1SArnaldo Carvalho de Melo	addq %r8,%rdi
1377d7d1bf1SArnaldo Carvalho de Melo	subq %r8,%rdx
1387d7d1bf1SArnaldo Carvalho de Melo	jmp .Lafter_bad_alignment
1397d7d1bf1SArnaldo Carvalho de Melo.Lfinal:
140bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(memset_orig)
141